Beispiel #1
0
def main(args=sys.argv[1:]):
    """Main process

    """
    if args:
        if not args[0].lower() in SUBCOMMANDS:
            new_args = ["main"]
            new_args.extend(args)
        else:
            new_args = args
        # checks if the old --delete syntax is used
        new_args = check_delete_option(new_args)
        if not PYTHON3:
            new_args = [arg.decode(SYSTEM_ENCODING) for arg in new_args]
        if new_args[0] == "main":
            main_dispatcher(args=new_args)
        elif new_args[0] == "analyze":
            analyze_dispatcher(args=new_args)
        elif new_args[0] == "cluster":
            cluster_dispatcher(args=new_args)
        elif new_args[0] == "anomaly":
            anomaly_dispatcher(args=new_args)
        elif new_args[0] == "sample":
            sample_dispatcher(args=new_args)
        elif new_args[0] == "report":
            report_dispatcher(args=new_args)
        elif new_args[0] == "reify":
            reify_dispatcher(args=new_args)
        elif new_args[0] == "execute":
            execute_dispatcher(args=new_args)
        elif new_args[0] == "delete":
            delete_dispatcher(args=new_args)
        elif new_args[0] == "project":
            project_dispatcher(args=new_args)
        elif new_args[0] == "association":
            association_dispatcher(args=new_args)
        elif new_args[0] == "logistic-regression":
            logistic_regression_dispatcher(args=new_args)
        elif new_args[0] == "whizzml":
            whizzml_dispatcher(args=new_args)
    else:
        sys.exit(
            "BigMLer used with no arguments. Check:\nbigmler --help\n\nor"
            "\n\nbigmler sample --help\n\n"
            "\n\nbigmler analyze --help\n\n"
            "\n\nbigmler cluster --help\n\n"
            "\n\nbigmler anomaly --help\n\n"
            "\n\nbigmler report --help\n\n"
            "\n\nbigmler reify --help\n\n"
            "\n\nbigmler project --help\n\n"
            "\n\nbigmler association --help\n\n"
            "\n\nbigmler logistic-regression --help\n\n"
            "\n\nbigmler execute --help\n\n"
            "\n\nbigmler whizzml --help\n\n"
            "\n\nbigmler delete --help\n\n"
            " for a list of options"
        )
Beispiel #2
0
def main(args=sys.argv[1:]):
    """Main process

    """
    if args:
        if not args[0].lower() in SUBCOMMANDS:
            new_args = ["main"]
            new_args.extend(args)
        else:
            new_args = args
        # checks if the old --delete syntax is used
        new_args = check_delete_option(new_args)
        if not PYTHON3:
            new_args = [arg.decode(SYSTEM_ENCODING) for arg in new_args]
        if new_args[0] == "main":
            main_dispatcher(args=new_args)
        elif new_args[0] == "analyze":
            analyze_dispatcher(args=new_args)
        elif new_args[0] == "cluster":
            cluster_dispatcher(args=new_args)
        elif new_args[0] == "anomaly":
            anomaly_dispatcher(args=new_args)
        elif new_args[0] == "sample":
            sample_dispatcher(args=new_args)
        elif new_args[0] == "report":
            report_dispatcher(args=new_args)
        elif new_args[0] == "reify":
            reify_dispatcher(args=new_args)
        elif new_args[0] == "execute":
            execute_dispatcher(args=new_args)
        elif new_args[0] == "delete":
            delete_dispatcher(args=new_args)
        elif new_args[0] == "project":
            project_dispatcher(args=new_args)
        elif new_args[0] == "association":
            association_dispatcher(args=new_args)
        elif new_args[0] == "logistic-regression":
            logistic_regression_dispatcher(args=new_args)
        elif new_args[0] == "whizzml":
            whizzml_dispatcher(args=new_args)
    else:
        sys.exit("BigMLer used with no arguments. Check:\nbigmler --help\n\nor"
                 "\n\nbigmler sample --help\n\n"
                 "\n\nbigmler analyze --help\n\n"
                 "\n\nbigmler cluster --help\n\n"
                 "\n\nbigmler anomaly --help\n\n"
                 "\n\nbigmler report --help\n\n"
                 "\n\nbigmler reify --help\n\n"
                 "\n\nbigmler project --help\n\n"
                 "\n\nbigmler association --help\n\n"
                 "\n\nbigmler logistic-regression --help\n\n"
                 "\n\nbigmler execute --help\n\n"
                 "\n\nbigmler whizzml --help\n\n"
                 "\n\nbigmler delete --help\n\n"
                 " for a list of options")
Beispiel #3
0
def whizzml_script(args, api):
    """Returns the ID of the script to be used to generate the output

    """
    # each language has its own script, so first check:
    # - whether the script exists in the account
    # - whether it has the same version
    # else, we act as if we wanted to upgrade the script
    script_dir = os.path.join(REIFY_PACKAGE_PATH,
                              SCRIPT_FILE.get(args.language, args.language))
    if not args.upgrade:
        # the script is retrieved by name
        # Reading the name of the script
        with open(os.path.join(script_dir, "metadata.json")) as meta_file:
            meta = json.load(meta_file)
        # check for the last script used to retrain the model
        query_string = "name=%s" % meta["name"]
        reify_script = get_last_resource( \
            "script",
            api=api,
            query_string=query_string)
    else:
        reify_script = None

    # create or retrieve the script to generate the output
    # if --upgrade, we force rebuilding the scriptified script
    if reify_script is None:
        try:
            shutil.rmtree(
                os.path.join(BIGMLER_SCRIPTS_DIRECTORY,
                             SCRIPT_FILE.get(args.language, args.language)))
        except Exception:
            pass

    if reify_script is None:
        # new bigmler command: creating the scriptify scripts
        whizzml_command = [
            'whizzml', '--package-dir', REIFY_PACKAGE_PATH, '--output-dir',
            BIGMLER_SCRIPTS_DIRECTORY
        ]
        whizzml_dispatcher(args=whizzml_command)
        reify_file = os.path.join(
            BIGMLER_SCRIPTS_DIRECTORY,
            SCRIPT_FILE.get(args.language, args.language), "scripts")
        reify_script = get_script_id(reify_file)
    return reify_script
Beispiel #4
0
    # if --upgrade, we force rebuilding the scriptified script
    if reify_script is None:
        try:
            shutil.rmtree(
                os.path.join(BIGMLER_SCRIPTS_DIRECTORY,
                             SCRIPT_FILE.get(args.language, args.language)))
        except Exception, exc:
            pass

    if reify_script is None:
        # new bigmler command: creating the scriptify scripts
        whizzml_command = [
            'whizzml', '--package-dir', REIFY_PACKAGE_PATH, '--output-dir',
            BIGMLER_SCRIPTS_DIRECTORY
        ]
        whizzml_dispatcher(args=whizzml_command)
        reify_file = os.path.join(
            BIGMLER_SCRIPTS_DIRECTORY,
            SCRIPT_FILE.get(args.language, args.language), "scripts")
        reify_script = get_script_id(reify_file)
    return reify_script


def reify_resources(args, api, logger):
    """ Extracts the properties of the created resources and generates
        code to rebuild them

    """

    resource_id = get_resource_id(args.resource_id)
    if resource_id is None:
Beispiel #5
0
    # create or retrieve the script to generate the output
    # if --upgrade, we force rebuilding the scriptified script
    if reify_script is None :
        try:
            shutil.rmtree(os.path.join(BIGMLER_SCRIPTS_DIRECTORY,
                                       SCRIPT_FILE.get(args.language,
                                                       args.language)))
        except Exception, exc:
            pass

    if reify_script is None:
        # new bigmler command: creating the scriptify scripts
        whizzml_command = ['whizzml',
                           '--package-dir', REIFY_PACKAGE_PATH,
                           '--output-dir', BIGMLER_SCRIPTS_DIRECTORY]
        whizzml_dispatcher(args=whizzml_command)
        reify_file = os.path.join(BIGMLER_SCRIPTS_DIRECTORY,
                                  SCRIPT_FILE.get(args.language,
                                                  args.language), "scripts")
        reify_script = get_script_id(reify_file)
    return reify_script


def reify_resources(args, api, logger):
    """ Extracts the properties of the created resources and generates
        code to rebuild them

    """

    resource_id = get_resource_id(args.resource_id)
    if resource_id is None:
Beispiel #6
0
def retrain_model(args, api, common_options, session_file=None):
    """Retrieve or create the retrain script for a model and
    execute it with the new provided data

    """

    # retrieve the modeling resource to be retrained by tag or id
    if args.resource_id:
        resource_id = args.resource_id
        reference_tag = "retrain:%s" % resource_id
    else:
        for model_type in MODEL_TYPES:
            if hasattr(args, "%s_tag" % model_type) and \
                    getattr(args, "%s_tag" % model_type) is not None:
                tag = getattr(args, "%s_tag" % model_type)
                query_string = "tags=%s" % tag
                resource_id = get_last_resource( \
                    model_type.replace("_", ""),
                    api=api,
                    query_string=query_string)
                if resource_id is None:
                    sys.exit("Failed to find the %s with tag %s. "
                             "Please, check the tag and"
                             " the connection info (domain and credentials)." %
                             (model_type.replace("_", " "), tag))
                reference_tag = tag
                break
    if args.upgrade:
        shutil.rmtree(BIGMLER_SCRIPTS_DIRECTORY)
        script_id = None
    else:
        # check for the last script used to retrain the model
        query_string = "tags=retrain:%s" % resource_id
        script_id = get_last_resource( \
            "script",
            api=api,
            query_string=query_string)

    if script_id is None:
        # if the script to retrain does not exist:

        # check whether the model exists
        try:
            bigml.api.check_resource(resource_id, raise_on_error=True, api=api)
        except Exception, exc:
            sys.exit("Failed to find the model %s. Please, check its ID and"
                     " the connection info (domain and credentials)." %
                     resource_id)

        # look for the script that creates the rebuild script.
        retrain_file = os.path.join(BIGMLER_SCRIPTS_DIRECTORY,
                                    "retrain",
                                    "scripts")
        reify_script = get_script_id(retrain_file)

        if reify_script is None:
            # new bigmler command: creating the scriptify scripts
            whizzml_command = ['whizzml',
                               '--package-dir', INCREMENTAL_PACKAGE_PATH,
                               '--output-dir', BIGMLER_SCRIPTS_DIRECTORY]
            whizzml_dispatcher(args=whizzml_command)
            reify_script = get_script_id(retrain_file)

        # new bigmler command: creating the retrain script
        execute_command = ['execute',
                           '--script', reify_script,
                           '--output-dir', args.output_dir]
        command_args, _, _, exe_session_file, _ = get_context(execute_command,
                                                              EXE_SETTINGS)
        command_args.arguments_ = [["model-resource", resource_id]]
        command_args.inputs = json.dumps(command_args.arguments_)
        # process the command
        execute_whizzml(command_args, api, session_file)
        script_id = extract_retrain_id(command_args, api, session_file)
Beispiel #7
0
def retrain_model(args, api, common_options, session_file=None):
    """Retrieve or create the retrain script for a model and
    execute it with the new provided data

    """

    # retrieve the modeling resource to be retrained by tag or id
    if args.resource_id:
        resource_id = args.resource_id
        reference_tag = "retrain:%s" % resource_id
    else:
        for model_type in MODEL_TYPES:
            if hasattr(args, "%s_tag" % model_type) and \
                    getattr(args, "%s_tag" % model_type) is not None:
                tag = getattr(args, "%s_tag" % model_type)
                query_string = "tags=%s" % tag
                resource_id = get_last_resource( \
                    model_type.replace("_", ""),
                    api=api,
                    query_string=query_string)
                if resource_id is None:
                    sys.exit("Failed to find the %s with tag %s. "
                             "Please, check the tag and"
                             " the connection info (domain and credentials)." %
                             (model_type.replace("_", " "), tag))
                reference_tag = tag
                break
    if args.upgrade:
        shutil.rmtree(BIGMLER_SCRIPTS_DIRECTORY)
        script_id = None
    else:
        # check for the last script used to retrain the model
        query_string = "tags=retrain:%s" % resource_id
        script_id = get_last_resource( \
            "script",
            api=api,
            query_string=query_string)

    if script_id is None:
        # if the script to retrain does not exist:

        # check whether the model exists
        try:
            bigml.api.check_resource(resource_id, raise_on_error=True, api=api)
        except Exception, exc:
            sys.exit("Failed to find the model %s. Please, check its ID and"
                     " the connection info (domain and credentials)." %
                     resource_id)

        # look for the script that creates the rebuild script.
        retrain_file = os.path.join(BIGMLER_SCRIPTS_DIRECTORY, "retrain",
                                    "scripts")
        reify_script = get_script_id(retrain_file)

        if reify_script is None:
            # new bigmler command: creating the scriptify scripts
            whizzml_command = [
                'whizzml', '--package-dir', INCREMENTAL_PACKAGE_PATH,
                '--output-dir', BIGMLER_SCRIPTS_DIRECTORY
            ]
            whizzml_dispatcher(args=whizzml_command)
            reify_script = get_script_id(retrain_file)

        # new bigmler command: creating the retrain script
        execute_command = [
            'execute', '--script', reify_script, '--output-dir',
            args.output_dir
        ]
        command_args, _, _, exe_session_file, _ = get_context(
            execute_command, EXE_SETTINGS)
        command_args.arguments_ = [["model-resource", resource_id]]
        command_args.inputs = json.dumps(command_args.arguments_)
        # process the command
        execute_whizzml(command_args, api, session_file)
        script_id = extract_retrain_id(command_args, api, session_file)
Beispiel #8
0
def retrain_model(args, api, command, session_file=None):
    """Retrieve or create the retrain script for a model and
    execute it with the new provided data

    """

    retrain_file = os.path.join(BIGMLER_SCRIPTS_DIRECTORY, "retrain",
                                "scripts")
    try:
        os.remove(UPGRADE_FILE)
        reify_script = None
        try:
            shutil.rmtree(BIGMLER_SCRIPTS_DIRECTORY)
        except OSError:
            pass
    except OSError:
        # look for the script that creates the rebuild script.
        reify_script = get_script_id(retrain_file)

    if reify_script is None:
        # new bigmler command: creating the scriptify scripts
        whizzml_command = [
            'whizzml', '--package-dir', INCREMENTAL_PACKAGE_PATH,
            '--output-dir', BIGMLER_SCRIPTS_DIRECTORY
        ]
        add_api_context(whizzml_command, args)
        whizzml_dispatcher(args=whizzml_command)
        reify_script = get_script_id(retrain_file)

    # retrieve the modeling resource to be retrained by tag or id
    if args.resource_id:
        resource_id = args.resource_id
        reference_tag = "retrain:%s" % resource_id
    else:
        for model_type in MODEL_TYPES:
            if hasattr(args, "%s_tag" % model_type) and \
                    getattr(args, "%s_tag" % model_type) is not None:
                tag = getattr(args, "%s_tag" % model_type)
                query_string = "tags=%s" % tag
                resource_id = get_first_resource( \
                    model_type.replace("_", ""),
                    api=api,
                    query_string=query_string)
                if resource_id is None:
                    sys.exit("Failed to find the %s with tag %s. "
                             "Please, check the tag and"
                             " the connection info (domain and credentials)." %
                             (model_type.replace("_", " "), tag))
                reference_tag = tag
                break
    # updating the dataset that generated the model with the reference tag
    model = api.getters[get_resource_type(resource_id)](resource_id)
    dataset_id = model["object"]["dataset"]
    dataset = api.get_dataset(dataset_id)
    tags = dataset["object"]["tags"]
    if reference_tag not in tags:
        tags.append(reference_tag)
        api.update_dataset(dataset_id, {"tags": tags})

    # if --upgrade, we force rebuilding the scriptified script
    if args.upgrade:
        script_id = None
    else:
        # check for the last script used to retrain the model
        query_string = "tags=%s" % reference_tag
        script_id = get_last_resource( \
            "script",
            api=api,
            query_string=query_string)

    if script_id is None:
        # if the script to retrain does not exist:

        # check whether the model exists
        try:
            bigml.api.check_resource(resource_id, raise_on_error=True, api=api)
        except Exception:
            sys.exit("Failed to find the model %s. Please, check its ID and"
                     " the connection info (domain and credentials)." %
                     resource_id)

        # new bigmler command: creating the retrain script
        execute_command = [
            'execute', '--script', reify_script, '--tag', reference_tag,
            '--output-dir', args.output_dir
        ]
        command.propagate(execute_command)
        command_args, _, _, exe_session_file, _ = get_context(
            execute_command, EXE_SETTINGS)
        command_args.arguments_ = [["model-resource", resource_id]]
        command_args.inputs = json.dumps(command_args.arguments_)

        # process the command
        execute_whizzml(command_args, api, session_file)
        script_id = extract_retrain_id(command_args, api, session_file)

    # apply the retrain script to the new data:
    # add new data: depending on the script we will need to use
    # a source-url, a source or a dataset
    if args.add:
        script_inputs = api.get_script(script_id)['object']['inputs']
        input_type = script_inputs[0]['type']
        command_args, api, exe_session_file = \
            create_input(args, api, input_type, script_id, command)

        # process the command
        execute_whizzml(command_args, api, exe_session_file)

        with open("%s.json" % command_args.output) as file_handler:
            model_resource_id = json.load(file_handler)['result']
            message = (u'The new retrained model is: %s.\n'
                       u'You can use the\n\n%s\n\nquery to retrieve the latest'
                       u' retrained model.\n\n') % \
                (model_resource_id, last_resource_url( \
                resource_id, api, \
                "limit=1;full=yes;tags=%s" % reference_tag))
            log_message(message, log_file=session_file, console=1)
Beispiel #9
0
def main(args=sys.argv[1:]):
    """Main process

    """
    if args:
        if not args[0].lower() in SUBCOMMANDS:
            new_args = ["main"]
            new_args.extend(args)
        else:
            new_args = args
        # checks if the old --delete syntax is used
        new_args = check_delete_option(new_args)
        if not PYTHON3:
            new_args = [arg.decode(SYSTEM_ENCODING) for arg in new_args]
        if new_args[0] == "main":
            main_dispatcher(args=new_args)
        elif new_args[0] == "analyze":
            analyze_dispatcher(args=new_args)
        elif new_args[0] == "cluster":
            cluster_dispatcher(args=new_args)
        elif new_args[0] == "anomaly":
            anomaly_dispatcher(args=new_args)
        elif new_args[0] == "sample":
            sample_dispatcher(args=new_args)
        elif new_args[0] == "report":
            report_dispatcher(args=new_args)
        elif new_args[0] == "reify":
            reify_dispatcher(args=new_args)
        elif new_args[0] == "execute":
            execute_dispatcher(args=new_args)
        elif new_args[0] == "delete":
            delete_dispatcher(args=new_args)
        elif new_args[0] == "project":
            project_dispatcher(args=new_args)
        elif new_args[0] == "association":
            association_dispatcher(args=new_args)
        elif new_args[0] == "logistic-regression":
            logistic_regression_dispatcher(args=new_args)
        elif new_args[0] == "topic-model":
            if no_stemmer:
                sys.exit("To use the bigmler topic-model command you need the"
                         " Pystemmer library. Please, install it and"
                         " retry your command.")
            topic_model_dispatcher(args=new_args)
        elif new_args[0] == "time-series":
            time_series_dispatcher(args=new_args)
        elif new_args[0] == "deepnet":
            deepnet_dispatcher(args=new_args)
        elif new_args[0] == "whizzml":
            whizzml_dispatcher(args=new_args)
        elif new_args[0] == "export":
            export_dispatcher(args=new_args)
        elif new_args[0] == "retrain":
            retrain_dispatcher(args=new_args)
    else:
        sys.exit("BigMLer used with no arguments. Check:\nbigmler --help\n\nor"
                 "\n\nbigmler sample --help\n\n"
                 "\n\nbigmler analyze --help\n\n"
                 "\n\nbigmler cluster --help\n\n"
                 "\n\nbigmler anomaly --help\n\n"
                 "\n\nbigmler report --help\n\n"
                 "\n\nbigmler reify --help\n\n"
                 "\n\nbigmler project --help\n\n"
                 "\n\nbigmler association --help\n\n"
                 "\n\nbigmler logistic-regression --help\n\n"
                 "\n\nbigmler topic-model --help\n\n"
                 "\n\nbigmler time-series --help\n\n"
                 "\n\nbigmler deepnet --help\n\n"
                 "\n\nbigmler execute --help\n\n"
                 "\n\nbigmler whizzml --help\n\n"
                 "\n\nbigmler export --help\n\n"
                 "\n\nbigmler retrain --help\n\n"
                 "\n\nbigmler delete --help\n\n"
                 " for a list of options")
Beispiel #10
0
def main(args=sys.argv[1:]):
    """Main process

    """
    if args:
        if not args[0].lower() in SUBCOMMANDS:
            new_args = ["main"]
            new_args.extend(args)
        else:
            new_args = args
        # checks if the old --delete syntax is used
        new_args = check_delete_option(new_args)
        if not PYTHON3:
            new_args = [arg.decode(SYSTEM_ENCODING) for arg in new_args]
        if new_args[0] == "main":
            main_dispatcher(args=new_args)
        elif new_args[0] == "analyze":
            analyze_dispatcher(args=new_args)
        elif new_args[0] == "cluster":
            cluster_dispatcher(args=new_args)
        elif new_args[0] == "anomaly":
            anomaly_dispatcher(args=new_args)
        elif new_args[0] == "sample":
            sample_dispatcher(args=new_args)
        elif new_args[0] == "report":
            report_dispatcher(args=new_args)
        elif new_args[0] == "reify":
            reify_dispatcher(args=new_args)
        elif new_args[0] == "execute":
            execute_dispatcher(args=new_args)
        elif new_args[0] == "delete":
            delete_dispatcher(args=new_args)
        elif new_args[0] == "project":
            project_dispatcher(args=new_args)
        elif new_args[0] == "association":
            association_dispatcher(args=new_args)
        elif new_args[0] == "logistic-regression":
            logistic_regression_dispatcher(args=new_args)
        elif new_args[0] == "topic-model":
            if no_stemmer:
                sys.exit("To use the bigmler topic-model command you need the"
                         " Pystemmer library. Please, install it and"
                         " retry your command.")
            topic_model_dispatcher(args=new_args)
        elif new_args[0] == "time-series":
            time_series_dispatcher(args=new_args)
        elif new_args[0] == "deepnet":
            deepnet_dispatcher(args=new_args)
        elif new_args[0] == "whizzml":
            whizzml_dispatcher(args=new_args)
        elif new_args[0] == "export":
            export_dispatcher(args=new_args)
        elif new_args[0] == "retrain":
            retrain_dispatcher(args=new_args)
    else:
        sys.exit("BigMLer used with no arguments. Check:\nbigmler --help\n\nor"
                 "\n\nbigmler sample --help\n\n"
                 "\n\nbigmler analyze --help\n\n"
                 "\n\nbigmler cluster --help\n\n"
                 "\n\nbigmler anomaly --help\n\n"
                 "\n\nbigmler report --help\n\n"
                 "\n\nbigmler reify --help\n\n"
                 "\n\nbigmler project --help\n\n"
                 "\n\nbigmler association --help\n\n"
                 "\n\nbigmler logistic-regression --help\n\n"
                 "\n\nbigmler topic-model --help\n\n"
                 "\n\nbigmler time-series --help\n\n"
                 "\n\nbigmler deepnet --help\n\n"
                 "\n\nbigmler execute --help\n\n"
                 "\n\nbigmler whizzml --help\n\n"
                 "\n\nbigmler export --help\n\n"
                 "\n\nbigmler retrain --help\n\n"
                 "\n\nbigmler delete --help\n\n"
                 " for a list of options")