Example #1
0
def create_kfold_datasets(dataset,
                          args,
                          selecting_file_list,
                          command_obj,
                          resume=False):
    """Calling the bigmler procedure to create the k-fold datasets

    """
    args.output_dir = os.path.normpath(os.path.join(args.output_dir, "test"))
    output_dir = args.output_dir
    global subcommand_list
    # creating the selecting datasets
    for index in range(0, len(selecting_file_list)):
        command = COMMANDS["selection"] % (dataset, selecting_file_list[index],
                                           output_dir)
        command_args = command.split()
        command_obj.propagate(command_args)
        command = rebuild_command(command_args)
        if resume:
            next_command = subcommand_list.pop()
            if different_command(next_command, command):
                resume = False
                u.sys_log_message(command, log_file=subcommand_file)
                main_dispatcher(args=command_args)
            elif not subcommand_list:
                main_dispatcher(args=['main', '--resume'])
                resume = False
        else:
            u.sys_log_message(command, log_file=subcommand_file)
            main_dispatcher(args=command_args)
    datasets_file = os.path.normpath(os.path.join(output_dir, "dataset_gen"))
    return datasets_file, resume
Example #2
0
def delete_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    command = command_handling(args, COMMAND_LOG)

    # Parses command line arguments.
    command_args = a.parse_and_check(command)
    if command_args.resume:
        command_args, session_file, _ = get_stored_command(
            args, command_args.debug, command_log=COMMAND_LOG,
            dirs_log=DIRS_LOG, sessions_log=SESSIONS_LOG)
    else:
        if command_args.output_dir is None:
            command_args.output_dir = a.NOW
        directory = u.check_dir(os.path.join(command_args.output_dir, "tmp"))
        session_file = os.path.join(directory, SESSIONS_LOG)
        u.log_message(command.command + "\n", log_file=session_file)
        try:
            shutil.copy(DEFAULTS_FILE, os.path.join(directory, DEFAULTS_FILE))
        except IOError:
            pass
        u.sys_log_message(u"%s\n" % os.path.abspath(directory),
                          log_file=DIRS_LOG)

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    # Creates the corresponding api instance
    api = a.get_api_instance(command_args, u.check_dir(session_file))

    delete_resources(command_args, api)
    u.log_message("_" * 80 + "\n", log_file=session_file)
Example #3
0
def create_kfold_datasets(dataset, args,
                          selecting_file_list,
                          common_options, resume=False):
    """Calling the bigmler procedure to create the k-fold datasets

    """
    args.output_dir = os.path.normpath(os.path.join(args.output_dir, "test"))
    output_dir = args.output_dir
    global subcommand_list
    # creating the selecting datasets
    for index in range(0, len(selecting_file_list)):
        command = COMMANDS["selection"] % (
            dataset, selecting_file_list[index],
            output_dir)
        command_args = command.split()
        common_options_list = u.get_options_list(args, common_options,
                                                 prioritary=command_args)
        command_args.extend(common_options_list)
        command = rebuild_command(command_args)
        if resume:
            next_command = subcommand_list.pop()
            if different_command(next_command, command):
                resume = False
                u.sys_log_message(command, log_file=subcommand_file)
                main_dispatcher(args=command_args)
            elif not subcommand_list:
                main_dispatcher(args=['main', '--resume'])
                resume = False
        else:
            u.sys_log_message(command, log_file=subcommand_file)
            main_dispatcher(args=command_args)
    datasets_file = os.path.normpath(os.path.join(output_dir, "dataset_gen"))
    return datasets_file, resume
Example #4
0
def create_prediction_dataset(base_path, folder, args, resume):
    """Creates batch prediction datasets and a multidataset with the prediction
    results for the best scoring model in the folder set by the argument

    """
    args.output_dir = os.path.join(base_path, "%s_pred" % folder)
    output_dir = args.output_dir
    folder = os.path.join(base_path, folder)
    model_type = "ensembles" if hasattr(args, "number_of_models") and \
        args.number_of_models > 1 else "models"
    global subcommand_list
    # creating the predictions CSV file
    command = COMMANDS["prediction"] % (base_path, model_type, folder,
                                        model_type, folder)
    command_args = command.split()
    if resume:
        next_command = subcommand_list.pop()
        if different_command(next_command, command):
            resume = False
            u.sys_log_message(command, log_file=subcommand_file)
            main_dispatcher(args=command_args)
        elif not subcommand_list:
            main_dispatcher(args=['main', '--resume'])
            resume = False
    else:
        u.sys_log_message(command, log_file=subcommand_file)
        main_dispatcher(args=command_args)
    return resume
Example #5
0
def main_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    command = command_handling(args, COMMAND_LOG)

    # Parses command line arguments.
    command_args = a.parse_and_check(command)
    default_output = ('evaluation' if command_args.evaluate
                      else 'predictions.csv')
    resume = command_args.resume
    if command_args.resume:
        command_args, session_file, output_dir = get_stored_command(
            args, command_args.debug, command_log=COMMAND_LOG,
            dirs_log=DIRS_LOG, sessions_log=SESSIONS_LOG)
        default_output = ('evaluation' if command_args.evaluate
                          else 'predictions.csv')
        if command_args.predictions is None:
            command_args.predictions = os.path.join(output_dir,
                                                    default_output)
    else:
        if command_args.output_dir is None:
            command_args.output_dir = a.NOW
        if command_args.predictions is None:
            command_args.predictions = os.path.join(command_args.output_dir,
                                                    default_output)
        if len(os.path.dirname(command_args.predictions).strip()) == 0:
            command_args.predictions = os.path.join(command_args.output_dir,
                                                    command_args.predictions)
        directory = u.check_dir(command_args.predictions)
        session_file = os.path.join(directory, SESSIONS_LOG)
        u.log_message(command.command + "\n", log_file=session_file)
        try:
            defaults_file = open(DEFAULTS_FILE, 'r')
            contents = defaults_file.read()
            defaults_file.close()
            defaults_copy = open(os.path.join(directory, DEFAULTS_FILE),
                                 'w', 0)
            defaults_copy.write(contents)
            defaults_copy.close()
        except IOError:
            pass
        u.sys_log_message(u"%s\n" % os.path.abspath(directory),
                          log_file=DIRS_LOG)

    # Creates the corresponding api instance
    api = a.get_api_instance(command_args, u.check_dir(session_file))

    if (a.has_train(command_args) or a.has_test(command_args)
            or command_args.votes_dirs):
        output_args = a.get_output_args(api, command_args, resume)
        a.transform_args(command_args, command.flags, api,
                         command.user_defaults)
        compute_output(**output_args)
    u.log_message("_" * 80 + "\n", log_file=session_file)
Example #6
0
def create_prediction_dataset(base_path, folder, args, resume):
    """Creates batch prediction datasets and a multidataset with the prediction
    results for the best scoring model in the folder set by the argument

    """
    args.output_dir = os.path.join(base_path, "%s_pred" % folder)
    folder = os.path.join(base_path, folder)
    model_type = "ensembles" if hasattr(args, "number_of_models") and \
        args.number_of_models > 1 else "models"
    global subcommand_list
    # creating the predictions CSV file
    command = COMMANDS["prediction"] % (base_path, model_type, folder,
                                        model_type, folder)
    command_args = command.split()
    if resume:
        next_command = subcommand_list.pop()
        if different_command(next_command, command):
            resume = False
            u.sys_log_message(command, log_file=subcommand_file)
            main_dispatcher(args=command_args)
        elif not subcommand_list:
            main_dispatcher(args=['main', '--resume'])
            resume = False
    else:
        u.sys_log_message(command, log_file=subcommand_file)
        main_dispatcher(args=command_args)
    return resume
Example #7
0
def delete_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    command = command_handling(args, COMMAND_LOG)

    # Parses command line arguments.
    command_args = a.parse_and_check(command)
    if command_args.resume:
        command_args, session_file, _ = get_stored_command(
            args, command_args.debug, command_log=COMMAND_LOG,
            dirs_log=DIRS_LOG, sessions_log=SESSIONS_LOG)
    else:
        if command_args.output_dir is None:
            command_args.output_dir = a.NOW
        directory = u.check_dir(os.path.join(command_args.output_dir, "tmp"))
        session_file = os.path.join(directory, SESSIONS_LOG)
        u.log_message(command.command + "\n", log_file=session_file)
        try:
            shutil.copy(DEFAULTS_FILE, os.path.join(directory, DEFAULTS_FILE))
        except IOError:
            pass
        u.sys_log_message(u"%s\n" % os.path.abspath(directory),
                          log_file=DIRS_LOG)

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    # Creates the corresponding api instance
    api = a.get_api_instance(command_args, u.check_dir(session_file))

    delete_resources(command_args, api)
    u.log_message("_" * 80 + "\n", log_file=session_file)
Example #8
0
def cluster_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    command = command_handling(args, COMMAND_LOG)

    # Parses command line arguments.
    command_args = a.parse_and_check(command)
    resume = command_args.resume
    if command_args.resume:
        command_args, session_file, output_dir = get_stored_command(
            args,
            command_args.debug,
            command_log=COMMAND_LOG,
            dirs_log=DIRS_LOG,
            sessions_log=SESSIONS_LOG)
        if command_args.predictions is None:
            command_args.predictions = os.path.join(output_dir, DEFAULT_OUTPUT)
    else:
        if command_args.output_dir is None:
            command_args.output_dir = a.NOW
        if command_args.predictions is None:
            command_args.predictions = os.path.join(command_args.output_dir,
                                                    DEFAULT_OUTPUT)
        if len(os.path.dirname(command_args.predictions).strip()) == 0:
            command_args.predictions = os.path.join(command_args.output_dir,
                                                    command_args.predictions)
        directory = u.check_dir(command_args.predictions)
        session_file = os.path.join(directory, SESSIONS_LOG)
        u.log_message(command.command + "\n", log_file=session_file)
        try:
            defaults_file = open(DEFAULTS_FILE, 'r')
            contents = defaults_file.read()
            defaults_file.close()
            defaults_copy = open(os.path.join(directory, DEFAULTS_FILE), 'w',
                                 0)
            defaults_copy.write(contents)
            defaults_copy.close()
        except IOError:
            pass
        u.sys_log_message(u"%s\n" % os.path.abspath(directory),
                          log_file=DIRS_LOG)

    # Creates the corresponding api instance
    api = a.get_api_instance(command_args, u.check_dir(session_file))

    # Selects the action to perform
    if (a.has_train(command_args) or a.has_test(command_args)
            or command_args.cluster_datasets is not None):
        output_args = a.get_output_args(api, command_args, resume)
        a.transform_args(command_args, command.flags, api,
                         command.user_defaults)
        compute_output(**output_args)
    u.log_message("_" * 80 + "\n", log_file=session_file)
Example #9
0
def cluster_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    command = command_handling(args, COMMAND_LOG)

    # Parses command line arguments.
    command_args = a.parse_and_check(command)
    resume = command_args.resume
    if command_args.resume:
        # Keep the debug option if set
        debug = command_args.debug
        # Restore the args of the call to resume from the command log file
        stored_command = StoredCommand(args, COMMAND_LOG, DIRS_LOG)
        command = Command(None, stored_command=stored_command)
        # Logs the issued command and the resumed command
        session_file = os.path.join(stored_command.output_dir, SESSIONS_LOG)
        stored_command.log_command(session_file=session_file)
        # Parses resumed arguments.
        command_args = a.parse_and_check(command)
        if command_args.predictions is None:
            command_args.predictions = os.path.join(stored_command.output_dir, DEFAULT_OUTPUT)
    else:
        if command_args.output_dir is None:
            command_args.output_dir = a.NOW
        if command_args.predictions is None:
            command_args.predictions = os.path.join(command_args.output_dir, DEFAULT_OUTPUT)
        if len(os.path.dirname(command_args.predictions).strip()) == 0:
            command_args.predictions = os.path.join(command_args.output_dir, command_args.predictions)
        directory = u.check_dir(command_args.predictions)
        session_file = os.path.join(directory, SESSIONS_LOG)
        u.log_message(command.command + "\n", log_file=session_file)
        try:
            defaults_file = open(DEFAULTS_FILE, "r")
            contents = defaults_file.read()
            defaults_file.close()
            defaults_copy = open(os.path.join(directory, DEFAULTS_FILE), "w", 0)
            defaults_copy.write(contents)
            defaults_copy.close()
        except IOError:
            pass
        u.sys_log_message(u"%s\n" % os.path.abspath(directory), log_file=DIRS_LOG)

    # Creates the corresponding api instance
    if resume and debug:
        command_args.debug = True
    api = a.get_api_instance(command_args, u.check_dir(session_file))

    # Selects the action to perform
    if has_train(command_args) or has_test(command_args) or command_args.cluster_datasets is not None:
        output_args = a.get_output_args(api, command_args, resume)
        a.transform_args(command_args, command.flags, api, command.user_defaults)
        compute_output(**output_args)
    u.log_message("_" * 80 + "\n", log_file=session_file)
Example #10
0
def create_kfold_evaluations(datasets_file,
                             args,
                             command_obj,
                             resume=False,
                             counter=0):
    """ Create k-fold cross-validation from a datasets file

    """
    global subcommand_list
    output_dir = os.path.normpath(
        u.check_dir(
            os.path.join(u"%s%s" % (args.output_dir, counter),
                         u"evaluation.json")))
    model_fields = args.model_fields
    name_suffix = "_subset_%s" % counter
    name_max_length = NAME_MAX_LENGTH - len(name_suffix)
    name = "%s%s" % (args.name[0:name_max_length], name_suffix)
    dataset_id = u.read_datasets(datasets_file)[0]
    model_dataset = os.path.normpath(
        os.path.join(u.check_dir(datasets_file), dataset_id.replace("/", "_")))
    command = COMMANDS["create_cv"] % (datasets_file, output_dir, name,
                                       model_dataset)
    command_args = command.split()

    if model_fields:
        command_args.append("--model-fields")
        command_args.append(model_fields)
    command_args.append("--objective")
    command_args.append(args.objective_field)
    command_args = add_model_options(command_args, args)
    """
    common_options_list = u.get_options_list(args, command_obj.common_options,
                                             prioritary=command_args)
    command_args.extend(common_options_list)
    """
    command_obj.propagate(
        command_args, exclude=["--dataset", "--datasets", "--dataset-file"])
    command = rebuild_command(command_args)
    if resume:
        next_command = subcommand_list.pop()
        if different_command(next_command, command):
            resume = False
            u.sys_log_message(command, log_file=subcommand_file)
            main_dispatcher(args=command_args)
        elif not subcommand_list:
            main_dispatcher(args=['main', '--resume'])
            resume = False
    else:
        u.sys_log_message(command, log_file=subcommand_file)
        main_dispatcher(args=command_args)
    evaluation_file = os.path.normpath(
        os.path.join(output_dir, "evaluation.json"))
    try:
        with open(evaluation_file) as evaluation_handler:
            evaluation = json.loads(evaluation_handler.read())
        return evaluation, resume
    except (ValueError, IOError):
        sys.exit("Failed to retrieve evaluation.")
Example #11
0
def logistic_regression_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    command = command_handling(args, COMMAND_LOG)

    # Parses command line arguments.
    command_args = a.parse_and_check(command)
    default_output = ('evaluation'
                      if command_args.evaluate else 'predictions.csv')
    resume = command_args.resume
    if command_args.resume:
        command_args, session_file, output_dir = get_stored_command(
            args,
            command_args.debug,
            command_log=COMMAND_LOG,
            dirs_log=DIRS_LOG,
            sessions_log=SESSIONS_LOG)
        default_output = ('evaluation'
                          if command_args.evaluate else 'predictions.csv')
        if command_args.predictions is None:
            command_args.predictions = os.path.join(output_dir, default_output)
    else:
        if command_args.output_dir is None:
            command_args.output_dir = a.NOW
        if command_args.predictions is None:
            command_args.predictions = os.path.join(command_args.output_dir,
                                                    default_output)
        if len(os.path.dirname(command_args.predictions).strip()) == 0:
            command_args.predictions = os.path.join(command_args.output_dir,
                                                    command_args.predictions)
        directory = u.check_dir(command_args.predictions)
        session_file = os.path.join(directory, SESSIONS_LOG)
        u.log_message(command.command + "\n", log_file=session_file)
        try:
            shutil.copy(DEFAULTS_FILE, os.path.join(directory, DEFAULTS_FILE))
        except IOError:
            pass
        u.sys_log_message(u"%s\n" % os.path.abspath(directory),
                          log_file=DIRS_LOG)

    # Creates the corresponding api instance
    api = a.get_api_instance(command_args, u.check_dir(session_file))

    # Selects the action to perform
    if (a.has_train(command_args) or a.has_test(command_args)
            or command_args.export_fields):
        output_args = a.get_output_args(api, command_args, resume)
        a.transform_args(command_args, command.flags, api,
                         command.user_defaults)
        compute_output(**output_args)
    u.log_message("_" * 80 + "\n", log_file=session_file)
Example #12
0
def create_kfold_evaluations(datasets_file, args, command_obj,
                             resume=False, counter=0):
    """ Create k-fold cross-validation from a datasets file

    """
    global subcommand_list
    output_dir = os.path.normpath(
        u.check_dir(os.path.join(u"%s%s" % (args.output_dir, counter),
                                 u"evaluation.json")))
    model_fields = args.model_fields
    name_suffix = "_subset_%s" % counter
    name_max_length = NAME_MAX_LENGTH - len(name_suffix)
    name = "%s%s" % (args.name[0: name_max_length], name_suffix)
    dataset_id = u.read_datasets(datasets_file)[0]
    model_dataset = os.path.normpath(
        os.path.join(u.check_dir(datasets_file), dataset_id.replace("/", "_")))
    command = COMMANDS["create_cv"] % (datasets_file, output_dir, name,
                                       model_dataset)
    command_args = command.split()

    if model_fields:
        command_args.append("--model-fields")
        command_args.append(model_fields)
    command_args.append("--objective")
    command_args.append(args.objective_field)
    command_args = add_model_options(command_args, args)
    """
    common_options_list = u.get_options_list(args, command_obj.common_options,
                                             prioritary=command_args)
    command_args.extend(common_options_list)
    """
    command_obj.propagate(command_args, exclude=["--dataset",
                                                 "--datasets",
                                                 "--dataset-file"])
    command = rebuild_command(command_args)
    if resume:
        next_command = subcommand_list.pop()
        if different_command(next_command, command):
            resume = False
            u.sys_log_message(command, log_file=subcommand_file)
            main_dispatcher(args=command_args)
        elif not subcommand_list:
            main_dispatcher(args=['main', '--resume'])
            resume = False
    else:
        u.sys_log_message(command, log_file=subcommand_file)
        main_dispatcher(args=command_args)
    evaluation_file = os.path.normpath(os.path.join(output_dir,
                                                    "evaluation.json"))
    try:
        with open(evaluation_file) as evaluation_handler:
            evaluation = json.loads(evaluation_handler.read())
        return evaluation, resume
    except (ValueError, IOError):
        sys.exit("Failed to retrieve evaluation.")
Example #13
0
def whizzml_dispatcher(args=sys.argv[1:]):
    """Main processing of the parsed options for BigMLer whizzml

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    command = command_handling(args, COMMAND_LOG)

    # Parses command line arguments.
    command_args = command.parser.parse_args(command.args)
    resume = command_args.resume
    if resume:
        command_args, session_file, _ = get_stored_command(
            args,
            command_args.debug,
            command_log=COMMAND_LOG,
            dirs_log=DIRS_LOG,
            sessions_log=SESSIONS_LOG)
    else:
        if command_args.output_dir is None:
            command_args.output_dir = a.NOW
        session_file = os.path.join(command_args.output_dir, SESSIONS_LOG)
        # If logging is required, open the file for logging
        log = None
        if command_args.log_file:
            u.check_dir(command_args.log_file)
            log = command_args.log_file
            # If --clear_logs the log files are cleared
            if command_args.clear_logs:
                clear_log_files([log])

        u.sys_log_message(u"%s\n" % os.path.abspath(command_args.output_dir),
                          log_file=DIRS_LOG)
        session_file = os.path.join(command_args.output_dir, SESSIONS_LOG)
    # create api instance form args
    api = a.get_api_instance(command_args, u.check_dir(session_file))

    # Creates the corresponding api instance
    api = a.get_api_instance(command_args, u.check_dir(session_file))
    a.transform_dataset_options(command_args, api)

    # package_dir
    if command_args.package_dir is not None:
        create_package(command_args,
                       api,
                       command.common_options,
                       resume=resume)
    else:
        sys.exit("You must use the --package-dir flag pointing to the"
                 " directory where the metadata.json file is. Type\n"
                 "    bigmler whizzml --help\n"
                 " to see all the available options.")
Example #14
0
def whizzml_dispatcher(args=sys.argv[1:]):
    """Main processing of the parsed options for BigMLer whizzml

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    command = command_handling(args, COMMAND_LOG)

    # Parses command line arguments.
    command_args = command.parser.parse_args(command.args)
    resume = command_args.resume
    if resume:
        command_args, session_file, _ = get_stored_command(
            args, command_args.debug, command_log=COMMAND_LOG,
            dirs_log=DIRS_LOG, sessions_log=SESSIONS_LOG)
    else:
        if command_args.output_dir is None:
            command_args.output_dir = a.NOW
        session_file = os.path.join(command_args.output_dir,
                                    SESSIONS_LOG)
        # If logging is required, open the file for logging
        log = None
        if command_args.log_file:
            u.check_dir(command_args.log_file)
            log = command_args.log_file
            # If --clear_logs the log files are cleared
            if command_args.clear_logs:
                clear_log_files([log])

        u.sys_log_message(u"%s\n" % os.path.abspath(command_args.output_dir),
                          log_file=DIRS_LOG)
        session_file = os.path.join(command_args.output_dir, SESSIONS_LOG)
    # create api instance form args
    api = a.get_api_instance(command_args,
                             u.check_dir(session_file))

    # Creates the corresponding api instance
    api = a.get_api_instance(command_args, u.check_dir(session_file))
    a.transform_dataset_options(command_args, api)


    # package_dir
    if command_args.package_dir is not None:
        create_package(command_args, api, command.common_options,
                       resume=resume)
    else:
        sys.exit("You must use the --package-dir flag pointing to the"
                 " directory where the metadata.json file is. Type\n"
                 "    bigmler whizzml --help\n"
                 " to see all the available options.")
Example #15
0
def cluster_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    command = command_handling(args, COMMAND_LOG)

    # Parses command line arguments.
    command_args = a.parse_and_check(command)
    resume = command_args.resume
    if command_args.resume:
        command_args, session_file, output_dir = get_stored_command(
            args, command_args.debug, command_log=COMMAND_LOG,
            dirs_log=DIRS_LOG, sessions_log=SESSIONS_LOG)
        if command_args.predictions is None:
            command_args.predictions = os.path.join(output_dir,
                                                    DEFAULT_OUTPUT)
    else:
        if command_args.output_dir is None:
            command_args.output_dir = a.NOW
        if command_args.predictions is None:
            command_args.predictions = os.path.join(command_args.output_dir,
                                                    DEFAULT_OUTPUT)
        if len(os.path.dirname(command_args.predictions).strip()) == 0:
            command_args.predictions = os.path.join(command_args.output_dir,
                                                    command_args.predictions)
        directory = u.check_dir(command_args.predictions)
        session_file = os.path.join(directory, SESSIONS_LOG)
        u.log_message(command.command + "\n", log_file=session_file)
        try:
            shutil.copy(DEFAULTS_FILE, os.path.join(directory, DEFAULTS_FILE))
        except IOError:
            pass
        u.sys_log_message(u"%s\n" % os.path.abspath(directory),
                          log_file=DIRS_LOG)

    # Creates the corresponding api instance
    api = a.get_api_instance(command_args, u.check_dir(session_file))

    # Selects the action to perform
    if (a.has_train(command_args) or a.has_test(command_args)
            or command_args.cluster_datasets is not None
            or command_args.export_fields is not None):
        output_args = a.get_output_args(api, command_args, resume)
        a.transform_args(command_args, command.flags, api,
                         command.user_defaults)
        compute_output(**output_args)
    u.log_message("_" * 80 + "\n", log_file=session_file)
Example #16
0
def command_handling(args, log=COMMAND_LOG):
    """Rebuilds command string, logs it for --resume future requests and
       parses it.

    """
    # Create the Command object
    command = Command(args, None)

    # Resume calls are not logged
    if not command.resume:
        u.sys_log_message(command.command.replace("\\", "\\\\"), log_file=log)

    return command
Example #17
0
def command_handling(args, log=COMMAND_LOG):
    """Rebuilds command string, logs it for --resume future requests and
       parses it.

    """
    # Create the Command object
    command = Command(args, None)

    # Resume calls are not logged
    if not command.resume:
        u.sys_log_message(command.command.replace('\\', '\\\\'), log_file=log)

    return command
Example #18
0
def delete_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    command = command_handling(args, COMMAND_LOG)

    # Parses command line arguments.
    command_args = a.parse_and_check(command)
    resume = command_args.resume
    if command_args.resume:
        # Keep the debug option if set
        debug = command_args.debug
        # Restore the args of the call to resume from the command log file
        stored_command = StoredCommand(args, COMMAND_LOG, DIRS_LOG)
        command = Command(None, stored_command=stored_command)
        # Logs the issued command and the resumed command
        session_file = os.path.join(stored_command.output_dir, SESSIONS_LOG)
        stored_command.log_command(session_file=session_file)
        # Parses resumed arguments.
        command_args = a.parse_and_check(command)
    else:
        if command_args.output_dir is None:
            command_args.output_dir = a.NOW
        directory = u.check_dir(os.path.join(command_args.output_dir, "tmp"))
        session_file = os.path.join(directory, SESSIONS_LOG)
        u.log_message(command.command + "\n", log_file=session_file)
        try:
            defaults_file = open(DEFAULTS_FILE, 'r')
            contents = defaults_file.read()
            defaults_file.close()
            defaults_copy = open(os.path.join(directory, DEFAULTS_FILE),
                                 'w', 0)
            defaults_copy.write(contents)
            defaults_copy.close()
        except IOError:
            pass
        u.sys_log_message(u"%s\n" % os.path.abspath(directory),
                          log_file=DIRS_LOG)

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    # Creates the corresponding api instance
    if resume and debug:
        command_args.debug = True
    api = a.get_api_instance(command_args, u.check_dir(session_file))

    delete_resources(command_args, api)
    u.log_message("_" * 80 + "\n", log_file=session_file)
Example #19
0
def delete_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    command = command_handling(args, COMMAND_LOG)

    # Parses command line arguments.
    command_args = a.parse_and_check(command)
    resume = command_args.resume
    if command_args.resume:
        # Keep the debug option if set
        debug = command_args.debug
        # Restore the args of the call to resume from the command log file
        stored_command = StoredCommand(args, COMMAND_LOG, DIRS_LOG)
        command = Command(None, stored_command=stored_command)
        # Logs the issued command and the resumed command
        session_file = os.path.join(stored_command.output_dir, SESSIONS_LOG)
        stored_command.log_command(session_file=session_file)
        # Parses resumed arguments.
        command_args = a.parse_and_check(command)
    else:
        if command_args.output_dir is None:
            command_args.output_dir = a.NOW
        directory = u.check_dir(os.path.join(command_args.output_dir, "tmp"))
        session_file = os.path.join(directory, SESSIONS_LOG)
        u.log_message(command.command + "\n", log_file=session_file)
        try:
            defaults_file = open(DEFAULTS_FILE, 'r')
            contents = defaults_file.read()
            defaults_file.close()
            defaults_copy = open(os.path.join(directory, DEFAULTS_FILE), 'w',
                                 0)
            defaults_copy.write(contents)
            defaults_copy.close()
        except IOError:
            pass
        u.sys_log_message(u"%s\n" % os.path.abspath(directory),
                          log_file=DIRS_LOG)

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    # Creates the corresponding api instance
    if resume and debug:
        command_args.debug = True
    api = a.get_api_instance(command_args, u.check_dir(session_file))

    delete_resources(command_args, api)
    u.log_message("_" * 80 + "\n", log_file=session_file)
Example #20
0
def get_cmd_context(args, settings):
    """Parses the args array to create an args object storing the defaults and
    user-given values. It also sets the output directory and the log files.

    """

    command = command_handling(args, settings['command_log'])

    # Parses command line arguments.
    command_args = a.parse_and_check(command)
    resume = command_args.resume

    if command_args.resume:
        command_args, session_file, output_dir = get_stored_command(
            args,
            command_args.debug,
            command_log=settings['command_log'],
            dirs_log=settings["dirs_log"],
            sessions_log=settings['sessions_log'])
        if settings.get('default_output') is None:
            settings['default_output'] = "tmp.txt"
        if not hasattr(command_args, "output") or command_args.output is None:
            command_args.output = os.path.join(output_dir,
                                               settings['default_output'])
    else:
        if hasattr(command_args, "output") and \
                command_args.output is not None:
            command_args.output_dir = u.check_dir(command_args.output)
        if command_args.output_dir is None:
            command_args.output_dir = a.NOW
        if settings.get('default_output') is None:
            settings['default_output'] = "tmp.txt"
        if not hasattr(command_args, "output") or command_args.output is None:
            command_args.output = os.path.join(command_args.output_dir,
                                               settings['default_output'])
        if not os.path.dirname(command_args.output).strip():
            command_args.output = os.path.join(command_args.output_dir,
                                               command_args.output)
        directory = u.check_dir(command_args.output)
        session_file = os.path.join(directory, settings['sessions_log'])
        u.log_message(command.command + "\n", log_file=session_file)
        if settings.get('defaults_file') is not None:
            try:
                shutil.copy(settings['defaults_file'],
                            os.path.join(directory, settings['defaults_file']))
            except IOError:
                pass
        u.sys_log_message(u"%s\n" % os.path.abspath(directory),
                          log_file=settings['dirs_log'])
    return command_args, command, session_file, resume
Example #21
0
def create_candidates_evaluations(datasets_file,
                                  args,
                                  command_obj,
                                  resume=False,
                                  random_candidates=DEFAULT_MIN_CANDIDATES):
    """ Create random candidates ensembles evaluations

    """
    global subcommand_list
    output_dir = os.path.normpath(
        u.check_dir(
            os.path.join(u"%s%s" % (args.output_dir, random_candidates),
                         "evaluation.json")))
    command = COMMANDS["random_candidates"] % (datasets_file,
                                               random_candidates, output_dir)
    command_args = command.split()
    """
    common_options_list = u.get_options_list(args, command_obj.common_options,
                                             prioritary=command_args)
    command_args.extend(common_options_list)
    """
    command_args.append("--objective")
    command_args.append(args.objective_field)
    command_args = add_model_options(command_args, args)

    command_obj.propagate(
        command_args, exclude=["--dataset", "--datasets", "--dataset-file"])
    command = rebuild_command(command_args)
    if resume:
        next_command = subcommand_list.pop()
        if different_command(next_command, command):
            resume = False
            u.sys_log_message(command, log_file=subcommand_file)
            main_dispatcher(args=command_args)
        elif not subcommand_list:
            main_dispatcher(args=['main', '--resume'])
            resume = False
    else:
        u.sys_log_message(command, log_file=subcommand_file)
        main_dispatcher(args=command_args)
    evaluation_file = os.path.normpath(
        os.path.join(output_dir, "evaluation.json"))
    try:
        with open(evaluation_file, u.open_mode("r")) as evaluation_handler:
            evaluation = json.loads(evaluation_handler.read())
        return evaluation, resume
    except (ValueError, IOError):
        sys.exit("Failed to retrieve evaluation.")
Example #22
0
def execute_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    command = command_handling(args, COMMAND_LOG)
    default_output = 'whizzml_results'
    # Parses command line arguments.
    command_args = a.parse_and_check(command)
    resume = command_args.resume
    if command_args.resume:
        command_args, session_file, output_dir = get_stored_command(
            args,
            command_args.debug,
            command_log=COMMAND_LOG,
            dirs_log=DIRS_LOG,
            sessions_log=SESSIONS_LOG)
        if command_args.output is None:
            command_args.output = os.path.join(output_dir, default_output)
    else:
        if command_args.output_dir is None:
            command_args.output_dir = a.NOW
        if command_args.output is None:
            command_args.output = os.path.join(command_args.output_dir,
                                               default_output)
        if len(os.path.dirname(command_args.output).strip()) == 0:
            command_args.output = os.path.join(command_args.output_dir,
                                               command_args.output)
        directory = u.check_dir(command_args.output)
        session_file = os.path.join(directory, SESSIONS_LOG)
        u.log_message(command.command + "\n", log_file=session_file)
        try:
            shutil.copy(DEFAULTS_FILE, os.path.join(directory, DEFAULTS_FILE))
        except IOError:
            pass
        u.sys_log_message(u"%s\n" % os.path.abspath(directory),
                          log_file=DIRS_LOG)

    # Creates the corresponding api instance
    api = a.get_api_instance(command_args, u.check_dir(session_file))
    _ = a.get_output_args(api, command_args, resume)
    a.transform_args(command_args, command.flags, api, command.user_defaults)
    execute_whizzml(command_args, api, session_file)
    u.log_message("_" * 80 + "\n", log_file=session_file)
Example #23
0
def get_cmd_context(args, settings):
    """Parses the args array to create an args object storing the defaults and
    user-given values. It also sets the output directory and the log files.

    """

    command = command_handling(args, settings['command_log'])

    # Parses command line arguments.
    command_args = a.parse_and_check(command)
    resume = command_args.resume

    if command_args.resume:
        command_args, session_file, output_dir = get_stored_command(
            args, command_args.debug, command_log=settings['command_log'],
            dirs_log=settings["dirs_log"],
            sessions_log=settings['sessions_log'])
        if settings.get('default_output') is None:
            settings['default_output'] = "tmp.txt"
        if not hasattr(command_args, "output") or command_args.output is None:
            command_args.output = os.path.join(output_dir,
                                               settings['default_output'])
    else:
        if hasattr(command_args, "output") and \
                command_args.output is not None:
            command_args.output_dir = u.check_dir(command_args.output)
        if command_args.output_dir is None:
            command_args.output_dir = a.NOW
        if settings.get('default_output') is None:
            settings['default_output'] = "tmp.txt"
        if not hasattr(command_args, "output") or command_args.output is None:
            command_args.output = os.path.join(command_args.output_dir,
                                               settings['default_output'])
        if len(os.path.dirname(command_args.output).strip()) == 0:
            command_args.output = os.path.join(command_args.output_dir,
                                               command_args.output)
        directory = u.check_dir(command_args.output)
        session_file = os.path.join(directory, settings['sessions_log'])
        u.log_message(command.command + "\n", log_file=session_file)
        if settings.get('defaults_file') is not None:
            try:
                shutil.copy(settings['defaults_file'],
                            os.path.join(directory, settings['defaults_file']))
            except IOError:
                pass
        u.sys_log_message(u"%s\n" % os.path.abspath(directory),
                          log_file=settings['dirs_log'])
    return command_args, command, session_file, resume
Example #24
0
def execute_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    command = command_handling(args, COMMAND_LOG)
    default_output = 'whizzml_results'
    # Parses command line arguments.
    command_args = a.parse_and_check(command)
    resume = command_args.resume
    if command_args.resume:
        command_args, session_file, output_dir = get_stored_command(
            args, command_args.debug, command_log=COMMAND_LOG,
            dirs_log=DIRS_LOG, sessions_log=SESSIONS_LOG)
        if command_args.output is None:
            command_args.output = os.path.join(output_dir,
                                               default_output)
    else:
        if command_args.output_dir is None:
            command_args.output_dir = a.NOW
        if command_args.output is None:
            command_args.output = os.path.join(command_args.output_dir,
                                               default_output)
        if len(os.path.dirname(command_args.output).strip()) == 0:
            command_args.output = os.path.join(command_args.output_dir,
                                               command_args.output)
        directory = u.check_dir(command_args.output)
        session_file = os.path.join(directory, SESSIONS_LOG)
        u.log_message(command.command + "\n", log_file=session_file)
        try:
            shutil.copy(DEFAULTS_FILE, os.path.join(directory, DEFAULTS_FILE))
        except IOError:
            pass
        u.sys_log_message(u"%s\n" % os.path.abspath(directory),
                          log_file=DIRS_LOG)

    # Creates the corresponding api instance
    api = a.get_api_instance(command_args, u.check_dir(session_file))
    _ = a.get_output_args(api, command_args, resume)
    a.transform_args(command_args, command.flags, api,
                     command.user_defaults)
    execute_whizzml(command_args, api, session_file)
    u.log_message("_" * 80 + "\n", log_file=session_file)
Example #25
0
def create_candidates_evaluations(datasets_file, args, command_obj,
                                  resume=False,
                                  random_candidates=DEFAULT_MIN_CANDIDATES):
    """ Create random candidates ensembles evaluations

    """
    global subcommand_list
    output_dir = os.path.normpath(u.check_dir(
        os.path.join(u"%s%s" % (args.output_dir, random_candidates),
                     "evaluation.json")))
    command = COMMANDS["random_candidates"] % (
        datasets_file, random_candidates, output_dir)
    command_args = command.split()
    """
    common_options_list = u.get_options_list(args, command_obj.common_options,
                                             prioritary=command_args)
    command_args.extend(common_options_list)
    """
    command_args.append("--objective")
    command_args.append(args.objective_field)
    command_args = add_model_options(command_args, args)

    command_obj.propagate(command_args, exclude=["--dataset",
                                                 "--datasets",
                                                 "--dataset-file"])
    command = rebuild_command(command_args)
    if resume:
        next_command = subcommand_list.pop()
        if different_command(next_command, command):
            resume = False
            u.sys_log_message(command, log_file=subcommand_file)
            main_dispatcher(args=command_args)
        elif not subcommand_list:
            main_dispatcher(args=['main', '--resume'])
            resume = False
    else:
        u.sys_log_message(command, log_file=subcommand_file)
        main_dispatcher(args=command_args)
    evaluation_file = os.path.normpath(os.path.join(output_dir,
                                                    "evaluation.json"))
    try:
        with open(evaluation_file, u.open_mode("r")) as evaluation_handler:
            evaluation = json.loads(evaluation_handler.read())
        return evaluation, resume
    except (ValueError, IOError):
        sys.exit("Failed to retrieve evaluation.")
Example #26
0
def create_node_th_evaluations(datasets_file,
                               args,
                               common_options,
                               resume=False,
                               node_threshold=DEFAULT_MIN_NODES):
    """ Create node_threshold evaluations

    """
    global subcommand_list
    output_dir = os.path.normpath(
        u.check_dir(
            os.path.join(u"%s%s" % (args.output_dir, node_threshold),
                         "evaluation.json")))
    command = COMMANDS["node_threshold"] % (datasets_file, node_threshold,
                                            output_dir)
    command_args = command.split()
    common_options_list = u.get_options_list(args,
                                             common_options,
                                             prioritary=command_args)
    command_args.extend(common_options_list)
    command_args.append("--objective")
    command_args.append(args.objective_field)
    command_args = add_model_options(command_args, args)
    command = rebuild_command(command_args)
    if resume:
        next_command = subcommand_list.pop()
        if different_command(next_command, command):
            resume = False
            u.sys_log_message(command, log_file=subcommand_file)
            main_dispatcher(args=command_args)
        elif not subcommand_list:
            main_dispatcher(args=['main', '--resume'])
            resume = False
    else:
        u.sys_log_message(command, log_file=subcommand_file)
        main_dispatcher(args=command_args)
    evaluation_file = os.path.normpath(
        os.path.join(output_dir, "evaluation.json"))
    try:
        with open(evaluation_file, u.open_mode("r")) as evaluation_handler:
            evaluation = json.loads(evaluation_handler.read())
        return evaluation, resume
    except (ValueError, IOError):
        sys.exit("Failed to retrieve evaluation.")
Example #27
0
def create_node_th_evaluations(datasets_file, args, common_options,
                               resume=False,
                               node_threshold=DEFAULT_MIN_NODES):
    """ Create node_threshold evaluations

    """
    global subcommand_list
    output_dir = os.path.normpath(u.check_dir(
        os.path.join(u"%s%s" % (args.output_dir, node_threshold),
                     "evaluation.json")))
    command = COMMANDS["node_threshold"] % (
        datasets_file, node_threshold, output_dir)
    command_args = command.split()
    common_options_list = u.get_options_list(args, common_options,
                                             prioritary=command_args)
    command_args.extend(common_options_list)
    command_args.append("--objective")
    command_args.append(args.objective_field)
    command_args = add_model_options(command_args, args)
    command = rebuild_command(command_args)
    if resume:
        next_command = subcommand_list.pop()
        if different_command(next_command, command):
            resume = False
            u.sys_log_message(command, log_file=subcommand_file)
            main_dispatcher(args=command_args)
        elif not subcommand_list:
            main_dispatcher(args=['main', '--resume'])
            resume = False
    else:
        u.sys_log_message(command, log_file=subcommand_file)
        main_dispatcher(args=command_args)
    evaluation_file = os.path.normpath(os.path.join(output_dir,
                                                    "evaluation.json"))
    try:
        with open(evaluation_file, u.open_mode("r")) as evaluation_handler:
            evaluation = json.loads(evaluation_handler.read())
        return evaluation, resume
    except (ValueError, IOError):
        sys.exit("Failed to retrieve evaluation.")
Example #28
0
def analyze_dispatcher(args=sys.argv[1:]):
    """Main processing of the parsed options for BigMLer analyze

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    command = command_handling(args, COMMAND_LOG)

    # Parses command line arguments.
    command_args = command.parser.parse_args(command.args)
    resume = command_args.resume
    if resume:
        # Keep the debug option if set
        debug = command_args.debug
        # Restore the args of the call to resume from the command log file
        stored_command = StoredCommand(args, COMMAND_LOG, DIRS_LOG)
        command = Command(None, stored_command=stored_command)
        # Logs the issued command and the resumed command
        session_file = os.path.join(stored_command.output_dir, SESSIONS_LOG)
        stored_command.log_command(session_file=session_file)
        # Parses resumed arguments.
        command_args = command.parser.parse_args(command.args)
        command_args.debug = debug
    else:
        if command_args.output_dir is None:
            command_args.output_dir = a.NOW
        session_file = os.path.join(command_args.output_dir, SESSIONS_LOG)
        # If logging is required, open the file for logging
        log = None
        if command_args.log_file:
            u.check_dir(command_args.log_file)
            log = command_args.log_file
            # If --clear_logs the log files are cleared
            if command_args.clear_logs:
                clear_log_files([log])

        if command_args.model_fields:
            model_fields = command_args.model_fields.split(',')
            command_args.model_fields_ = [
                model_field.strip() for model_field in model_fields
            ]
        else:
            command_args.model_fields_ = {}
        u.sys_log_message(u"%s\n" % os.path.abspath(command_args.output_dir),
                          log_file=DIRS_LOG)
        session_file = os.path.join(command_args.output_dir, SESSIONS_LOG)
    # create api instance form args
    api = a.get_api_instance(command_args, u.check_dir(session_file))
    # --maximize flag will be deprecated. Use --optimize flag.
    if command_args.maximize is not None and command_args.optimize is None:
        command_args.optimize = command_args.maximize
    incompatible_flags = [
        command_args.cv, command_args.features, command_args.nodes
    ]
    if sum([int(bool(flag)) for flag in incompatible_flags]) > 1:
        sys.exit("The following flags cannot be used together:\n    --features"
                 "\n    --cross-validation\n    --nodes")
    # k-fold cross-validation
    if command_args.cv and command_args.dataset is not None:
        create_kfold_cv(command_args,
                        api,
                        command.common_options,
                        resume=resume)

    # features analysis
    if command_args.features:
        create_features_analysis(command_args,
                                 api,
                                 command.common_options,
                                 resume=resume)

    # node threshold analysis
    if command_args.nodes:
        create_nodes_analysis(command_args,
                              api,
                              command.common_options,
                              resume=resume)
Example #29
0
def create_kfold_datasets(dataset, args,
                          selecting_file_list, objective,
                          common_options, resume=False):
    """Calling the bigmler procedure to create the k-fold datasets

    """
    args.output_dir = os.path.normpath(os.path.join(args.output_dir, "test"))
    output_dir = args.output_dir
    global subcommand_list
    # creating the selecting datasets
    for index in range(0, len(selecting_file_list)):
        command = COMMANDS["selection"] % (
            dataset, selecting_file_list[index],
            output_dir)
        command_args = command.split()
        common_options_list = u.get_options_list(args, common_options,
                                                 prioritary=command_args)
        command_args.extend(common_options_list)
        command = rebuild_command(command_args)
        if resume:
            next_command = subcommand_list.pop()
            if different_command(next_command, command):
                resume = False
                u.sys_log_message(command, log_file=subcommand_file)
                main_dispatcher(args=command_args)
            elif not subcommand_list:
                main_dispatcher(args=['main', '--resume'])
                resume = False
        else:
            u.sys_log_message(command, log_file=subcommand_file)
            main_dispatcher(args=command_args)
    # updating the datasets to set the objective field
    datasets_file = os.path.normpath(os.path.join(output_dir, "dataset_gen"))
    with open(datasets_file) as datasets_handler:
        index = 0
        for line in datasets_handler:
            dataset_id = line.strip()
            command = COMMANDS["objective"] % (dataset_id,
                                               "dataset_%s" % index,
                                               output_dir)
            command_args = command.split()
            command_args.append("--objective")
            command_args.append(objective)
            common_options_list = u.get_options_list(args, common_options,
                                                     prioritary=command_args)
            command_args.extend(common_options_list)
            command = rebuild_command(command_args)
            if resume:
                next_command = subcommand_list.pop()
                if different_command(next_command, command):
                    resume = False
                    u.sys_log_message(command, log_file=subcommand_file)
                    main_dispatcher(args=command_args)
                elif not subcommand_list:
                    main_dispatcher(args=['main', '--resume'])
                    resume = False
            else:
                u.sys_log_message(command, log_file=subcommand_file)
                main_dispatcher(args=command_args)
            index += 1

    return datasets_file, resume
Example #30
0
def project_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    command = command_handling(args, COMMAND_LOG)

    # Parses command line arguments.
    command_args = a.parse_and_check(command)
    if command_args.resume:
        command_args, session_file, _ = get_stored_command(
            args,
            command_args.debug,
            command_log=COMMAND_LOG,
            dirs_log=DIRS_LOG,
            sessions_log=SESSIONS_LOG)
    else:
        if command_args.output_dir is None:
            command_args.output_dir = a.NOW
        directory = u.check_dir("%s/x.txt" % command_args.output_dir)
        command_args.output_dir = directory
        session_file = os.path.join(directory, SESSIONS_LOG)
        u.log_message(command.command + "\n", log_file=session_file)

        directory = u.check_dir(os.path.join(command_args.output_dir, "tmp"))
        session_file = os.path.join(directory, SESSIONS_LOG)
        u.log_message(command.command + "\n", log_file=session_file)
        try:
            shutil.copy(DEFAULTS_FILE, os.path.join(directory, DEFAULTS_FILE))
        except IOError:
            pass
        u.sys_log_message(u"%s\n" % os.path.abspath(directory),
                          log_file=DIRS_LOG)

    path = u.check_dir("%s/x.txt" % command_args.output_dir)
    session_file = u"%s%s%s" % (path, os.sep, SESSIONS_LOG)
    # If logging is required set the file for logging
    log = None
    if command_args.log_file:
        u.check_dir(command_args.log_file)
        log = command_args.log_file
        # If --clear_logs the log files are cleared
        clear_log_files([log])

    # Creates the corresponding api instance
    api = a.get_api_instance(command_args, u.check_dir(session_file))
    a.get_output_args(api, command_args, command_args.resume)
    a.attribute_args(command_args)

    if not command_args.project_id and command_args.name:
        command_args.project = command_args.name
    if command_args.project:
        # create project
        pp.project_processing(api,
                              command_args,
                              command_args.resume,
                              session_file=session_file,
                              path=path,
                              log=log,
                              create=True)
    if command_args.project_id and (command_args.project_attributes
                                    or command_args.name or command_args.tag
                                    or command_args.description
                                    or command_args.category):
        # update project's attributes
        pp.update_project(command_args, api, command_args.resume, \
            session_file=session_file)

    u.log_message("_" * 80 + "\n", log_file=session_file)
    u.print_generated_files(command_args.output_dir,
                            log_file=session_file,
                            verbosity=command_args.verbosity)
Example #31
0
def project_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    command = command_handling(args, COMMAND_LOG)

    # Parses command line arguments.
    command_args = a.parse_and_check(command)
    if command_args.resume:
        command_args, session_file, _ = get_stored_command(
            args, command_args.debug, command_log=COMMAND_LOG,
            dirs_log=DIRS_LOG, sessions_log=SESSIONS_LOG)
    else:
        if command_args.output_dir is None:
            command_args.output_dir = a.NOW
        directory = u.check_dir("%s/x.txt" % command_args.output_dir)
        command_args.output_dir = directory
        session_file = os.path.join(directory, SESSIONS_LOG)
        u.log_message(command.command + "\n", log_file=session_file)


        directory = u.check_dir(os.path.join(command_args.output_dir, "tmp"))
        session_file = os.path.join(directory, SESSIONS_LOG)
        u.log_message(command.command + "\n", log_file=session_file)
        try:
            shutil.copy(DEFAULTS_FILE, os.path.join(directory, DEFAULTS_FILE))
        except IOError:
            pass
        u.sys_log_message(u"%s\n" % os.path.abspath(directory),
                          log_file=DIRS_LOG)


    path = u.check_dir("%s/x.txt" % command_args.output_dir)
    session_file = u"%s%s%s" % (path, os.sep, SESSIONS_LOG)
    # If logging is required set the file for logging
    log = None
    if command_args.log_file:
        u.check_dir(command_args.log_file)
        log = command_args.log_file
        # If --clear_logs the log files are cleared
        clear_log_files([log])


    # Creates the corresponding api instance
    api = a.get_api_instance(command_args, u.check_dir(session_file))
    a.get_output_args(api, command_args, command_args.resume)
    a.attribute_args(command_args)


    if not command_args.project_id and command_args.name:
        command_args.project = command_args.name
    if command_args.project:
        # create project
        pp.project_processing(
            api, command_args, command_args.resume, session_file=session_file,
            path=path, log=log, create=True)
    if command_args.project_id and (
            command_args.project_attributes or
            command_args.name or command_args.tag or command_args.description
            or command_args.category):
        # update project's attributes
        pp.update_project(command_args, api, command_args.resume, \
            session_file=session_file)

    u.log_message("_" * 80 + "\n", log_file=session_file)
    u.print_generated_files(command_args.output_dir, log_file=session_file,
                            verbosity=command_args.verbosity)
Example #32
0
def create_package(args, api, command_obj, resume=False):
    """Creates the package whizzml resources as referred in the metadata.json
    file.

    """
    set_subcommand_file(args.output_dir)
    if resume:
        retrieve_subcommands()
    # read the metadata.json information
    message = ('Reading the metadata.json files.........\n')
    u.log_message(message, log_file=session_file, console=args.verbosity)
    package_dir = args.package_dir
    output_dir = args.output_dir
    metadata_file = os.path.join(package_dir, METADATA_FILE)
    metadata = None

    with open(metadata_file) as metadata_handler:
        metadata = json.load(metadata_handler)
    # recurse into components/directories, if any
    if metadata.get("kind") == "package" and 'components' in metadata:
        components = metadata.get("components")
        for component in components:
            message = ('Inspecting component %s.........\n' % component)
            u.log_message(message,
                          log_file=session_file,
                          console=args.verbosity)
            args.package_dir = os.path.join(package_dir, component)
            create_package(args, api, command_obj, resume=resume)
            args.package_dir = package_dir
    else:
        # create libraries or scripts
        imports = []
        category = str(metadata.get("category", DFT_CATEGORY))
        if metadata.get("imports") is not None:
            lib_imports = metadata.get("imports")
            for lib_import in lib_imports:
                args.package_dir = os.path.join(package_dir, lib_import)
                if args.embed_libs:
                    library_ref = create_package( \
                        args, api, command_obj, resume=resume)
                    u.log_created_resources("imports", output_dir, library_ref)
                else:
                    try:
                        # try to read the library id, if it is already there
                        library_ref = read_library_id(os.path.join( \
                            output_dir, os.path.basename(args.package_dir)))
                    except IOError:
                        library_ref = create_package( \
                            args, api, command_obj, resume=resume)
                        library_ref = read_library_id(os.path.join( \
                            output_dir, os.path.basename(args.package_dir)))
                imports.append(library_ref)
                args.package_dir = package_dir
        # read the metadata.json information
        message = ('Creating the %s.........\n' % metadata.get("kind"))
        u.log_message(message, log_file=session_file, console=args.verbosity)
        if metadata.get("kind") in WHIZZML_RESOURCES:
            whizzml_code = os.path.normpath(os.path.join(args.package_dir, \
                metadata.get("source_code", "%s.whizzml" % \
                metadata.get("kind"))))
            if args.embed_libs and metadata.get("kind") == WHIZZML_LIBRARY:
                return whizzml_code

            args.output_dir = os.path.join(output_dir, \
                os.path.basename(package_dir))
            # creating command to create the resource
            command = COMMANDS[metadata.get("kind")] % (whizzml_code,
                                                        args.output_dir)
            command_args = command.split()
            bigml.util.check_dir(args.output_dir)

            # getting inputs and outputs for the script from metadata
            if "inputs" in metadata:
                inputs_file = os.path.join(args.output_dir, "inputs.json")
                u.write_to_utf8(inputs_file,
                                json.dumps(metadata.get("inputs")))
                command_args.extend(["--declare-inputs", inputs_file])
            if "outputs" in metadata:
                outputs_file = os.path.join(args.output_dir, "outputs.json")
                u.write_to_utf8(outputs_file,
                                json.dumps(metadata.get("outputs")))
                command_args.extend(["--declare-outputs", outputs_file])
            if "description" in metadata:
                desc_file = os.path.join(args.output_dir, "description.txt")
                u.write_to_utf8(desc_file, metadata.get("description"))
                command_args.extend(["--description", desc_file])
            if metadata.get("name"):
                command_args.extend(["--name", metadata.get("name")])
            if args.tag:
                for tag in args.tag:
                    command_args.extend(["--tag", tag])
            command_args.extend(["--category", category])

            # adding imports, if any
            if imports:
                if args.embed_libs:
                    # imports to be embedded are in the same output directory
                    command_args.extend( \
                        ["--embedded-imports", os.path.join(output_dir,
                                                            "imports")])
                else:
                    # imports to be refereced by ID
                    command_args.extend(["--imports", ",".join(imports)])
            command_args.extend(["--verbosity", str(args.verbosity)])
            command_obj.propagate(command_args)
            # u.add_api_context(command_args, args)
            if args.upgrade:
                command_args.extend(["--upgrade"])

            if resume:
                next_command = subcommand_list.pop()
                if different_command(next_command, command):
                    resume = False
                    u.sys_log_message(command, log_file=subcommand_file)
                    execute_dispatcher(args=command_args)
                elif not subcommand_list:
                    execute_dispatcher(args=['execute', '--resume'])
                    resume = False
            else:
                u.sys_log_message(command, log_file=subcommand_file)
                execute_dispatcher(args=command_args)
            args.output_dir = output_dir
            return whizzml_code
    return ""
Example #33
0
def analyze_dispatcher(args=sys.argv[1:]):
    """Main processing of the parsed options for BigMLer analyze

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    command = command_handling(args, COMMAND_LOG)

    # Parses command line arguments.
    command_args = command.parser.parse_args(command.args)
    resume = command_args.resume
    if resume:
        command_args, session_file, _ = get_stored_command(
            args, command_args.debug, command_log=COMMAND_LOG,
            dirs_log=DIRS_LOG, sessions_log=SESSIONS_LOG)
    else:
        if command_args.output_dir is None:
            command_args.output_dir = a.NOW
        session_file = os.path.join(command_args.output_dir,
                                    SESSIONS_LOG)
        # If logging is required, open the file for logging
        log = None
        if command_args.log_file:
            u.check_dir(command_args.log_file)
            log = command_args.log_file
            # If --clear_logs the log files are cleared
            if command_args.clear_logs:
                clear_log_files([log])

        if command_args.model_fields:
            model_fields = command_args.model_fields.split(',')
            command_args.model_fields_ = [model_field.strip()
                                          for model_field in model_fields]
        else:
            command_args.model_fields_ = {}
        u.sys_log_message(u"%s\n" % os.path.abspath(command_args.output_dir),
                          log_file=DIRS_LOG)
        session_file = os.path.join(command_args.output_dir, SESSIONS_LOG)
    # create api instance form args
    api = a.get_api_instance(command_args,
                             u.check_dir(session_file))
    # --maximize flag will be deprecated. Use --optimize flag.
    if command_args.maximize is not None and command_args.optimize is None:
        command_args.optimize = command_args.maximize
    incompatible_flags = [command_args.cv, command_args.features,
                          command_args.nodes, command_args.random_fields]
    if sum([int(bool(flag)) for flag in incompatible_flags]) > 1:
        sys.exit("The following flags cannot be used together:\n    --features"
                 "\n    --cross-validation\n    --nodes\n    --random-fields")
    if (command_args.dataset is None and command_args.datasets is None and
            command_args.dataset_file is None):
        sys.exit("The analyze command needs an existing dataset ID. Please, "
                 "use the --dataset flag.")
    if not any(incompatible_flags):
        sys.exit("You need to specify the type of analysis: features, node "
                 "threshold, cross validation or random fields.")
    # k-fold cross-validation
    if command_args.cv and command_args.dataset is not None:
        create_kfold_cv(command_args, api, command.common_options,
                        resume=resume)

    # features analysis
    elif command_args.features:
        create_features_analysis(command_args, api, command.common_options,
                                 resume=resume)

    # node threshold analysis
    elif command_args.nodes:
        create_nodes_analysis(command_args, api, command.common_options,
                              resume=resume)

    # random fields analysis
    elif command_args.random_fields:
        create_candidates_analysis(command_args, api, command.common_options,
                                   resume=resume)
    else:
        sys.exit("You must choose one of the available analysis: --features,"
                 " --nodes, --random-fields or --cross-validation. Add"
                 " your prefered option to"
                 " the command line or type\n    bigmler analyze --help\n"
                 " to see all the available options.")
Example #34
0
def anomaly_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    command = command_handling(args, COMMAND_LOG)

    # Parses command line arguments.
    command_args = a.parse_and_check(command)
    resume = command_args.resume
    if command_args.resume:
        # Keep the debug option if set
        debug = command_args.debug
        # Restore the args of the call to resume from the command log file
        stored_command = StoredCommand(args, COMMAND_LOG, DIRS_LOG)
        command = Command(None, stored_command=stored_command)
        # Logs the issued command and the resumed command
        session_file = os.path.join(stored_command.output_dir, SESSIONS_LOG)
        stored_command.log_command(session_file=session_file)
        # Parses resumed arguments.
        command_args = a.parse_and_check(command)
        if command_args.predictions is None:
            command_args.predictions = os.path.join(stored_command.output_dir,
                                                    DEFAULT_OUTPUT)
    else:
        if command_args.output_dir is None:
            command_args.output_dir = a.NOW
        if command_args.predictions is None:
            command_args.predictions = os.path.join(command_args.output_dir,
                                                    DEFAULT_OUTPUT)
        if len(os.path.dirname(command_args.predictions).strip()) == 0:
            command_args.predictions = os.path.join(command_args.output_dir,
                                                    command_args.predictions)
        directory = u.check_dir(command_args.predictions)
        session_file = os.path.join(directory, SESSIONS_LOG)
        u.log_message(command.command + "\n", log_file=session_file)
        try:
            defaults_file = open(DEFAULTS_FILE, 'r')
            contents = defaults_file.read()
            defaults_file.close()
            defaults_copy = open(os.path.join(directory, DEFAULTS_FILE),
                                 'w', 0)
            defaults_copy.write(contents)
            defaults_copy.close()
        except IOError:
            pass
        u.sys_log_message(u"%s\n" % os.path.abspath(directory),
                          log_file=DIRS_LOG)

    # Creates the corresponding api instance
    if resume and debug:
        command_args.debug = True
    api = a.get_api_instance(command_args, u.check_dir(session_file))

    # Selects the action to perform
    if (has_train(command_args) or has_test(command_args)):
        output_args = a.get_output_args(api, command_args, resume)
        a.transform_args(command_args, command.flags, api,
                         command.user_defaults)
        compute_output(**output_args)
    u.log_message("_" * 80 + "\n", log_file=session_file)
Example #35
0
def create_package(args, api, common_options, resume=False):
    """Creates the package whizzml resources as referred in the metadata.json
    file.

    """
    set_subcommand_file(args.output_dir)
    if resume:
        retrieve_subcommands()
    # read the metadata.json information
    message = ('Reading the metadata.json files.........\n')
    u.log_message(message, log_file=session_file,
                  console=args.verbosity)
    package_dir = args.package_dir
    output_dir = args.output_dir
    metadata_file = os.path.join(package_dir, METADATA_FILE)
    metadata = None
    created_resources = []
    with open(metadata_file) as metadata_handler:
        metadata = json.load(metadata_handler)
    # recurse into components/directories, if any
    if metadata.get("kind") == "package" and 'components' in metadata:
        components = metadata.get("components")
        for component in components:
            message = ('Inspecting component %s.........\n' % component)
            u.log_message(message, log_file=session_file,
                          console=args.verbosity)
            args.package_dir = os.path.join(package_dir, component)
            create_package(args, api, common_options, resume=resume)
            args.package_dir = package_dir
    else:
        # create libraries or scripts
        imports = []
        if metadata.get("imports") is not None:
            lib_imports = metadata.get("imports")
            for lib_import in lib_imports:
                args.package_dir = os.path.join(package_dir, lib_import)
                create_package(args, api, common_options, resume=resume)
                imports.append(read_library_id(os.path.join( \
                    output_dir, os.path.basename(args.package_dir))))
                args.package_dir = package_dir
        # read the metadata.json information
        message = ('Creating the %s.........\n' % metadata.get("kind"))
        u.log_message(message, log_file=session_file,
                      console=args.verbosity)
        if metadata.get("kind") in WHIZZML_RESOURCES:
            whizzml_code = os.path.join(args.package_dir, \
                metadata.get("source_code", "%s.whizzml" % \
                metadata.get("kind")))
            args.output_dir = os.path.join(output_dir, \
                os.path.basename(package_dir))
            # creating command to create the resource
            command = COMMANDS[metadata.get("kind")] % (whizzml_code,
                                                        args.output_dir)
            command_args = command.split()
            bigml.util.check_dir(args.output_dir)
            # getting inputs and outputs for the script from metadata
            if "inputs" in metadata:
                inputs_file = os.path.join(args.output_dir, "inputs.json")
                with open(inputs_file, "w") as inputs_handler:
                    json.dump(metadata.get("inputs"), inputs_handler)
                command_args.extend(["--declare-inputs", inputs_file])
            if "outputs" in metadata:
                outputs_file = os.path.join(args.output_dir, "outputs.json")
                with open(outputs_file, "w") as outputs_handler:
                    json.dump(metadata.get("outputs"), outputs_handler)
                command_args.extend(["--declare-outputs", outputs_file])
            if "description" in metadata:
                desc_file = os.path.join(args.output_dir, "description.txt")
                with open(desc_file, "w") as desc_handler:
                    desc_handler.write(metadata.get("description"))
                command_args.extend(["--description", desc_file])
            if metadata.get("name"):
                command_args.extend(["--name", metadata.get("name")])
            # adding imports, if any
            if imports:
                command_args.extend(["--imports", ",".join(imports)])
            command_args.extend(["--verbosity", str(args.verbosity)])

            if resume:
                next_command = subcommand_list.pop()
                if different_command(next_command, command):
                    resume = False
                    u.sys_log_message(command, log_file=subcommand_file)
                    execute_dispatcher(args=command_args)
                elif not subcommand_list:
                    execute_dispatcher(args=['execute', '--resume'])
                    resume = False
            else:
                u.sys_log_message(command, log_file=subcommand_file)
                execute_dispatcher(args=command_args)
            args.output_dir = output_dir
Example #36
0
def analyze_dispatcher(args=sys.argv[1:]):
    """Main processing of the parsed options for BigMLer analyze

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    command = command_handling(args, COMMAND_LOG)

    # Parses command line arguments.
    command_args = command.parser.parse_args(command.args)
    resume = command_args.resume
    if resume:
        command_args, session_file, _ = get_stored_command(
            args,
            command_args.debug,
            command_log=COMMAND_LOG,
            dirs_log=DIRS_LOG,
            sessions_log=SESSIONS_LOG)
    else:
        if command_args.output_dir is None:
            command_args.output_dir = a.NOW
        session_file = os.path.join(command_args.output_dir, SESSIONS_LOG)
        # If logging is required, open the file for logging
        log = None
        if command_args.log_file:
            u.check_dir(command_args.log_file)
            log = command_args.log_file
            # If --clear_logs the log files are cleared
            if command_args.clear_logs:
                clear_log_files([log])

        if command_args.model_fields:
            model_fields = command_args.model_fields.split(',')
            command_args.model_fields_ = [
                model_field.strip() for model_field in model_fields
            ]
        else:
            command_args.model_fields_ = {}
        u.sys_log_message(u"%s\n" % os.path.abspath(command_args.output_dir),
                          log_file=DIRS_LOG)
        session_file = os.path.join(command_args.output_dir, SESSIONS_LOG)
    # create api instance form args
    api = a.get_api_instance(command_args, u.check_dir(session_file))

    # Creates the corresponding api instance
    api = a.get_api_instance(command_args, u.check_dir(session_file))
    a.transform_dataset_options(command_args, api)

    # --maximize flag will be deprecated. Use --optimize flag.
    if command_args.maximize is not None and command_args.optimize is None:
        command_args.optimize = command_args.maximize
    incompatible_flags = [
        command_args.cv, command_args.features, command_args.nodes,
        command_args.random_fields
    ]
    if sum([int(bool(flag)) for flag in incompatible_flags]) > 1:
        sys.exit("The following flags cannot be used together:\n    --features"
                 "\n    --cross-validation\n    --nodes\n    --random-fields")
    if (command_args.dataset is None and command_args.datasets is None
            and command_args.dataset_file is None):
        sys.exit("The analyze command needs an existing dataset ID. Please, "
                 "use the --dataset flag.")
    if not any(incompatible_flags):
        sys.exit("You need to specify the type of analysis: features, node "
                 "threshold, cross validation or random fields.")
    # k-fold cross-validation
    if command_args.cv and command_args.dataset is not None:
        create_kfold_cv(command_args,
                        api,
                        command.common_options,
                        resume=resume)

    # features analysis
    elif command_args.features:
        create_features_analysis(command_args,
                                 api,
                                 command.common_options,
                                 resume=resume)

    # node threshold analysis
    elif command_args.nodes:
        create_nodes_analysis(command_args,
                              api,
                              command.common_options,
                              resume=resume)

    # random fields analysis
    elif command_args.random_fields:
        create_candidates_analysis(command_args,
                                   api,
                                   command.common_options,
                                   resume=resume)
    else:
        sys.exit("You must choose one of the available analysis: --features,"
                 " --nodes, --random-fields or --cross-validation. Add"
                 " your prefered option to"
                 " the command line or type\n    bigmler analyze --help\n"
                 " to see all the available options.")
Example #37
0
def analyze_dispatcher(args=sys.argv[1:]):
    """Main processing of the parsed options for BigMLer analyze

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    command = command_handling(args, COMMAND_LOG)

    # Parses command line arguments.
    command_args = command.parser.parse_args(command.args)
    resume = command_args.resume
    if resume:
        # Keep the debug option if set
        debug = command_args.debug
        # Restore the args of the call to resume from the command log file
        stored_command = StoredCommand(args, COMMAND_LOG, DIRS_LOG)
        command = Command(None, stored_command=stored_command)
        # Logs the issued command and the resumed command
        session_file = os.path.join(stored_command.output_dir, SESSIONS_LOG)
        stored_command.log_command(session_file=session_file)
        # Parses resumed arguments.
        command_args = command.parser.parse_args(command.args)
        command_args.debug = debug
    else:
        if command_args.output_dir is None:
            command_args.output_dir = a.NOW
        session_file = os.path.join(command_args.output_dir,
                                    SESSIONS_LOG)
        # If logging is required, open the file for logging
        log = None
        if command_args.log_file:
            u.check_dir(command_args.log_file)
            log = command_args.log_file
            # If --clear_logs the log files are cleared
            if command_args.clear_logs:
                clear_log_files([log])

        if command_args.model_fields:
            model_fields = command_args.model_fields.split(',')
            command_args.model_fields_ = [model_field.strip()
                                          for model_field in model_fields]
        else:
            command_args.model_fields_ = {}
        u.sys_log_message(u"%s\n" % os.path.abspath(command_args.output_dir),
                          log_file=DIRS_LOG)
        session_file = os.path.join(command_args.output_dir, SESSIONS_LOG)
    # create api instance form args
    api = a.get_api_instance(command_args,
                             u.check_dir(session_file))
    # --maximize flag will be deprecated. Use --optimize flag.
    if command_args.maximize is not None and command_args.optimize is None:
        command_args.optimize = command_args.maximize
    incompatible_flags = [command_args.cv, command_args.features,
                          command_args.nodes]
    if sum([int(bool(flag)) for flag in incompatible_flags]) > 1:
        sys.exit("The following flags cannot be used together:\n    --features"
                 "\n    --cross-validation\n    --nodes")
    # k-fold cross-validation
    if command_args.cv and command_args.dataset is not None:
        create_kfold_cv(command_args, api, command.common_options,
                        resume=resume)

    # features analysis
    if command_args.features:
        create_features_analysis(command_args, api, command.common_options,
                                 resume=resume)

    # node threshold analysis
    if command_args.nodes:
        create_nodes_analysis(command_args, api, command.common_options,
                              resume=resume)
Example #38
0
def reify_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    command = command_handling(args, COMMAND_LOG)

    # Parses command line arguments.
    command_args = a.parse_and_check(command)
    if command_args.resume:
        command_args, session_file, _ = get_stored_command(
            args,
            command_args.debug,
            command_log=COMMAND_LOG,
            dirs_log=DIRS_LOG,
            sessions_log=SESSIONS_LOG)
        if command_args.output is None:
            command_args.output = os.path.join(command_args.output_dir,
                                               DEFAULT_OUTPUT)
    else:
        if command_args.output_dir is None:
            command_args.output_dir = a.NOW
        if command_args.output is None:
            command_args.output = os.path.join(command_args.output_dir,
                                               DEFAULT_OUTPUT)
        if len(os.path.dirname(command_args.output).strip()) == 0:
            command_args.output = os.path.join(command_args.output_dir,
                                               command_args.output)
        directory = u.check_dir(command_args.output)
        command_args.output_dir = directory
        session_file = os.path.join(directory, SESSIONS_LOG)
        u.log_message(command.command + "\n", log_file=session_file)

        directory = u.check_dir(os.path.join(command_args.output_dir, "tmp"))
        session_file = os.path.join(directory, SESSIONS_LOG)
        u.log_message(command.command + "\n", log_file=session_file)
        try:
            shutil.copy(DEFAULTS_FILE, os.path.join(directory, DEFAULTS_FILE))
        except IOError:
            pass
        u.sys_log_message(u"%s\n" % os.path.abspath(directory),
                          log_file=DIRS_LOG)

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    def logger(message):
        """Partial to log messages according to args.verbosity

        """
        u.log_message(u.dated(message), \
            log_file=session_file, console=command_args.verbosity)

    # Creates the corresponding api instance
    api = a.get_api_instance(command_args, u.check_dir(session_file))
    message = "Starting reification for %s\n\n" % command_args.resource_id
    u.log_message(message, \
        log_file=session_file, console=command_args.verbosity)
    reify_resources(command_args, api, logger)
    message = "\nReification complete. See the results in %s\n\n" % \
        command_args.output
    u.log_message(message, \
        log_file=session_file, console=command_args.verbosity)
    u.log_message("_" * 80 + "\n", log_file=session_file)

    u.print_generated_files(command_args.output_dir,
                            log_file=session_file,
                            verbosity=command_args.verbosity)
Example #39
0
def create_package(args, api, common_options, resume=False):
    """Creates the package whizzml resources as referred in the metadata.json
    file.

    """
    set_subcommand_file(args.output_dir)
    if resume:
        retrieve_subcommands()
    # read the metadata.json information
    message = ('Reading the metadata.json files.........\n')
    u.log_message(message, log_file=session_file,
                  console=args.verbosity)
    package_dir = args.package_dir
    output_dir = args.output_dir
    metadata_file = os.path.join(package_dir, METADATA_FILE)
    metadata = None

    with open(metadata_file) as metadata_handler:
        metadata = json.load(metadata_handler)
    # recurse into components/directories, if any
    if metadata.get("kind") == "package" and 'components' in metadata:
        components = metadata.get("components")
        for component in components:
            message = ('Inspecting component %s.........\n' % component)
            u.log_message(message, log_file=session_file,
                          console=args.verbosity)
            args.package_dir = os.path.join(package_dir, component)
            create_package(args, api, common_options, resume=resume)
            args.package_dir = package_dir
    else:
        # create libraries or scripts
        imports = []
        if metadata.get("imports") is not None:
            lib_imports = metadata.get("imports")
            for lib_import in lib_imports:
                args.package_dir = os.path.join(package_dir, lib_import)
                # try to read the library id, if it is already there
                try:
                    library_id = read_library_id(os.path.join( \
                        output_dir, os.path.basename(args.package_dir)))
                except IOError:
                    create_package(args, api, common_options, resume=resume)
                    library_id = read_library_id(os.path.join( \
                        output_dir, os.path.basename(args.package_dir)))
                imports.append(library_id)
                args.package_dir = package_dir
        # read the metadata.json information
        message = ('Creating the %s.........\n' % metadata.get("kind"))
        u.log_message(message, log_file=session_file,
                      console=args.verbosity)
        if metadata.get("kind") in WHIZZML_RESOURCES:
            whizzml_code = os.path.join(args.package_dir, \
                metadata.get("source_code", "%s.whizzml" % \
                metadata.get("kind")))
            args.output_dir = os.path.join(output_dir, \
                os.path.basename(package_dir))
            # creating command to create the resource
            command = COMMANDS[metadata.get("kind")] % (whizzml_code,
                                                        args.output_dir)
            command_args = command.split()
            bigml.util.check_dir(args.output_dir)
            # getting inputs and outputs for the script from metadata
            if "inputs" in metadata:
                inputs_file = os.path.join(args.output_dir, "inputs.json")
                with open(inputs_file, "w") as inputs_handler:
                    json.dump(metadata.get("inputs"), inputs_handler)
                command_args.extend(["--declare-inputs", inputs_file])
            if "outputs" in metadata:
                outputs_file = os.path.join(args.output_dir, "outputs.json")
                with open(outputs_file, "w") as outputs_handler:
                    json.dump(metadata.get("outputs"), outputs_handler)
                command_args.extend(["--declare-outputs", outputs_file])
            if "description" in metadata:
                desc_file = os.path.join(args.output_dir, "description.txt")
                with open(desc_file, "w") as desc_handler:
                    desc_handler.write(metadata.get("description"))
                command_args.extend(["--description", desc_file])
            if metadata.get("name"):
                command_args.extend(["--name", metadata.get("name")])
            # adding imports, if any
            if imports:
                command_args.extend(["--imports", ",".join(imports)])
            command_args.extend(["--verbosity", str(args.verbosity)])

            if resume:
                next_command = subcommand_list.pop()
                if different_command(next_command, command):
                    resume = False
                    u.sys_log_message(command, log_file=subcommand_file)
                    execute_dispatcher(args=command_args)
                elif not subcommand_list:
                    execute_dispatcher(args=['execute', '--resume'])
                    resume = False
            else:
                u.sys_log_message(command, log_file=subcommand_file)
                execute_dispatcher(args=command_args)
            args.output_dir = output_dir
Example #40
0
def reify_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    command = command_handling(args, COMMAND_LOG)

    # Parses command line arguments.
    command_args = a.parse_and_check(command)
    if command_args.resume:
        command_args, session_file, _ = get_stored_command(
            args, command_args.debug, command_log=COMMAND_LOG,
            dirs_log=DIRS_LOG, sessions_log=SESSIONS_LOG)
        if command_args.output is None:
            command_args.output = os.path.join(command_args.output_dir,
                                               DEFAULT_OUTPUT)
    else:
        if command_args.output_dir is None:
            command_args.output_dir = a.NOW
        if command_args.output is None:
            command_args.output = os.path.join(command_args.output_dir,
                                               DEFAULT_OUTPUT)
        if len(os.path.dirname(command_args.output).strip()) == 0:
            command_args.output = os.path.join(command_args.output_dir,
                                               command_args.output)
        directory = u.check_dir(command_args.output)
        command_args.output_dir = directory
        session_file = os.path.join(directory, SESSIONS_LOG)
        u.log_message(command.command + "\n", log_file=session_file)


        directory = u.check_dir(os.path.join(command_args.output_dir, "tmp"))
        session_file = os.path.join(directory, SESSIONS_LOG)
        u.log_message(command.command + "\n", log_file=session_file)
        try:
            shutil.copy(DEFAULTS_FILE, os.path.join(directory, DEFAULTS_FILE))
        except IOError:
            pass
        u.sys_log_message(u"%s\n" % os.path.abspath(directory),
                          log_file=DIRS_LOG)

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    def logger(message):
        """Partial to log messages according to args.verbosity

        """
        u.log_message(u.dated(message), \
            log_file=session_file, console=command_args.verbosity)

    # Creates the corresponding api instance
    api = a.get_api_instance(command_args, u.check_dir(session_file))
    message = "Starting reification for %s\n\n" % command_args.resource_id
    u.log_message(message, \
        log_file=session_file, console=command_args.verbosity)
    reify_resources(command_args, api, logger)
    message = "\nReification complete. See the results in %s\n\n" % \
        command_args.output
    u.log_message(message, \
        log_file=session_file, console=command_args.verbosity)
    u.log_message("_" * 80 + "\n", log_file=session_file)

    u.print_generated_files(command_args.output_dir, log_file=session_file,
                            verbosity=command_args.verbosity)
Example #41
0
def analyze_dispatcher(args=sys.argv[1:]):
    """Main processing of the parsed options for BigMLer analyze

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    command = command_handling(args, COMMAND_LOG)

    # Parses command line arguments.
    command_args = command.parser.parse_args(command.args)
    resume = command_args.resume
    if resume:
        # Keep the debug option if set
        debug = command_args.debug
        # Restore the args of the call to resume from the command log file
        stored_command = StoredCommand(args, COMMAND_LOG, DIRS_LOG)
        command = Command(None, stored_command=stored_command)
        # Logs the issued command and the resumed command
        session_file = os.path.join(stored_command.output_dir, SESSIONS_LOG)
        stored_command.log_command(session_file=session_file)
        # Parses resumed arguments.
        command_args = command.parser.parse_args(command.args)
        command_args.debug = debug
    else:
        if command_args.output_dir is None:
            command_args.output_dir = a.NOW
        session_file = os.path.join(command_args.output_dir,
                                    SESSIONS_LOG)
        # If logging is required, open the file for logging
        log = None
        if command_args.log_file:
            u.check_dir(command_args.log_file)
            log = command_args.log_file
            # If --clear_logs the log files are cleared
            if command_args.clear_logs:
                clear_log_files([log])

        if command_args.model_fields:
            model_fields = command_args.model_fields.split(',')
            command_args.model_fields_ = map(str.strip, model_fields)
        else:
            command_args.model_fields_ = {}
        u.sys_log_message(u"%s\n" % os.path.abspath(command_args.output_dir),
                          log_file=DIRS_LOG)
        session_file = os.path.join(command_args.output_dir, SESSIONS_LOG)
    # create api instance form args
    api = a.get_api_instance(command_args,
                             u.check_dir(session_file))

    # k-fold cross-validation
    if command_args.cv and command_args.dataset is not None:
        create_kfold_cv(command_args, api, command.common_options,
                        resume=resume)

    # features analysis
    if command_args.features:
        create_features_analysis(command_args, api, command.common_options,
                                 resume=resume)

    # node threshold analysis
    if command_args.nodes:
        create_nodes_analysis(command_args, api, command.common_options,
                              resume=resume)