Ejemplo n.º 1
0
def create_kfold_datasets(dataset, args,
                          selecting_file_list,
                          common_options, resume=False):
    """Calling the bigmler procedure to create the k-fold datasets

    """
    args.output_dir = os.path.normpath(os.path.join(args.output_dir, "test"))
    output_dir = args.output_dir
    global subcommand_list
    # creating the selecting datasets
    for index in range(0, len(selecting_file_list)):
        command = COMMANDS["selection"] % (
            dataset, selecting_file_list[index],
            output_dir)
        command_args = command.split()
        common_options_list = u.get_options_list(args, common_options,
                                                 prioritary=command_args)
        command_args.extend(common_options_list)
        command = rebuild_command(command_args)
        if resume:
            next_command = subcommand_list.pop()
            if different_command(next_command, command):
                resume = False
                u.sys_log_message(command, log_file=subcommand_file)
                main_dispatcher(args=command_args)
            elif not subcommand_list:
                main_dispatcher(args=['main', '--resume'])
                resume = False
        else:
            u.sys_log_message(command, log_file=subcommand_file)
            main_dispatcher(args=command_args)
    datasets_file = os.path.normpath(os.path.join(output_dir, "dataset_gen"))
    return datasets_file, resume
Ejemplo n.º 2
0
def create_kfold_datasets(dataset, args,
                          selecting_file_list,
                          common_options, resume=False):
    """Calling the bigmler procedure to create the k-fold datasets

    """
    args.output_dir = os.path.normpath(os.path.join(args.output_dir, "test"))
    output_dir = args.output_dir
    global subcommand_list
    # creating the selecting datasets
    for index in range(0, len(selecting_file_list)):
        command = COMMANDS["selection"] % (
            dataset, selecting_file_list[index],
            output_dir)
        command_args = command.split()
        common_options_list = u.get_options_list(args, common_options,
                                                 prioritary=command_args)
        command_args.extend(common_options_list)
        command = rebuild_command(command_args)
        if resume:
            next_command = subcommand_list.pop()
            if different_command(next_command, command):
                resume = False
                u.sys_log_message(command, log_file=subcommand_file)
                main_dispatcher(args=command_args)
            elif not subcommand_list:
                main_dispatcher(args=['main', '--resume'])
                resume = False
        else:
            u.sys_log_message(command, log_file=subcommand_file)
            main_dispatcher(args=command_args)
    datasets_file = os.path.normpath(os.path.join(output_dir, "dataset_gen"))
    return datasets_file, resume
Ejemplo n.º 3
0
def create_kfold_evaluations(datasets_file,
                             args,
                             common_options,
                             resume=False,
                             counter=0):
    """ Create k-fold cross-validation from a datasets file

    """
    global subcommand_list
    output_dir = os.path.normpath(
        u.check_dir(
            os.path.join(u"%s%s" % (args.output_dir, counter),
                         u"evaluation.json")))
    model_fields = args.model_fields
    name_suffix = "_subset_%s" % counter
    name_max_length = NAME_MAX_LENGTH - len(name_suffix)
    name = "%s%s" % (args.name[0:name_max_length], name_suffix)
    dataset_id = u.read_datasets(datasets_file)[0]
    model_dataset = os.path.normpath(
        os.path.join(u.check_dir(datasets_file), dataset_id.replace("/", "_")))
    command = COMMANDS["create_cv"] % (datasets_file, output_dir, name,
                                       model_dataset)
    command_args = command.split()
    if model_fields:
        command_args.append("--model-fields")
        command_args.append(model_fields)
    command_args.append("--objective")
    command_args.append(args.objective_field)
    command_args = add_model_options(command_args, args)
    common_options_list = u.get_options_list(args,
                                             common_options,
                                             prioritary=command_args)
    command_args.extend(common_options_list)
    command = rebuild_command(command_args)
    if resume:
        next_command = subcommand_list.pop()
        if different_command(next_command, command):
            resume = False
            u.sys_log_message(command, log_file=subcommand_file)
            main_dispatcher(args=command_args)
        elif not subcommand_list:
            main_dispatcher(args=['main', '--resume'])
            resume = False
    else:
        u.sys_log_message(command, log_file=subcommand_file)
        main_dispatcher(args=command_args)
    evaluation_file = os.path.normpath(
        os.path.join(output_dir, "evaluation.json"))
    try:
        with open(evaluation_file) as evaluation_handler:
            evaluation = json.loads(evaluation_handler.read())
        return evaluation, resume
    except (ValueError, IOError):
        sys.exit("Failed to retrieve evaluation.")
Ejemplo n.º 4
0
def create_kfold_evaluations(datasets_file, args, common_options,
                             resume=False, counter=0):
    """ Create k-fold cross-validation from a datasets file

    """
    global subcommand_list
    output_dir = os.path.normpath(
        u.check_dir(os.path.join(u"%s%s" % (args.output_dir, counter),
                                 u"evaluation.json")))
    model_fields = args.model_fields
    name_suffix = "_subset_%s" % counter
    name_max_length = NAME_MAX_LENGTH - len(name_suffix)
    name = "%s%s" % (args.name[0: name_max_length], name_suffix)
    dataset_id = u.read_datasets(datasets_file)[0]
    model_dataset = os.path.normpath(
        os.path.join(u.check_dir(datasets_file), dataset_id.replace("/", "_")))
    command = COMMANDS["create_cv"] % (datasets_file, output_dir, name,
                                       model_dataset)
    command_args = command.split()
    if model_fields:
        command_args.append("--model-fields")
        command_args.append(model_fields)
    command_args.append("--objective")
    command_args.append(args.objective_field)
    command_args = add_model_options(command_args, args)
    common_options_list = u.get_options_list(args, common_options,
                                             prioritary=command_args)
    command_args.extend(common_options_list)
    command = rebuild_command(command_args)
    if resume:
        next_command = subcommand_list.pop()
        if different_command(next_command, command):
            resume = False
            u.sys_log_message(command, log_file=subcommand_file)
            main_dispatcher(args=command_args)
        elif not subcommand_list:
            main_dispatcher(args=['main', '--resume'])
            resume = False
    else:
        u.sys_log_message(command, log_file=subcommand_file)
        main_dispatcher(args=command_args)
    evaluation_file = os.path.normpath(os.path.join(output_dir,
                                                    "evaluation.json"))
    try:
        with open(evaluation_file) as evaluation_handler:
            evaluation = json.loads(evaluation_handler.read())
        return evaluation, resume
    except (ValueError, IOError):
        sys.exit("Failed to retrieve evaluation.")
Ejemplo n.º 5
0
def create_node_th_evaluations(datasets_file,
                               args,
                               common_options,
                               resume=False,
                               node_threshold=DEFAULT_MIN_NODES):
    """ Create node_threshold evaluations

    """
    global subcommand_list
    output_dir = os.path.normpath(
        u.check_dir(
            os.path.join(u"%s%s" % (args.output_dir, node_threshold),
                         "evaluation.json")))
    command = COMMANDS["node_threshold"] % (datasets_file, node_threshold,
                                            output_dir)
    command_args = command.split()
    common_options_list = u.get_options_list(args,
                                             common_options,
                                             prioritary=command_args)
    command_args.extend(common_options_list)
    command_args.append("--objective")
    command_args.append(args.objective_field)
    command_args = add_model_options(command_args, args)
    command = rebuild_command(command_args)
    if resume:
        next_command = subcommand_list.pop()
        if different_command(next_command, command):
            resume = False
            u.sys_log_message(command, log_file=subcommand_file)
            main_dispatcher(args=command_args)
        elif not subcommand_list:
            main_dispatcher(args=['main', '--resume'])
            resume = False
    else:
        u.sys_log_message(command, log_file=subcommand_file)
        main_dispatcher(args=command_args)
    evaluation_file = os.path.normpath(
        os.path.join(output_dir, "evaluation.json"))
    try:
        with open(evaluation_file, u.open_mode("r")) as evaluation_handler:
            evaluation = json.loads(evaluation_handler.read())
        return evaluation, resume
    except (ValueError, IOError):
        sys.exit("Failed to retrieve evaluation.")
Ejemplo n.º 6
0
def create_kfold_evaluations(datasets_file, args, common_options,
                             resume=False, counter=0):
    """ Create k-fold cross-validation from a datasets file

    """
    global subcommand_list
    output_dir = u.check_dir(os.path.join("%s%s" % (args.output_dir, counter),
                                          "evaluation.json"))
    model_fields = args.model_fields
    name_suffix = "_subset_%s" % counter
    name_max_length = NAME_MAX_LENGTH - len(name_suffix)
    name = "%s%s" % (args.name[0: name_max_length] , name_suffix)
    command = COMMANDS["create_cv"] % (datasets_file, output_dir, name)
    command_args = command.split()
    if model_fields:
        command_args.append("--model-fields")
        command_args.append(model_fields)
    common_options_list = u.get_options_list(args, common_options,
                                             prioritary=command_args)
    command_args.extend(common_options_list)
    command = " ".join(command_args)
    if resume:
        next_command = subcommand_list.pop().strip()
        if next_command != command:
            resume = False
            u.log_message("%s\n" % command, log_file=subcommand_file,
                          console=False)
            main_dispatcher(args=command_args)
        elif not subcommand_list:
            main_dispatcher(args=['main', '--resume'])
            resume = False
    else:
        u.log_message("%s\n" % command, log_file=subcommand_file,
                      console=False)
        main_dispatcher(args=command_args)   
    evaluation_file = os.path.join(output_dir, "evaluation.json")
    try:
        with open(evaluation_file) as evaluation_handler:
            evaluation = json.loads(evaluation_handler.read())
        return evaluation, resume
    except (ValueError, IOError):
        sys.exit("Failed to retrieve evaluation.")
Ejemplo n.º 7
0
def create_node_th_evaluations(datasets_file, args, common_options,
                               resume=False,
                               node_threshold=DEFAULT_MIN_NODES):
    """ Create node_threshold evaluations

    """
    global subcommand_list
    output_dir = os.path.normpath(u.check_dir(
        os.path.join(u"%s%s" % (args.output_dir, node_threshold),
                     "evaluation.json")))
    command = COMMANDS["node_threshold"] % (
        datasets_file, node_threshold, output_dir)
    command_args = command.split()
    common_options_list = u.get_options_list(args, common_options,
                                             prioritary=command_args)
    command_args.extend(common_options_list)
    command_args.append("--objective")
    command_args.append(args.objective_field)
    command_args = add_model_options(command_args, args)
    command = rebuild_command(command_args)
    if resume:
        next_command = subcommand_list.pop()
        if different_command(next_command, command):
            resume = False
            u.sys_log_message(command, log_file=subcommand_file)
            main_dispatcher(args=command_args)
        elif not subcommand_list:
            main_dispatcher(args=['main', '--resume'])
            resume = False
    else:
        u.sys_log_message(command, log_file=subcommand_file)
        main_dispatcher(args=command_args)
    evaluation_file = os.path.normpath(os.path.join(output_dir,
                                                    "evaluation.json"))
    try:
        with open(evaluation_file, u.open_mode("r")) as evaluation_handler:
            evaluation = json.loads(evaluation_handler.read())
        return evaluation, resume
    except (ValueError, IOError):
        sys.exit("Failed to retrieve evaluation.")
Ejemplo n.º 8
0
def create_node_th_evaluations(datasets_file, args, common_options,
                               resume=False,
                               node_threshold=DEFAULT_MIN_NODES):
    """ Create node_threshold evaluations

    """
    global subcommand_list
    output_dir = u.check_dir(
        os.path.join("%s%s" % (args.output_dir, node_threshold),
                     "evaluation.json"))
    command = COMMANDS["node_threshold"] % (
        datasets_file, node_threshold, output_dir)
    command_args = command.split()
    common_options_list = u.get_options_list(args, common_options,
                                             prioritary=command_args)
    command_args.extend(common_options_list)
    command = " ".join(command_args)
    if resume:
        next_command = subcommand_list.pop().strip()
        if next_command != command:
            resume = False
            u.log_message("%s\n" % command, log_file=subcommand_file,
                          console=False)
            main_dispatcher(args=command_args)
        elif not subcommand_list:
            main_dispatcher(args=['main', '--resume'])
            resume = False
    else:
        u.log_message("%s\n" % command, log_file=subcommand_file,
                      console=False)
        main_dispatcher(args=command_args)   
    evaluation_file = os.path.join(output_dir, "evaluation.json")
    try:
        with open(evaluation_file) as evaluation_handler:
            evaluation = json.loads(evaluation_handler.read())
        return evaluation, resume
    except (ValueError, IOError):
        sys.exit("Failed to retrieve evaluation.")
Ejemplo n.º 9
0
def create_kfold_datasets(dataset, args,
                          selecting_file_list, objective, kfold_field,
                          common_options, resume=False):
    """Calling the bigmler procedure to create the k-fold datasets

    """
    args.output_dir = os.path.join(args.output_dir, "test")
    output_dir = args.output_dir
    k = args.k_folds
    global subcommand_list
    # creating the selecting datasets
    for index in range(0, len(selecting_file_list)):
        command = COMMANDS["selection"] % (
            dataset, selecting_file_list[index],
            output_dir)
        command_args = command.split()
        common_options_list = u.get_options_list(args, common_options,
                                                 prioritary=command_args)
        command_args.extend(common_options_list)
        command = " ".join(command_args)
        if resume:
            next_command = subcommand_list.pop().strip()
            if next_command != command:
                resume = False
                u.log_message("%s\n" % command, log_file=subcommand_file,
                              console=False)
                main_dispatcher(args=command_args)
            elif not subcommand_list:
                main_dispatcher(args=['main', '--resume'])
                resume = False
        else:
            u.log_message("%s\n" % command, log_file=subcommand_file,
                          console=False)
            main_dispatcher(args=command_args)
    # updating the datasets to set the objective field
    datasets_file = os.path.join(output_dir, "dataset_gen")
    with open(datasets_file) as datasets_handler:
        for line in datasets_handler:
            dataset_id = line.strip()
            command = COMMANDS["objective"] % (dataset_id,
                                              "dataset_%s" % index, output_dir)
            command_args = command.split()
            command_args.append("--objective")
            command_args.append(objective)
            common_options_list = u.get_options_list(args, common_options,
                                                     prioritary=command_args)
            command_args.extend(common_options_list)
            command = " ".join(command_args)
            if resume:
                next_command = subcommand_list.pop().strip()
                if next_command != command:
                    resume = False
                    u.log_message("%s\n" % command, log_file=subcommand_file,
                                  console=False)
                    main_dispatcher(args=command_args)
                elif not subcommand_list:
                    main_dispatcher(args=['main', '--resume'])
                    resume = False
            else:
                u.log_message("%s\n" % command, log_file=subcommand_file,
                              console=False)
                main_dispatcher(args=command_args)

    return datasets_file, resume