def create_kfold_datasets(dataset, args, selecting_file_list, common_options, resume=False): """Calling the bigmler procedure to create the k-fold datasets """ args.output_dir = os.path.normpath(os.path.join(args.output_dir, "test")) output_dir = args.output_dir global subcommand_list # creating the selecting datasets for index in range(0, len(selecting_file_list)): command = COMMANDS["selection"] % ( dataset, selecting_file_list[index], output_dir) command_args = command.split() common_options_list = u.get_options_list(args, common_options, prioritary=command_args) command_args.extend(common_options_list) command = rebuild_command(command_args) if resume: next_command = subcommand_list.pop() if different_command(next_command, command): resume = False u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) elif not subcommand_list: main_dispatcher(args=['main', '--resume']) resume = False else: u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) datasets_file = os.path.normpath(os.path.join(output_dir, "dataset_gen")) return datasets_file, resume
def create_kfold_evaluations(datasets_file, args, common_options, resume=False, counter=0): """ Create k-fold cross-validation from a datasets file """ global subcommand_list output_dir = os.path.normpath( u.check_dir( os.path.join(u"%s%s" % (args.output_dir, counter), u"evaluation.json"))) model_fields = args.model_fields name_suffix = "_subset_%s" % counter name_max_length = NAME_MAX_LENGTH - len(name_suffix) name = "%s%s" % (args.name[0:name_max_length], name_suffix) dataset_id = u.read_datasets(datasets_file)[0] model_dataset = os.path.normpath( os.path.join(u.check_dir(datasets_file), dataset_id.replace("/", "_"))) command = COMMANDS["create_cv"] % (datasets_file, output_dir, name, model_dataset) command_args = command.split() if model_fields: command_args.append("--model-fields") command_args.append(model_fields) command_args.append("--objective") command_args.append(args.objective_field) command_args = add_model_options(command_args, args) common_options_list = u.get_options_list(args, common_options, prioritary=command_args) command_args.extend(common_options_list) command = rebuild_command(command_args) if resume: next_command = subcommand_list.pop() if different_command(next_command, command): resume = False u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) elif not subcommand_list: main_dispatcher(args=['main', '--resume']) resume = False else: u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) evaluation_file = os.path.normpath( os.path.join(output_dir, "evaluation.json")) try: with open(evaluation_file) as evaluation_handler: evaluation = json.loads(evaluation_handler.read()) return evaluation, resume except (ValueError, IOError): sys.exit("Failed to retrieve evaluation.")
def create_kfold_evaluations(datasets_file, args, common_options, resume=False, counter=0): """ Create k-fold cross-validation from a datasets file """ global subcommand_list output_dir = os.path.normpath( u.check_dir(os.path.join(u"%s%s" % (args.output_dir, counter), u"evaluation.json"))) model_fields = args.model_fields name_suffix = "_subset_%s" % counter name_max_length = NAME_MAX_LENGTH - len(name_suffix) name = "%s%s" % (args.name[0: name_max_length], name_suffix) dataset_id = u.read_datasets(datasets_file)[0] model_dataset = os.path.normpath( os.path.join(u.check_dir(datasets_file), dataset_id.replace("/", "_"))) command = COMMANDS["create_cv"] % (datasets_file, output_dir, name, model_dataset) command_args = command.split() if model_fields: command_args.append("--model-fields") command_args.append(model_fields) command_args.append("--objective") command_args.append(args.objective_field) command_args = add_model_options(command_args, args) common_options_list = u.get_options_list(args, common_options, prioritary=command_args) command_args.extend(common_options_list) command = rebuild_command(command_args) if resume: next_command = subcommand_list.pop() if different_command(next_command, command): resume = False u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) elif not subcommand_list: main_dispatcher(args=['main', '--resume']) resume = False else: u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) evaluation_file = os.path.normpath(os.path.join(output_dir, "evaluation.json")) try: with open(evaluation_file) as evaluation_handler: evaluation = json.loads(evaluation_handler.read()) return evaluation, resume except (ValueError, IOError): sys.exit("Failed to retrieve evaluation.")
def create_node_th_evaluations(datasets_file, args, common_options, resume=False, node_threshold=DEFAULT_MIN_NODES): """ Create node_threshold evaluations """ global subcommand_list output_dir = os.path.normpath( u.check_dir( os.path.join(u"%s%s" % (args.output_dir, node_threshold), "evaluation.json"))) command = COMMANDS["node_threshold"] % (datasets_file, node_threshold, output_dir) command_args = command.split() common_options_list = u.get_options_list(args, common_options, prioritary=command_args) command_args.extend(common_options_list) command_args.append("--objective") command_args.append(args.objective_field) command_args = add_model_options(command_args, args) command = rebuild_command(command_args) if resume: next_command = subcommand_list.pop() if different_command(next_command, command): resume = False u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) elif not subcommand_list: main_dispatcher(args=['main', '--resume']) resume = False else: u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) evaluation_file = os.path.normpath( os.path.join(output_dir, "evaluation.json")) try: with open(evaluation_file, u.open_mode("r")) as evaluation_handler: evaluation = json.loads(evaluation_handler.read()) return evaluation, resume except (ValueError, IOError): sys.exit("Failed to retrieve evaluation.")
def create_kfold_evaluations(datasets_file, args, common_options, resume=False, counter=0): """ Create k-fold cross-validation from a datasets file """ global subcommand_list output_dir = u.check_dir(os.path.join("%s%s" % (args.output_dir, counter), "evaluation.json")) model_fields = args.model_fields name_suffix = "_subset_%s" % counter name_max_length = NAME_MAX_LENGTH - len(name_suffix) name = "%s%s" % (args.name[0: name_max_length] , name_suffix) command = COMMANDS["create_cv"] % (datasets_file, output_dir, name) command_args = command.split() if model_fields: command_args.append("--model-fields") command_args.append(model_fields) common_options_list = u.get_options_list(args, common_options, prioritary=command_args) command_args.extend(common_options_list) command = " ".join(command_args) if resume: next_command = subcommand_list.pop().strip() if next_command != command: resume = False u.log_message("%s\n" % command, log_file=subcommand_file, console=False) main_dispatcher(args=command_args) elif not subcommand_list: main_dispatcher(args=['main', '--resume']) resume = False else: u.log_message("%s\n" % command, log_file=subcommand_file, console=False) main_dispatcher(args=command_args) evaluation_file = os.path.join(output_dir, "evaluation.json") try: with open(evaluation_file) as evaluation_handler: evaluation = json.loads(evaluation_handler.read()) return evaluation, resume except (ValueError, IOError): sys.exit("Failed to retrieve evaluation.")
def create_node_th_evaluations(datasets_file, args, common_options, resume=False, node_threshold=DEFAULT_MIN_NODES): """ Create node_threshold evaluations """ global subcommand_list output_dir = os.path.normpath(u.check_dir( os.path.join(u"%s%s" % (args.output_dir, node_threshold), "evaluation.json"))) command = COMMANDS["node_threshold"] % ( datasets_file, node_threshold, output_dir) command_args = command.split() common_options_list = u.get_options_list(args, common_options, prioritary=command_args) command_args.extend(common_options_list) command_args.append("--objective") command_args.append(args.objective_field) command_args = add_model_options(command_args, args) command = rebuild_command(command_args) if resume: next_command = subcommand_list.pop() if different_command(next_command, command): resume = False u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) elif not subcommand_list: main_dispatcher(args=['main', '--resume']) resume = False else: u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) evaluation_file = os.path.normpath(os.path.join(output_dir, "evaluation.json")) try: with open(evaluation_file, u.open_mode("r")) as evaluation_handler: evaluation = json.loads(evaluation_handler.read()) return evaluation, resume except (ValueError, IOError): sys.exit("Failed to retrieve evaluation.")
def create_node_th_evaluations(datasets_file, args, common_options, resume=False, node_threshold=DEFAULT_MIN_NODES): """ Create node_threshold evaluations """ global subcommand_list output_dir = u.check_dir( os.path.join("%s%s" % (args.output_dir, node_threshold), "evaluation.json")) command = COMMANDS["node_threshold"] % ( datasets_file, node_threshold, output_dir) command_args = command.split() common_options_list = u.get_options_list(args, common_options, prioritary=command_args) command_args.extend(common_options_list) command = " ".join(command_args) if resume: next_command = subcommand_list.pop().strip() if next_command != command: resume = False u.log_message("%s\n" % command, log_file=subcommand_file, console=False) main_dispatcher(args=command_args) elif not subcommand_list: main_dispatcher(args=['main', '--resume']) resume = False else: u.log_message("%s\n" % command, log_file=subcommand_file, console=False) main_dispatcher(args=command_args) evaluation_file = os.path.join(output_dir, "evaluation.json") try: with open(evaluation_file) as evaluation_handler: evaluation = json.loads(evaluation_handler.read()) return evaluation, resume except (ValueError, IOError): sys.exit("Failed to retrieve evaluation.")
def create_kfold_datasets(dataset, args, selecting_file_list, objective, kfold_field, common_options, resume=False): """Calling the bigmler procedure to create the k-fold datasets """ args.output_dir = os.path.join(args.output_dir, "test") output_dir = args.output_dir k = args.k_folds global subcommand_list # creating the selecting datasets for index in range(0, len(selecting_file_list)): command = COMMANDS["selection"] % ( dataset, selecting_file_list[index], output_dir) command_args = command.split() common_options_list = u.get_options_list(args, common_options, prioritary=command_args) command_args.extend(common_options_list) command = " ".join(command_args) if resume: next_command = subcommand_list.pop().strip() if next_command != command: resume = False u.log_message("%s\n" % command, log_file=subcommand_file, console=False) main_dispatcher(args=command_args) elif not subcommand_list: main_dispatcher(args=['main', '--resume']) resume = False else: u.log_message("%s\n" % command, log_file=subcommand_file, console=False) main_dispatcher(args=command_args) # updating the datasets to set the objective field datasets_file = os.path.join(output_dir, "dataset_gen") with open(datasets_file) as datasets_handler: for line in datasets_handler: dataset_id = line.strip() command = COMMANDS["objective"] % (dataset_id, "dataset_%s" % index, output_dir) command_args = command.split() command_args.append("--objective") command_args.append(objective) common_options_list = u.get_options_list(args, common_options, prioritary=command_args) command_args.extend(common_options_list) command = " ".join(command_args) if resume: next_command = subcommand_list.pop().strip() if next_command != command: resume = False u.log_message("%s\n" % command, log_file=subcommand_file, console=False) main_dispatcher(args=command_args) elif not subcommand_list: main_dispatcher(args=['main', '--resume']) resume = False else: u.log_message("%s\n" % command, log_file=subcommand_file, console=False) main_dispatcher(args=command_args) return datasets_file, resume