def create_kfold_datasets(dataset, args, selecting_file_list, command_obj, resume=False): """Calling the bigmler procedure to create the k-fold datasets """ args.output_dir = os.path.normpath(os.path.join(args.output_dir, "test")) output_dir = args.output_dir global subcommand_list # creating the selecting datasets for index in range(0, len(selecting_file_list)): command = COMMANDS["selection"] % (dataset, selecting_file_list[index], output_dir) command_args = command.split() command_obj.propagate(command_args) command = rebuild_command(command_args) if resume: next_command = subcommand_list.pop() if different_command(next_command, command): resume = False u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) elif not subcommand_list: main_dispatcher(args=['main', '--resume']) resume = False else: u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) datasets_file = os.path.normpath(os.path.join(output_dir, "dataset_gen")) return datasets_file, resume
def delete_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different processing functions """ command = command_handling(args, COMMAND_LOG) # Parses command line arguments. command_args = a.parse_and_check(command) if command_args.resume: command_args, session_file, _ = get_stored_command( args, command_args.debug, command_log=COMMAND_LOG, dirs_log=DIRS_LOG, sessions_log=SESSIONS_LOG) else: if command_args.output_dir is None: command_args.output_dir = a.NOW directory = u.check_dir(os.path.join(command_args.output_dir, "tmp")) session_file = os.path.join(directory, SESSIONS_LOG) u.log_message(command.command + "\n", log_file=session_file) try: shutil.copy(DEFAULTS_FILE, os.path.join(directory, DEFAULTS_FILE)) except IOError: pass u.sys_log_message(u"%s\n" % os.path.abspath(directory), log_file=DIRS_LOG) # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) # Creates the corresponding api instance api = a.get_api_instance(command_args, u.check_dir(session_file)) delete_resources(command_args, api) u.log_message("_" * 80 + "\n", log_file=session_file)
def create_kfold_datasets(dataset, args, selecting_file_list, common_options, resume=False): """Calling the bigmler procedure to create the k-fold datasets """ args.output_dir = os.path.normpath(os.path.join(args.output_dir, "test")) output_dir = args.output_dir global subcommand_list # creating the selecting datasets for index in range(0, len(selecting_file_list)): command = COMMANDS["selection"] % ( dataset, selecting_file_list[index], output_dir) command_args = command.split() common_options_list = u.get_options_list(args, common_options, prioritary=command_args) command_args.extend(common_options_list) command = rebuild_command(command_args) if resume: next_command = subcommand_list.pop() if different_command(next_command, command): resume = False u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) elif not subcommand_list: main_dispatcher(args=['main', '--resume']) resume = False else: u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) datasets_file = os.path.normpath(os.path.join(output_dir, "dataset_gen")) return datasets_file, resume
def create_prediction_dataset(base_path, folder, args, resume): """Creates batch prediction datasets and a multidataset with the prediction results for the best scoring model in the folder set by the argument """ args.output_dir = os.path.join(base_path, "%s_pred" % folder) output_dir = args.output_dir folder = os.path.join(base_path, folder) model_type = "ensembles" if hasattr(args, "number_of_models") and \ args.number_of_models > 1 else "models" global subcommand_list # creating the predictions CSV file command = COMMANDS["prediction"] % (base_path, model_type, folder, model_type, folder) command_args = command.split() if resume: next_command = subcommand_list.pop() if different_command(next_command, command): resume = False u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) elif not subcommand_list: main_dispatcher(args=['main', '--resume']) resume = False else: u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) return resume
def main_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different processing functions """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) command = command_handling(args, COMMAND_LOG) # Parses command line arguments. command_args = a.parse_and_check(command) default_output = ('evaluation' if command_args.evaluate else 'predictions.csv') resume = command_args.resume if command_args.resume: command_args, session_file, output_dir = get_stored_command( args, command_args.debug, command_log=COMMAND_LOG, dirs_log=DIRS_LOG, sessions_log=SESSIONS_LOG) default_output = ('evaluation' if command_args.evaluate else 'predictions.csv') if command_args.predictions is None: command_args.predictions = os.path.join(output_dir, default_output) else: if command_args.output_dir is None: command_args.output_dir = a.NOW if command_args.predictions is None: command_args.predictions = os.path.join(command_args.output_dir, default_output) if len(os.path.dirname(command_args.predictions).strip()) == 0: command_args.predictions = os.path.join(command_args.output_dir, command_args.predictions) directory = u.check_dir(command_args.predictions) session_file = os.path.join(directory, SESSIONS_LOG) u.log_message(command.command + "\n", log_file=session_file) try: defaults_file = open(DEFAULTS_FILE, 'r') contents = defaults_file.read() defaults_file.close() defaults_copy = open(os.path.join(directory, DEFAULTS_FILE), 'w', 0) defaults_copy.write(contents) defaults_copy.close() except IOError: pass u.sys_log_message(u"%s\n" % os.path.abspath(directory), log_file=DIRS_LOG) # Creates the corresponding api instance api = a.get_api_instance(command_args, u.check_dir(session_file)) if (a.has_train(command_args) or a.has_test(command_args) or command_args.votes_dirs): output_args = a.get_output_args(api, command_args, resume) a.transform_args(command_args, command.flags, api, command.user_defaults) compute_output(**output_args) u.log_message("_" * 80 + "\n", log_file=session_file)
def create_prediction_dataset(base_path, folder, args, resume): """Creates batch prediction datasets and a multidataset with the prediction results for the best scoring model in the folder set by the argument """ args.output_dir = os.path.join(base_path, "%s_pred" % folder) folder = os.path.join(base_path, folder) model_type = "ensembles" if hasattr(args, "number_of_models") and \ args.number_of_models > 1 else "models" global subcommand_list # creating the predictions CSV file command = COMMANDS["prediction"] % (base_path, model_type, folder, model_type, folder) command_args = command.split() if resume: next_command = subcommand_list.pop() if different_command(next_command, command): resume = False u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) elif not subcommand_list: main_dispatcher(args=['main', '--resume']) resume = False else: u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) return resume
def cluster_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different processing functions """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) command = command_handling(args, COMMAND_LOG) # Parses command line arguments. command_args = a.parse_and_check(command) resume = command_args.resume if command_args.resume: command_args, session_file, output_dir = get_stored_command( args, command_args.debug, command_log=COMMAND_LOG, dirs_log=DIRS_LOG, sessions_log=SESSIONS_LOG) if command_args.predictions is None: command_args.predictions = os.path.join(output_dir, DEFAULT_OUTPUT) else: if command_args.output_dir is None: command_args.output_dir = a.NOW if command_args.predictions is None: command_args.predictions = os.path.join(command_args.output_dir, DEFAULT_OUTPUT) if len(os.path.dirname(command_args.predictions).strip()) == 0: command_args.predictions = os.path.join(command_args.output_dir, command_args.predictions) directory = u.check_dir(command_args.predictions) session_file = os.path.join(directory, SESSIONS_LOG) u.log_message(command.command + "\n", log_file=session_file) try: defaults_file = open(DEFAULTS_FILE, 'r') contents = defaults_file.read() defaults_file.close() defaults_copy = open(os.path.join(directory, DEFAULTS_FILE), 'w', 0) defaults_copy.write(contents) defaults_copy.close() except IOError: pass u.sys_log_message(u"%s\n" % os.path.abspath(directory), log_file=DIRS_LOG) # Creates the corresponding api instance api = a.get_api_instance(command_args, u.check_dir(session_file)) # Selects the action to perform if (a.has_train(command_args) or a.has_test(command_args) or command_args.cluster_datasets is not None): output_args = a.get_output_args(api, command_args, resume) a.transform_args(command_args, command.flags, api, command.user_defaults) compute_output(**output_args) u.log_message("_" * 80 + "\n", log_file=session_file)
def cluster_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different processing functions """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) command = command_handling(args, COMMAND_LOG) # Parses command line arguments. command_args = a.parse_and_check(command) resume = command_args.resume if command_args.resume: # Keep the debug option if set debug = command_args.debug # Restore the args of the call to resume from the command log file stored_command = StoredCommand(args, COMMAND_LOG, DIRS_LOG) command = Command(None, stored_command=stored_command) # Logs the issued command and the resumed command session_file = os.path.join(stored_command.output_dir, SESSIONS_LOG) stored_command.log_command(session_file=session_file) # Parses resumed arguments. command_args = a.parse_and_check(command) if command_args.predictions is None: command_args.predictions = os.path.join(stored_command.output_dir, DEFAULT_OUTPUT) else: if command_args.output_dir is None: command_args.output_dir = a.NOW if command_args.predictions is None: command_args.predictions = os.path.join(command_args.output_dir, DEFAULT_OUTPUT) if len(os.path.dirname(command_args.predictions).strip()) == 0: command_args.predictions = os.path.join(command_args.output_dir, command_args.predictions) directory = u.check_dir(command_args.predictions) session_file = os.path.join(directory, SESSIONS_LOG) u.log_message(command.command + "\n", log_file=session_file) try: defaults_file = open(DEFAULTS_FILE, "r") contents = defaults_file.read() defaults_file.close() defaults_copy = open(os.path.join(directory, DEFAULTS_FILE), "w", 0) defaults_copy.write(contents) defaults_copy.close() except IOError: pass u.sys_log_message(u"%s\n" % os.path.abspath(directory), log_file=DIRS_LOG) # Creates the corresponding api instance if resume and debug: command_args.debug = True api = a.get_api_instance(command_args, u.check_dir(session_file)) # Selects the action to perform if has_train(command_args) or has_test(command_args) or command_args.cluster_datasets is not None: output_args = a.get_output_args(api, command_args, resume) a.transform_args(command_args, command.flags, api, command.user_defaults) compute_output(**output_args) u.log_message("_" * 80 + "\n", log_file=session_file)
def create_kfold_evaluations(datasets_file, args, command_obj, resume=False, counter=0): """ Create k-fold cross-validation from a datasets file """ global subcommand_list output_dir = os.path.normpath( u.check_dir( os.path.join(u"%s%s" % (args.output_dir, counter), u"evaluation.json"))) model_fields = args.model_fields name_suffix = "_subset_%s" % counter name_max_length = NAME_MAX_LENGTH - len(name_suffix) name = "%s%s" % (args.name[0:name_max_length], name_suffix) dataset_id = u.read_datasets(datasets_file)[0] model_dataset = os.path.normpath( os.path.join(u.check_dir(datasets_file), dataset_id.replace("/", "_"))) command = COMMANDS["create_cv"] % (datasets_file, output_dir, name, model_dataset) command_args = command.split() if model_fields: command_args.append("--model-fields") command_args.append(model_fields) command_args.append("--objective") command_args.append(args.objective_field) command_args = add_model_options(command_args, args) """ common_options_list = u.get_options_list(args, command_obj.common_options, prioritary=command_args) command_args.extend(common_options_list) """ command_obj.propagate( command_args, exclude=["--dataset", "--datasets", "--dataset-file"]) command = rebuild_command(command_args) if resume: next_command = subcommand_list.pop() if different_command(next_command, command): resume = False u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) elif not subcommand_list: main_dispatcher(args=['main', '--resume']) resume = False else: u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) evaluation_file = os.path.normpath( os.path.join(output_dir, "evaluation.json")) try: with open(evaluation_file) as evaluation_handler: evaluation = json.loads(evaluation_handler.read()) return evaluation, resume except (ValueError, IOError): sys.exit("Failed to retrieve evaluation.")
def logistic_regression_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different processing functions """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) command = command_handling(args, COMMAND_LOG) # Parses command line arguments. command_args = a.parse_and_check(command) default_output = ('evaluation' if command_args.evaluate else 'predictions.csv') resume = command_args.resume if command_args.resume: command_args, session_file, output_dir = get_stored_command( args, command_args.debug, command_log=COMMAND_LOG, dirs_log=DIRS_LOG, sessions_log=SESSIONS_LOG) default_output = ('evaluation' if command_args.evaluate else 'predictions.csv') if command_args.predictions is None: command_args.predictions = os.path.join(output_dir, default_output) else: if command_args.output_dir is None: command_args.output_dir = a.NOW if command_args.predictions is None: command_args.predictions = os.path.join(command_args.output_dir, default_output) if len(os.path.dirname(command_args.predictions).strip()) == 0: command_args.predictions = os.path.join(command_args.output_dir, command_args.predictions) directory = u.check_dir(command_args.predictions) session_file = os.path.join(directory, SESSIONS_LOG) u.log_message(command.command + "\n", log_file=session_file) try: shutil.copy(DEFAULTS_FILE, os.path.join(directory, DEFAULTS_FILE)) except IOError: pass u.sys_log_message(u"%s\n" % os.path.abspath(directory), log_file=DIRS_LOG) # Creates the corresponding api instance api = a.get_api_instance(command_args, u.check_dir(session_file)) # Selects the action to perform if (a.has_train(command_args) or a.has_test(command_args) or command_args.export_fields): output_args = a.get_output_args(api, command_args, resume) a.transform_args(command_args, command.flags, api, command.user_defaults) compute_output(**output_args) u.log_message("_" * 80 + "\n", log_file=session_file)
def create_kfold_evaluations(datasets_file, args, command_obj, resume=False, counter=0): """ Create k-fold cross-validation from a datasets file """ global subcommand_list output_dir = os.path.normpath( u.check_dir(os.path.join(u"%s%s" % (args.output_dir, counter), u"evaluation.json"))) model_fields = args.model_fields name_suffix = "_subset_%s" % counter name_max_length = NAME_MAX_LENGTH - len(name_suffix) name = "%s%s" % (args.name[0: name_max_length], name_suffix) dataset_id = u.read_datasets(datasets_file)[0] model_dataset = os.path.normpath( os.path.join(u.check_dir(datasets_file), dataset_id.replace("/", "_"))) command = COMMANDS["create_cv"] % (datasets_file, output_dir, name, model_dataset) command_args = command.split() if model_fields: command_args.append("--model-fields") command_args.append(model_fields) command_args.append("--objective") command_args.append(args.objective_field) command_args = add_model_options(command_args, args) """ common_options_list = u.get_options_list(args, command_obj.common_options, prioritary=command_args) command_args.extend(common_options_list) """ command_obj.propagate(command_args, exclude=["--dataset", "--datasets", "--dataset-file"]) command = rebuild_command(command_args) if resume: next_command = subcommand_list.pop() if different_command(next_command, command): resume = False u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) elif not subcommand_list: main_dispatcher(args=['main', '--resume']) resume = False else: u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) evaluation_file = os.path.normpath(os.path.join(output_dir, "evaluation.json")) try: with open(evaluation_file) as evaluation_handler: evaluation = json.loads(evaluation_handler.read()) return evaluation, resume except (ValueError, IOError): sys.exit("Failed to retrieve evaluation.")
def whizzml_dispatcher(args=sys.argv[1:]): """Main processing of the parsed options for BigMLer whizzml """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) command = command_handling(args, COMMAND_LOG) # Parses command line arguments. command_args = command.parser.parse_args(command.args) resume = command_args.resume if resume: command_args, session_file, _ = get_stored_command( args, command_args.debug, command_log=COMMAND_LOG, dirs_log=DIRS_LOG, sessions_log=SESSIONS_LOG) else: if command_args.output_dir is None: command_args.output_dir = a.NOW session_file = os.path.join(command_args.output_dir, SESSIONS_LOG) # If logging is required, open the file for logging log = None if command_args.log_file: u.check_dir(command_args.log_file) log = command_args.log_file # If --clear_logs the log files are cleared if command_args.clear_logs: clear_log_files([log]) u.sys_log_message(u"%s\n" % os.path.abspath(command_args.output_dir), log_file=DIRS_LOG) session_file = os.path.join(command_args.output_dir, SESSIONS_LOG) # create api instance form args api = a.get_api_instance(command_args, u.check_dir(session_file)) # Creates the corresponding api instance api = a.get_api_instance(command_args, u.check_dir(session_file)) a.transform_dataset_options(command_args, api) # package_dir if command_args.package_dir is not None: create_package(command_args, api, command.common_options, resume=resume) else: sys.exit("You must use the --package-dir flag pointing to the" " directory where the metadata.json file is. Type\n" " bigmler whizzml --help\n" " to see all the available options.")
def cluster_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different processing functions """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) command = command_handling(args, COMMAND_LOG) # Parses command line arguments. command_args = a.parse_and_check(command) resume = command_args.resume if command_args.resume: command_args, session_file, output_dir = get_stored_command( args, command_args.debug, command_log=COMMAND_LOG, dirs_log=DIRS_LOG, sessions_log=SESSIONS_LOG) if command_args.predictions is None: command_args.predictions = os.path.join(output_dir, DEFAULT_OUTPUT) else: if command_args.output_dir is None: command_args.output_dir = a.NOW if command_args.predictions is None: command_args.predictions = os.path.join(command_args.output_dir, DEFAULT_OUTPUT) if len(os.path.dirname(command_args.predictions).strip()) == 0: command_args.predictions = os.path.join(command_args.output_dir, command_args.predictions) directory = u.check_dir(command_args.predictions) session_file = os.path.join(directory, SESSIONS_LOG) u.log_message(command.command + "\n", log_file=session_file) try: shutil.copy(DEFAULTS_FILE, os.path.join(directory, DEFAULTS_FILE)) except IOError: pass u.sys_log_message(u"%s\n" % os.path.abspath(directory), log_file=DIRS_LOG) # Creates the corresponding api instance api = a.get_api_instance(command_args, u.check_dir(session_file)) # Selects the action to perform if (a.has_train(command_args) or a.has_test(command_args) or command_args.cluster_datasets is not None or command_args.export_fields is not None): output_args = a.get_output_args(api, command_args, resume) a.transform_args(command_args, command.flags, api, command.user_defaults) compute_output(**output_args) u.log_message("_" * 80 + "\n", log_file=session_file)
def command_handling(args, log=COMMAND_LOG): """Rebuilds command string, logs it for --resume future requests and parses it. """ # Create the Command object command = Command(args, None) # Resume calls are not logged if not command.resume: u.sys_log_message(command.command.replace("\\", "\\\\"), log_file=log) return command
def command_handling(args, log=COMMAND_LOG): """Rebuilds command string, logs it for --resume future requests and parses it. """ # Create the Command object command = Command(args, None) # Resume calls are not logged if not command.resume: u.sys_log_message(command.command.replace('\\', '\\\\'), log_file=log) return command
def delete_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different processing functions """ command = command_handling(args, COMMAND_LOG) # Parses command line arguments. command_args = a.parse_and_check(command) resume = command_args.resume if command_args.resume: # Keep the debug option if set debug = command_args.debug # Restore the args of the call to resume from the command log file stored_command = StoredCommand(args, COMMAND_LOG, DIRS_LOG) command = Command(None, stored_command=stored_command) # Logs the issued command and the resumed command session_file = os.path.join(stored_command.output_dir, SESSIONS_LOG) stored_command.log_command(session_file=session_file) # Parses resumed arguments. command_args = a.parse_and_check(command) else: if command_args.output_dir is None: command_args.output_dir = a.NOW directory = u.check_dir(os.path.join(command_args.output_dir, "tmp")) session_file = os.path.join(directory, SESSIONS_LOG) u.log_message(command.command + "\n", log_file=session_file) try: defaults_file = open(DEFAULTS_FILE, 'r') contents = defaults_file.read() defaults_file.close() defaults_copy = open(os.path.join(directory, DEFAULTS_FILE), 'w', 0) defaults_copy.write(contents) defaults_copy.close() except IOError: pass u.sys_log_message(u"%s\n" % os.path.abspath(directory), log_file=DIRS_LOG) # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) # Creates the corresponding api instance if resume and debug: command_args.debug = True api = a.get_api_instance(command_args, u.check_dir(session_file)) delete_resources(command_args, api) u.log_message("_" * 80 + "\n", log_file=session_file)
def get_cmd_context(args, settings): """Parses the args array to create an args object storing the defaults and user-given values. It also sets the output directory and the log files. """ command = command_handling(args, settings['command_log']) # Parses command line arguments. command_args = a.parse_and_check(command) resume = command_args.resume if command_args.resume: command_args, session_file, output_dir = get_stored_command( args, command_args.debug, command_log=settings['command_log'], dirs_log=settings["dirs_log"], sessions_log=settings['sessions_log']) if settings.get('default_output') is None: settings['default_output'] = "tmp.txt" if not hasattr(command_args, "output") or command_args.output is None: command_args.output = os.path.join(output_dir, settings['default_output']) else: if hasattr(command_args, "output") and \ command_args.output is not None: command_args.output_dir = u.check_dir(command_args.output) if command_args.output_dir is None: command_args.output_dir = a.NOW if settings.get('default_output') is None: settings['default_output'] = "tmp.txt" if not hasattr(command_args, "output") or command_args.output is None: command_args.output = os.path.join(command_args.output_dir, settings['default_output']) if not os.path.dirname(command_args.output).strip(): command_args.output = os.path.join(command_args.output_dir, command_args.output) directory = u.check_dir(command_args.output) session_file = os.path.join(directory, settings['sessions_log']) u.log_message(command.command + "\n", log_file=session_file) if settings.get('defaults_file') is not None: try: shutil.copy(settings['defaults_file'], os.path.join(directory, settings['defaults_file'])) except IOError: pass u.sys_log_message(u"%s\n" % os.path.abspath(directory), log_file=settings['dirs_log']) return command_args, command, session_file, resume
def create_candidates_evaluations(datasets_file, args, command_obj, resume=False, random_candidates=DEFAULT_MIN_CANDIDATES): """ Create random candidates ensembles evaluations """ global subcommand_list output_dir = os.path.normpath( u.check_dir( os.path.join(u"%s%s" % (args.output_dir, random_candidates), "evaluation.json"))) command = COMMANDS["random_candidates"] % (datasets_file, random_candidates, output_dir) command_args = command.split() """ common_options_list = u.get_options_list(args, command_obj.common_options, prioritary=command_args) command_args.extend(common_options_list) """ command_args.append("--objective") command_args.append(args.objective_field) command_args = add_model_options(command_args, args) command_obj.propagate( command_args, exclude=["--dataset", "--datasets", "--dataset-file"]) command = rebuild_command(command_args) if resume: next_command = subcommand_list.pop() if different_command(next_command, command): resume = False u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) elif not subcommand_list: main_dispatcher(args=['main', '--resume']) resume = False else: u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) evaluation_file = os.path.normpath( os.path.join(output_dir, "evaluation.json")) try: with open(evaluation_file, u.open_mode("r")) as evaluation_handler: evaluation = json.loads(evaluation_handler.read()) return evaluation, resume except (ValueError, IOError): sys.exit("Failed to retrieve evaluation.")
def execute_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different processing functions """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) command = command_handling(args, COMMAND_LOG) default_output = 'whizzml_results' # Parses command line arguments. command_args = a.parse_and_check(command) resume = command_args.resume if command_args.resume: command_args, session_file, output_dir = get_stored_command( args, command_args.debug, command_log=COMMAND_LOG, dirs_log=DIRS_LOG, sessions_log=SESSIONS_LOG) if command_args.output is None: command_args.output = os.path.join(output_dir, default_output) else: if command_args.output_dir is None: command_args.output_dir = a.NOW if command_args.output is None: command_args.output = os.path.join(command_args.output_dir, default_output) if len(os.path.dirname(command_args.output).strip()) == 0: command_args.output = os.path.join(command_args.output_dir, command_args.output) directory = u.check_dir(command_args.output) session_file = os.path.join(directory, SESSIONS_LOG) u.log_message(command.command + "\n", log_file=session_file) try: shutil.copy(DEFAULTS_FILE, os.path.join(directory, DEFAULTS_FILE)) except IOError: pass u.sys_log_message(u"%s\n" % os.path.abspath(directory), log_file=DIRS_LOG) # Creates the corresponding api instance api = a.get_api_instance(command_args, u.check_dir(session_file)) _ = a.get_output_args(api, command_args, resume) a.transform_args(command_args, command.flags, api, command.user_defaults) execute_whizzml(command_args, api, session_file) u.log_message("_" * 80 + "\n", log_file=session_file)
def get_cmd_context(args, settings): """Parses the args array to create an args object storing the defaults and user-given values. It also sets the output directory and the log files. """ command = command_handling(args, settings['command_log']) # Parses command line arguments. command_args = a.parse_and_check(command) resume = command_args.resume if command_args.resume: command_args, session_file, output_dir = get_stored_command( args, command_args.debug, command_log=settings['command_log'], dirs_log=settings["dirs_log"], sessions_log=settings['sessions_log']) if settings.get('default_output') is None: settings['default_output'] = "tmp.txt" if not hasattr(command_args, "output") or command_args.output is None: command_args.output = os.path.join(output_dir, settings['default_output']) else: if hasattr(command_args, "output") and \ command_args.output is not None: command_args.output_dir = u.check_dir(command_args.output) if command_args.output_dir is None: command_args.output_dir = a.NOW if settings.get('default_output') is None: settings['default_output'] = "tmp.txt" if not hasattr(command_args, "output") or command_args.output is None: command_args.output = os.path.join(command_args.output_dir, settings['default_output']) if len(os.path.dirname(command_args.output).strip()) == 0: command_args.output = os.path.join(command_args.output_dir, command_args.output) directory = u.check_dir(command_args.output) session_file = os.path.join(directory, settings['sessions_log']) u.log_message(command.command + "\n", log_file=session_file) if settings.get('defaults_file') is not None: try: shutil.copy(settings['defaults_file'], os.path.join(directory, settings['defaults_file'])) except IOError: pass u.sys_log_message(u"%s\n" % os.path.abspath(directory), log_file=settings['dirs_log']) return command_args, command, session_file, resume
def create_candidates_evaluations(datasets_file, args, command_obj, resume=False, random_candidates=DEFAULT_MIN_CANDIDATES): """ Create random candidates ensembles evaluations """ global subcommand_list output_dir = os.path.normpath(u.check_dir( os.path.join(u"%s%s" % (args.output_dir, random_candidates), "evaluation.json"))) command = COMMANDS["random_candidates"] % ( datasets_file, random_candidates, output_dir) command_args = command.split() """ common_options_list = u.get_options_list(args, command_obj.common_options, prioritary=command_args) command_args.extend(common_options_list) """ command_args.append("--objective") command_args.append(args.objective_field) command_args = add_model_options(command_args, args) command_obj.propagate(command_args, exclude=["--dataset", "--datasets", "--dataset-file"]) command = rebuild_command(command_args) if resume: next_command = subcommand_list.pop() if different_command(next_command, command): resume = False u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) elif not subcommand_list: main_dispatcher(args=['main', '--resume']) resume = False else: u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) evaluation_file = os.path.normpath(os.path.join(output_dir, "evaluation.json")) try: with open(evaluation_file, u.open_mode("r")) as evaluation_handler: evaluation = json.loads(evaluation_handler.read()) return evaluation, resume except (ValueError, IOError): sys.exit("Failed to retrieve evaluation.")
def create_node_th_evaluations(datasets_file, args, common_options, resume=False, node_threshold=DEFAULT_MIN_NODES): """ Create node_threshold evaluations """ global subcommand_list output_dir = os.path.normpath( u.check_dir( os.path.join(u"%s%s" % (args.output_dir, node_threshold), "evaluation.json"))) command = COMMANDS["node_threshold"] % (datasets_file, node_threshold, output_dir) command_args = command.split() common_options_list = u.get_options_list(args, common_options, prioritary=command_args) command_args.extend(common_options_list) command_args.append("--objective") command_args.append(args.objective_field) command_args = add_model_options(command_args, args) command = rebuild_command(command_args) if resume: next_command = subcommand_list.pop() if different_command(next_command, command): resume = False u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) elif not subcommand_list: main_dispatcher(args=['main', '--resume']) resume = False else: u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) evaluation_file = os.path.normpath( os.path.join(output_dir, "evaluation.json")) try: with open(evaluation_file, u.open_mode("r")) as evaluation_handler: evaluation = json.loads(evaluation_handler.read()) return evaluation, resume except (ValueError, IOError): sys.exit("Failed to retrieve evaluation.")
def create_node_th_evaluations(datasets_file, args, common_options, resume=False, node_threshold=DEFAULT_MIN_NODES): """ Create node_threshold evaluations """ global subcommand_list output_dir = os.path.normpath(u.check_dir( os.path.join(u"%s%s" % (args.output_dir, node_threshold), "evaluation.json"))) command = COMMANDS["node_threshold"] % ( datasets_file, node_threshold, output_dir) command_args = command.split() common_options_list = u.get_options_list(args, common_options, prioritary=command_args) command_args.extend(common_options_list) command_args.append("--objective") command_args.append(args.objective_field) command_args = add_model_options(command_args, args) command = rebuild_command(command_args) if resume: next_command = subcommand_list.pop() if different_command(next_command, command): resume = False u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) elif not subcommand_list: main_dispatcher(args=['main', '--resume']) resume = False else: u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) evaluation_file = os.path.normpath(os.path.join(output_dir, "evaluation.json")) try: with open(evaluation_file, u.open_mode("r")) as evaluation_handler: evaluation = json.loads(evaluation_handler.read()) return evaluation, resume except (ValueError, IOError): sys.exit("Failed to retrieve evaluation.")
def analyze_dispatcher(args=sys.argv[1:]): """Main processing of the parsed options for BigMLer analyze """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) command = command_handling(args, COMMAND_LOG) # Parses command line arguments. command_args = command.parser.parse_args(command.args) resume = command_args.resume if resume: # Keep the debug option if set debug = command_args.debug # Restore the args of the call to resume from the command log file stored_command = StoredCommand(args, COMMAND_LOG, DIRS_LOG) command = Command(None, stored_command=stored_command) # Logs the issued command and the resumed command session_file = os.path.join(stored_command.output_dir, SESSIONS_LOG) stored_command.log_command(session_file=session_file) # Parses resumed arguments. command_args = command.parser.parse_args(command.args) command_args.debug = debug else: if command_args.output_dir is None: command_args.output_dir = a.NOW session_file = os.path.join(command_args.output_dir, SESSIONS_LOG) # If logging is required, open the file for logging log = None if command_args.log_file: u.check_dir(command_args.log_file) log = command_args.log_file # If --clear_logs the log files are cleared if command_args.clear_logs: clear_log_files([log]) if command_args.model_fields: model_fields = command_args.model_fields.split(',') command_args.model_fields_ = [ model_field.strip() for model_field in model_fields ] else: command_args.model_fields_ = {} u.sys_log_message(u"%s\n" % os.path.abspath(command_args.output_dir), log_file=DIRS_LOG) session_file = os.path.join(command_args.output_dir, SESSIONS_LOG) # create api instance form args api = a.get_api_instance(command_args, u.check_dir(session_file)) # --maximize flag will be deprecated. Use --optimize flag. if command_args.maximize is not None and command_args.optimize is None: command_args.optimize = command_args.maximize incompatible_flags = [ command_args.cv, command_args.features, command_args.nodes ] if sum([int(bool(flag)) for flag in incompatible_flags]) > 1: sys.exit("The following flags cannot be used together:\n --features" "\n --cross-validation\n --nodes") # k-fold cross-validation if command_args.cv and command_args.dataset is not None: create_kfold_cv(command_args, api, command.common_options, resume=resume) # features analysis if command_args.features: create_features_analysis(command_args, api, command.common_options, resume=resume) # node threshold analysis if command_args.nodes: create_nodes_analysis(command_args, api, command.common_options, resume=resume)
def create_kfold_datasets(dataset, args, selecting_file_list, objective, common_options, resume=False): """Calling the bigmler procedure to create the k-fold datasets """ args.output_dir = os.path.normpath(os.path.join(args.output_dir, "test")) output_dir = args.output_dir global subcommand_list # creating the selecting datasets for index in range(0, len(selecting_file_list)): command = COMMANDS["selection"] % ( dataset, selecting_file_list[index], output_dir) command_args = command.split() common_options_list = u.get_options_list(args, common_options, prioritary=command_args) command_args.extend(common_options_list) command = rebuild_command(command_args) if resume: next_command = subcommand_list.pop() if different_command(next_command, command): resume = False u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) elif not subcommand_list: main_dispatcher(args=['main', '--resume']) resume = False else: u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) # updating the datasets to set the objective field datasets_file = os.path.normpath(os.path.join(output_dir, "dataset_gen")) with open(datasets_file) as datasets_handler: index = 0 for line in datasets_handler: dataset_id = line.strip() command = COMMANDS["objective"] % (dataset_id, "dataset_%s" % index, output_dir) command_args = command.split() command_args.append("--objective") command_args.append(objective) common_options_list = u.get_options_list(args, common_options, prioritary=command_args) command_args.extend(common_options_list) command = rebuild_command(command_args) if resume: next_command = subcommand_list.pop() if different_command(next_command, command): resume = False u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) elif not subcommand_list: main_dispatcher(args=['main', '--resume']) resume = False else: u.sys_log_message(command, log_file=subcommand_file) main_dispatcher(args=command_args) index += 1 return datasets_file, resume
def project_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different processing functions """ command = command_handling(args, COMMAND_LOG) # Parses command line arguments. command_args = a.parse_and_check(command) if command_args.resume: command_args, session_file, _ = get_stored_command( args, command_args.debug, command_log=COMMAND_LOG, dirs_log=DIRS_LOG, sessions_log=SESSIONS_LOG) else: if command_args.output_dir is None: command_args.output_dir = a.NOW directory = u.check_dir("%s/x.txt" % command_args.output_dir) command_args.output_dir = directory session_file = os.path.join(directory, SESSIONS_LOG) u.log_message(command.command + "\n", log_file=session_file) directory = u.check_dir(os.path.join(command_args.output_dir, "tmp")) session_file = os.path.join(directory, SESSIONS_LOG) u.log_message(command.command + "\n", log_file=session_file) try: shutil.copy(DEFAULTS_FILE, os.path.join(directory, DEFAULTS_FILE)) except IOError: pass u.sys_log_message(u"%s\n" % os.path.abspath(directory), log_file=DIRS_LOG) path = u.check_dir("%s/x.txt" % command_args.output_dir) session_file = u"%s%s%s" % (path, os.sep, SESSIONS_LOG) # If logging is required set the file for logging log = None if command_args.log_file: u.check_dir(command_args.log_file) log = command_args.log_file # If --clear_logs the log files are cleared clear_log_files([log]) # Creates the corresponding api instance api = a.get_api_instance(command_args, u.check_dir(session_file)) a.get_output_args(api, command_args, command_args.resume) a.attribute_args(command_args) if not command_args.project_id and command_args.name: command_args.project = command_args.name if command_args.project: # create project pp.project_processing(api, command_args, command_args.resume, session_file=session_file, path=path, log=log, create=True) if command_args.project_id and (command_args.project_attributes or command_args.name or command_args.tag or command_args.description or command_args.category): # update project's attributes pp.update_project(command_args, api, command_args.resume, \ session_file=session_file) u.log_message("_" * 80 + "\n", log_file=session_file) u.print_generated_files(command_args.output_dir, log_file=session_file, verbosity=command_args.verbosity)
def project_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different processing functions """ command = command_handling(args, COMMAND_LOG) # Parses command line arguments. command_args = a.parse_and_check(command) if command_args.resume: command_args, session_file, _ = get_stored_command( args, command_args.debug, command_log=COMMAND_LOG, dirs_log=DIRS_LOG, sessions_log=SESSIONS_LOG) else: if command_args.output_dir is None: command_args.output_dir = a.NOW directory = u.check_dir("%s/x.txt" % command_args.output_dir) command_args.output_dir = directory session_file = os.path.join(directory, SESSIONS_LOG) u.log_message(command.command + "\n", log_file=session_file) directory = u.check_dir(os.path.join(command_args.output_dir, "tmp")) session_file = os.path.join(directory, SESSIONS_LOG) u.log_message(command.command + "\n", log_file=session_file) try: shutil.copy(DEFAULTS_FILE, os.path.join(directory, DEFAULTS_FILE)) except IOError: pass u.sys_log_message(u"%s\n" % os.path.abspath(directory), log_file=DIRS_LOG) path = u.check_dir("%s/x.txt" % command_args.output_dir) session_file = u"%s%s%s" % (path, os.sep, SESSIONS_LOG) # If logging is required set the file for logging log = None if command_args.log_file: u.check_dir(command_args.log_file) log = command_args.log_file # If --clear_logs the log files are cleared clear_log_files([log]) # Creates the corresponding api instance api = a.get_api_instance(command_args, u.check_dir(session_file)) a.get_output_args(api, command_args, command_args.resume) a.attribute_args(command_args) if not command_args.project_id and command_args.name: command_args.project = command_args.name if command_args.project: # create project pp.project_processing( api, command_args, command_args.resume, session_file=session_file, path=path, log=log, create=True) if command_args.project_id and ( command_args.project_attributes or command_args.name or command_args.tag or command_args.description or command_args.category): # update project's attributes pp.update_project(command_args, api, command_args.resume, \ session_file=session_file) u.log_message("_" * 80 + "\n", log_file=session_file) u.print_generated_files(command_args.output_dir, log_file=session_file, verbosity=command_args.verbosity)
def create_package(args, api, command_obj, resume=False): """Creates the package whizzml resources as referred in the metadata.json file. """ set_subcommand_file(args.output_dir) if resume: retrieve_subcommands() # read the metadata.json information message = ('Reading the metadata.json files.........\n') u.log_message(message, log_file=session_file, console=args.verbosity) package_dir = args.package_dir output_dir = args.output_dir metadata_file = os.path.join(package_dir, METADATA_FILE) metadata = None with open(metadata_file) as metadata_handler: metadata = json.load(metadata_handler) # recurse into components/directories, if any if metadata.get("kind") == "package" and 'components' in metadata: components = metadata.get("components") for component in components: message = ('Inspecting component %s.........\n' % component) u.log_message(message, log_file=session_file, console=args.verbosity) args.package_dir = os.path.join(package_dir, component) create_package(args, api, command_obj, resume=resume) args.package_dir = package_dir else: # create libraries or scripts imports = [] category = str(metadata.get("category", DFT_CATEGORY)) if metadata.get("imports") is not None: lib_imports = metadata.get("imports") for lib_import in lib_imports: args.package_dir = os.path.join(package_dir, lib_import) if args.embed_libs: library_ref = create_package( \ args, api, command_obj, resume=resume) u.log_created_resources("imports", output_dir, library_ref) else: try: # try to read the library id, if it is already there library_ref = read_library_id(os.path.join( \ output_dir, os.path.basename(args.package_dir))) except IOError: library_ref = create_package( \ args, api, command_obj, resume=resume) library_ref = read_library_id(os.path.join( \ output_dir, os.path.basename(args.package_dir))) imports.append(library_ref) args.package_dir = package_dir # read the metadata.json information message = ('Creating the %s.........\n' % metadata.get("kind")) u.log_message(message, log_file=session_file, console=args.verbosity) if metadata.get("kind") in WHIZZML_RESOURCES: whizzml_code = os.path.normpath(os.path.join(args.package_dir, \ metadata.get("source_code", "%s.whizzml" % \ metadata.get("kind")))) if args.embed_libs and metadata.get("kind") == WHIZZML_LIBRARY: return whizzml_code args.output_dir = os.path.join(output_dir, \ os.path.basename(package_dir)) # creating command to create the resource command = COMMANDS[metadata.get("kind")] % (whizzml_code, args.output_dir) command_args = command.split() bigml.util.check_dir(args.output_dir) # getting inputs and outputs for the script from metadata if "inputs" in metadata: inputs_file = os.path.join(args.output_dir, "inputs.json") u.write_to_utf8(inputs_file, json.dumps(metadata.get("inputs"))) command_args.extend(["--declare-inputs", inputs_file]) if "outputs" in metadata: outputs_file = os.path.join(args.output_dir, "outputs.json") u.write_to_utf8(outputs_file, json.dumps(metadata.get("outputs"))) command_args.extend(["--declare-outputs", outputs_file]) if "description" in metadata: desc_file = os.path.join(args.output_dir, "description.txt") u.write_to_utf8(desc_file, metadata.get("description")) command_args.extend(["--description", desc_file]) if metadata.get("name"): command_args.extend(["--name", metadata.get("name")]) if args.tag: for tag in args.tag: command_args.extend(["--tag", tag]) command_args.extend(["--category", category]) # adding imports, if any if imports: if args.embed_libs: # imports to be embedded are in the same output directory command_args.extend( \ ["--embedded-imports", os.path.join(output_dir, "imports")]) else: # imports to be refereced by ID command_args.extend(["--imports", ",".join(imports)]) command_args.extend(["--verbosity", str(args.verbosity)]) command_obj.propagate(command_args) # u.add_api_context(command_args, args) if args.upgrade: command_args.extend(["--upgrade"]) if resume: next_command = subcommand_list.pop() if different_command(next_command, command): resume = False u.sys_log_message(command, log_file=subcommand_file) execute_dispatcher(args=command_args) elif not subcommand_list: execute_dispatcher(args=['execute', '--resume']) resume = False else: u.sys_log_message(command, log_file=subcommand_file) execute_dispatcher(args=command_args) args.output_dir = output_dir return whizzml_code return ""
def analyze_dispatcher(args=sys.argv[1:]): """Main processing of the parsed options for BigMLer analyze """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) command = command_handling(args, COMMAND_LOG) # Parses command line arguments. command_args = command.parser.parse_args(command.args) resume = command_args.resume if resume: command_args, session_file, _ = get_stored_command( args, command_args.debug, command_log=COMMAND_LOG, dirs_log=DIRS_LOG, sessions_log=SESSIONS_LOG) else: if command_args.output_dir is None: command_args.output_dir = a.NOW session_file = os.path.join(command_args.output_dir, SESSIONS_LOG) # If logging is required, open the file for logging log = None if command_args.log_file: u.check_dir(command_args.log_file) log = command_args.log_file # If --clear_logs the log files are cleared if command_args.clear_logs: clear_log_files([log]) if command_args.model_fields: model_fields = command_args.model_fields.split(',') command_args.model_fields_ = [model_field.strip() for model_field in model_fields] else: command_args.model_fields_ = {} u.sys_log_message(u"%s\n" % os.path.abspath(command_args.output_dir), log_file=DIRS_LOG) session_file = os.path.join(command_args.output_dir, SESSIONS_LOG) # create api instance form args api = a.get_api_instance(command_args, u.check_dir(session_file)) # --maximize flag will be deprecated. Use --optimize flag. if command_args.maximize is not None and command_args.optimize is None: command_args.optimize = command_args.maximize incompatible_flags = [command_args.cv, command_args.features, command_args.nodes, command_args.random_fields] if sum([int(bool(flag)) for flag in incompatible_flags]) > 1: sys.exit("The following flags cannot be used together:\n --features" "\n --cross-validation\n --nodes\n --random-fields") if (command_args.dataset is None and command_args.datasets is None and command_args.dataset_file is None): sys.exit("The analyze command needs an existing dataset ID. Please, " "use the --dataset flag.") if not any(incompatible_flags): sys.exit("You need to specify the type of analysis: features, node " "threshold, cross validation or random fields.") # k-fold cross-validation if command_args.cv and command_args.dataset is not None: create_kfold_cv(command_args, api, command.common_options, resume=resume) # features analysis elif command_args.features: create_features_analysis(command_args, api, command.common_options, resume=resume) # node threshold analysis elif command_args.nodes: create_nodes_analysis(command_args, api, command.common_options, resume=resume) # random fields analysis elif command_args.random_fields: create_candidates_analysis(command_args, api, command.common_options, resume=resume) else: sys.exit("You must choose one of the available analysis: --features," " --nodes, --random-fields or --cross-validation. Add" " your prefered option to" " the command line or type\n bigmler analyze --help\n" " to see all the available options.")
def anomaly_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different processing functions """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) command = command_handling(args, COMMAND_LOG) # Parses command line arguments. command_args = a.parse_and_check(command) resume = command_args.resume if command_args.resume: # Keep the debug option if set debug = command_args.debug # Restore the args of the call to resume from the command log file stored_command = StoredCommand(args, COMMAND_LOG, DIRS_LOG) command = Command(None, stored_command=stored_command) # Logs the issued command and the resumed command session_file = os.path.join(stored_command.output_dir, SESSIONS_LOG) stored_command.log_command(session_file=session_file) # Parses resumed arguments. command_args = a.parse_and_check(command) if command_args.predictions is None: command_args.predictions = os.path.join(stored_command.output_dir, DEFAULT_OUTPUT) else: if command_args.output_dir is None: command_args.output_dir = a.NOW if command_args.predictions is None: command_args.predictions = os.path.join(command_args.output_dir, DEFAULT_OUTPUT) if len(os.path.dirname(command_args.predictions).strip()) == 0: command_args.predictions = os.path.join(command_args.output_dir, command_args.predictions) directory = u.check_dir(command_args.predictions) session_file = os.path.join(directory, SESSIONS_LOG) u.log_message(command.command + "\n", log_file=session_file) try: defaults_file = open(DEFAULTS_FILE, 'r') contents = defaults_file.read() defaults_file.close() defaults_copy = open(os.path.join(directory, DEFAULTS_FILE), 'w', 0) defaults_copy.write(contents) defaults_copy.close() except IOError: pass u.sys_log_message(u"%s\n" % os.path.abspath(directory), log_file=DIRS_LOG) # Creates the corresponding api instance if resume and debug: command_args.debug = True api = a.get_api_instance(command_args, u.check_dir(session_file)) # Selects the action to perform if (has_train(command_args) or has_test(command_args)): output_args = a.get_output_args(api, command_args, resume) a.transform_args(command_args, command.flags, api, command.user_defaults) compute_output(**output_args) u.log_message("_" * 80 + "\n", log_file=session_file)
def create_package(args, api, common_options, resume=False): """Creates the package whizzml resources as referred in the metadata.json file. """ set_subcommand_file(args.output_dir) if resume: retrieve_subcommands() # read the metadata.json information message = ('Reading the metadata.json files.........\n') u.log_message(message, log_file=session_file, console=args.verbosity) package_dir = args.package_dir output_dir = args.output_dir metadata_file = os.path.join(package_dir, METADATA_FILE) metadata = None created_resources = [] with open(metadata_file) as metadata_handler: metadata = json.load(metadata_handler) # recurse into components/directories, if any if metadata.get("kind") == "package" and 'components' in metadata: components = metadata.get("components") for component in components: message = ('Inspecting component %s.........\n' % component) u.log_message(message, log_file=session_file, console=args.verbosity) args.package_dir = os.path.join(package_dir, component) create_package(args, api, common_options, resume=resume) args.package_dir = package_dir else: # create libraries or scripts imports = [] if metadata.get("imports") is not None: lib_imports = metadata.get("imports") for lib_import in lib_imports: args.package_dir = os.path.join(package_dir, lib_import) create_package(args, api, common_options, resume=resume) imports.append(read_library_id(os.path.join( \ output_dir, os.path.basename(args.package_dir)))) args.package_dir = package_dir # read the metadata.json information message = ('Creating the %s.........\n' % metadata.get("kind")) u.log_message(message, log_file=session_file, console=args.verbosity) if metadata.get("kind") in WHIZZML_RESOURCES: whizzml_code = os.path.join(args.package_dir, \ metadata.get("source_code", "%s.whizzml" % \ metadata.get("kind"))) args.output_dir = os.path.join(output_dir, \ os.path.basename(package_dir)) # creating command to create the resource command = COMMANDS[metadata.get("kind")] % (whizzml_code, args.output_dir) command_args = command.split() bigml.util.check_dir(args.output_dir) # getting inputs and outputs for the script from metadata if "inputs" in metadata: inputs_file = os.path.join(args.output_dir, "inputs.json") with open(inputs_file, "w") as inputs_handler: json.dump(metadata.get("inputs"), inputs_handler) command_args.extend(["--declare-inputs", inputs_file]) if "outputs" in metadata: outputs_file = os.path.join(args.output_dir, "outputs.json") with open(outputs_file, "w") as outputs_handler: json.dump(metadata.get("outputs"), outputs_handler) command_args.extend(["--declare-outputs", outputs_file]) if "description" in metadata: desc_file = os.path.join(args.output_dir, "description.txt") with open(desc_file, "w") as desc_handler: desc_handler.write(metadata.get("description")) command_args.extend(["--description", desc_file]) if metadata.get("name"): command_args.extend(["--name", metadata.get("name")]) # adding imports, if any if imports: command_args.extend(["--imports", ",".join(imports)]) command_args.extend(["--verbosity", str(args.verbosity)]) if resume: next_command = subcommand_list.pop() if different_command(next_command, command): resume = False u.sys_log_message(command, log_file=subcommand_file) execute_dispatcher(args=command_args) elif not subcommand_list: execute_dispatcher(args=['execute', '--resume']) resume = False else: u.sys_log_message(command, log_file=subcommand_file) execute_dispatcher(args=command_args) args.output_dir = output_dir
def analyze_dispatcher(args=sys.argv[1:]): """Main processing of the parsed options for BigMLer analyze """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) command = command_handling(args, COMMAND_LOG) # Parses command line arguments. command_args = command.parser.parse_args(command.args) resume = command_args.resume if resume: command_args, session_file, _ = get_stored_command( args, command_args.debug, command_log=COMMAND_LOG, dirs_log=DIRS_LOG, sessions_log=SESSIONS_LOG) else: if command_args.output_dir is None: command_args.output_dir = a.NOW session_file = os.path.join(command_args.output_dir, SESSIONS_LOG) # If logging is required, open the file for logging log = None if command_args.log_file: u.check_dir(command_args.log_file) log = command_args.log_file # If --clear_logs the log files are cleared if command_args.clear_logs: clear_log_files([log]) if command_args.model_fields: model_fields = command_args.model_fields.split(',') command_args.model_fields_ = [ model_field.strip() for model_field in model_fields ] else: command_args.model_fields_ = {} u.sys_log_message(u"%s\n" % os.path.abspath(command_args.output_dir), log_file=DIRS_LOG) session_file = os.path.join(command_args.output_dir, SESSIONS_LOG) # create api instance form args api = a.get_api_instance(command_args, u.check_dir(session_file)) # Creates the corresponding api instance api = a.get_api_instance(command_args, u.check_dir(session_file)) a.transform_dataset_options(command_args, api) # --maximize flag will be deprecated. Use --optimize flag. if command_args.maximize is not None and command_args.optimize is None: command_args.optimize = command_args.maximize incompatible_flags = [ command_args.cv, command_args.features, command_args.nodes, command_args.random_fields ] if sum([int(bool(flag)) for flag in incompatible_flags]) > 1: sys.exit("The following flags cannot be used together:\n --features" "\n --cross-validation\n --nodes\n --random-fields") if (command_args.dataset is None and command_args.datasets is None and command_args.dataset_file is None): sys.exit("The analyze command needs an existing dataset ID. Please, " "use the --dataset flag.") if not any(incompatible_flags): sys.exit("You need to specify the type of analysis: features, node " "threshold, cross validation or random fields.") # k-fold cross-validation if command_args.cv and command_args.dataset is not None: create_kfold_cv(command_args, api, command.common_options, resume=resume) # features analysis elif command_args.features: create_features_analysis(command_args, api, command.common_options, resume=resume) # node threshold analysis elif command_args.nodes: create_nodes_analysis(command_args, api, command.common_options, resume=resume) # random fields analysis elif command_args.random_fields: create_candidates_analysis(command_args, api, command.common_options, resume=resume) else: sys.exit("You must choose one of the available analysis: --features," " --nodes, --random-fields or --cross-validation. Add" " your prefered option to" " the command line or type\n bigmler analyze --help\n" " to see all the available options.")
def analyze_dispatcher(args=sys.argv[1:]): """Main processing of the parsed options for BigMLer analyze """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) command = command_handling(args, COMMAND_LOG) # Parses command line arguments. command_args = command.parser.parse_args(command.args) resume = command_args.resume if resume: # Keep the debug option if set debug = command_args.debug # Restore the args of the call to resume from the command log file stored_command = StoredCommand(args, COMMAND_LOG, DIRS_LOG) command = Command(None, stored_command=stored_command) # Logs the issued command and the resumed command session_file = os.path.join(stored_command.output_dir, SESSIONS_LOG) stored_command.log_command(session_file=session_file) # Parses resumed arguments. command_args = command.parser.parse_args(command.args) command_args.debug = debug else: if command_args.output_dir is None: command_args.output_dir = a.NOW session_file = os.path.join(command_args.output_dir, SESSIONS_LOG) # If logging is required, open the file for logging log = None if command_args.log_file: u.check_dir(command_args.log_file) log = command_args.log_file # If --clear_logs the log files are cleared if command_args.clear_logs: clear_log_files([log]) if command_args.model_fields: model_fields = command_args.model_fields.split(',') command_args.model_fields_ = [model_field.strip() for model_field in model_fields] else: command_args.model_fields_ = {} u.sys_log_message(u"%s\n" % os.path.abspath(command_args.output_dir), log_file=DIRS_LOG) session_file = os.path.join(command_args.output_dir, SESSIONS_LOG) # create api instance form args api = a.get_api_instance(command_args, u.check_dir(session_file)) # --maximize flag will be deprecated. Use --optimize flag. if command_args.maximize is not None and command_args.optimize is None: command_args.optimize = command_args.maximize incompatible_flags = [command_args.cv, command_args.features, command_args.nodes] if sum([int(bool(flag)) for flag in incompatible_flags]) > 1: sys.exit("The following flags cannot be used together:\n --features" "\n --cross-validation\n --nodes") # k-fold cross-validation if command_args.cv and command_args.dataset is not None: create_kfold_cv(command_args, api, command.common_options, resume=resume) # features analysis if command_args.features: create_features_analysis(command_args, api, command.common_options, resume=resume) # node threshold analysis if command_args.nodes: create_nodes_analysis(command_args, api, command.common_options, resume=resume)
def reify_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different processing functions """ command = command_handling(args, COMMAND_LOG) # Parses command line arguments. command_args = a.parse_and_check(command) if command_args.resume: command_args, session_file, _ = get_stored_command( args, command_args.debug, command_log=COMMAND_LOG, dirs_log=DIRS_LOG, sessions_log=SESSIONS_LOG) if command_args.output is None: command_args.output = os.path.join(command_args.output_dir, DEFAULT_OUTPUT) else: if command_args.output_dir is None: command_args.output_dir = a.NOW if command_args.output is None: command_args.output = os.path.join(command_args.output_dir, DEFAULT_OUTPUT) if len(os.path.dirname(command_args.output).strip()) == 0: command_args.output = os.path.join(command_args.output_dir, command_args.output) directory = u.check_dir(command_args.output) command_args.output_dir = directory session_file = os.path.join(directory, SESSIONS_LOG) u.log_message(command.command + "\n", log_file=session_file) directory = u.check_dir(os.path.join(command_args.output_dir, "tmp")) session_file = os.path.join(directory, SESSIONS_LOG) u.log_message(command.command + "\n", log_file=session_file) try: shutil.copy(DEFAULTS_FILE, os.path.join(directory, DEFAULTS_FILE)) except IOError: pass u.sys_log_message(u"%s\n" % os.path.abspath(directory), log_file=DIRS_LOG) # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) def logger(message): """Partial to log messages according to args.verbosity """ u.log_message(u.dated(message), \ log_file=session_file, console=command_args.verbosity) # Creates the corresponding api instance api = a.get_api_instance(command_args, u.check_dir(session_file)) message = "Starting reification for %s\n\n" % command_args.resource_id u.log_message(message, \ log_file=session_file, console=command_args.verbosity) reify_resources(command_args, api, logger) message = "\nReification complete. See the results in %s\n\n" % \ command_args.output u.log_message(message, \ log_file=session_file, console=command_args.verbosity) u.log_message("_" * 80 + "\n", log_file=session_file) u.print_generated_files(command_args.output_dir, log_file=session_file, verbosity=command_args.verbosity)
def create_package(args, api, common_options, resume=False): """Creates the package whizzml resources as referred in the metadata.json file. """ set_subcommand_file(args.output_dir) if resume: retrieve_subcommands() # read the metadata.json information message = ('Reading the metadata.json files.........\n') u.log_message(message, log_file=session_file, console=args.verbosity) package_dir = args.package_dir output_dir = args.output_dir metadata_file = os.path.join(package_dir, METADATA_FILE) metadata = None with open(metadata_file) as metadata_handler: metadata = json.load(metadata_handler) # recurse into components/directories, if any if metadata.get("kind") == "package" and 'components' in metadata: components = metadata.get("components") for component in components: message = ('Inspecting component %s.........\n' % component) u.log_message(message, log_file=session_file, console=args.verbosity) args.package_dir = os.path.join(package_dir, component) create_package(args, api, common_options, resume=resume) args.package_dir = package_dir else: # create libraries or scripts imports = [] if metadata.get("imports") is not None: lib_imports = metadata.get("imports") for lib_import in lib_imports: args.package_dir = os.path.join(package_dir, lib_import) # try to read the library id, if it is already there try: library_id = read_library_id(os.path.join( \ output_dir, os.path.basename(args.package_dir))) except IOError: create_package(args, api, common_options, resume=resume) library_id = read_library_id(os.path.join( \ output_dir, os.path.basename(args.package_dir))) imports.append(library_id) args.package_dir = package_dir # read the metadata.json information message = ('Creating the %s.........\n' % metadata.get("kind")) u.log_message(message, log_file=session_file, console=args.verbosity) if metadata.get("kind") in WHIZZML_RESOURCES: whizzml_code = os.path.join(args.package_dir, \ metadata.get("source_code", "%s.whizzml" % \ metadata.get("kind"))) args.output_dir = os.path.join(output_dir, \ os.path.basename(package_dir)) # creating command to create the resource command = COMMANDS[metadata.get("kind")] % (whizzml_code, args.output_dir) command_args = command.split() bigml.util.check_dir(args.output_dir) # getting inputs and outputs for the script from metadata if "inputs" in metadata: inputs_file = os.path.join(args.output_dir, "inputs.json") with open(inputs_file, "w") as inputs_handler: json.dump(metadata.get("inputs"), inputs_handler) command_args.extend(["--declare-inputs", inputs_file]) if "outputs" in metadata: outputs_file = os.path.join(args.output_dir, "outputs.json") with open(outputs_file, "w") as outputs_handler: json.dump(metadata.get("outputs"), outputs_handler) command_args.extend(["--declare-outputs", outputs_file]) if "description" in metadata: desc_file = os.path.join(args.output_dir, "description.txt") with open(desc_file, "w") as desc_handler: desc_handler.write(metadata.get("description")) command_args.extend(["--description", desc_file]) if metadata.get("name"): command_args.extend(["--name", metadata.get("name")]) # adding imports, if any if imports: command_args.extend(["--imports", ",".join(imports)]) command_args.extend(["--verbosity", str(args.verbosity)]) if resume: next_command = subcommand_list.pop() if different_command(next_command, command): resume = False u.sys_log_message(command, log_file=subcommand_file) execute_dispatcher(args=command_args) elif not subcommand_list: execute_dispatcher(args=['execute', '--resume']) resume = False else: u.sys_log_message(command, log_file=subcommand_file) execute_dispatcher(args=command_args) args.output_dir = output_dir
def analyze_dispatcher(args=sys.argv[1:]): """Main processing of the parsed options for BigMLer analyze """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) command = command_handling(args, COMMAND_LOG) # Parses command line arguments. command_args = command.parser.parse_args(command.args) resume = command_args.resume if resume: # Keep the debug option if set debug = command_args.debug # Restore the args of the call to resume from the command log file stored_command = StoredCommand(args, COMMAND_LOG, DIRS_LOG) command = Command(None, stored_command=stored_command) # Logs the issued command and the resumed command session_file = os.path.join(stored_command.output_dir, SESSIONS_LOG) stored_command.log_command(session_file=session_file) # Parses resumed arguments. command_args = command.parser.parse_args(command.args) command_args.debug = debug else: if command_args.output_dir is None: command_args.output_dir = a.NOW session_file = os.path.join(command_args.output_dir, SESSIONS_LOG) # If logging is required, open the file for logging log = None if command_args.log_file: u.check_dir(command_args.log_file) log = command_args.log_file # If --clear_logs the log files are cleared if command_args.clear_logs: clear_log_files([log]) if command_args.model_fields: model_fields = command_args.model_fields.split(',') command_args.model_fields_ = map(str.strip, model_fields) else: command_args.model_fields_ = {} u.sys_log_message(u"%s\n" % os.path.abspath(command_args.output_dir), log_file=DIRS_LOG) session_file = os.path.join(command_args.output_dir, SESSIONS_LOG) # create api instance form args api = a.get_api_instance(command_args, u.check_dir(session_file)) # k-fold cross-validation if command_args.cv and command_args.dataset is not None: create_kfold_cv(command_args, api, command.common_options, resume=resume) # features analysis if command_args.features: create_features_analysis(command_args, api, command.common_options, resume=resume) # node threshold analysis if command_args.nodes: create_nodes_analysis(command_args, api, command.common_options, resume=resume)