def create_input(args, api, input_type, script_id): """ Creates the resources used as input for the retrain script when adding new data. When remote sources are used, the input is usually the remote url. If a local source is used, then the input should be a source-id or a dataset-id """ if input_type in ['source-id', 'dataset-id']: source_command = ["main", "--train", args.add, "--output-dir", args.output_dir, STOP_WORKFLOW[input_type]] command_args, _, _, main_session_file, _ = get_context( \ source_command, MAIN_SETTINGS) command_args.predictions = command_args.output a.get_output_args(api, command_args, False) compute_output(api, command_args) resource_type = input_type[:-3] resource_id = getattr(command_args, resource_type) else: resource_type = "source-url" resource_id = args.add # apply the retrain script to the new resource execute_command = ['execute', '--script', script_id, '--output-dir', args.output_dir] add_api_context(execute_command, args) command_args, _, _, exe_session_file, _ = get_context( \ execute_command, EXE_SETTINGS) command_args.arguments_ = [["%s1" % resource_type, resource_id], ["datasets-limit", args.window_size]] command_args.inputs = json.dumps(command_args.arguments_) return command_args
def connector_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different processing functions """ command_args, _, api, session_file, _ = get_context(args, SETTINGS) path = u.check_dir(command_args.output) log = None if command_args.log_file: u.check_dir(command_args.log_file) log = command_args.log_file # If --clear_logs the log files are cleared clear_log_files([log]) if not command_args.external_connector_id and \ u.has_connection_info(command_args): # create connector pec.connector_processing(api, command_args, command_args.resume, session_file=session_file, path=path, log=log) if command_args.external_connector_id and ( command_args.connector_attributes or command_args.name or command_args.tag or command_args.description or command_args.category): # update connector's attributes pec.update_external_connector(command_args, api, command_args.resume, \ session_file=session_file) u.log_message("_" * 80 + "\n", log_file=session_file) u.print_generated_files(command_args.output_dir, log_file=session_file, verbosity=command_args.verbosity)
def retrain_dispatcher(args=sys.argv[1:]): """Main processing of the parsed options for BigMLer retrain """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) # parses the command line to get the context args and the log files to use command_args, command, api, session_file, resume = get_context( args, SETTINGS) # --id or --model-tag, --ensemble-tag, etc. is compulsory if check_compulsory_options(command.flags, command_args): retrain_model(command_args, api, command.common_options, session_file=session_file) u.log_message("_" * 80 + "\n", log_file=session_file) else: sys.exit("You must provide the ID of the resource to be" " retrained in the --id option or a unique tag" " to retrieve such ID." " Type bigmler retrain --help\n" " to see all the available options.")
def reify_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different processing functions """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) command_args, _, api, session_file, _ = get_context(args, SETTINGS) def logger(message): """Partial to log messages according to args.verbosity """ u.log_message(u.dated(message), \ log_file=session_file, console=command_args.verbosity) message = "Starting reification for %s\n\n" % command_args.resource_id u.log_message(message, \ log_file=session_file, console=command_args.verbosity) reify_resources(command_args, api) message = "\nReification complete. See the results in %s\n\n" % \ command_args.output u.log_message(message, \ log_file=session_file, console=command_args.verbosity) u.log_message("_" * 80 + "\n", log_file=session_file) u.print_generated_files(command_args.output_dir, log_file=session_file, verbosity=command_args.verbosity)
def project_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different processing functions """ command_args, command, api, session_file, resume = get_context(args, SETTINGS) path = u.check_dir(command_args.output) log = None if command_args.log_file: u.check_dir(command_args.log_file) log = command_args.log_file # If --clear_logs the log files are cleared clear_log_files([log]) if not command_args.project_id and command_args.name: command_args.project = command_args.name if command_args.project: # create project pp.project_processing( api, command_args, command_args.resume, session_file=session_file, path=path, log=log, create=True) if command_args.project_id and ( command_args.project_attributes or command_args.name or command_args.tag or command_args.description or command_args.category): # update project's attributes pp.update_project(command_args, api, command_args.resume, \ session_file=session_file) u.log_message("_" * 80 + "\n", log_file=session_file) u.print_generated_files(command_args.output_dir, log_file=session_file, verbosity=command_args.verbosity)
def project_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different processing functions """ command_args, command, api, session_file, resume = get_context(args, SETTINGS) path = u.check_dir(command_args.output) log = None if command_args.log_file: u.check_dir(command_args.log_file) log = command_args.log_file # If --clear_logs the log files are cleared clear_log_files([log]) if not command_args.project_id and command_args.name: command_args.project = command_args.name if command_args.project: # create project pp.project_processing( api, command_args, command_args.resume, session_file=session_file, path=path, log=log, create=True) if command_args.project_id and ( command_args.project_attributes or command_args.name or command_args.tag or command_args.description or command_args.category): # update project's attributes pp.update_project(command_args, api, command_args.resume, \ session_file=session_file) u.log_message("_" * 80 + "\n", log_file=session_file) u.print_generated_files(command_args.output_dir, log_file=session_file, verbosity=command_args.verbosity)
def reify_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different processing functions """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) command_args, command, api, session_file, resume = get_context(args, SETTINGS) def logger(message): """Partial to log messages according to args.verbosity """ u.log_message(u.dated(message), \ log_file=session_file, console=command_args.verbosity) print command_args.output, command_args.output_dir message = "Starting reification for %s\n\n" % command_args.resource_id u.log_message(message, \ log_file=session_file, console=command_args.verbosity) reify_resources(command_args, api, logger) message = "\nReification complete. See the results in %s\n\n" % \ command_args.output u.log_message(message, \ log_file=session_file, console=command_args.verbosity) u.log_message("_" * 80 + "\n", log_file=session_file) u.print_generated_files(command_args.output_dir, log_file=session_file, verbosity=command_args.verbosity)
def execute_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different processing functions """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) command_args, _, api, session_file, _ = get_context(args, SETTINGS) # process the command execute_whizzml(command_args, api, session_file) u.log_message("_" * 80 + "\n", log_file=session_file)
def create_input(args, api, input_type, script_id, command): """ Creates the resources used as input for the retrain script when adding new data. When remote sources are used, the input is usually the remote url. If a local source is used, then the input should be a source-id or a dataset-id """ if input_type in ['source-id', 'dataset-id']: source_command = [ "main", "--train", args.add, "--output-dir", args.output_dir, STOP_WORKFLOW[input_type] ] command.propagate(source_command) command_args, _, _, main_session_file, _ = get_context( \ source_command, MAIN_SETTINGS) command_args.predictions = command_args.output a.get_output_args(api, command_args, False) compute_output(api, command_args) resource_type = input_type[:-3] resource_id = getattr(command_args, resource_type) else: resource_type = "source-url" resource_id = args.add # apply the retrain script to the new resource execute_command = [ 'execute', '--script', script_id, '--output-dir', args.output_dir ] command.propagate(execute_command) command_args, _, _, exe_session_file, _ = get_context( \ execute_command, EXE_SETTINGS) command_args.arguments_ = [["%s1" % resource_type, resource_id], ["datasets-limit", args.window_size]] command_args.inputs = json.dumps(command_args.arguments_) return command_args, api, exe_session_file
def execute_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different processing functions """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) command_args, command, api, session_file, resume = get_context(args, SETTINGS) # process the command execute_whizzml(command_args, api, session_file) u.log_message("_" * 80 + "\n", log_file=session_file)
def association_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different processing functions """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) command_args, _, api, session_file, resume = get_context(args, SETTINGS) # Selects the action to perform if a.has_train(command_args) or a.has_test(command_args): output_args = a.get_output_args(api, command_args, resume) compute_output(**output_args) u.log_message("_" * 80 + "\n", log_file=session_file)
def association_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different processing functions """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) command_args, command, api, session_file, resume = get_context(args, SETTINGS) # Selects the action to perform if a.has_train(command_args) or a.has_test(command_args): output_args = a.get_output_args(api, command_args, resume) compute_output(**output_args) u.log_message("_" * 80 + "\n", log_file=session_file)
def pca_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different processing functions """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) settings = {} settings.update(SETTINGS) command_args, _, api, session_file, _ = get_context(args, settings) # Selects the action to perform if (a.has_train(command_args) or a.has_test(command_args) or command_args.export_fields): compute_output(api, command_args) u.log_message("_" * 80 + "\n", log_file=session_file)
def fusion_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different processing functions """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) settings = {} settings.update(SETTINGS) if '--evaluate' in args: settings.update({"default_output": "evaluation"}) command_args, _, api, session_file, _ = get_context(args, settings) # Selects the action to perform if a.has_value(command_args, "fusion_models_") or a.has_test(command_args): compute_output(api, command_args) u.log_message("_" * 80 + "\n", log_file=session_file)
def whizzml_dispatcher(args=sys.argv[1:]): """Main processing of the parsed options for BigMLer whizzml """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) command_args, command, api, _, resume = get_context(args, SETTINGS) # package_dir if command_args.package_dir is not None: command_args.package_dir = os.path.expanduser(command_args.package_dir) create_package(command_args, api, command, resume=resume) else: sys.exit("You must use the --package-dir flag pointing to the" " directory where the metadata.json file is. Type\n" " bigmler whizzml --help\n" " to see all the available options.")
def pca_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different processing functions """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) settings = {} settings.update(SETTINGS) command_args, command, api, session_file, resume = get_context(args, settings) # Selects the action to perform if (a.has_train(command_args) or a.has_test(command_args) or command_args.export_fields): compute_output(api, command_args) u.log_message("_" * 80 + "\n", log_file=session_file)
def main_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different processing functions """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) settings = {} settings.update(SETTINGS) if '--evaluate' in args: settings.update({"default_output": "evaluation"}) command_args, _, api, session_file, _ = get_context(args, settings) # the predictions flag prevails to store the results if (a.has_train(command_args) or a.has_test(command_args) or command_args.votes_dirs): compute_output(api, command_args) u.log_message("_" * 80 + "\n", log_file=session_file)
def main_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different processing functions """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) settings = {} settings.update(SETTINGS) if '--evaluate' in args: settings.update({"default_output": "evaluation"}) command_args, command, api, session_file, resume = get_context(args, settings) # the predictions flag prevails to store the results if (a.has_train(command_args) or a.has_test(command_args) or command_args.votes_dirs): compute_output(api, command_args) u.log_message("_" * 80 + "\n", log_file=session_file)
def fusion_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different processing functions """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) settings = {} settings.update(SETTINGS) if '--evaluate' in args: settings.update({"default_output": "evaluation"}) command_args, command, api, session_file, resume = get_context(args, settings) # Selects the action to perform if a.has_value(command_args, "fusion_models_") or a.has_test(command_args): compute_output(api, command_args) u.log_message("_" * 80 + "\n", log_file=session_file)
def export_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different export functions """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) command_args, _, api, session_file, _ = get_context(args, SETTINGS) # Creates the corresponding api instance resource = command_args.ensemble or command_args.model message = "Generating %s code for %s\n\n" % (command_args.language, resource) u.log_message(message, \ log_file=session_file, console=command_args.verbosity) export_code(command_args, api) u.log_message("_" * 80 + "\n", log_file=session_file) u.print_generated_files(command_args.output_dir, log_file=session_file, verbosity=command_args.verbosity)
def export_dispatcher(args=sys.argv[1:]): """Parses command line and calls the different export functions """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) command_args, command, api, session_file, resume = get_context(args, SETTINGS) # Creates the corresponding api instance resource = command_args.ensemble or command_args.model message = "Generating %s code for %s\n\n" % (command_args.language, resource) u.log_message(message, \ log_file=session_file, console=command_args.verbosity) export_code(command_args, api) u.log_message("_" * 80 + "\n", log_file=session_file) u.print_generated_files(command_args.output_dir, log_file=session_file, verbosity=command_args.verbosity)
def whizzml_dispatcher(args=sys.argv[1:]): """Main processing of the parsed options for BigMLer whizzml """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) command_args, command, api, session_file, resume = get_context(args, SETTINGS) # package_dir if command_args.package_dir is not None: command_args.package_dir = os.path.expanduser(command_args.package_dir) create_package(command_args, api, command.common_options, resume=resume) else: sys.exit("You must use the --package-dir flag pointing to the" " directory where the metadata.json file is. Type\n" " bigmler whizzml --help\n" " to see all the available options.")
def retrain_dispatcher(args=sys.argv[1:]): """Main processing of the parsed options for BigMLer retrain """ # If --clear-logs the log files are cleared if "--clear-logs" in args: clear_log_files(LOG_FILES) # parses the command line to get the context args and the log files to use command_args, command, api, session_file, resume = get_context(args, SETTINGS) # --id or --model-tag, --ensemble-tag, etc. is compulsory if check_compulsory_options(command.flags, command_args): retrain_model(command_args, api, command.common_options, session_file=session_file) u.log_message("_" * 80 + "\n", log_file=session_file) else: sys.exit("You must provide the ID of the resource to be" " retrained in the --id option or a unique tag" " to retrieve such ID." " Type bigmler retrain --help\n" " to see all the available options.")
'--script', reify_script, '--output-dir', args.output_dir] command_args, _, _, exe_session_file, _ = get_context(execute_command, EXE_SETTINGS) command_args.arguments_ = [["model-resource", resource_id]] command_args.inputs = json.dumps(command_args.arguments_) # process the command execute_whizzml(command_args, api, session_file) script_id = extract_retrain_id(command_args, api, session_file) # apply the retrain script to the new data: # create a source with the new data if args.add: source_command = ["main", "--train", args.add, "--no-dataset", "--output-dir", args.output_dir] command_args, _, _, main_session_file, _ = get_context(source_command, MAIN_SETTINGS) command_args.predictions = command_args.output a.get_output_args(api, command_args, False) compute_output(api, command_args) source_id = command_args.source # apply the retrain script to the new source execute_command = ['execute', '--script', script_id, '--output-dir', args.output_dir] command_args, _, _, exe_session_file, _ = get_context(execute_command, EXE_SETTINGS) command_args.arguments_ = [["source1", source_id], ["datasets-limit", args.window_size]] command_args.inputs = json.dumps(command_args.arguments_) # process the command execute_whizzml(command_args, api, session_file)
# check whether the resource exists try: check_resource(resource_id, raise_on_error=True, api=api) except Exception, exc: sys.exit("Failed to find the resource %s. Please, check its ID and" " the connection info (domain and credentials)." % resource_id) reify_script = whizzml_script(args, api) # apply the reify script to the resource execute_command = [ 'execute', '--script', reify_script, '--output-dir', args.output_dir ] command_args, _, _, exe_session_file, _ = get_context( \ execute_command, EXE_SETTINGS) command_args.arguments_ = [["res-id", resource_id]] command_args.inputs = json.dumps(command_args.arguments_) # process the command session_file = None execute_whizzml(command_args, api, session_file) with open("%s.json" % command_args.output) as file_handler: exe_output = json.load(file_handler)['result'] if args.language == "nb": write_nb_output(resource_id, \ exe_output, args.output.replace(".py", ".ipynb"), api) return elif args.language == "whizzml": output = exe_output["source_code"]
# check whether the resource exists try: check_resource(resource_id, raise_on_error=True, api=api) except Exception, exc: sys.exit("Failed to find the resource %s. Please, check its ID and" " the connection info (domain and credentials)." % resource_id) reify_script = whizzml_script(args, api) # apply the reify script to the resource execute_command = ['execute', '--script', reify_script, '--output-dir', args.output_dir] command_args, _, _, exe_session_file, _ = get_context( \ execute_command, EXE_SETTINGS) command_args.arguments_ = [["res-id", resource_id]] command_args.inputs = json.dumps(command_args.arguments_) # process the command session_file = None execute_whizzml(command_args, api, session_file) with open("%s.json" % command_args.output) as file_handler: exe_output = json.load(file_handler)['result'] if args.language == "nb": write_nb_output(resource_id, \ exe_output, args.output.replace(".py", ".ipynb"), api) return elif args.language == "whizzml": output = exe_output["source_code"]
command_args, _, _, exe_session_file, _ = get_context( execute_command, EXE_SETTINGS) command_args.arguments_ = [["model-resource", resource_id]] command_args.inputs = json.dumps(command_args.arguments_) # process the command execute_whizzml(command_args, api, session_file) script_id = extract_retrain_id(command_args, api, session_file) # apply the retrain script to the new data: # create a source with the new data if args.add: source_command = [ "main", "--train", args.add, "--no-dataset", "--output-dir", args.output_dir ] command_args, _, _, main_session_file, _ = get_context( source_command, MAIN_SETTINGS) command_args.predictions = command_args.output a.get_output_args(api, command_args, False) compute_output(api, command_args) source_id = command_args.source # apply the retrain script to the new source execute_command = [ 'execute', '--script', script_id, '--output-dir', args.output_dir ] command_args, _, _, exe_session_file, _ = get_context( execute_command, EXE_SETTINGS) command_args.arguments_ = [["source1", source_id], ["datasets-limit", args.window_size]] command_args.inputs = json.dumps(command_args.arguments_) # process the command execute_whizzml(command_args, api, session_file)
def retrain_model(args, api, common_options, session_file=None): """Retrieve or create the retrain script for a model and execute it with the new provided data """ # retrieve the modeling resource to be retrained by tag or id if args.resource_id: resource_id = args.resource_id reference_tag = "retrain:%s" % resource_id else: for model_type in MODEL_TYPES: if hasattr(args, "%s_tag" % model_type) and \ getattr(args, "%s_tag" % model_type) is not None: tag = getattr(args, "%s_tag" % model_type) query_string = "tags=%s" % tag resource_id = get_last_resource( \ model_type.replace("_", ""), api=api, query_string=query_string) if resource_id is None: sys.exit("Failed to find the %s with tag %s. " "Please, check the tag and" " the connection info (domain and credentials)." % (model_type.replace("_", " "), tag)) reference_tag = tag break if args.upgrade: shutil.rmtree(BIGMLER_SCRIPTS_DIRECTORY) script_id = None else: # check for the last script used to retrain the model query_string = "tags=retrain:%s" % resource_id script_id = get_last_resource( \ "script", api=api, query_string=query_string) if script_id is None: # if the script to retrain does not exist: # check whether the model exists try: bigml.api.check_resource(resource_id, raise_on_error=True, api=api) except Exception, exc: sys.exit("Failed to find the model %s. Please, check its ID and" " the connection info (domain and credentials)." % resource_id) # look for the script that creates the rebuild script. retrain_file = os.path.join(BIGMLER_SCRIPTS_DIRECTORY, "retrain", "scripts") reify_script = get_script_id(retrain_file) if reify_script is None: # new bigmler command: creating the scriptify scripts whizzml_command = [ 'whizzml', '--package-dir', INCREMENTAL_PACKAGE_PATH, '--output-dir', BIGMLER_SCRIPTS_DIRECTORY ] whizzml_dispatcher(args=whizzml_command) reify_script = get_script_id(retrain_file) # new bigmler command: creating the retrain script execute_command = [ 'execute', '--script', reify_script, '--output-dir', args.output_dir ] command_args, _, _, exe_session_file, _ = get_context( execute_command, EXE_SETTINGS) command_args.arguments_ = [["model-resource", resource_id]] command_args.inputs = json.dumps(command_args.arguments_) # process the command execute_whizzml(command_args, api, session_file) script_id = extract_retrain_id(command_args, api, session_file)
def retrain_model(args, api, command, session_file=None): """Retrieve or create the retrain script for a model and execute it with the new provided data """ retrain_file = os.path.join(BIGMLER_SCRIPTS_DIRECTORY, "retrain", "scripts") try: os.remove(UPGRADE_FILE) reify_script = None try: shutil.rmtree(BIGMLER_SCRIPTS_DIRECTORY) except OSError: pass except OSError: # look for the script that creates the rebuild script. reify_script = get_script_id(retrain_file) if reify_script is None: # new bigmler command: creating the scriptify scripts whizzml_command = [ 'whizzml', '--package-dir', INCREMENTAL_PACKAGE_PATH, '--output-dir', BIGMLER_SCRIPTS_DIRECTORY ] add_api_context(whizzml_command, args) whizzml_dispatcher(args=whizzml_command) reify_script = get_script_id(retrain_file) # retrieve the modeling resource to be retrained by tag or id if args.resource_id: resource_id = args.resource_id reference_tag = "retrain:%s" % resource_id else: for model_type in MODEL_TYPES: if hasattr(args, "%s_tag" % model_type) and \ getattr(args, "%s_tag" % model_type) is not None: tag = getattr(args, "%s_tag" % model_type) query_string = "tags=%s" % tag resource_id = get_first_resource( \ model_type.replace("_", ""), api=api, query_string=query_string) if resource_id is None: sys.exit("Failed to find the %s with tag %s. " "Please, check the tag and" " the connection info (domain and credentials)." % (model_type.replace("_", " "), tag)) reference_tag = tag break # updating the dataset that generated the model with the reference tag model = api.getters[get_resource_type(resource_id)](resource_id) dataset_id = model["object"]["dataset"] dataset = api.get_dataset(dataset_id) tags = dataset["object"]["tags"] if reference_tag not in tags: tags.append(reference_tag) api.update_dataset(dataset_id, {"tags": tags}) # if --upgrade, we force rebuilding the scriptified script if args.upgrade: script_id = None else: # check for the last script used to retrain the model query_string = "tags=%s" % reference_tag script_id = get_last_resource( \ "script", api=api, query_string=query_string) if script_id is None: # if the script to retrain does not exist: # check whether the model exists try: bigml.api.check_resource(resource_id, raise_on_error=True, api=api) except Exception: sys.exit("Failed to find the model %s. Please, check its ID and" " the connection info (domain and credentials)." % resource_id) # new bigmler command: creating the retrain script execute_command = [ 'execute', '--script', reify_script, '--tag', reference_tag, '--output-dir', args.output_dir ] command.propagate(execute_command) command_args, _, _, exe_session_file, _ = get_context( execute_command, EXE_SETTINGS) command_args.arguments_ = [["model-resource", resource_id]] command_args.inputs = json.dumps(command_args.arguments_) # process the command execute_whizzml(command_args, api, session_file) script_id = extract_retrain_id(command_args, api, session_file) # apply the retrain script to the new data: # add new data: depending on the script we will need to use # a source-url, a source or a dataset if args.add: script_inputs = api.get_script(script_id)['object']['inputs'] input_type = script_inputs[0]['type'] command_args, api, exe_session_file = \ create_input(args, api, input_type, script_id, command) # process the command execute_whizzml(command_args, api, exe_session_file) with open("%s.json" % command_args.output) as file_handler: model_resource_id = json.load(file_handler)['result'] message = (u'The new retrained model is: %s.\n' u'You can use the\n\n%s\n\nquery to retrieve the latest' u' retrained model.\n\n') % \ (model_resource_id, last_resource_url( \ resource_id, api, \ "limit=1;full=yes;tags=%s" % reference_tag)) log_message(message, log_file=session_file, console=1)
def reify_resources(args, api): """ Extracts the properties of the created resources and generates code to rebuild them """ resource_id = get_resource_id(args.resource_id) if resource_id is None: sys.exit("Failed to match a valid resource ID. Please, check: %s" % args.resource_id) # check whether the resource exists try: check_resource(resource_id, raise_on_error=True, api=api) except Exception: sys.exit("Failed to find the resource %s. Please, check its ID and" " the connection info (domain and credentials)." % resource_id) reify_script = whizzml_script(args, api) # apply the reify script to the resource execute_command = [ 'execute', '--script', reify_script, '--output-dir', args.output_dir ] command_args, _, _, _, _ = get_context( \ execute_command, EXE_SETTINGS) command_args.arguments_ = [["res-id", resource_id]] command_args.inputs = json.dumps(command_args.arguments_) # process the command session_file = None execute_whizzml(command_args, api, session_file) with open("%s.json" % command_args.output) as file_handler: exe_output = json.load(file_handler)['result'] if args.language == "nb": write_nb_output(resource_id, \ exe_output, args.output.replace(".py", ".ipynb"), api) return elif args.language == "whizzml": output = exe_output["source_code"] args.output = args.output.replace(".py", ".whizzml") exe_output["source_code"] = args.output exe_output["kind"] = "script" with open(os.path.join(os.path.dirname(args.output), "metadata.json"), "w") as meta_handler: meta_handler.write(json.dumps(exe_output)) else: output = python_output(exe_output, api) prefix = u"""\ #!/usr/bin/env python # -*- coding: utf-8 -*- \"\"\"Python code to reify %s Generated by BigMLer \"\"\" def main(): """ % resource_id suffix = u"""\ if __name__ == "__main__": main() """ output = "%s%s\n%s" % (prefix, output, suffix) write_to_utf8(args.output, output) sts = os.stat(args.output) os.chmod(args.output, sts.st_mode | stat.S_IEXEC)
def retrain_model(args, api, common_options, session_file=None): """Retrieve or create the retrain script for a model and execute it with the new provided data """ # retrieve the modeling resource to be retrained by tag or id if args.resource_id: resource_id = args.resource_id reference_tag = "retrain:%s" % resource_id else: for model_type in MODEL_TYPES: if hasattr(args, "%s_tag" % model_type) and \ getattr(args, "%s_tag" % model_type) is not None: tag = getattr(args, "%s_tag" % model_type) query_string = "tags=%s" % tag resource_id = get_last_resource( \ model_type.replace("_", ""), api=api, query_string=query_string) if resource_id is None: sys.exit("Failed to find the %s with tag %s. " "Please, check the tag and" " the connection info (domain and credentials)." % (model_type.replace("_", " "), tag)) reference_tag = tag break if args.upgrade: shutil.rmtree(BIGMLER_SCRIPTS_DIRECTORY) script_id = None else: # check for the last script used to retrain the model query_string = "tags=retrain:%s" % resource_id script_id = get_last_resource( \ "script", api=api, query_string=query_string) if script_id is None: # if the script to retrain does not exist: # check whether the model exists try: bigml.api.check_resource(resource_id, raise_on_error=True, api=api) except Exception, exc: sys.exit("Failed to find the model %s. Please, check its ID and" " the connection info (domain and credentials)." % resource_id) # look for the script that creates the rebuild script. retrain_file = os.path.join(BIGMLER_SCRIPTS_DIRECTORY, "retrain", "scripts") reify_script = get_script_id(retrain_file) if reify_script is None: # new bigmler command: creating the scriptify scripts whizzml_command = ['whizzml', '--package-dir', INCREMENTAL_PACKAGE_PATH, '--output-dir', BIGMLER_SCRIPTS_DIRECTORY] whizzml_dispatcher(args=whizzml_command) reify_script = get_script_id(retrain_file) # new bigmler command: creating the retrain script execute_command = ['execute', '--script', reify_script, '--output-dir', args.output_dir] command_args, _, _, exe_session_file, _ = get_context(execute_command, EXE_SETTINGS) command_args.arguments_ = [["model-resource", resource_id]] command_args.inputs = json.dumps(command_args.arguments_) # process the command execute_whizzml(command_args, api, session_file) script_id = extract_retrain_id(command_args, api, session_file)