def main(args=sys.argv[1:]): """Main process """ if args: if not args[0].lower() in SUBCOMMANDS: new_args = ["main"] new_args.extend(args) else: new_args = args # checks if the old --delete syntax is used new_args = check_delete_option(new_args) if not PYTHON3: new_args = [arg.decode(SYSTEM_ENCODING) for arg in new_args] if new_args[0] == "main": main_dispatcher(args=new_args) elif new_args[0] == "analyze": analyze_dispatcher(args=new_args) elif new_args[0] == "cluster": cluster_dispatcher(args=new_args) elif new_args[0] == "anomaly": anomaly_dispatcher(args=new_args) elif new_args[0] == "sample": sample_dispatcher(args=new_args) elif new_args[0] == "report": report_dispatcher(args=new_args) elif new_args[0] == "reify": reify_dispatcher(args=new_args) elif new_args[0] == "execute": execute_dispatcher(args=new_args) elif new_args[0] == "delete": delete_dispatcher(args=new_args) elif new_args[0] == "project": project_dispatcher(args=new_args) elif new_args[0] == "association": association_dispatcher(args=new_args) elif new_args[0] == "logistic-regression": logistic_regression_dispatcher(args=new_args) elif new_args[0] == "whizzml": whizzml_dispatcher(args=new_args) else: sys.exit( "BigMLer used with no arguments. Check:\nbigmler --help\n\nor" "\n\nbigmler sample --help\n\n" "\n\nbigmler analyze --help\n\n" "\n\nbigmler cluster --help\n\n" "\n\nbigmler anomaly --help\n\n" "\n\nbigmler report --help\n\n" "\n\nbigmler reify --help\n\n" "\n\nbigmler project --help\n\n" "\n\nbigmler association --help\n\n" "\n\nbigmler logistic-regression --help\n\n" "\n\nbigmler execute --help\n\n" "\n\nbigmler whizzml --help\n\n" "\n\nbigmler delete --help\n\n" " for a list of options" )
def main(args=sys.argv[1:]): """Main process """ if args: if not args[0].lower() in SUBCOMMANDS: new_args = ["main"] new_args.extend(args) else: new_args = args # checks if the old --delete syntax is used new_args = check_delete_option(new_args) if not PYTHON3: new_args = [arg.decode(SYSTEM_ENCODING) for arg in new_args] if new_args[0] == "main": main_dispatcher(args=new_args) elif new_args[0] == "analyze": analyze_dispatcher(args=new_args) elif new_args[0] == "cluster": cluster_dispatcher(args=new_args) elif new_args[0] == "anomaly": anomaly_dispatcher(args=new_args) elif new_args[0] == "sample": sample_dispatcher(args=new_args) elif new_args[0] == "report": report_dispatcher(args=new_args) elif new_args[0] == "reify": reify_dispatcher(args=new_args) elif new_args[0] == "execute": execute_dispatcher(args=new_args) elif new_args[0] == "delete": delete_dispatcher(args=new_args) elif new_args[0] == "project": project_dispatcher(args=new_args) elif new_args[0] == "association": association_dispatcher(args=new_args) elif new_args[0] == "logistic-regression": logistic_regression_dispatcher(args=new_args) elif new_args[0] == "whizzml": whizzml_dispatcher(args=new_args) else: sys.exit("BigMLer used with no arguments. Check:\nbigmler --help\n\nor" "\n\nbigmler sample --help\n\n" "\n\nbigmler analyze --help\n\n" "\n\nbigmler cluster --help\n\n" "\n\nbigmler anomaly --help\n\n" "\n\nbigmler report --help\n\n" "\n\nbigmler reify --help\n\n" "\n\nbigmler project --help\n\n" "\n\nbigmler association --help\n\n" "\n\nbigmler logistic-regression --help\n\n" "\n\nbigmler execute --help\n\n" "\n\nbigmler whizzml --help\n\n" "\n\nbigmler delete --help\n\n" " for a list of options")
def whizzml_script(args, api): """Returns the ID of the script to be used to generate the output """ # each language has its own script, so first check: # - whether the script exists in the account # - whether it has the same version # else, we act as if we wanted to upgrade the script script_dir = os.path.join(REIFY_PACKAGE_PATH, SCRIPT_FILE.get(args.language, args.language)) if not args.upgrade: # the script is retrieved by name # Reading the name of the script with open(os.path.join(script_dir, "metadata.json")) as meta_file: meta = json.load(meta_file) # check for the last script used to retrain the model query_string = "name=%s" % meta["name"] reify_script = get_last_resource( \ "script", api=api, query_string=query_string) else: reify_script = None # create or retrieve the script to generate the output # if --upgrade, we force rebuilding the scriptified script if reify_script is None: try: shutil.rmtree( os.path.join(BIGMLER_SCRIPTS_DIRECTORY, SCRIPT_FILE.get(args.language, args.language))) except Exception: pass if reify_script is None: # new bigmler command: creating the scriptify scripts whizzml_command = [ 'whizzml', '--package-dir', REIFY_PACKAGE_PATH, '--output-dir', BIGMLER_SCRIPTS_DIRECTORY ] whizzml_dispatcher(args=whizzml_command) reify_file = os.path.join( BIGMLER_SCRIPTS_DIRECTORY, SCRIPT_FILE.get(args.language, args.language), "scripts") reify_script = get_script_id(reify_file) return reify_script
# if --upgrade, we force rebuilding the scriptified script if reify_script is None: try: shutil.rmtree( os.path.join(BIGMLER_SCRIPTS_DIRECTORY, SCRIPT_FILE.get(args.language, args.language))) except Exception, exc: pass if reify_script is None: # new bigmler command: creating the scriptify scripts whizzml_command = [ 'whizzml', '--package-dir', REIFY_PACKAGE_PATH, '--output-dir', BIGMLER_SCRIPTS_DIRECTORY ] whizzml_dispatcher(args=whizzml_command) reify_file = os.path.join( BIGMLER_SCRIPTS_DIRECTORY, SCRIPT_FILE.get(args.language, args.language), "scripts") reify_script = get_script_id(reify_file) return reify_script def reify_resources(args, api, logger): """ Extracts the properties of the created resources and generates code to rebuild them """ resource_id = get_resource_id(args.resource_id) if resource_id is None:
# create or retrieve the script to generate the output # if --upgrade, we force rebuilding the scriptified script if reify_script is None : try: shutil.rmtree(os.path.join(BIGMLER_SCRIPTS_DIRECTORY, SCRIPT_FILE.get(args.language, args.language))) except Exception, exc: pass if reify_script is None: # new bigmler command: creating the scriptify scripts whizzml_command = ['whizzml', '--package-dir', REIFY_PACKAGE_PATH, '--output-dir', BIGMLER_SCRIPTS_DIRECTORY] whizzml_dispatcher(args=whizzml_command) reify_file = os.path.join(BIGMLER_SCRIPTS_DIRECTORY, SCRIPT_FILE.get(args.language, args.language), "scripts") reify_script = get_script_id(reify_file) return reify_script def reify_resources(args, api, logger): """ Extracts the properties of the created resources and generates code to rebuild them """ resource_id = get_resource_id(args.resource_id) if resource_id is None:
def retrain_model(args, api, common_options, session_file=None): """Retrieve or create the retrain script for a model and execute it with the new provided data """ # retrieve the modeling resource to be retrained by tag or id if args.resource_id: resource_id = args.resource_id reference_tag = "retrain:%s" % resource_id else: for model_type in MODEL_TYPES: if hasattr(args, "%s_tag" % model_type) and \ getattr(args, "%s_tag" % model_type) is not None: tag = getattr(args, "%s_tag" % model_type) query_string = "tags=%s" % tag resource_id = get_last_resource( \ model_type.replace("_", ""), api=api, query_string=query_string) if resource_id is None: sys.exit("Failed to find the %s with tag %s. " "Please, check the tag and" " the connection info (domain and credentials)." % (model_type.replace("_", " "), tag)) reference_tag = tag break if args.upgrade: shutil.rmtree(BIGMLER_SCRIPTS_DIRECTORY) script_id = None else: # check for the last script used to retrain the model query_string = "tags=retrain:%s" % resource_id script_id = get_last_resource( \ "script", api=api, query_string=query_string) if script_id is None: # if the script to retrain does not exist: # check whether the model exists try: bigml.api.check_resource(resource_id, raise_on_error=True, api=api) except Exception, exc: sys.exit("Failed to find the model %s. Please, check its ID and" " the connection info (domain and credentials)." % resource_id) # look for the script that creates the rebuild script. retrain_file = os.path.join(BIGMLER_SCRIPTS_DIRECTORY, "retrain", "scripts") reify_script = get_script_id(retrain_file) if reify_script is None: # new bigmler command: creating the scriptify scripts whizzml_command = ['whizzml', '--package-dir', INCREMENTAL_PACKAGE_PATH, '--output-dir', BIGMLER_SCRIPTS_DIRECTORY] whizzml_dispatcher(args=whizzml_command) reify_script = get_script_id(retrain_file) # new bigmler command: creating the retrain script execute_command = ['execute', '--script', reify_script, '--output-dir', args.output_dir] command_args, _, _, exe_session_file, _ = get_context(execute_command, EXE_SETTINGS) command_args.arguments_ = [["model-resource", resource_id]] command_args.inputs = json.dumps(command_args.arguments_) # process the command execute_whizzml(command_args, api, session_file) script_id = extract_retrain_id(command_args, api, session_file)
def retrain_model(args, api, common_options, session_file=None): """Retrieve or create the retrain script for a model and execute it with the new provided data """ # retrieve the modeling resource to be retrained by tag or id if args.resource_id: resource_id = args.resource_id reference_tag = "retrain:%s" % resource_id else: for model_type in MODEL_TYPES: if hasattr(args, "%s_tag" % model_type) and \ getattr(args, "%s_tag" % model_type) is not None: tag = getattr(args, "%s_tag" % model_type) query_string = "tags=%s" % tag resource_id = get_last_resource( \ model_type.replace("_", ""), api=api, query_string=query_string) if resource_id is None: sys.exit("Failed to find the %s with tag %s. " "Please, check the tag and" " the connection info (domain and credentials)." % (model_type.replace("_", " "), tag)) reference_tag = tag break if args.upgrade: shutil.rmtree(BIGMLER_SCRIPTS_DIRECTORY) script_id = None else: # check for the last script used to retrain the model query_string = "tags=retrain:%s" % resource_id script_id = get_last_resource( \ "script", api=api, query_string=query_string) if script_id is None: # if the script to retrain does not exist: # check whether the model exists try: bigml.api.check_resource(resource_id, raise_on_error=True, api=api) except Exception, exc: sys.exit("Failed to find the model %s. Please, check its ID and" " the connection info (domain and credentials)." % resource_id) # look for the script that creates the rebuild script. retrain_file = os.path.join(BIGMLER_SCRIPTS_DIRECTORY, "retrain", "scripts") reify_script = get_script_id(retrain_file) if reify_script is None: # new bigmler command: creating the scriptify scripts whizzml_command = [ 'whizzml', '--package-dir', INCREMENTAL_PACKAGE_PATH, '--output-dir', BIGMLER_SCRIPTS_DIRECTORY ] whizzml_dispatcher(args=whizzml_command) reify_script = get_script_id(retrain_file) # new bigmler command: creating the retrain script execute_command = [ 'execute', '--script', reify_script, '--output-dir', args.output_dir ] command_args, _, _, exe_session_file, _ = get_context( execute_command, EXE_SETTINGS) command_args.arguments_ = [["model-resource", resource_id]] command_args.inputs = json.dumps(command_args.arguments_) # process the command execute_whizzml(command_args, api, session_file) script_id = extract_retrain_id(command_args, api, session_file)
def retrain_model(args, api, command, session_file=None): """Retrieve or create the retrain script for a model and execute it with the new provided data """ retrain_file = os.path.join(BIGMLER_SCRIPTS_DIRECTORY, "retrain", "scripts") try: os.remove(UPGRADE_FILE) reify_script = None try: shutil.rmtree(BIGMLER_SCRIPTS_DIRECTORY) except OSError: pass except OSError: # look for the script that creates the rebuild script. reify_script = get_script_id(retrain_file) if reify_script is None: # new bigmler command: creating the scriptify scripts whizzml_command = [ 'whizzml', '--package-dir', INCREMENTAL_PACKAGE_PATH, '--output-dir', BIGMLER_SCRIPTS_DIRECTORY ] add_api_context(whizzml_command, args) whizzml_dispatcher(args=whizzml_command) reify_script = get_script_id(retrain_file) # retrieve the modeling resource to be retrained by tag or id if args.resource_id: resource_id = args.resource_id reference_tag = "retrain:%s" % resource_id else: for model_type in MODEL_TYPES: if hasattr(args, "%s_tag" % model_type) and \ getattr(args, "%s_tag" % model_type) is not None: tag = getattr(args, "%s_tag" % model_type) query_string = "tags=%s" % tag resource_id = get_first_resource( \ model_type.replace("_", ""), api=api, query_string=query_string) if resource_id is None: sys.exit("Failed to find the %s with tag %s. " "Please, check the tag and" " the connection info (domain and credentials)." % (model_type.replace("_", " "), tag)) reference_tag = tag break # updating the dataset that generated the model with the reference tag model = api.getters[get_resource_type(resource_id)](resource_id) dataset_id = model["object"]["dataset"] dataset = api.get_dataset(dataset_id) tags = dataset["object"]["tags"] if reference_tag not in tags: tags.append(reference_tag) api.update_dataset(dataset_id, {"tags": tags}) # if --upgrade, we force rebuilding the scriptified script if args.upgrade: script_id = None else: # check for the last script used to retrain the model query_string = "tags=%s" % reference_tag script_id = get_last_resource( \ "script", api=api, query_string=query_string) if script_id is None: # if the script to retrain does not exist: # check whether the model exists try: bigml.api.check_resource(resource_id, raise_on_error=True, api=api) except Exception: sys.exit("Failed to find the model %s. Please, check its ID and" " the connection info (domain and credentials)." % resource_id) # new bigmler command: creating the retrain script execute_command = [ 'execute', '--script', reify_script, '--tag', reference_tag, '--output-dir', args.output_dir ] command.propagate(execute_command) command_args, _, _, exe_session_file, _ = get_context( execute_command, EXE_SETTINGS) command_args.arguments_ = [["model-resource", resource_id]] command_args.inputs = json.dumps(command_args.arguments_) # process the command execute_whizzml(command_args, api, session_file) script_id = extract_retrain_id(command_args, api, session_file) # apply the retrain script to the new data: # add new data: depending on the script we will need to use # a source-url, a source or a dataset if args.add: script_inputs = api.get_script(script_id)['object']['inputs'] input_type = script_inputs[0]['type'] command_args, api, exe_session_file = \ create_input(args, api, input_type, script_id, command) # process the command execute_whizzml(command_args, api, exe_session_file) with open("%s.json" % command_args.output) as file_handler: model_resource_id = json.load(file_handler)['result'] message = (u'The new retrained model is: %s.\n' u'You can use the\n\n%s\n\nquery to retrieve the latest' u' retrained model.\n\n') % \ (model_resource_id, last_resource_url( \ resource_id, api, \ "limit=1;full=yes;tags=%s" % reference_tag)) log_message(message, log_file=session_file, console=1)
def main(args=sys.argv[1:]): """Main process """ if args: if not args[0].lower() in SUBCOMMANDS: new_args = ["main"] new_args.extend(args) else: new_args = args # checks if the old --delete syntax is used new_args = check_delete_option(new_args) if not PYTHON3: new_args = [arg.decode(SYSTEM_ENCODING) for arg in new_args] if new_args[0] == "main": main_dispatcher(args=new_args) elif new_args[0] == "analyze": analyze_dispatcher(args=new_args) elif new_args[0] == "cluster": cluster_dispatcher(args=new_args) elif new_args[0] == "anomaly": anomaly_dispatcher(args=new_args) elif new_args[0] == "sample": sample_dispatcher(args=new_args) elif new_args[0] == "report": report_dispatcher(args=new_args) elif new_args[0] == "reify": reify_dispatcher(args=new_args) elif new_args[0] == "execute": execute_dispatcher(args=new_args) elif new_args[0] == "delete": delete_dispatcher(args=new_args) elif new_args[0] == "project": project_dispatcher(args=new_args) elif new_args[0] == "association": association_dispatcher(args=new_args) elif new_args[0] == "logistic-regression": logistic_regression_dispatcher(args=new_args) elif new_args[0] == "topic-model": if no_stemmer: sys.exit("To use the bigmler topic-model command you need the" " Pystemmer library. Please, install it and" " retry your command.") topic_model_dispatcher(args=new_args) elif new_args[0] == "time-series": time_series_dispatcher(args=new_args) elif new_args[0] == "deepnet": deepnet_dispatcher(args=new_args) elif new_args[0] == "whizzml": whizzml_dispatcher(args=new_args) elif new_args[0] == "export": export_dispatcher(args=new_args) elif new_args[0] == "retrain": retrain_dispatcher(args=new_args) else: sys.exit("BigMLer used with no arguments. Check:\nbigmler --help\n\nor" "\n\nbigmler sample --help\n\n" "\n\nbigmler analyze --help\n\n" "\n\nbigmler cluster --help\n\n" "\n\nbigmler anomaly --help\n\n" "\n\nbigmler report --help\n\n" "\n\nbigmler reify --help\n\n" "\n\nbigmler project --help\n\n" "\n\nbigmler association --help\n\n" "\n\nbigmler logistic-regression --help\n\n" "\n\nbigmler topic-model --help\n\n" "\n\nbigmler time-series --help\n\n" "\n\nbigmler deepnet --help\n\n" "\n\nbigmler execute --help\n\n" "\n\nbigmler whizzml --help\n\n" "\n\nbigmler export --help\n\n" "\n\nbigmler retrain --help\n\n" "\n\nbigmler delete --help\n\n" " for a list of options")