def get_full_metadata_by_uuid(model_uuid, collection_name=None): """Retrieve model parameter metadata for the given model_uuid and collection. The returned metadata dictionary will include training run performance metrics and training dataset metadata. Args: model_uuid (str): model unique identifier collection_name(str): collection to search (optional, searches all collections if not specified) Returns: Matching metadata dictionary. Raises MongoQueryException if the query fails. """ if not mlmt_supported: print( "Model tracker not supported in your environment; can load models from filesystem only." ) return None mlmt_client = dsf.initialize_model_tracker() if collection_name is None: collection_name = get_model_collection_by_uuid(model_uuid, mlmt_client=mlmt_client) return mlmt_client.get_model(collection_name=collection_name, model_uuid=model_uuid)
def get_model_collection_by_uuid(model_uuid, mlmt_client=None): """Retrieve model collection given a uuid. Args: model_uuid (str): model uuid mlmt_client: Ignored Returns: Matching collection name Raises: ValueError if there is no collection containing a model with the given uuid. """ if not mlmt_supported: print( "Model tracker not supported in your environment; can load models from filesystem only." ) return None mlmt_client = dsf.initialize_model_tracker() collections = mlmt_client.collections.get_collection_names().result() for col in collections: if not col.startswith('old_'): if mlmt_client.count_models(collection_name=col, model_uuid=model_uuid) > 0: return col raise ValueError('Collection not found for uuid: ' + model_uuid)
def get_full_metadata(filter_dict, collection_name=None): """Retrieve relevant full metadata (including training run metrics) of models matching given criteria. Args: filter_dict (dict): dictionary to filter on collection_name (str): Name of collection to search Returns: A list of matching full model metadata (including training run metrics) dictionaries. Raises MongoQueryException if the query fails. """ if not mlmt_supported: print( "Model tracker not supported in your environment; can load models from filesystem only." ) return None if filter_dict is None: raise ValueError('Parameter filter_dict cannot be None.') if collection_name is None: raise ValueError('Parameter collection_name cannot be None.') mlmt_client = dsf.initialize_model_tracker() query_params = { "match_metadata": filter_dict, } metadata_list = mlmt_client.model.query_model_metadata( collection_name=collection_name, query_params=query_params).result() return list(metadata_list)
def get_metadata_by_uuid(model_uuid, collection_name=None): """Retrieve model parameter metadata by model_uuid. The resulting metadata dictionary can be passed to parameter_parser.wrapper(); it does not contain performance metrics or training dataset metadata. Args: model_uuid (str): model unique identifier collection_name(str): collection to search (optional, searches all collections if not specified) Returns: Matching metadata dictionary. Raises MongoQueryException if the query fails. """ if not mlmt_supported: print( "Model tracker not supported in your environment; can load models from filesystem only." ) return None mlmt_client = dsf.initialize_model_tracker() if collection_name is None: collection_name = get_model_collection_by_uuid(model_uuid, mlmt_client=mlmt_client) exclude_fields = [ "training_metrics", "time_built", "training_dataset.dataset_metadata" ] return mlmt_client.get_model(collection_name=collection_name, model_uuid=model_uuid, exclude_fields=exclude_fields)
def train_model_from_tracker(model_uuid, output_dir): """ Retrain a model saved in the model tracker, but save it to output_dir and don't insert it into the model tracker Args: model_uuid (str): model tracker model_uuid file output_dir (str): path to output directory Returns: the model pipeline object with trained model """ if not mlmt_supported: logger.debug( "Model tracker not supported in your environment; can load models from filesystem only." ) return None mlmt_client = dsf.initialize_model_tracker() collection_name = mt.get_model_collection_by_uuid(model_uuid, mlmt_client=mlmt_client) # get metadata from tracker config = mt.get_metadata_by_uuid(model_uuid) # check if datastore dataset try: result = dsf.retrieve_dataset_by_datasetkey( config['training_dataset']['dataset_key'], bucket=config['training_dataset']['bucket']) if result is not None: config['datastore'] = True except: pass # fix weird old parameters #if config[] # Parse parameters params = parse.wrapper(config) params.result_dir = output_dir # otherwise this will have the same uuid as the source model params.model_uuid = None # use the same split params.previously_split = True params.split_uuid = config['splitting_parameters']['split_uuid'] # specify collection params.collection_name = collection_name logger.debug("model params %s" % str(params)) # Create model pipeline model = mp.ModelPipeline(params) # Train model model.train_model() return model
def instantiate_mlmt_client(self, use_production_server=True): """Instantiate the mlmt_client. Args: use_production_server (bool): True if production server should be used. False if local server should be used. Default True. Local server should only be used for testing. """ # ===================================================== # Set up machine learning model tracker (mlmt) client. # ===================================================== # Toggle True/False to use production server or the forsyth2 personal # server. # The former should almost always be used, unless testing with code only # running on the latter. self.ds_client, self.mlmt_client = dsf.initialize_model_tracker( use_production_server, self.ds_client)
def get_model_collection_by_uuid(model_uuid, mlmt_client=None): """Retrieve model collection given a uuid. Args: model_uuid (str): model uuid mlmt_client: Ignored Returns: Matching collection name Raises: ValueError if there is no collection containing a model with the given uuid. """ mlmt_client = dsf.initialize_model_tracker() collections = mlmt_client.collections.get_collection_names().result() for col in collections: if mlmt_client.count_models(collection_name=col, model_uuid=model_uuid) > 0: return col raise ValueError('Collection not found for uuid: ' + model_uuid)
def save_model(pipeline, collection_name='model_tracker', log=True): """Save the model. Save the model files to the datastore and save the model metadata dict to the Mongo database. Args: pipeline (ModelPipeline object): the pipeline to use collection_name (str): the name of the Mongo DB collection to use log (bool): True if logs should be printed, default False use_personal_client (bool): True if personal client should be used (i.e. for testing), default False Returns: None if insertion was successful, raises UnableToTarException, DatastoreInsertionException, MLMTClientInstantiationException or MongoInsertionException otherwise """ if pipeline is None: raise Exception('pipeline cannot be None.') if not mlmt_supported: print( "Model tracker not supported in your environment; can save models in filesystem only." ) return # ModelPipeline.create_model_metadata() should be called before the call to save_model. # Get the metadata dictionary from the model pipeline. metadata_dict = pipeline.model_metadata model_uuid = metadata_dict['model_uuid'] if model_uuid is None: raise ValueError("model_uuid is missing from pipeline metadata.") #### Part 1: Save the model tarball #### model = pipeline.model_wrapper # best_model_dir is an absolute path. directory_to_tar = model.best_model_dir # Put tar file in a temporary directory that will automatically be destroyed when we're done with tempfile.TemporaryDirectory() as tmp_dir: tar_file = os.path.join( tmp_dir, 'model_{model_uuid}.tar.gz'.format(model_uuid=model_uuid)) tar_flags = 'czf' # Change directory to model_dir so that paths in tarball are relative to model_dir. tar_command = 'tar -{tar_flags} {tar_file} -C {directory_to_tar} .'.format( tar_flags=tar_flags, tar_file=tar_file, directory_to_tar=directory_to_tar) try: subprocess.check_output(tar_command.split()) except subprocess.CalledProcessError as e: pipeline.log.error( 'Command to create model tarball returned status {return_code}' .format(return_code=e.returncode)) pipeline.log.error('Command was: "{cmd}"'.format(cmd=e.cmd)) pipeline.log.error( 'Output was: "{output}"'.format(output=e.output)) pipeline.log.error( 'stderr was: "{stderr}"'.format(stderr=e.stderr)) raise UnableToTarException( 'Unable to tar {directory_to_tar}.'.format( directory_to_tar=directory_to_tar)) title = '{model_uuid} model tarball'.format(model_uuid=model_uuid) uploaded_results = dsf.upload_file_to_DS( bucket=pipeline.params.model_bucket, title=title, description=title, tags=[], key_values={ 'model_uuid': model_uuid, 'file_category': 'ml_model' }, filepath=tmp_dir, filename=tar_file, dataset_key='model_' + model_uuid + '_tarball', client=pipeline.ds_client, return_metadata=True) if uploaded_results is None: raise DatastoreInsertionException( 'Unable to upload title={title} to datastore.'.format( title=title)) # Get the dataset_oid for actual metadata file stored in datastore. model_dataset_oid = uploaded_results['dataset_oid'] # By adding dataset_oid to the dict, we can immediately find the datastore file asssociated with a model. metadata_dict['model_parameters']['model_dataset_oid'] = model_dataset_oid #### Part 2: Save the model metadata #### mlmt_client = dsf.initialize_model_tracker() mlmt_client.save_metadata(collection_name=collection_name, model_uuid=metadata_dict['model_uuid'], model_metadata=metadata_dict) if log: print('Successfully inserted into the database with model_uuid %s.' % model_uuid)
def save_model(pipeline, collection_name='model_tracker', log=True): """Save the model. Save the model files to the datastore and save the model metadata dict to the Mongo database. Args: pipeline (ModelPipeline object): the pipeline to use collection_name (str): the name of the Mongo DB collection to use log (bool): True if logs should be printed, default False use_personal_client (bool): True if personal client should be used (i.e. for testing), default False Returns: None if insertion was successful, raises DatastoreInsertionException, MLMTClientInstantiationException or MongoInsertionException otherwise """ if pipeline is None: raise Exception('pipeline cannot be None.') if not mlmt_supported: logger.error( "Model tracker not supported in your environment; can save models in filesystem only." ) return # ModelPipeline.create_model_metadata() should be called before the call to save_model. # Get the metadata dictionary from the model pipeline. metadata_dict = pipeline.model_metadata model_uuid = metadata_dict['model_uuid'] if model_uuid is None: raise ValueError("model_uuid is missing from pipeline metadata.") #### Part 1: Save the model tarball in the datastore #### model = pipeline.model_wrapper # Put tar file in a temporary directory that will automatically be destroyed when we're done with tempfile.TemporaryDirectory() as tmp_dir: tarball_path = os.path.join(tmp_dir, f"model_{model_uuid}.tar.gz") save_model_tarball(pipeline.params.output_dir, tarball_path) title = f"{model_uuid} model tarball" ds_key = f"model_{model_uuid}_tarball" uploaded_results = dsf.upload_file_to_DS( bucket=pipeline.params.model_bucket, title=title, description=title, tags=[], key_values={ 'model_uuid': model_uuid, 'file_category': 'ml_model' }, filepath=tmp_dir, filename=tarball_path, dataset_key=ds_key, client=pipeline.ds_client, return_metadata=True) if uploaded_results is None: raise DatastoreInsertionException( 'Unable to upload title={title} to datastore.'.format( title=title)) # Get the dataset_oid for actual metadata file stored in datastore. model_dataset_oid = uploaded_results['dataset_oid'] # By adding dataset_oid to the dict, we can immediately find the datastore file asssociated with a model. metadata_dict['model_parameters']['model_dataset_oid'] = model_dataset_oid #### Part 2: Save the model metadata in the model tracker #### mlmt_client = dsf.initialize_model_tracker() mlmt_client.save_metadata(collection_name=collection_name, model_uuid=metadata_dict['model_uuid'], model_metadata=metadata_dict) if log: logger.info( 'Successfully inserted into the database with model_uuid %s.' % model_uuid)
def __init__(self, params, hyperparam_uuid=None): """ Args: params: The input hyperparameter parameters hyperparam_uuid: Optional, UUID for hyperparameter run if you want to group this run with a previous run. We ended up mainly doing this via collections, so not really used """ self.hyperparam_layers = {'layer_sizes', 'dropouts', 'weight_init_stddevs', 'bias_init_consts'} self.hyperparam_keys = {'model_type', 'featurizer', 'splitter', 'learning_rate', 'weight_decay_penalty', 'rf_estimators', 'rf_max_features', 'rf_max_depth', 'umap_dim', 'umap_targ_wt', 'umap_metric', 'umap_neighbors', 'umap_min_dist', 'xgb_learning_rate', 'xgb_gamma'} self.nn_specific_keys = {'learning_rate', 'layers','weight_decay_penalty'} self.rf_specific_keys = {'rf_estimators', 'rf_max_features', 'rf_max_depth'} self.xgboost_specific_keys = {'xgb_learning_rate', 'xgb_gamma'} self.hyperparam_keys |= self.hyperparam_layers self.excluded_keys = excluded_keys self.convert_to_float = parse.convert_to_float_list self.convert_to_int = parse.convert_to_int_list self.params = params # simplify NN layer construction if (params.layer_nums != None) and (params.node_nums != None) and (params.dropout_list != None): self.params.layer_sizes, self.params.dropouts = permutate_NNlayer_combo_params(params.layer_nums, params.node_nums, params.dropout_list, params.max_final_layer_size) if hyperparam_uuid is None: self.hyperparam_uuid = str(uuid.uuid4()) else: self.hyperparam_uuid = hyperparam_uuid self.hyperparams = {} self.new_params = {} self.layers = {} self.param_combos = [] self.num_rows = {} self.log = logging.getLogger("hyperparam_search") # Create handlers c_handler = logging.StreamHandler() log_path = os.path.join(self.params.result_dir, 'logs') if not os.path.exists(log_path): os.makedirs(log_path) f_handler = logging.FileHandler(os.path.join(log_path, '{0}.log'.format(self.hyperparam_uuid))) self.out_file = open(os.path.join(log_path, '{0}.json'.format(self.hyperparam_uuid)), 'a') c_handler.setLevel(logging.WARNING) f_handler.setLevel(logging.INFO) # Create formatters and add it to handlers c_format = logging.Formatter('%(name)s - %(levelname)s - %(message)s') f_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') c_handler.setFormatter(c_format) f_handler.setFormatter(f_format) # Add handlers to the logger self.log.addHandler(c_handler) self.log.addHandler(f_handler) self.mlmt_client = dsf.initialize_model_tracker() slurm_path = os.path.join(self.params.result_dir, 'slurm_files') if not os.path.exists(slurm_path): os.makedirs(slurm_path) self.shell_script = os.path.join(self.params.script_dir, 'utils', 'run.sh') with open(self.shell_script, 'w') as f: hostname = ''.join(list(filter(lambda x: x.isalpha(), socket.gethostname()))) f.write("#!/bin/bash\n#SBATCH -A {2}\n#SBATCH -N 1\n#SBATCH -p partition={0}\n#SBATCH -t 24:00:00" "\n#SBATCH -p {3}\n#SBATCH --export=ALL\n#SBATCH -D {1}\n".format(hostname, slurm_path, self.params.lc_account,self.params.slurm_partition)) f.write('start=`date +%s`\necho $3\n$1 $2/pipeline/model_pipeline.py $3\nend=`date +%s`\n' 'runtime=$((end-start))\necho "runtime: " $runtime')