def _write_model_to_db(self, class_path, parameters, feature_names, model_hash, trained_model, model_group_id, misc_db_parameters): """Writes model and feature importance data to a database Will overwrite the data of any previous versions (any existing model that shares a hash) Args: class_path (string) A full classpath to the model class parameters (dict) hyperparameters to give to the model constructor feature_names (list) feature names in order given to model model_hash (string) a unique id for the model trained_model (object) a trained model object misc_db_parameters (dict) params to pass through to the database """ saved_model_id = retrieve_model_id_from_hash(self.db_engine, model_hash) if saved_model_id: # logging.warning('deleting existing model %s', existing_model.model_id) # existing_model.delete(session) # session.commit() logging.warning('model meta data already stored %s', saved_model_id) return saved_model_id session = self.sessionmaker() model = Model(model_hash=model_hash, model_type=class_path, model_parameters=parameters, model_group_id=model_group_id, experiment_hash=self.experiment_hash, **misc_db_parameters) session.add(model) feature_importance = get_feature_importances(trained_model) temp_df = pandas.DataFrame({'feature_importance': feature_importance}) features_index = temp_df.index.tolist() rankings_abs = temp_df['feature_importance'].rank(method='dense', ascending=False) rankings_pct = temp_df['feature_importance'].rank(method='dense', ascending=False, pct=True) for feature_index, importance, rank_abs, rank_pct in zip( features_index, feature_importance, rankings_abs, rankings_pct): feature_importance = FeatureImportance( model=model, feature_importance=round(float(importance), 10), feature=feature_names[feature_index], rank_abs=int(rank_abs), rank_pct=round(float(rank_pct), 10)) session.add(feature_importance) session.commit() model_id = model.model_id session.close() return model_id
def fake_trained_model(project_path, model_storage_engine, db_engine): """Creates and stores a trivial trained model Args: project_path (string) a desired fs/s3 project path model_storage_engine (triage.storage.ModelStorageEngine) db_engine (sqlalchemy.engine) Returns: (int) model id for database retrieval """ trained_model = MockTrainedModel() model_storage_engine.get_store('abcd').write(trained_model) session = sessionmaker(db_engine)() db_model = Model(model_hash='abcd') session.add(db_model) session.commit() return trained_model, db_model.model_id
def _write_model_to_db( self, class_path, parameters, feature_names, model_hash, trained_model, model_group_id, misc_db_parameters ): """Writes model and feature importance data to a database Will overwrite the data of any previous versions (any existing model that shares a hash) If the replace flag on the object is set, the existing version of the model will have its non-unique attributes (e.g. timestamps) updated, and feature importances fully replaced. If the replace flag on the object is not set, the existing model metadata and feature importances will be used. Args: class_path (string) A full classpath to the model class parameters (dict) hyperparameters to give to the model constructor feature_names (list) feature names in order given to model model_hash (string) a unique id for the model trained_model (object) a trained model object misc_db_parameters (dict) params to pass through to the database """ model_id = retrieve_model_id_from_hash(self.db_engine, model_hash) if model_id and not self.replace: logging.info( 'Metadata for model_id %s found in database. Reusing model metadata.', model_id ) return model_id else: model = Model( model_hash=model_hash, model_type=class_path, model_parameters=parameters, model_group_id=model_group_id, experiment_hash=self.experiment_hash, **misc_db_parameters ) session = self.sessionmaker() if model_id: logging.info('Found model id %s, updating non-unique attributes', model_id) model.model_id = model_id session.merge(model) session.commit() else: session.add(model) session.commit() model_id = model.model_id logging.info('Added new model id %s', model_id) session.close() logging.info('Saving feature importances for model_id %s', model_id) self._save_feature_importances( model_id, get_feature_importances(trained_model), feature_names ) logging.info('Done saving feature importances for model_id %s', model_id) return model_id