Beispiel #1
0
    def _write_model_to_db(self, class_path, parameters, feature_names,
                           model_hash, trained_model, model_group_id,
                           model_size, misc_db_parameters):
        """Writes model and feature importance data to a database
        Will overwrite the data of any previous versions
        (any existing model that shares a hash)

        If the replace flag on the object is set, the existing version of the model
        will have its non-unique attributes (e.g. timestamps) updated,
        and feature importances fully replaced.

        If the replace flag on the object is not set, the existing model metadata
        and feature importances will be used.

        Args:
            class_path (string) A full classpath to the model class
            parameters (dict) hyperparameters to give to the model constructor
            feature_names (list) feature names in order given to model
            model_hash (string) a unique id for the model
            trained_model (object) a trained model object
            model_group_id (int) the unique id for the model group
            model_size (float) the size of the stored model in kB
            misc_db_parameters (dict) params to pass through to the database
        """
        model_id = retrieve_model_id_from_hash(self.db_engine, model_hash)
        if model_id and not self.replace:
            logging.info(
                'Metadata for model_id %s found in database. Reusing model metadata.',
                model_id)
            return model_id
        else:
            model = Model(model_hash=model_hash,
                          model_type=class_path,
                          hyperparameters=parameters,
                          model_group_id=model_group_id,
                          experiment_hash=self.experiment_hash,
                          model_size=model_size,
                          **misc_db_parameters)
            session = self.sessionmaker()
            if model_id:
                logging.info(
                    'Found model id %s, updating non-unique attributes',
                    model_id)
                model.model_id = model_id
                session.merge(model)
                session.commit()
            else:
                session.add(model)
                session.commit()
                model_id = model.model_id
                logging.info('Added new model id %s', model_id)
            session.close()

        logging.info('Saving feature importances for model_id %s', model_id)
        self._save_feature_importances(model_id,
                                       get_feature_importances(trained_model),
                                       feature_names)
        logging.info('Done saving feature importances for model_id %s',
                     model_id)
        return model_id
Beispiel #2
0
def fake_trained_model(db_engine,
                       train_matrix_uuid="efgh",
                       train_end_time=datetime.datetime(2016, 1, 1)):
    """Creates and stores a trivial trained model and training matrix

    Args:
        db_engine (sqlalchemy.engine)

    Returns:
        (int) model id for database retrieval
    """
    session = sessionmaker(db_engine)()
    session.merge(Matrix(matrix_uuid=train_matrix_uuid))

    # Create the fake trained model and store in db
    trained_model = MockTrainedModel()
    db_model = Model(
        model_hash="abcd",
        train_matrix_uuid=train_matrix_uuid,
        train_end_time=train_end_time,
    )
    session.add(db_model)
    session.commit()
    model_id = db_model.model_id
    session.close()
    return trained_model, model_id
Beispiel #3
0
def prepare():
    with rig_engines() as (db_engine, project_storage):
        train_matrix_uuid = '1234'
        session = sessionmaker(db_engine)()
        session.add(Matrix(matrix_uuid=train_matrix_uuid))

        # Create the fake trained model and store in db
        trained_model = MockTrainedModel()
        model_hash = 'abcd'
        project_storage.model_storage_engine().write(trained_model, model_hash)
        db_model = Model(model_hash=model_hash,
                         train_matrix_uuid=train_matrix_uuid)
        session.add(db_model)
        session.commit()
        yield project_storage, db_engine, db_model.model_id
Beispiel #4
0
def fake_trained_model(project_path, model_storage_engine, db_engine):
    """Creates and stores a trivial trained model

    Args:
        project_path (string) a desired fs/s3 project path
        model_storage_engine (triage.storage.ModelStorageEngine)
        db_engine (sqlalchemy.engine)

    Returns:
        (int) model id for database retrieval
    """
    trained_model = MockTrainedModel()
    model_storage_engine.write(trained_model, "abcd")
    session = sessionmaker(db_engine)()
    db_model = Model(model_hash="abcd")
    session.add(db_model)
    session.commit()
    return trained_model, db_model.model_id
Beispiel #5
0
def fake_trained_model(db_engine, train_matrix_uuid='efgh'):
    """Creates and stores a trivial trained model and training matrix

    Args:
        db_engine (sqlalchemy.engine)

    Returns:
        (int) model id for database retrieval
    """
    session = sessionmaker(db_engine)()
    session.merge(Matrix(matrix_uuid=train_matrix_uuid))

    # Create the fake trained model and store in db
    trained_model = MockTrainedModel()
    db_model = Model(model_hash='abcd', train_matrix_uuid=train_matrix_uuid)
    session.add(db_model)
    session.commit()
    return trained_model, db_model.model_id
Beispiel #6
0
def prepare():
    with rig_engines() as (db_engine, project_storage):
        train_matrix_uuid = "1234"
        try:
            session = sessionmaker(db_engine)()
            session.add(Matrix(matrix_uuid=train_matrix_uuid))

            # Create the fake trained model and store in db
            trained_model = MockTrainedModel()
            model_hash = "abcd"
            project_storage.model_storage_engine().write(
                trained_model, model_hash)
            db_model = Model(model_hash=model_hash,
                             train_matrix_uuid=train_matrix_uuid,
                             random_seed=MODEL_RANDOM_SEED)
            session.add(db_model)
            session.commit()
            yield project_storage, db_engine, db_model.model_id
        finally:
            session.close()
Beispiel #7
0
def fake_trained_model(project_path,
                       model_storage_engine,
                       db_engine,
                       train_matrix_uuid='efgh'):
    """Creates and stores a trivial trained model and training matrix

    Args:
        project_path (string) a desired fs/s3 project path
        model_storage_engine (triage.storage.ModelStorageEngine)
        db_engine (sqlalchemy.engine)

    Returns:
        (int) model id for database retrieval
    """
    session = sessionmaker(db_engine)()
    session.add(Matrix(matrix_uuid=train_matrix_uuid))

    # Create the fake trained model and store in db
    trained_model = MockTrainedModel()
    model_storage_engine.get_store('abcd').write(trained_model)
    db_model = Model(model_hash='abcd', train_matrix_uuid=train_matrix_uuid)
    session.add(db_model)
    session.commit()
    return trained_model, db_model.model_id
Beispiel #8
0
    def _write_model_to_db(
        self,
        class_path,
        parameters,
        feature_names,
        model_hash,
        trained_model,
        model_group_id,
        model_size,
        misc_db_parameters,
        retrain,
    ):
        """Writes model and feature importance data to a database
        Will overwrite the data of any previous versions
        (any existing model that shares a hash)

        If the replace flag on the object is set, the existing version of the model
        will have its non-unique attributes (e.g. timestamps) updated,
        and feature importances fully replaced.

        If the replace flag on the object is not set, the existing model metadata
        and feature importances will be used.

        Args:
            class_path (string) A full classpath to the model class
            parameters (dict) hyperparameters to give to the model constructor
            feature_names (list) feature names in order given to model
            model_hash (string) a unique id for the model
            trained_model (object) a trained model object
            model_group_id (int) the unique id for the model group
            model_size (float) the size of the stored model in kB
            misc_db_parameters (dict) params to pass through to the database
        """
        model_id = retrieve_model_id_from_hash(self.db_engine, model_hash)
        if model_id and not self.replace and not retrain:
            logger.notice(
                f"Metadata for model {model_id} found in database. Reusing model metadata."
            )
            return model_id
        else:
            if retrain:
                logger.debug("Retrain model...")
                model = Model(
                    model_group_id=model_group_id,
                    model_hash=model_hash,
                    model_type=class_path,
                    hyperparameters=parameters,
                    # built_by_retrain=self.experiment_hash,
                    built_in_triage_run=self.run_id,
                    model_size=model_size,
                    **misc_db_parameters,
                )

            else:
                model = Model(
                    model_hash=model_hash,
                    model_type=class_path,
                    hyperparameters=parameters,
                    model_group_id=model_group_id,
                    # built_by_experiment=self.experiment_hash,
                    built_in_triage_run=self.run_id,
                    model_size=model_size,
                    **misc_db_parameters,
                )
            session = self.sessionmaker()
            if model_id:
                logger.notice(
                    f"Found model {model_id}, updating non-unique attributes")
                model.model_id = model_id
                session.merge(model)
                session.commit()
            else:
                session.add(model)
                session.commit()
                model_id = model.model_id
                logger.notice(
                    f"Model {model_id}, not found from previous runs. Adding the new model"
                )
            session.close()

        logger.spam(f"Saving feature importances for model_id {model_id}")
        self._save_feature_importances(model_id,
                                       get_feature_importances(trained_model),
                                       feature_names)
        logger.debug(f"Saved feature importances for model_id {model_id}")
        return model_id