예제 #1
0
def test_predictor_get_train_columns():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        init_engine(db_engine)

        project_path = 'econ-dev/inspections'
        with tempfile.TemporaryDirectory() as temp_dir:
            train_store, test_store = sample_metta_csv_diff_order(temp_dir)

            model_storage_engine = InMemoryModelStorageEngine(project_path)
            _, model_id = \
                fake_trained_model(
                    project_path,
                    model_storage_engine,
                    db_engine,
                    train_matrix_uuid=train_store.uuid
                )
            predictor = Predictor(project_path, model_storage_engine,
                                  db_engine)

            # The train_store uuid is stored in fake_trained_model. Storing the other
            MatrixFactory(matrix_uuid=test_store.uuid)
            session.commit()

            # Runs the same test for training and testing predictions
            for store, mat_type in zip((train_store, test_store),
                                       ("train", "test")):
                predict_proba = predictor.predict(
                    model_id,
                    store,
                    misc_db_parameters=dict(),
                    train_matrix_columns=train_store.columns())
                # assert
                # 1. that we calculated predictions
                assert len(predict_proba) > 0

                # 2. that the predictions table entries are present and
                # can be linked to the original models
                records = [
                    row for row in db_engine.execute(
                        '''select entity_id, as_of_date
                    from {}_results.{}_predictions
                    join model_metadata.models using (model_id)'''.format(
                            mat_type, mat_type))
                ]
                assert len(records) > 0
예제 #2
0
def test_calculate_and_save():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        project_path = 'econ-dev/inspections'
        with tempfile.TemporaryDirectory() as temp_dir:
            train_store, test_store = sample_metta_csv_diff_order(temp_dir)
            model_storage_engine = InMemoryModelStorageEngine(project_path)
            calculator = IndividualImportanceCalculator(db_engine,
                                                        methods=['sample'],
                                                        replace=False)
            # given a trained model
            # and a test matrix
            _, model_id = \
                fake_trained_model(
                    project_path,
                    model_storage_engine,
                    db_engine,
                    train_matrix_uuid=train_store.uuid
                )
            # i expect to be able to call calculate and save
            calculator.calculate_and_save_all_methods_and_dates(
                model_id, test_store)
            # and find individual importances in the results schema afterwards
            records = [
                row
                for row in db_engine.execute('''select entity_id, as_of_date
                from test_results.individual_importances
                join model_metadata.models using (model_id)''')
            ]
            assert len(records) > 0
            # and that when run again, has the same result
            calculator.calculate_and_save_all_methods_and_dates(
                model_id, test_store)
            new_records = [
                row
                for row in db_engine.execute('''select entity_id, as_of_date
                from test_results.individual_importances
                join model_metadata.models using (model_id)''')
            ]
            assert len(records) == len(new_records)
            assert records == new_records
예제 #3
0
def test_predictor_get_train_columns():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        project_path = 'econ-dev/inspections'
        with tempfile.TemporaryDirectory() as temp_dir:
            train_store, test_store = sample_metta_csv_diff_order(temp_dir)

            model_storage_engine = InMemoryModelStorageEngine(project_path)
            _, model_id = \
                fake_trained_model(
                    project_path,
                    model_storage_engine,
                    db_engine,
                    train_matrix_uuid=train_store.uuid
                )
            predictor = Predictor(project_path, model_storage_engine, db_engine)

            predict_proba = predictor.predict(
                model_id,
                test_store,
                misc_db_parameters=dict(),
                train_matrix_columns=train_store.columns()
            )
            # assert
            # 1. that we calculated predictions
            assert len(predict_proba) > 0

            # 2. that the predictions table entries are present and
            # can be linked to the original models
            records = [
                row for row in
                db_engine.execute('''select entity_id, as_of_date
                from results.predictions
                join results.models using (model_id)''')
            ]
            assert len(records) > 0