Esempi in Python per InMemoryMatrixStore, esempi in Python per triage.storage.InMemoryMatrixStore

Esempio n. 1

0

Mostra file

File: run_models.py Progetto: snowdj/police-eis

    def setup_train_models(self, model_storage):
        train_matrix_id = str([
            sorted(self.temporal_split['train_as_of_dates']), self.labels,
            self.temporal_split['prediction_window']
        ])

        # Train matrix
        train_metadata = self._make_metadata(
            datetime.datetime.strptime(self.temporal_split['train_start_date'],
                                       "%Y-%m-%d"),
            datetime.datetime.strptime(self.temporal_split['train_end_date'],
                                       "%Y-%m-%d"), train_matrix_id,
            self.temporal_split['train_as_of_dates'])

        # Inlcude metadata in config for db
        self.misc_db_parameters['config']['train_metadata'] = json.dumps(
            train_metadata, default=self.dt_handler, sort_keys=True)
        self.misc_db_parameters['config']['labels_config'] = self.labels_config
        # self.misc_db_parameters['config'] = json.dump(self.misc_db_parameters['config'],default=self.dt_handler, sort_keys=True)

        # Load train matrix
        log.info('Load train matrix using as of dates: {}'.format(
            self.temporal_split['train_as_of_dates']))
        train_df, train_matrix_uuid = self.load_store_matrix(
            train_metadata, self.temporal_split['train_as_of_dates'])

        if len(train_df.iloc[:, -1].unique()) == 1:
            log.warning(
                '''Train Matrix %s had only one
                        unique value, no point in training this model. Skipping
                        ''', train_matrix_uuid)
            return None, None

        # remove the index from the data-frame
        for column in train_metadata['indices']:
            if column in train_df.columns:
                del train_df[column]

        # Store in metta
        log.info('Store in metta')
        # add to parameters to store in db
        self.misc_db_parameters['train_matrix_uuid'] = train_matrix_uuid
        train_matrix_store = InMemoryMatrixStore(train_df.iloc[:, :-1],
                                                 train_metadata,
                                                 train_df.iloc[:, -1])

        trainer = ModelTrainer(project_path=self.project_path,
                               experiment_hash=self.experiment_hash,
                               model_storage_engine=model_storage,
                               matrix_store=train_matrix_store,
                               db_engine=self.db_engine)
        log.info('Train Models')
        model_ids_generator = trainer.generate_trained_models(
            grid_config=self.grid_config,
            misc_db_parameters=self.misc_db_parameters,
            replace=True)

        return train_matrix_uuid, model_ids_generator

Esempio n. 2

0

Mostra file

def test_n_jobs_not_new_model():
    grid_config = {
        'sklearn.ensemble.AdaBoostClassifier': {
            'n_estimators': [10, 100, 1000]
        },
        'sklearn.ensemble.RandomForestClassifier': {
            'n_estimators': [10, 100],
            'max_features': ['sqrt', 'log2'],
            'max_depth': [5, 10, 15, 20],
            'criterion': ['gini', 'entropy'],
            'n_jobs': [12, 24],
        }
    }

    with testing.postgresql.Postgresql() as postgresql:
        engine = create_engine(postgresql.url())
        ensure_db(engine)
        with mock_s3():
            s3_conn = boto3.resource('s3')
            s3_conn.create_bucket(Bucket='econ-dev')
            trainer = ModelTrainer(
                project_path='econ-dev/inspections',
                experiment_hash=None,
                model_storage_engine=S3ModelStorageEngine(
                    s3_conn, 'econ-dev/inspections'),
                db_engine=engine,
                model_group_keys=['label_name', 'label_window'])

            matrix = pandas.DataFrame.from_dict({
                'entity_id': [1, 2],
                'feature_one': [3, 4],
                'feature_two': [5, 6],
                'label': ['good', 'bad']
            })
            train_tasks = trainer.generate_train_tasks(
                grid_config, dict(),
                InMemoryMatrixStore(
                    matrix, {
                        'label_window': '1d',
                        'end_time': datetime.datetime.now(),
                        'beginning_of_time': datetime.date(2012, 12, 20),
                        'label_name': 'label',
                        'metta-uuid': '1234',
                        'feature_names': ['ft1', 'ft2']
                    }))
            assert len(train_tasks
                       ) == 35  # 32+3, would be (32*2)+3 if we didn't remove
            assert len([
                task for task in train_tasks if 'n_jobs' in task['parameters']
            ]) == 32

            for train_task in train_tasks:
                trainer.process_train_task(**train_task)

            for row in engine.execute(
                    'select model_parameters from results.model_groups'):
                assert 'n_jobs' not in row[0]

Esempio n. 3

0

Mostra file

    def test_retry_recovery(self):
        grid_config = {
            'sklearn.ensemble.AdaBoostClassifier': {
                'n_estimators': [10]
            },
        }

        engine = None
        trainer = None
        port = None
        with testing.postgresql.Postgresql() as postgresql:
            port = postgresql.settings['port']
            engine = create_engine(postgresql.url())
            ensure_db(engine)
            trainer = ModelTrainer(
                project_path='econ-dev/inspections',
                experiment_hash=None,
                model_storage_engine=InMemoryModelStorageEngine(
                    project_path=''),
                db_engine=engine,
                model_group_keys=['label_name', 'label_window'])

            matrix = pandas.DataFrame.from_dict({
                'entity_id': [1, 2],
                'feature_one': [3, 4],
                'feature_two': [5, 6],
                'label': ['good', 'bad']
            })
            matrix_store = InMemoryMatrixStore(
                matrix, {
                    'label_window': '1d',
                    'end_time': datetime.datetime.now(),
                    'beginning_of_time': datetime.date(2012, 12, 20),
                    'label_name': 'label',
                    'metta-uuid': '1234',
                    'feature_names': ['ft1', 'ft2']
                })

        # start without a database server
        # then bring it back up after the first sleep
        # use self so it doesn't go out of scope too early and shut down
        self.new_server = None

        def replace_db(arg):
            self.new_server = testing.postgresql.Postgresql(port=port)
            engine = create_engine(self.new_server.url())
            ensure_db(engine)

        with patch('time.sleep') as time_mock:
            time_mock.side_effect = replace_db
            try:
                trainer.train_models(grid_config, dict(), matrix_store)
            finally:
                if self.new_server is not None:
                    self.new_server.stop()
            assert len(time_mock.mock_calls) == 1

Esempio n. 4

0

Mostra file

    def train_test_models(self, train_matrix_uuid, model_ids_generator,
                          model_storage):

        predictor = Predictor(project_path=self.project_path,
                              model_storage_engine=model_storage,
                              db_engine=self.db_engine)

        for trained_model_id in model_ids_generator:
            ## Prediction
            log.info('Predict for model_id: {}'.format(trained_model_id))

            # Loop over testing as of dates
            for test_date in self.temporal_split['test_as_of_dates']:
                # Load matrixes
                log.info(
                    'Load test matrix for as of date: {}'.format(test_date))
                test_matrix_id = str([
                    test_date, self.labels,
                    self.temporal_split['prediction_window']
                ])

                test_metadata = self._make_metadata(
                    datetime.datetime.strptime(test_date, "%Y-%m-%d"),
                    datetime.datetime.strptime(test_date, "%Y-%m-%d"),
                    test_matrix_id, [test_date])

                test_df, test_uuid = self.load_store_matrix(
                    test_metadata, [test_date])
                misc_db_parameters = {'matrix_uuid': test_uuid}

                # remove the index from the data-frame
                for column in test_metadata['indices']:
                    if column in test_df.columns:
                        del test_df[column]

                # Store matrix
                test_matrix_store = InMemoryMatrixStore(
                    test_df.iloc[:, :-1], test_metadata, test_df.iloc[:, -1])

                predictions_binary, predictions_proba = predictor.predict(
                    trained_model_id, test_matrix_store, misc_db_parameters)
                ## Evaluation
                log.info('Generate Evaluations for model_id: {}'.format(
                    trained_model_id))
                self.evaluations(predictions_proba, predictions_binary,
                                 test_df.iloc[:,
                                              -1], trained_model_id, test_date)

            # remove trained model from memory
            predictor.delete_model(trained_model_id)

        return None

Esempio n. 5

0

Mostra file

    def test_retry_max(self):
        grid_config = {
            'sklearn.ensemble.AdaBoostClassifier': {
                'n_estimators': [10]
            },
        }

        engine = None
        trainer = None
        # set up a basic model training run
        # TODO abstract the setup of a basic model training run where
        # we don't worry about the specific values used? it would make
        # tests like this require a bit less noise to read past
        with testing.postgresql.Postgresql() as postgresql:
            engine = create_engine(postgresql.url())
            ensure_db(engine)
            trainer = ModelTrainer(
                project_path='econ-dev/inspections',
                experiment_hash=None,
                model_storage_engine=InMemoryModelStorageEngine(
                    project_path=''),
                db_engine=engine,
                model_group_keys=['label_name', 'label_window'])

            matrix = pandas.DataFrame.from_dict({
                'entity_id': [1, 2],
                'feature_one': [3, 4],
                'feature_two': [5, 6],
                'label': ['good', 'bad']
            })
            matrix_store = InMemoryMatrixStore(
                matrix, {
                    'label_window': '1d',
                    'end_time': datetime.datetime.now(),
                    'beginning_of_time': datetime.date(2012, 12, 20),
                    'label_name': 'label',
                    'metta-uuid': '1234',
                    'feature_names': ['ft1', 'ft2']
                })
        # the postgres server goes out of scope here and thus no longer exists
        with patch('time.sleep') as time_mock:
            with self.assertRaises(sqlalchemy.exc.OperationalError):
                trainer.train_models(grid_config, dict(), matrix_store)
            # we want to make sure that we are using the retrying module sanely
            # as opposed to matching the exact # of calls specified by the code
            assert len(time_mock.mock_calls) > 5

Esempio n. 6

0

Mostra file

File: test_predictors.py Progetto: pvdb2178/triage

def test_predictor_composite_index():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        project_path = 'econ-dev/inspections'
        model_storage_engine = InMemoryModelStorageEngine(project_path)
        _, model_id = \
            fake_trained_model(project_path, model_storage_engine, db_engine)
        predictor = Predictor(project_path, model_storage_engine, db_engine)
        dayone = datetime.datetime(2011, 1, 1)
        daytwo = datetime.datetime(2011, 1, 2)
        # create prediction set
        matrix = pandas.DataFrame.from_dict({
            'entity_id': [1, 2, 1, 2],
            'as_of_date': [dayone, dayone, daytwo, daytwo],
            'feature_one': [3, 4, 5, 6],
            'feature_two': [5, 6, 7, 8],
            'label': [7, 8, 8, 7]
        }).set_index(['entity_id', 'as_of_date'])
        metadata = {
            'label_name': 'label',
            'end_time': AS_OF_DATE,
            'label_window': '3month',
            'metta-uuid': '1234',
        }
        matrix_store = InMemoryMatrixStore(matrix, metadata)
        predict_proba = predictor.predict(model_id, matrix_store, misc_db_parameters=dict())

        # assert
        # 1. that the returned predictions are of the desired length
        assert len(predict_proba) == 4

        # 2. that the predictions table entries are present and
        # can be linked to the original models
        records = [
            row for row in
            db_engine.execute('''select entity_id, as_of_date
            from results.predictions
            join results.models using (model_id)''')
        ]
        assert len(records) == 4

Esempio n. 7

0

Mostra file

def test_model_trainer():
    with testing.postgresql.Postgresql() as postgresql:
        engine = create_engine(postgresql.url())
        ensure_db(engine)

        grid_config = {
            'sklearn.linear_model.LogisticRegression': {
                'C': [0.00001, 0.0001],
                'penalty': ['l1', 'l2'],
                'random_state': [2193]
            }
        }

        with mock_s3():
            s3_conn = boto3.resource('s3')
            s3_conn.create_bucket(Bucket='econ-dev')

            # create training set
            matrix = pandas.DataFrame.from_dict({
                'entity_id': [1, 2],
                'feature_one': [3, 4],
                'feature_two': [5, 6],
                'label': ['good', 'bad']
            })
            metadata = {
                'beginning_of_time': datetime.date(2012, 12, 20),
                'end_time': datetime.date(2016, 12, 20),
                'label_name': 'label',
                'label_window': '1y',
                'metta-uuid': '1234',
                'feature_names': ['ft1', 'ft2']
            }
            project_path = 'econ-dev/inspections'
            model_storage_engine = S3ModelStorageEngine(s3_conn, project_path)
            trainer = ModelTrainer(
                project_path=project_path,
                experiment_hash=None,
                model_storage_engine=model_storage_engine,
                db_engine=engine,
                model_group_keys=['label_name', 'label_window'])
            matrix_store = InMemoryMatrixStore(matrix, metadata)
            model_ids = trainer.train_models(grid_config=grid_config,
                                             misc_db_parameters=dict(),
                                             matrix_store=matrix_store)

            # assert
            # 1. that the models and feature importances table entries are present
            records = [
                row for row in engine.execute(
                    'select * from results.feature_importances')
            ]
            assert len(records) == 4 * 3  # maybe exclude entity_id?

            records = [
                row for row in engine.execute(
                    'select model_hash from results.models')
            ]
            assert len(records) == 4

            cache_keys = [
                model_cache_key(project_path, model_row[0], s3_conn)
                for model_row in records
            ]

            # 2. that the model groups are distinct
            records = [
                row for row in engine.execute(
                    'select distinct model_group_id from results.models')
            ]
            assert len(records) == 4

            # 3. that all four models are cached
            model_pickles = [
                pickle.loads(cache_key.get()['Body'].read())
                for cache_key in cache_keys
            ]
            assert len(model_pickles) == 4
            assert len([x for x in model_pickles if x is not None]) == 4

            # 4. that their results can have predictions made on it
            test_matrix = pandas.DataFrame.from_dict({
                'entity_id': [3, 4],
                'feature_one': [4, 4],
                'feature_two': [6, 5],
            })
            for model_pickle in model_pickles:
                predictions = model_pickle.predict(test_matrix)
                assert len(predictions) == 2

            # 5. when run again, same models are returned
            new_model_ids = trainer.train_models(grid_config=grid_config,
                                                 misc_db_parameters=dict(),
                                                 matrix_store=matrix_store)
            assert len([
                row for row in engine.execute(
                    'select model_hash from results.models')
            ]) == 4
            assert model_ids == new_model_ids

            # 6. if metadata is deleted but the cache is still there,
            # retrains that one and replaces the feature importance records
            engine.execute(
                'delete from results.feature_importances where model_id = 3')
            engine.execute('delete from results.models where model_id = 3')
            new_model_ids = trainer.train_models(grid_config=grid_config,
                                                 misc_db_parameters=dict(),
                                                 matrix_store=matrix_store)
            expected_model_ids = [1, 2, 4, 5]
            assert expected_model_ids == sorted(new_model_ids)
            assert [
                row['model_id'] for row in engine.execute(
                    'select model_id from results.models order by 1 asc')
            ] == expected_model_ids

            records = [
                row for row in engine.execute(
                    'select * from results.feature_importances')
            ]
            assert len(records) == 4 * 3  # maybe exclude entity_id?

            # 7. if the cache is missing but the metadata is still there, reuse the metadata
            for row in engine.execute('select model_hash from results.models'):
                model_storage_engine.get_store(row[0]).delete()
            expected_model_ids = [1, 2, 4, 5]
            new_model_ids = trainer.train_models(grid_config=grid_config,
                                                 misc_db_parameters=dict(),
                                                 matrix_store=matrix_store)
            assert expected_model_ids == sorted(new_model_ids)

            # 8. that the generator interface works the same way
            new_model_ids = trainer.generate_trained_models(
                grid_config=grid_config,
                misc_db_parameters=dict(),
                matrix_store=matrix_store)
            assert expected_model_ids == \
                sorted([model_id for model_id in new_model_ids])

Esempio n. 8

0

Mostra file

File: test_predictors.py Progetto: pvdb2178/triage

def test_predictor():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)

        with mock_s3():
            s3_conn = boto3.resource('s3')
            s3_conn.create_bucket(Bucket='econ-dev')
            project_path = 'econ-dev/inspections'
            model_storage_engine = S3ModelStorageEngine(s3_conn, project_path)
            _, model_id = \
                fake_trained_model(project_path, model_storage_engine, db_engine)
            predictor = Predictor(project_path, model_storage_engine, db_engine)
            # create prediction set
            matrix = pandas.DataFrame.from_dict({
                'entity_id': [1, 2],
                'feature_one': [3, 4],
                'feature_two': [5, 6],
                'label': [7, 8]
            }).set_index('entity_id')
            metadata = {
                'label_name': 'label',
                'end_time': AS_OF_DATE,
                'label_window': '3month',
                'metta-uuid': '1234',
            }

            matrix_store = InMemoryMatrixStore(matrix, metadata)
            predict_proba = predictor.predict(model_id, matrix_store, misc_db_parameters=dict())

            # assert
            # 1. that the returned predictions are of the desired length
            assert len(predict_proba) == 2

            # 2. that the predictions table entries are present and
            # can be linked to the original models
            records = [
                row for row in
                db_engine.execute('''select entity_id, as_of_date
                from results.predictions
                join results.models using (model_id)''')
            ]
            assert len(records) == 2

            # 3. that the contained as_of_dates match what we sent in
            for record in records:
                assert record[1].date() == AS_OF_DATE

            # 4. that the entity ids match the given dataset
            assert sorted([record[0] for record in records]) == [1, 2]

            # 5. running with same model_id, different as of date
            # then with same as of date only replaces the records
            # with the same date
            new_matrix = pandas.DataFrame.from_dict({
                'entity_id': [1, 2],
                'feature_one': [3, 4],
                'feature_two': [5, 6],
                'label': [7, 8]
            }).set_index('entity_id')
            new_metadata = {
                'label_name': 'label',
                'end_time': AS_OF_DATE + datetime.timedelta(days=1),
                'label_window': '3month',
                'metta-uuid': '1234',
            }
            new_matrix_store = InMemoryMatrixStore(new_matrix, new_metadata)
            predictor.predict(model_id, new_matrix_store, misc_db_parameters=dict())
            predictor.predict(model_id, matrix_store, misc_db_parameters=dict())
            records = [
                row for row in
                db_engine.execute('''select entity_id, as_of_date
                from results.predictions
                join results.models using (model_id)''')
            ]
            assert len(records) == 4

            # 6. That we can delete the model when done prediction on it
            predictor.delete_model(model_id)
            assert predictor.load_model(model_id) == None

Esempio n. 9

0

Mostra file

File: test_predictors.py Progetto: pvdb2178/triage

def test_predictor_retrieve():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        project_path = 'econ-dev/inspections'
        model_storage_engine = InMemoryModelStorageEngine(project_path)
        _, model_id = \
            fake_trained_model(project_path, model_storage_engine, db_engine)
        predictor = Predictor(project_path, model_storage_engine, db_engine, replace=False)
        dayone = datetime.date(2011, 1, 1).isoformat()
        daytwo = datetime.date(2011, 1, 2).isoformat()
        # create prediction set
        matrix_data = {
            'entity_id': [1, 2, 1, 2],
            'as_of_date': [dayone, dayone, daytwo, daytwo],
            'feature_one': [3, 4, 5, 6],
            'feature_two': [5, 6, 7, 8],
            'label': [7, 8, 8, 7]
        }
        matrix = pandas.DataFrame.from_dict(matrix_data)\
            .set_index(['entity_id', 'as_of_date'])
        metadata = {
            'label_name': 'label',
            'end_time': AS_OF_DATE,
            'label_window': '3month',
            'metta-uuid': '1234',
        }
        matrix_store = InMemoryMatrixStore(matrix, metadata)
        predict_proba = predictor.predict(model_id, matrix_store, misc_db_parameters=dict())

        # When run again, the predictions retrieved from the database
        # should match.
        #
        # Some trickiness here. Let's explain:
        #
        # If we are not careful, retrieving predictions from the database and
        # presenting them as a numpy array can result in a bad ordering,
        # since the given matrix may not be 'ordered' by some criteria
        # that can be easily represented by an ORDER BY clause.
        #
        # It will sometimes work, because without ORDER BY you will get
        # it back in the table's physical order, which unless something has
        # happened to the table will be the order you inserted it,
        # which could very well be the order in the matrix.
        # So it's not a bug that would necessarily immediately show itself,
        # but when it does go wrong your scores will be garbage.
        #
        # So we simulate a table order mutation that can happen over time:
        # Remove the first row and put it at the end.
        # If the Predictor doesn't explicitly reorder the results, this will fail
        session = sessionmaker(bind=db_engine)()
        obj = session.query(Prediction).first()
        session.delete(obj)
        session.commit()

        make_transient(obj)
        session = sessionmaker(bind=db_engine)()
        session.add(obj)
        session.commit()

        predictor.load_model = Mock()
        new_predict_proba = predictor.predict(model_id, matrix_store, misc_db_parameters=dict())
        assert_array_equal(new_predict_proba, predict_proba)
        assert not predictor.load_model.called

Esempio n. 10

0

Mostra file

def test_integration():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)

        with mock_s3():
            s3_conn = boto3.resource('s3')
            s3_conn.create_bucket(Bucket='econ-dev')
            project_path = 'econ-dev/inspections'

            # create train and test matrices
            train_matrix = pandas.DataFrame.from_dict({
                'entity_id': [1, 2],
                'feature_one': [3, 4],
                'feature_two': [5, 6],
                'label': [7, 8]
            }).set_index('entity_id')
            train_metadata = {
                'start_time': datetime.date(2012, 12, 20),
                'end_time': datetime.date(2016, 12, 20),
                'label_name': 'label',
                'prediction_window': '1y',
                'feature_names': ['ft1', 'ft2']
            }

            train_store = InMemoryMatrixStore(train_matrix, train_metadata)

            as_of_dates = [
                datetime.date(2016, 12, 21),
                datetime.date(2017, 1, 21)
            ]

            test_stores = [
                InMemoryMatrixStore(
                    pandas.DataFrame.from_dict({
                        'entity_id': [3],
                        'feature_one': [8],
                        'feature_two': [5],
                        'label': [5]
                    }).set_index('entity_id'),
                    {
                        'label_name': 'label',
                        'end_time': as_of_date
                    }
                )
                for as_of_date in as_of_dates
            ]

            model_storage_engine = S3ModelStorageEngine(s3_conn, project_path)

            # instantiate pipeline objects
            trainer = ModelTrainer(
                project_path=project_path,
                model_storage_engine=model_storage_engine,
                matrix_store=None,
                db_engine=db_engine,
            )
            predictor = Predictor(
                project_path,
                model_storage_engine,
                db_engine
            )
            model_scorer = ModelScorer(
                [{'metrics': ['precision@'], 'thresholds': {'top_n': [5]}}],
                db_engine
            )

            # run the pipeline
            grid_config = {
                'sklearn.linear_model.LogisticRegression': {
                    'C': [0.00001, 0.0001],
                    'penalty': ['l1', 'l2'],
                    'random_state': [2193]
                }
            }
            model_ids = trainer.train_models(
                grid_config=grid_config,
                misc_db_parameters=dict(),
                matrix_store=train_store
            )

            for model_id in model_ids:
                for as_of_date, test_store in zip(as_of_dates, test_stores):
                    predictions, predictions_proba = predictor.predict(
                        model_id,
                        test_store,
                        misc_db_parameters=dict()
                    )

                    model_scorer.score(
                        predictions_proba,
                        predictions,
                        test_store.labels(),
                        model_id,
                        as_of_date,
                        as_of_date,
                        '6month'
                    )

            # assert
            # 1. that the predictions table entries are present and
            # can be linked to the original models
            records = [
                row for row in
                db_engine.execute('''select entity_id, model_id, as_of_date
                from results.predictions
                join results.models using (model_id)
                order by 3, 2''')
            ]
            assert records == [
                (3, 1, datetime.datetime(2016, 12, 21)),
                (3, 2, datetime.datetime(2016, 12, 21)),
                (3, 3, datetime.datetime(2016, 12, 21)),
                (3, 4, datetime.datetime(2016, 12, 21)),
                (3, 1, datetime.datetime(2017, 1, 21)),
                (3, 2, datetime.datetime(2017, 1, 21)),
                (3, 3, datetime.datetime(2017, 1, 21)),
                (3, 4, datetime.datetime(2017, 1, 21)),
            ]

            # that evaluations are there
            records = [
                row for row in
                db_engine.execute('''
                    select model_id, evaluation_start_time, metric, parameter
                    from results.evaluations order by 2, 1''')
            ]
            assert records == [
                (1, datetime.datetime(2016, 12, 21), 'precision@', '5_abs'),
                (2, datetime.datetime(2016, 12, 21), 'precision@', '5_abs'),
                (3, datetime.datetime(2016, 12, 21), 'precision@', '5_abs'),
                (4, datetime.datetime(2016, 12, 21), 'precision@', '5_abs'),
                (1, datetime.datetime(2017, 1, 21), 'precision@', '5_abs'),
                (2, datetime.datetime(2017, 1, 21), 'precision@', '5_abs'),
                (3, datetime.datetime(2017, 1, 21), 'precision@', '5_abs'),
                (4, datetime.datetime(2017, 1, 21), 'precision@', '5_abs'),
            ]