Beispiel #1
0
    def test_benchmark_metrics_exception(self):
        pass

        signals = [self.signal]
        datasets = {self.dataset: signals}
        pipelines = {self.name: self.pipeline}

        metric = 'does-not-exist'
        metrics = [metric]

        with self.assertRaises(ValueError) as ex:
            benchmark.benchmark(pipelines, datasets, self.hyper, metrics, self.rank)

            self.assertTrue(metric in ex.exception)
Beispiel #2
0
    def test_benchmark_metrics_exception(self, evaluate_datasets_mock):
        test_split = False
        detrend = False

        signals = [self.signal]
        datasets = {self.dataset: signals}
        pipelines = {self.name: self.pipeline}

        metric = 'does-not-exist'
        metrics = [metric]

        score = self.set_score(1, ANY, test_split)
        evaluate_datasets_mock.return_value = pd.DataFrame.from_records(
            [score])

        with self.assertRaises(ValueError) as ex:
            benchmark.benchmark(pipelines, datasets, self.hyper, metrics,
                                self.rank, self.distributed, test_split,
                                detrend)

            self.assertTrue(metric in ex.exception)
Beispiel #3
0
    def test_benchmark_metrics_list(self, run_job_mock):
        pass

        signals = [self.signal]
        datasets = {self.dataset: signals}
        pipelines = {self.name: self.pipeline}

        metric = Mock(autospec=METRICS['f1'], return_value=1)
        metric.__name__ = 'metric-name'
        metrics = [metric]
        metrics_ = {metric.__name__: metric}

        output = self.set_output(1, ANY, ANY)
        output[metric.__name__] = metric
        run_job_mock.return_value = pd.DataFrame.from_records([output])

        order = [
            'pipeline',
            'rank',
            'dataset',
            'signal',
            'iteration',
            'metric-name',
            'status',
            'elapsed',
            'split',
            'run_id']

        expected_return = pd.DataFrame.from_records([{
            'metric-name': metric,
            'rank': 1,
            'elapsed': ANY,
            'split': ANY,
            'status': 'OK',
            'iteration': self.iteration,
            'pipeline': self.name,
            'dataset': self.dataset,
            'signal': self.signal,
            'run_id': self.run_id
        }])[order]

        returned = benchmark.benchmark(
            pipelines, datasets, self.hyper, metrics, self.rank)[order]

        pd.testing.assert_frame_equal(returned, expected_return)

        args = list(self.args())
        args[5] = metrics_

        run_job_mock.assert_called_once_with(tuple(args))
Beispiel #4
0
    def test_benchmark_pipelines_list(self, run_job_mock):
        signals = [self.signal]
        datasets = {self.dataset: signals}
        pipelines = [self.pipeline]

        hyper = {}

        output = self.set_output(1, ANY, ANY)
        output['pipeline'] = self.pipeline
        run_job_mock.return_value = pd.DataFrame.from_records([output])

        order = [
            'pipeline',
            'rank',
            'dataset',
            'signal',
            'iteration',
            'metric-name',
            'status',
            'elapsed',
            'split',
            'run_id']

        expected_return = pd.DataFrame.from_records([{
            'metric-name': 1,
            'rank': 1,
            'elapsed': ANY,
            'split': ANY,
            'status': 'OK',
            'iteration': self.iteration,
            'pipeline': self.pipeline,
            'dataset': self.dataset,
            'signal': self.signal,
            'run_id': self.run_id
        }])[order]

        returned = benchmark.benchmark(
            pipelines, datasets, hyper, self.metrics, self.rank)[order]

        pd.testing.assert_frame_equal(returned, expected_return)

        args = list(self.args())
        args[1] = self.pipeline
        args[4] = hyper

        run_job_mock.assert_called_once_with(tuple(args))
Beispiel #5
0
    def test_benchmark_metrics_list(self, evaluate_datasets_mock):
        test_split = False
        detrend = False

        signals = [self.signal]
        datasets = {self.dataset: signals}
        pipelines = {self.name: self.pipeline}

        metric = Mock(autospec=METRICS['f1'], return_value=1)
        metric.__name__ = 'metric-name'
        metrics = [metric]
        metrics_ = {metric.__name__: metric}

        score = self.set_score(1, ANY, test_split)
        score[metric.__name__] = metric
        evaluate_datasets_mock.return_value = pd.DataFrame.from_records(
            [score])

        order = [
            'pipeline', 'rank', 'dataset', 'elapsed', 'metric-name', 'signal',
            'split', 'status'
        ]

        expected_return = pd.DataFrame.from_records([{
            'rank': 1,
            'metric-name': metric,
            'elapsed': ANY,
            'split': test_split,
            'pipeline': self.name,
            'dataset': self.dataset,
            'signal': self.signal,
            'status': 'OK'
        }])[order]

        returned = benchmark.benchmark(pipelines, datasets, self.hyper,
                                       metrics, self.rank, self.distributed,
                                       test_split, detrend)

        pd.testing.assert_frame_equal(returned, expected_return)

        evaluate_datasets_mock.assert_called_once_with(pipelines, datasets,
                                                       self.hyper, metrics_,
                                                       self.distributed,
                                                       test_split, detrend)
Beispiel #6
0
def _evaluate(args):
    if args.all:
        pipelines = get_available_templates()
    else:
        pipelines = args.pipeline

    scores = benchmark(pipelines=pipelines, datasets=args.signal, metrics=args.metric,
                       rank=args.rank, test_split=args.holdout)

    if args.output:
        print('Writing results in {}'.format(args.output))
        scores.to_csv(args.output, index=False)

    print(tabulate.tabulate(
        scores,
        showindex=False,
        tablefmt='github',
        headers=scores.columns
    ))
Beispiel #7
0
    def test_benchmark_pipelines_list(self, evaluate_datasets_mock):
        test_split = False
        detrend = False

        signals = [self.signal]
        datasets = {self.dataset: signals}
        pipelines = [self.pipeline]
        pipelines_ = {self.pipeline: self.pipeline}

        score = self.set_score(1, ANY, test_split)
        score['pipeline'] = self.pipeline
        evaluate_datasets_mock.return_value = pd.DataFrame.from_records(
            [score])

        order = [
            'pipeline', 'rank', 'dataset', 'elapsed', 'metric-name', 'signal',
            'split', 'status'
        ]

        expected_return = pd.DataFrame.from_records([{
            'rank': 1,
            'metric-name': 1,
            'elapsed': ANY,
            'split': test_split,
            'pipeline': self.pipeline,
            'dataset': self.dataset,
            'signal': self.signal,
            'status': 'OK'
        }])[order]

        returned = benchmark.benchmark(pipelines, datasets, self.hyper,
                                       self.metrics, self.rank,
                                       self.distributed, test_split, detrend)

        pd.testing.assert_frame_equal(returned, expected_return)

        evaluate_datasets_mock.assert_called_once_with(pipelines_, datasets,
                                                       self.hyper,
                                                       self.metrics,
                                                       self.distributed,
                                                       test_split, detrend)