def test__publish_flow_if_necessary(self):
        task_id = 115
        task = openml.tasks.get_task(task_id)

        clf = LogisticRegression()
        flow = sklearn_to_flow(clf)
        flow, sentinel = self._add_sentinel_to_flow_name(flow, None)
        openml.runs.functions._publish_flow_if_necessary(flow)
        self.assertIsNotNone(flow.flow_id)

        flow2 = sklearn_to_flow(clf)
        flow2, _ = self._add_sentinel_to_flow_name(flow2, sentinel)
        openml.runs.functions._publish_flow_if_necessary(flow2)
        self.assertEqual(flow2.flow_id, flow.flow_id)
Esempio n. 2
0
    def test__publish_flow_if_necessary(self):
        task_id = 115
        task = openml.tasks.get_task(task_id)

        clf = LogisticRegression()
        flow = sklearn_to_flow(clf)
        flow, sentinel = self._add_sentinel_to_flow_name(flow, None)
        openml.runs.functions._publish_flow_if_necessary(flow)
        self.assertIsNotNone(flow.flow_id)

        flow2 = sklearn_to_flow(clf)
        flow2, _ = self._add_sentinel_to_flow_name(flow2, sentinel)
        openml.runs.functions._publish_flow_if_necessary(flow2)
        self.assertEqual(flow2.flow_id, flow.flow_id)
Esempio n. 3
0
    def test_parse_parameters_flow_not_on_server(self):

        model = LogisticRegression()
        flow = sklearn_to_flow(model)
        self.assertRaisesRegexp(
            ValueError, 'Flow sklearn.linear_model.logistic.LogisticRegression'
            ' has no flow_id!', OpenMLRun._parse_parameters, flow)

        model = AdaBoostClassifier(base_estimator=LogisticRegression())
        flow = sklearn_to_flow(model)
        flow.flow_id = 1
        self.assertRaisesRegexp(
            ValueError, 'Flow sklearn.linear_model.logistic.LogisticRegression'
            ' has no flow_id!', OpenMLRun._parse_parameters, flow)
Esempio n. 4
0
    def test_parse_parameters_flow_not_on_server(self):

        model = LogisticRegression()
        flow = sklearn_to_flow(model)
        self.assertRaisesRegexp(ValueError,
                                'Flow sklearn.linear_model.logistic.LogisticRegression '
                                'has no flow_id!',
                                OpenMLRun._parse_parameters, flow)

        model = AdaBoostClassifier(base_estimator=LogisticRegression())
        flow = sklearn_to_flow(model)
        flow.flow_id = 1
        self.assertRaisesRegexp(ValueError,
                                'Flow sklearn.linear_model.logistic.LogisticRegression '
                                'has no flow_id!',
                                OpenMLRun._parse_parameters, flow)
Esempio n. 5
0
    def test_parse_parameters(self):

        model = RandomizedSearchCV(
            estimator=RandomForestClassifier(n_estimators=5),
            param_distributions={"max_depth": [3, None],
                                 "max_features": [1, 2, 3, 4],
                                 "min_samples_split": [2, 3, 4, 5, 6, 7, 8, 9, 10],
                                 "min_samples_leaf": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                                 "bootstrap": [True, False],
                                 "criterion": ["gini", "entropy"]},
            cv=StratifiedKFold(n_splits=2, random_state=1),
            n_iter=5)
        flow = sklearn_to_flow(model)
        flow.flow_id = 1
        flow.components['estimator'].flow_id = 2
        parameters = OpenMLRun._parse_parameters(flow)
        for parameter in parameters:
            self.assertIsNotNone(parameter['oml:component'], msg=parameter)
            if parameter['oml:name'] == 'n_estimators':
                self.assertEqual(parameter['oml:value'], '5')
                self.assertEqual(parameter['oml:component'], 2)
Esempio n. 6
0
    def test_parse_parameters(self):

        model = RandomizedSearchCV(
            estimator=RandomForestClassifier(n_estimators=5),
            param_distributions={
                "max_depth": [3, None],
                "max_features": [1, 2, 3, 4],
                "min_samples_split": [2, 3, 4, 5, 6, 7, 8, 9, 10],
                "min_samples_leaf": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                "bootstrap": [True, False], "criterion": ["gini", "entropy"]},
            cv=StratifiedKFold(n_splits=2, random_state=1),
            n_iter=5)
        flow = sklearn_to_flow(model)
        flow.flow_id = 1
        flow.components['estimator'].flow_id = 2
        parameters = OpenMLRun._parse_parameters(flow)
        for parameter in parameters:
            self.assertIsNotNone(parameter['oml:component'], msg=parameter)
            if parameter['oml:name'] == 'n_estimators':
                self.assertEqual(parameter['oml:value'], '5')
                self.assertEqual(parameter['oml:component'], 2)
    def _perform_run(self,
                     task_id,
                     num_instances,
                     clf,
                     random_state_value=None,
                     check_setup=True):
        def _remove_random_state(flow):
            if 'random_state' in flow.parameters:
                del flow.parameters['random_state']
            for component in flow.components.values():
                _remove_random_state(component)

        flow = sklearn_to_flow(clf)
        flow, _ = self._add_sentinel_to_flow_name(flow, None)
        flow.publish()

        task = openml.tasks.get_task(task_id)
        run = openml.runs.run_flow_on_task(
            task,
            flow,
            seed=1,
            avoid_duplicate_runs=openml.config.avoid_duplicate_runs)
        run_ = run.publish()
        self.assertEqual(run_, run)
        self.assertIsInstance(run.dataset_id, int)

        # check arff output
        self.assertEqual(len(run.data_content), num_instances)

        if check_setup:
            # test the initialize setup function
            run_id = run_.run_id
            run_server = openml.runs.get_run(run_id)
            clf_server = openml.setups.initialize_model(run_server.setup_id)

            flow_local = openml.flows.sklearn_to_flow(clf)
            flow_server = openml.flows.sklearn_to_flow(clf_server)

            if flow.class_name not in \
                    ['sklearn.model_selection._search.GridSearchCV',
                     'sklearn.pipeline.Pipeline']:
                # If the flow is initialized from a model without a random state,
                # the flow is on the server without any random state
                self.assertEqual(flow.parameters['random_state'], 'null')
                # As soon as a flow is run, a random state is set in the model.
                # If a flow is re-instantiated
                self.assertEqual(flow_local.parameters['random_state'],
                                 random_state_value)
                self.assertEqual(flow_server.parameters['random_state'],
                                 random_state_value)
            _remove_random_state(flow_local)
            _remove_random_state(flow_server)
            openml.flows.assert_flows_equal(flow_local, flow_server)

            # and test the initialize setup from run function
            clf_server2 = openml.runs.initialize_model_from_run(
                run_server.run_id)
            flow_server2 = openml.flows.sklearn_to_flow(clf_server2)
            if flow.class_name not in \
                    ['sklearn.model_selection._search.GridSearchCV',
                     'sklearn.pipeline.Pipeline']:
                self.assertEqual(flow_server2.parameters['random_state'],
                                 random_state_value)

            _remove_random_state(flow_server2)
            openml.flows.assert_flows_equal(flow_local, flow_server2)

            #self.assertEquals(clf.get_params(), clf_prime.get_params())
            # self.assertEquals(clf, clf_prime)

        downloaded = openml.runs.get_run(run_.run_id)
        assert ('openml-python' in downloaded.tags)

        return run
Esempio n. 8
0
    def _perform_run(self, task_id, num_instances, clf,
                     random_state_value=None, check_setup=True):

        def _remove_random_state(flow):
            if 'random_state' in flow.parameters:
                del flow.parameters['random_state']
            for component in flow.components.values():
                _remove_random_state(component)

        flow = sklearn_to_flow(clf)
        flow, _ = self._add_sentinel_to_flow_name(flow, None)
        flow.publish()

        task = openml.tasks.get_task(task_id)
        run = openml.runs.run_flow_on_task(task, flow, seed=1,
                                           avoid_duplicate_runs=openml.config.avoid_duplicate_runs)
        run_ = run.publish()
        self.assertEqual(run_, run)
        self.assertIsInstance(run.dataset_id, int)

        # check arff output
        self.assertEqual(len(run.data_content), num_instances)

        if check_setup:
            # test the initialize setup function
            run_id = run_.run_id
            run_server = openml.runs.get_run(run_id)
            clf_server = openml.setups.initialize_model(run_server.setup_id)

            flow_local = openml.flows.sklearn_to_flow(clf)
            flow_server = openml.flows.sklearn_to_flow(clf_server)

            if flow.class_name not in \
                    ['sklearn.model_selection._search.GridSearchCV',
                     'sklearn.pipeline.Pipeline']:
                # If the flow is initialized from a model without a random state,
                # the flow is on the server without any random state
                self.assertEqual(flow.parameters['random_state'], 'null')
                # As soon as a flow is run, a random state is set in the model.
                # If a flow is re-instantiated
                self.assertEqual(flow_local.parameters['random_state'],
                                 random_state_value)
                self.assertEqual(flow_server.parameters['random_state'],
                                 random_state_value)
            _remove_random_state(flow_local)
            _remove_random_state(flow_server)
            openml.flows.assert_flows_equal(flow_local, flow_server)

            # and test the initialize setup from run function
            clf_server2 = openml.runs.initialize_model_from_run(run_server.run_id)
            flow_server2 = openml.flows.sklearn_to_flow(clf_server2)
            if flow.class_name not in \
                    ['sklearn.model_selection._search.GridSearchCV',
                     'sklearn.pipeline.Pipeline']:
                self.assertEqual(flow_server2.parameters['random_state'],
                                 random_state_value)

            _remove_random_state(flow_server2)
            openml.flows.assert_flows_equal(flow_local, flow_server2)

            #self.assertEquals(clf.get_params(), clf_prime.get_params())
            # self.assertEquals(clf, clf_prime)

        downloaded = openml.runs.get_run(run_.run_id)
        assert('openml-python' in downloaded.tags)

        return run