def test__run_exists(self): # would be better to not sentinel these clfs, # so we do not have to perform the actual runs # and can just check their status on line clfs = [sklearn.pipeline.Pipeline(steps=[('Imputer', Imputer(strategy='mean')), ('VarianceThreshold', VarianceThreshold(threshold=0.05)), ('Estimator', DecisionTreeClassifier(max_depth=4))]), sklearn.pipeline.Pipeline(steps=[('Imputer', Imputer(strategy='most_frequent')), ('VarianceThreshold', VarianceThreshold(threshold=0.1)), ('Estimator', DecisionTreeClassifier(max_depth=4))])] task = openml.tasks.get_task(115) for clf in clfs: try: # first populate the server with this run. # skip run if it was already performed. run = openml.runs.run_model_on_task(task, clf, avoid_duplicate_runs=True) run.publish() except openml.exceptions.PyOpenMLError as e: # run already existed. Great. pass flow = openml.flows.sklearn_to_flow(clf) flow_exists = openml.flows.flow_exists(flow.name, flow.external_version) self.assertGreater(flow_exists, 0) downloaded_flow = openml.flows.get_flow(flow_exists) setup_exists = openml.setups.setup_exists(downloaded_flow, clf) self.assertGreater(setup_exists, 0) run_ids = _run_exists(task.task_id, setup_exists) self.assertTrue(run_ids, msg=(run_ids, clf))
def test_get_run_trace(self): # get_run_trace is already tested implicitly in test_run_and_publish # this test is a bit additional. num_iterations = 10 num_folds = 1 task_id = 119 task = openml.tasks.get_task(task_id) # IMPORTANT! Do not sentinel this flow. is faster if we don't wait on openml server clf = RandomizedSearchCV(RandomForestClassifier(random_state=42), { "max_depth": [3, None], "max_features": [1, 2, 3, 4], "bootstrap": [True, False], "criterion": ["gini", "entropy"] }, num_iterations, random_state=42) # [SPEED] make unit test faster by exploiting run information from the past try: # in case the run did not exists yet run = openml.runs.run_model_on_task(task, clf, avoid_duplicate_runs=True) trace = openml.runs.functions._create_trace_from_arff( run._generate_trace_arff_dict()) self.assertEquals( len(trace.trace_iterations), num_iterations * num_folds, ) run = run.publish() self._wait_for_processed_run(run.run_id, 200) run_id = run.run_id except openml.exceptions.PyOpenMLError as e: if 'Run already exists in server' not in e.message: # in this case the error was not the one we expected raise e # run was already flow = openml.flows.sklearn_to_flow(clf) flow_exists = openml.flows.flow_exists(flow.name, flow.external_version) self.assertIsInstance(flow_exists, int) self.assertGreater(flow_exists, 0) downloaded_flow = openml.flows.get_flow(flow_exists) setup_exists = openml.setups.setup_exists(downloaded_flow) self.assertIsInstance(setup_exists, int) self.assertGreater(setup_exists, 0) run_ids = _run_exists(task.task_id, setup_exists) self.assertGreater(len(run_ids), 0) run_id = random.choice(list(run_ids)) # now the actual unit test ... run_trace = openml.runs.get_run_trace(run_id) self.assertEqual(len(run_trace.trace_iterations), num_iterations * num_folds)
def test__run_exists(self): # would be better to not sentinel these clfs, # so we do not have to perform the actual runs # and can just check their status on line clfs = [ sklearn.pipeline.Pipeline( steps=[('Imputer', Imputer(strategy='mean')), ('VarianceThreshold', VarianceThreshold(threshold=0.05) ), ('Estimator', DecisionTreeClassifier(max_depth=4))]), sklearn.pipeline.Pipeline( steps=[('Imputer', Imputer(strategy='most_frequent')), ('VarianceThreshold', VarianceThreshold(threshold=0.1) ), ('Estimator', DecisionTreeClassifier(max_depth=4))]) ] task = openml.tasks.get_task(115) for clf in clfs: try: # first populate the server with this run. # skip run if it was already performed. run = openml.runs.run_model_on_task(task, clf, avoid_duplicate_runs=True) run.publish() except openml.exceptions.PyOpenMLError as e: # run already existed. Great. pass flow = openml.flows.sklearn_to_flow(clf) flow_exists = openml.flows.flow_exists(flow.name, flow.external_version) self.assertGreater(flow_exists, 0) downloaded_flow = openml.flows.get_flow(flow_exists) setup_exists = openml.setups.setup_exists(downloaded_flow, clf) self.assertGreater(setup_exists, 0) run_ids = _run_exists(task.task_id, setup_exists) self.assertTrue(run_ids, msg=(run_ids, clf))
def test_get_run_trace(self): # get_run_trace is already tested implicitly in test_run_and_publish # this test is a bit additional. num_iterations = 10 num_folds = 1 task_id = 119 task = openml.tasks.get_task(task_id) # IMPORTANT! Do not sentinel this flow. is faster if we don't wait on openml server clf = RandomizedSearchCV(RandomForestClassifier(random_state=42), {"max_depth": [3, None], "max_features": [1, 2, 3, 4], "bootstrap": [True, False], "criterion": ["gini", "entropy"]}, num_iterations, random_state=42) # [SPEED] make unit test faster by exploiting run information from the past try: # in case the run did not exists yet run = openml.runs.run_model_on_task(task, clf, avoid_duplicate_runs=True) run = run.publish() self._wait_for_processed_run(run.run_id, 200) run_id = run.run_id except openml.exceptions.PyOpenMLError: # run was already flow = openml.flows.sklearn_to_flow(clf) flow_exists = openml.flows.flow_exists(flow.name, flow.external_version) self.assertIsInstance(flow_exists, int) downloaded_flow = openml.flows.get_flow(flow_exists) setup_exists = openml.setups.setup_exists(downloaded_flow) self.assertIsInstance(setup_exists, int) run_ids = _run_exists(task.task_id, setup_exists) run_id = random.choice(list(run_ids)) # now the actual unit test ... run_trace = openml.runs.get_run_trace(run_id) self.assertEqual(len(run_trace.trace_iterations), num_iterations * num_folds)