def _create(): run_id = str(uuid.uuid4()) experiment_id = str(random_int(10, 2000)) user_id = random_str(random_int(10, 25)) status = RunStatus.to_string(random.choice(RunStatus.all_status())) start_time = random_int(1, 10) end_time = start_time + random_int(1, 10) lifecycle_stage = LifecycleStage.ACTIVE artifact_uri = random_str(random_int(10, 40)) ri = RunInfo( run_uuid=run_id, run_id=run_id, experiment_id=experiment_id, user_id=user_id, status=status, start_time=start_time, end_time=end_time, lifecycle_stage=lifecycle_stage, artifact_uri=artifact_uri, ) return ( ri, run_id, experiment_id, user_id, status, start_time, end_time, lifecycle_stage, artifact_uri, )
def test_yaml_read_and_write(tmpdir): temp_dir = str(tmpdir) yaml_file = random_file("yaml") long_value = long(1) if six.PY2 else 1 # pylint: disable=undefined-variable data = { "a": random_int(), "B": random_int(), "text_value": u"中文", "long_value": long_value, "int_value": 32, "text_value_2": u"hi" } file_utils.write_yaml(temp_dir, yaml_file, data) read_data = file_utils.read_yaml(temp_dir, yaml_file) assert data == read_data yaml_path = os.path.join(temp_dir, yaml_file) with codecs.open(yaml_path, encoding="utf-8") as handle: contents = handle.read() assert "!!python" not in contents # Check that UTF-8 strings are written properly to the file (rather than as ASCII # representations of their byte sequences). assert u"中文" in contents def edit_func(old_dict): old_dict["more_text"] = u"西班牙语" return old_dict assert "more_text" not in file_utils.read_yaml(temp_dir, yaml_file) with safe_edit_yaml(temp_dir, yaml_file, edit_func): editted_dict = file_utils.read_yaml(temp_dir, yaml_file) assert "more_text" in editted_dict assert editted_dict["more_text"] == u"西班牙语" assert "more_text" not in file_utils.read_yaml(temp_dir, yaml_file)
def _create(): metrics = [Metric(random_str(10), random_int(0, 1000), int(time.time()) + random_int(-1e4, 1e4))] params = [Param(random_str(10), random_str(random_int(10, 35))) for _ in range(10)] # noqa tags = [RunTag(random_str(10), random_str(random_int(10, 35))) for _ in range(10)] # noqa rd = RunData(metrics=metrics, params=params, tags=tags) return rd, metrics, params, tags
def test_parse_json_input_split_oriented(): size = 200 data = {"col_m": [random_int(0, 1000) for _ in range(size)], "col_z": [random_str(4) for _ in range(size)], "col_a": [random_int() for _ in range(size)]} p1 = pd.DataFrame.from_dict(data) p2 = pyfunc_scoring_server.parse_json_input(p1.to_json(orient="split"), orient="split") assert all(p1 == p2)
def test_parse_json_input_records_oriented(): size = 20 data = {"col_m": [random_int(0, 1000) for _ in range(size)], "col_z": [random_str(4) for _ in range(size)], "col_a": [random_int() for _ in range(size)]} p1 = pd.DataFrame.from_dict(data) p2 = pyfunc_scoring_server.parse_json_input(p1.to_json(orient="records"), orient="records") # "records" orient may shuffle column ordering. Hence comparing each column Series for col in data.keys(): assert all(p1[col] == p2[col])
def _create(): metrics = [ Metric( key=random_str(10), value=random_int(0, 1000), timestamp=int(time.time()) + random_int(-1e4, 1e4), step=random_int(), ) ] params = [Param(random_str(10), random_str(random_int(10, 35))) for _ in range(10)] tags = [RunTag(random_str(10), random_str(random_int(10, 35))) for _ in range(10)] rd = RunData(metrics=metrics, params=params, tags=tags) return rd, metrics, params, tags
def test_parse_json_input_split_oriented_to_numpy_array(): size = 200 data = OrderedDict([("col_m", [random_int(0, 1000) for _ in range(size)]), ("col_z", [random_str(4) for _ in range(size)]), ("col_a", [random_int() for _ in range(size)])]) p0 = pd.DataFrame.from_dict(data) np_array = np.array([[a, b, c] for a, b, c in zip(data['col_m'], data['col_z'], data['col_a'])], dtype=object) p1 = pd.DataFrame(np_array).infer_objects() p2 = pyfunc_scoring_server.parse_split_oriented_json_input_to_numpy( p0.to_json(orient="split")) np.testing.assert_array_equal(p1, p2)
def test_infer_and_parse_json_input(): size = 20 # input is correctly recognized as list, and parsed as pd df with orient 'records' data = { "col_m": [random_int(0, 1000) for _ in range(size)], "col_z": [random_str(4) for _ in range(size)], "col_a": [random_int() for _ in range(size)], } p1 = pd.DataFrame.from_dict(data) p2 = pyfunc_scoring_server.infer_and_parse_json_input( p1.to_json(orient="records")) assert all(p1 == p2) # input is correctly recognized as a dict, and parsed as pd df with orient 'split' data = { "col_m": [random_int(0, 1000) for _ in range(size)], "col_z": [random_str(4) for _ in range(size)], "col_a": [random_int() for _ in range(size)], } p1 = pd.DataFrame.from_dict(data) p2 = pyfunc_scoring_server.infer_and_parse_json_input( p1.to_json(orient="split")) assert all(p1 == p2) # input is correctly recognized as tf serving input arr = [ [[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[3, 2, 1], [6, 5, 4], [9, 8, 7]], ] tfserving_input = {"instances": arr} result = pyfunc_scoring_server.infer_and_parse_json_input( json.dumps(tfserving_input)) assert result.shape == (2, 3, 3) assert (result == np.array(arr)).all() # input is unrecognized JSON input with pytest.raises(MlflowException) as ex: pyfunc_scoring_server.infer_and_parse_json_input( json.dumps('"just a string"')) assert ( "Failed to parse input from JSON. Ensure that input is a valid JSON" " list or dictionary." in str(ex)) # input is not json str with pytest.raises(MlflowException) as ex: pyfunc_scoring_server.infer_and_parse_json_input("(not a json string)") assert ( "Failed to parse input from JSON. Ensure that input is a valid JSON" " formatted string." in str(ex))
def test_mkdir(self): new_dir_name = "mkdir_test_%d" % random_int() file_utils.mkdir(self.test_folder, new_dir_name) self.assertEqual(os.listdir(self.test_folder), [new_dir_name]) with self.assertRaises(OSError): file_utils.mkdir("/ bad directory @ name ", "ouch")
def test_delete_tags(self): fs = FileStore(self.test_root) exp_id = self.experiments[random_int(0, len(self.experiments) - 1)] run_id = self.exp_data[exp_id]['runs'][0] fs.set_tag(run_id, RunTag("tag0", "value0")) fs.set_tag(run_id, RunTag("tag1", "value1")) tags = fs.get_run(run_id).data.tags assert tags["tag0"] == "value0" assert tags["tag1"] == "value1" fs.delete_tag(run_id, "tag0") new_tags = fs.get_run(run_id).data.tags assert "tag0" not in new_tags.keys() # test that you cannot delete tags that don't exist. with pytest.raises(MlflowException): fs.delete_tag(run_id, "fakeTag") # test that you cannot delete tags for nonexistent runs with pytest.raises(MlflowException): fs.delete_tag("random_id", "tag0") fs = FileStore(self.test_root) fs.delete_run(run_id) # test that you cannot delete tags for deleted runs. assert fs.get_run( run_id).info.lifecycle_stage == LifecycleStage.DELETED with pytest.raises(MlflowException): fs.delete_tag(run_id, "tag0")
def test_rename_experiment(self): fs = FileStore(self.test_root) exp_id = self.experiments[random_int(0, len(self.experiments) - 1)] # Error cases with self.assertRaises(Exception): fs.rename_experiment(exp_id, None) with self.assertRaises(Exception): # test that names of existing experiments are checked before renaming other_exp_id = None for exp in self.experiments: if exp != exp_id: other_exp_id = exp break fs.rename_experiment(exp_id, fs.get_experiment(other_exp_id).name) exp_name = self.exp_data[exp_id]["name"] new_name = exp_name + "!!!" self.assertNotEqual(exp_name, new_name) self.assertEqual(fs.get_experiment(exp_id).name, exp_name) fs.rename_experiment(exp_id, new_name) self.assertEqual(fs.get_experiment(exp_id).name, new_name) # Ensure that we cannot rename deleted experiments. fs.delete_experiment(exp_id) with pytest.raises(Exception) as e: fs.rename_experiment(exp_id, exp_name) assert "non-active lifecycle" in str(e.value) self.assertEqual(fs.get_experiment(exp_id).name, new_name) # Restore the experiment, and confirm that we acn now rename it. fs.restore_experiment(exp_id) self.assertEqual(fs.get_experiment(exp_id).name, new_name) fs.rename_experiment(exp_id, exp_name) self.assertEqual(fs.get_experiment(exp_id).name, exp_name)
def test_creation_and_hydration(self): path = random_str(random_int(10, 50)) is_dir = random_int(10, 2500) % 2 == 0 size_in_bytes = random_int(1, 10000) fi1 = FileInfo(path, is_dir, size_in_bytes) self._check(fi1, path, is_dir, size_in_bytes) as_dict = {"path": path, "is_dir": is_dir, "file_size": size_in_bytes} self.assertEqual(dict(fi1), as_dict) proto = fi1.to_proto() fi2 = FileInfo.from_proto(proto) self._check(fi2, path, is_dir, size_in_bytes) fi3 = FileInfo.from_dictionary(as_dict) self._check(fi3, path, is_dir, size_in_bytes)
def test_create_run_with_parent_id(self): fs = FileStore(self.test_root) exp_id = self.experiments[random_int(0, len(self.experiments) - 1)] run = fs.create_run(exp_id, 'user', 'name', 'source_type', 'source_name', 'entry_point_name', 0, None, [], 'test_parent_run_id') assert any([t.key == MLFLOW_PARENT_RUN_ID and t.value == 'test_parent_run_id' for t in fs.get_all_tags(run.info.run_uuid)])
def test_delete_restore_experiment(self): fs = FileStore(self.test_root) exp_id = self.experiments[random_int(0, len(self.experiments) - 1)] exp_name = self.exp_data[exp_id]["name"] # delete it fs.delete_experiment(exp_id) self.assertTrue(exp_id not in self._extract_ids(fs.list_experiments(ViewType.ACTIVE_ONLY))) self.assertTrue(exp_id in self._extract_ids(fs.list_experiments(ViewType.DELETED_ONLY))) self.assertTrue(exp_id in self._extract_ids(fs.list_experiments(ViewType.ALL))) self.assertEqual(fs.get_experiment(exp_id).lifecycle_stage, Experiment.DELETED_LIFECYCLE) # restore it fs.restore_experiment(exp_id) restored_1 = fs.get_experiment(exp_id) self.assertEqual(restored_1.experiment_id, exp_id) self.assertEqual(restored_1.name, exp_name) restored_2 = fs.get_experiment_by_name(exp_name) self.assertEqual(restored_2.experiment_id, exp_id) self.assertEqual(restored_2.name, exp_name) self.assertTrue(exp_id in self._extract_ids(fs.list_experiments(ViewType.ACTIVE_ONLY))) self.assertTrue(exp_id not in self._extract_ids(fs.list_experiments(ViewType.DELETED_ONLY))) self.assertTrue(exp_id in self._extract_ids(fs.list_experiments(ViewType.ALL))) self.assertEqual(fs.get_experiment(exp_id).lifecycle_stage, Experiment.ACTIVE_LIFECYCLE)
def test_create_run_in_deleted_experiment(self): fs = FileStore(self.test_root) exp_id = self.experiments[random_int(0, len(self.experiments) - 1)] # delete it fs.delete_experiment(exp_id) with pytest.raises(Exception): fs.create_run(exp_id, 'user', 0, [])
def _create(): metrics = [ Metric(random_str(10), random_int(), int(time.time() + random_int(-1e4, 1e4))) for x in range(100) ] # noqa params = [ Param(random_str(10), random_str(random_int(10, 35))) for x in range(10) ] # noqa rd = RunData() for p in params: rd.add_param(p) for m in metrics: rd.add_metric(m) return rd, metrics, params
def test_creation_and_hydration(self): exp_id = random_int() name = "exp_%d_%d" % (random_int(), random_int()) location = random_file(".json") exp = Experiment(exp_id, name, location) self._check(exp, exp_id, name, location) as_dict = {"experiment_id": exp_id, "name": name, "artifact_location": location} self.assertEqual(dict(exp), as_dict) proto = exp.to_proto() exp2 = Experiment.from_proto(proto) self._check(exp2, exp_id, name, location) exp3 = Experiment.from_dictionary(as_dict) self._check(exp3, exp_id, name, location)
def test_mkdir(tmpdir): temp_dir = str(tmpdir) new_dir_name = "mkdir_test_%d" % random_int() file_utils.mkdir(temp_dir, new_dir_name) assert os.listdir(temp_dir) == [new_dir_name] with pytest.raises(OSError): file_utils.mkdir("/ bad directory @ name ", "ouch")
def test_create_run_in_deleted_experiment(self): fs = FileStore(self.test_root) exp_id = self.experiments[random_int(0, len(self.experiments) - 1)] # delete it fs.delete_experiment(exp_id) with pytest.raises(Exception): fs.create_run(exp_id, 'user', 'name', 'source_type', 'source_name', 'entry_point_name', 0, None, [], None)
def test_get_deleted_run(self): """ Getting metrics/tags/params/run info should be allowed on deleted runs. """ fs = FileStore(self.test_root) exp_id = self.experiments[random_int(0, len(self.experiments) - 1)] run_id = self.exp_data[exp_id]['runs'][0] fs.delete_run(run_id) assert fs.get_run(run_id)
def test_create_run_with_parent_id(self): fs = FileStore(self.test_root) exp_id = self.experiments[random_int(0, len(self.experiments) - 1)] run = fs.create_run(exp_id, 'user', 'name', 'source_type', 'source_name', 'entry_point_name', 0, None, [], 'test_parent_run_id') assert fs.get_run( run.info.run_uuid ).data.tags[MLFLOW_PARENT_RUN_ID] == 'test_parent_run_id'
def test_creation_and_hydration(self): key = random_str(random_int( 10, 25)) # random string on size in range [10, 25] value = random_str(random_int( 55, 75)) # random string on size in range [55, 75] param = Param(key, value) self._check(param, key, value) as_dict = {"key": key, "value": value} self.assertEqual(dict(param), as_dict) proto = param.to_proto() param2 = Param.from_proto(proto) self._check(param2, key, value) param3 = Param.from_dictionary(as_dict) self._check(param3, key, value)
def test_delete_restore_run(self): fs = FileStore(self.test_root) exp_id = self.experiments[random_int(0, len(self.experiments) - 1)] run_id = self.exp_data[exp_id]['runs'][0] # Should not throw. assert fs.get_run(run_id).info.lifecycle_stage == 'active' fs.delete_run(run_id) assert fs.get_run(run_id).info.lifecycle_stage == 'deleted' fs.restore_run(run_id) assert fs.get_run(run_id).info.lifecycle_stage == 'active'
def test_get_experiment(self): fs = FileStore(self.test_root) for exp_id in self.experiments: self._verify_experiment(fs, exp_id) # test that fake experiments dont exist. # look for random experiment ids between 8000, 15000 since created ones are (100, 2000) for exp_id in set(random_int(8000, 15000) for x in range(20)): with self.assertRaises(Exception): fs.get_experiment(exp_id)
def test_yaml_read_and_write(self): yaml_file = random_file("yaml") long_value = long(1) if six.PY2 else 1 # pylint: disable=undefined-variable data = { "a": random_int(), "B": random_int(), "text_value": u"中文", "long_value": long_value, "int_value": 32, "text_value_2": u"hi" } file_utils.write_yaml(self.test_folder, yaml_file, data) read_data = file_utils.read_yaml(self.test_folder, yaml_file) self.assertEqual(data, read_data) yaml_path = file_utils.build_path(self.test_folder, yaml_file) with codecs.open(yaml_path, encoding="utf-8") as handle: contents = handle.read() self.assertNotIn("!!python", contents) # Check that UTF-8 strings are written properly to the file (rather than as ASCII # representations of their byte sequences). self.assertIn(u"中文", contents)
def test_hard_delete_run(self): fs = FileStore(self.test_root) exp_id = self.experiments[random_int(0, len(self.experiments) - 1)] run_id = self.exp_data[exp_id]["runs"][0] fs._hard_delete_run(run_id) with self.assertRaises(MlflowException): fs.get_run(run_id) with self.assertRaises(MlflowException): fs.get_all_tags(run_id) with self.assertRaises(MlflowException): fs.get_all_metrics(run_id) with self.assertRaises(MlflowException): fs.get_all_params(run_id)
def test_get_experiment(self): fs = FileStore(self.test_root) for exp_id in self.experiments: exp = fs.get_experiment(exp_id) self.assertEqual(exp.experiment_id, exp_id) self.assertEqual(exp.name, self.exp_data[exp_id]["name"]) self.assertEqual(exp.artifact_location, self.exp_data[exp_id]["artifact_location"]) # test that fake experiments dont exist. # look for random experiment ids between 8000, 15000 since created ones are (100, 2000) for exp_id in set(random_int(8000, 15000) for x in range(20)): with self.assertRaises(Exception): fs.get_experiment(exp_id)
def _create(): metrics = [ Metric(random_str(10), random_int(0, 1000), int(time.time() + random_int(-1e4, 1e4))) for _ in range(100) ] params = [ Param(random_str(10), random_str(random_int(10, 35))) for _ in range(10) ] # noqa tags = [ RunTag(random_str(10), random_str(random_int(10, 35))) for _ in range(10) ] # noqa rd = RunData() for p in params: rd._add_param(p) for m in metrics: rd._add_metric(m) for t in tags: rd._add_tag(t) return rd, metrics, params, tags
def test_creation_and_hydration(self): exp_id = str(random_int()) name = "exp_%d_%d" % (random_int(), random_int()) lifecycle_stage = LifecycleStage.ACTIVE location = random_file(".json") exp = Experiment(exp_id, name, location, lifecycle_stage) self._check(exp, exp_id, name, location, lifecycle_stage) as_dict = { "experiment_id": exp_id, "name": name, "artifact_location": location, "lifecycle_stage": lifecycle_stage } self.assertEqual(dict(exp), as_dict) proto = exp.to_proto() exp2 = Experiment.from_proto(proto) self._check(exp2, exp_id, name, location, lifecycle_stage) exp3 = Experiment.from_dictionary(as_dict) self._check(exp3, exp_id, name, location, lifecycle_stage)
def test_set_deleted_run(self): """ Setting metrics/tags/params/updating run info should not be allowed on deleted runs. """ fs = FileStore(self.test_root) exp_id = self.experiments[random_int(0, len(self.experiments) - 1)] run_id = self.exp_data[exp_id]['runs'][0] fs.delete_run(run_id) assert fs.get_run(run_id).info.lifecycle_stage == LifecycleStage.DELETED with pytest.raises(MlflowException): fs.set_tag(run_id, RunTag('a', 'b')) with pytest.raises(MlflowException): fs.log_metric(run_id, Metric('a', 0.0, timestamp=0)) with pytest.raises(MlflowException): fs.log_param(run_id, Param('a', 'b'))