Esempio n. 1
0
 def _create_experiment_with_id(self, name, experiment_id, artifact_uri):
     self._check_root_dir()
     meta_dir = mkdir(self.root_directory, experiment_id)
     artifact_uri = artifact_uri or build_path(self.artifact_root_uri, experiment_id)
     experiment = Experiment(experiment_id, name, artifact_uri, LifecycleStage.ACTIVE)
     write_yaml(meta_dir, FileStore.META_DATA_FILE_NAME, dict(experiment))
     return experiment_id
Esempio n. 2
0
def test_yaml_read_and_write(tmpdir):
    temp_dir = str(tmpdir)
    yaml_file = random_file("yaml")
    long_value = 1  # pylint: disable=undefined-variable
    data = {
        "a": random_int(),
        "B": random_int(),
        "text_value": u"中文",
        "long_value": long_value,
        "int_value": 32,
        "text_value_2": u"hi",
    }
    file_utils.write_yaml(temp_dir, yaml_file, data)
    read_data = file_utils.read_yaml(temp_dir, yaml_file)
    assert data == read_data
    yaml_path = os.path.join(temp_dir, yaml_file)
    with codecs.open(yaml_path, encoding="utf-8") as handle:
        contents = handle.read()
    assert "!!python" not in contents
    # Check that UTF-8 strings are written properly to the file (rather than as ASCII
    # representations of their byte sequences).
    assert u"中文" in contents

    def edit_func(old_dict):
        old_dict["more_text"] = u"西班牙语"
        return old_dict

    assert "more_text" not in file_utils.read_yaml(temp_dir, yaml_file)
    with safe_edit_yaml(temp_dir, yaml_file, edit_func):
        editted_dict = file_utils.read_yaml(temp_dir, yaml_file)
        assert "more_text" in editted_dict
        assert editted_dict["more_text"] == u"西班牙语"
    assert "more_text" not in file_utils.read_yaml(temp_dir, yaml_file)
    def test_bad_experiment_id_recorded_for_run(self):
        fs = FileStore(self.test_root)
        exp_0 = fs.get_experiment(FileStore.DEFAULT_EXPERIMENT_ID)
        all_runs = self._search(fs, exp_0.experiment_id)

        all_run_ids = self.exp_data[exp_0.experiment_id]["runs"]
        assert len(all_runs) == len(all_run_ids)

        # change experiment pointer in run
        bad_run_id = str(self.exp_data[exp_0.experiment_id]['runs'][0])
        path = os.path.join(self.test_root, str(exp_0.experiment_id),
                            bad_run_id)
        experiment_data = read_yaml(path, "meta.yaml")
        experiment_data["experiment_id"] = 1
        write_yaml(path, "meta.yaml", experiment_data, True)

        with pytest.raises(MlflowException) as e:
            fs.get_run(bad_run_id)
            assert e.message.contains("not found")

        valid_runs = self._search(fs, exp_0.experiment_id)
        assert len(valid_runs) == len(all_runs) - 1

        for rid in all_run_ids:
            if rid != bad_run_id:
                fs.get_run(rid)
Esempio n. 4
0
 def _overwrite_run_info(self, run_info):
     run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_uuid)
     run_info_dict = _make_persisted_run_info_dict(run_info)
     write_yaml(run_dir,
                FileStore.META_DATA_FILE_NAME,
                run_info_dict,
                overwrite=True)
Esempio n. 5
0
 def create_run(self, experiment_id, user_id, run_name, source_type,
                source_name, entry_point_name, start_time, source_version,
                tags):
     """
     Creates a run with the specified attributes.
     """
     if self.get_experiment(experiment_id) is None:
         raise Exception(
             "Could not create run under experiment with ID %s - no such experiment "
             "exists." % experiment_id)
     run_uuid = uuid.uuid4().hex
     artifact_uri = self._get_artifact_dir(experiment_id, run_uuid)
     num_runs = len(self._list_run_uuids(experiment_id))
     run_info = RunInfo(run_uuid=run_uuid,
                        experiment_id=experiment_id,
                        name="Run %s" % num_runs,
                        artifact_uri=artifact_uri,
                        source_type=source_type,
                        source_name=source_name,
                        entry_point_name=entry_point_name,
                        user_id=user_id,
                        status=RunStatus.RUNNING,
                        start_time=start_time,
                        end_time=None,
                        source_version=source_version,
                        tags=tags)
     # Persist run metadata and create directories for logging metrics, parameters, artifacts
     run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_uuid)
     mkdir(run_dir)
     write_yaml(run_dir, FileStore.META_DATA_FILE_NAME, dict(run_info))
     mkdir(run_dir, FileStore.METRICS_FOLDER_NAME)
     mkdir(run_dir, FileStore.PARAMS_FOLDER_NAME)
     mkdir(run_dir, FileStore.ARTIFACTS_FOLDER_NAME)
     return Run(run_info=run_info, run_data=None)
Esempio n. 6
0
 def _create_experiment_with_id(self, name, experiment_id, artifact_uri):
     self._check_root_dir()
     meta_dir = mkdir(self.root_directory, str(experiment_id))
     artifact_uri = artifact_uri or path_to_local_file_uri(
         os.path.join(self.root_directory, str(experiment_id)))
     experiment = Experiment(experiment_id, name, artifact_uri, LifecycleStage.ACTIVE)
     write_yaml(meta_dir, FileStore.META_DATA_FILE_NAME, dict(experiment))
     return experiment_id
Esempio n. 7
0
 def update_run_info(self, run_uuid, run_status, end_time):
     run_info = self.get_run(run_uuid).info
     new_info = run_info.copy_with_overrides(run_status, end_time)
     run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_uuid)
     write_yaml(run_dir,
                FileStore.META_DATA_FILE_NAME,
                dict(new_info),
                overwrite=True)
     return new_info
Esempio n. 8
0
 def rename_experiment(self, experiment_id, new_name):
     meta_dir = os.path.join(self.root_directory, experiment_id)
     # if experiment is malformed, will raise error
     experiment = self._get_experiment(experiment_id)
     if experiment is None:
         raise MlflowException("Experiment '%s' does not exist." % experiment_id,
                               databricks_pb2.RESOURCE_DOES_NOT_EXIST)
     experiment._set_name(new_name)
     if experiment.lifecycle_stage != LifecycleStage.ACTIVE:
         raise Exception("Cannot rename experiment in non-active lifecycle stage."
                         " Current stage: %s" % experiment.lifecycle_stage)
     write_yaml(meta_dir, FileStore.META_DATA_FILE_NAME, dict(experiment), overwrite=True)
Esempio n. 9
0
 def rename_experiment(self, experiment_id, new_name):
     meta_dir = os.path.join(self.root_directory, str(experiment_id))
     experiment = self._get_experiment(experiment_id)
     experiment._set_name(new_name)
     if experiment.lifecycle_stage != Experiment.ACTIVE_LIFECYCLE:
         raise Exception(
             "Cannot rename experiment in non-active lifecycle stage."
             " Current stage: %s" % experiment.lifecycle_stage)
     write_yaml(meta_dir,
                FileStore.META_DATA_FILE_NAME,
                dict(experiment),
                overwrite=True)
Esempio n. 10
0
 def _create_experiment_with_id(self, name, experiment_id, artifact_uri):
     artifact_uri = artifact_uri or posixpath.join(self.artifact_root_uri,
                                                   str(experiment_id))
     self._check_root_dir()
     meta_dir = mkdir(self.root_directory, str(experiment_id))
     experiment = Experiment(experiment_id, name, artifact_uri,
                             LifecycleStage.ACTIVE)
     experiment_dict = dict(experiment)
     # tags are added to the file system and are not written to this dict on write
     # As such, we should not include them in the meta file.
     del experiment_dict['tags']
     write_yaml(meta_dir, FileStore.META_DATA_FILE_NAME, experiment_dict)
     return experiment_id
Esempio n. 11
0
 def create_run(self, experiment_id, user_id, run_name, source_type,
                source_name, entry_point_name, start_time, source_version,
                tags, parent_run_id):
     """
     Creates a run with the specified attributes.
     """
     experiment_id = FileStore.DEFAULT_EXPERIMENT_ID if experiment_id is None else experiment_id
     experiment = self.get_experiment(experiment_id)
     if experiment is None:
         raise MlflowException(
             "Could not create run under experiment with ID %s - no such experiment "
             "exists." % experiment_id,
             databricks_pb2.RESOURCE_DOES_NOT_EXIST)
     if experiment.lifecycle_stage != LifecycleStage.ACTIVE:
         raise MlflowException(
             "Could not create run under non-active experiment with ID "
             "%s." % experiment_id, databricks_pb2.INVALID_STATE)
     run_uuid = uuid.uuid4().hex
     artifact_uri = self._get_artifact_dir(experiment_id, run_uuid)
     run_info = RunInfo(run_uuid=run_uuid,
                        experiment_id=experiment_id,
                        name="",
                        artifact_uri=artifact_uri,
                        source_type=source_type,
                        source_name=source_name,
                        entry_point_name=entry_point_name,
                        user_id=user_id,
                        status=RunStatus.RUNNING,
                        start_time=start_time,
                        end_time=None,
                        source_version=source_version,
                        lifecycle_stage=LifecycleStage.ACTIVE)
     # Persist run metadata and create directories for logging metrics, parameters, artifacts
     run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_uuid)
     mkdir(run_dir)
     run_info_dict = _make_persisted_run_info_dict(run_info)
     write_yaml(run_dir, FileStore.META_DATA_FILE_NAME, run_info_dict)
     mkdir(run_dir, FileStore.METRICS_FOLDER_NAME)
     mkdir(run_dir, FileStore.PARAMS_FOLDER_NAME)
     mkdir(run_dir, FileStore.ARTIFACTS_FOLDER_NAME)
     for tag in tags:
         self.set_tag(run_uuid, tag)
     if parent_run_id:
         self.set_tag(run_uuid,
                      RunTag(key=MLFLOW_PARENT_RUN_ID, value=parent_run_id))
     if run_name:
         self.set_tag(run_uuid, RunTag(key=MLFLOW_RUN_NAME, value=run_name))
     return Run(run_info=run_info, run_data=None)
Esempio n. 12
0
 def _create_experiment_with_id(self, name, experiment_id, artifact_uri, tags):
     artifact_uri = artifact_uri or append_to_uri_path(
         self.artifact_root_uri, str(experiment_id)
     )
     self._check_root_dir()
     meta_dir = mkdir(self.root_directory, str(experiment_id))
     experiment = Experiment(experiment_id, name, artifact_uri, LifecycleStage.ACTIVE)
     experiment_dict = dict(experiment)
     # tags are added to the file system and are not written to this dict on write
     # As such, we should not include them in the meta file.
     del experiment_dict["tags"]
     write_yaml(meta_dir, FileStore.META_DATA_FILE_NAME, experiment_dict)
     if tags is not None:
         for tag in tags:
             self.set_experiment_tag(experiment_id, tag)
     return experiment_id
Esempio n. 13
0
def test_yaml_read_and_write(tmpdir):
    temp_dir = str(tmpdir)
    yaml_file = random_file("yaml")
    long_value = long(1) if six.PY2 else 1  # pylint: disable=undefined-variable
    data = {"a": random_int(), "B": random_int(), "text_value": u"中文",
            "long_value": long_value, "int_value": 32, "text_value_2": u"hi"}
    file_utils.write_yaml(temp_dir, yaml_file, data)
    read_data = file_utils.read_yaml(temp_dir, yaml_file)
    assert data == read_data
    yaml_path = file_utils.build_path(temp_dir, yaml_file)
    with codecs.open(yaml_path, encoding="utf-8") as handle:
        contents = handle.read()
    assert "!!python" not in contents
    # Check that UTF-8 strings are written properly to the file (rather than as ASCII
    # representations of their byte sequences).
    assert u"中文" in contents
Esempio n. 14
0
 def create_run(self, experiment_id, user_id, run_name, source_type,
                source_name, entry_point_name, start_time, source_version,
                tags):
     """
     Creates a run with the specified attributes.
     """
     experiment = self.get_experiment(experiment_id)
     if experiment is None:
         raise Exception(
             "Could not create run under experiment with ID %s - no such experiment "
             "exists." % experiment_id)
     if experiment.lifecycle_stage != Experiment.ACTIVE_LIFECYCLE:
         raise Exception(
             'Could not create run under non-active experiment with ID '
             '%s.' % experiment_id)
     run_uuid = uuid.uuid4().hex
     artifact_uri = self._get_artifact_dir(experiment_id, run_uuid)
     run_info = RunInfo(run_uuid=run_uuid,
                        experiment_id=experiment_id,
                        name="",
                        artifact_uri=artifact_uri,
                        source_type=source_type,
                        source_name=source_name,
                        entry_point_name=entry_point_name,
                        user_id=user_id,
                        status=RunStatus.RUNNING,
                        start_time=start_time,
                        end_time=None,
                        source_version=source_version,
                        lifecycle_stage=RunInfo.ACTIVE_LIFECYCLE)
     # Persist run metadata and create directories for logging metrics, parameters, artifacts
     run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_uuid)
     mkdir(run_dir)
     write_yaml(run_dir, FileStore.META_DATA_FILE_NAME,
                _make_persisted_run_info_dict(run_info))
     mkdir(run_dir, FileStore.METRICS_FOLDER_NAME)
     mkdir(run_dir, FileStore.PARAMS_FOLDER_NAME)
     mkdir(run_dir, FileStore.ARTIFACTS_FOLDER_NAME)
     for tag in tags:
         self.set_tag(run_uuid, tag)
     if run_name:
         self.set_tag(run_uuid, RunTag(key=MLFLOW_RUN_NAME, value=run_name))
     return Run(run_info=run_info, run_data=None)
Esempio n. 15
0
 def create_run(self, experiment_id, user_id, start_time, tags):
     """
     Creates a run with the specified attributes.
     """
     experiment_id = FileStore.DEFAULT_EXPERIMENT_ID if experiment_id is None else experiment_id
     experiment = self.get_experiment(experiment_id)
     if experiment is None:
         raise MlflowException(
             "Could not create run under experiment with ID %s - no such experiment "
             "exists." % experiment_id,
             databricks_pb2.RESOURCE_DOES_NOT_EXIST,
         )
     if experiment.lifecycle_stage != LifecycleStage.ACTIVE:
         raise MlflowException(
             "Could not create run under non-active experiment with ID %s."
             % experiment_id,
             databricks_pb2.INVALID_STATE,
         )
     run_uuid = uuid.uuid4().hex
     artifact_uri = self._get_artifact_dir(experiment_id, run_uuid)
     run_info = RunInfo(
         run_uuid=run_uuid,
         run_id=run_uuid,
         experiment_id=experiment_id,
         artifact_uri=artifact_uri,
         user_id=user_id,
         status=RunStatus.to_string(RunStatus.RUNNING),
         start_time=start_time,
         end_time=None,
         lifecycle_stage=LifecycleStage.ACTIVE,
     )
     # Persist run metadata and create directories for logging metrics, parameters, artifacts
     run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_id)
     mkdir(run_dir)
     run_info_dict = _make_persisted_run_info_dict(run_info)
     write_yaml(run_dir, FileStore.META_DATA_FILE_NAME, run_info_dict)
     mkdir(run_dir, FileStore.METRICS_FOLDER_NAME)
     mkdir(run_dir, FileStore.PARAMS_FOLDER_NAME)
     mkdir(run_dir, FileStore.ARTIFACTS_FOLDER_NAME)
     for tag in tags:
         self.set_tag(run_uuid, tag)
     return self.get_run(run_id=run_uuid)
Esempio n. 16
0
 def test_yaml_read_and_write(self):
     yaml_file = random_file("yaml")
     long_value = long(1) if six.PY2 else 1  # pylint: disable=undefined-variable
     data = {
         "a": random_int(),
         "B": random_int(),
         "text_value": u"中文",
         "long_value": long_value,
         "int_value": 32,
         "text_value_2": u"hi"
     }
     file_utils.write_yaml(self.test_folder, yaml_file, data)
     read_data = file_utils.read_yaml(self.test_folder, yaml_file)
     self.assertEqual(data, read_data)
     yaml_path = file_utils.build_path(self.test_folder, yaml_file)
     with codecs.open(yaml_path, encoding="utf-8") as handle:
         contents = handle.read()
     self.assertNotIn("!!python", contents)
     # Check that UTF-8 strings are written properly to the file (rather than as ASCII
     # representations of their byte sequences).
     self.assertIn(u"中文", contents)
Esempio n. 17
0
 def _create_root(self, root):
     self.test_root = os.path.join(root, "test_file_store_%d" % random_int())
     os.mkdir(self.test_root)
     self.experiments = [random_int(100, int(1e9)) for _ in range(3)]
     self.exp_data = {}
     self.run_data = {}
     # Include default experiment
     self.experiments.append(Experiment.DEFAULT_EXPERIMENT_ID)
     for exp in self.experiments:
         # create experiment
         exp_folder = os.path.join(self.test_root, str(exp))
         os.makedirs(exp_folder)
         d = {"experiment_id": exp, "name": random_str(), "artifact_location": exp_folder}
         self.exp_data[exp] = d
         write_yaml(exp_folder, FileStore.META_DATA_FILE_NAME, d)
         # add runs
         self.exp_data[exp]["runs"] = []
         for _ in range(2):
             run_uuid = uuid.uuid4().hex
             self.exp_data[exp]["runs"].append(run_uuid)
             run_folder = os.path.join(exp_folder, run_uuid)
             os.makedirs(run_folder)
             run_info = {"run_uuid": run_uuid,
                         "experiment_id": exp,
                         "name": random_str(random_int(10, 40)),
                         "source_type": random_int(1, 4),
                         "source_name": random_str(random_int(100, 300)),
                         "entry_point_name": random_str(random_int(100, 300)),
                         "user_id": random_str(random_int(10, 25)),
                         "status": random_int(1, 5),
                         "start_time": random_int(1, 10),
                         "end_time": random_int(20, 30),
                         "source_version": random_str(random_int(10, 30)),
                         "tags": [],
                         "artifact_uri": "%s/%s" % (run_folder, FileStore.ARTIFACTS_FOLDER_NAME),
                         }
             write_yaml(run_folder, FileStore.META_DATA_FILE_NAME, run_info)
             self.run_data[run_uuid] = run_info
             # params
             params_folder = os.path.join(run_folder, FileStore.PARAMS_FOLDER_NAME)
             os.makedirs(params_folder)
             params = {}
             for _ in range(5):
                 param_name = random_str(random_int(4, 12))
                 param_value = random_str(random_int(10, 15))
                 param_file = os.path.join(params_folder, param_name)
                 with open(param_file, 'w') as f:
                     f.write(param_value)
                 params[param_name] = param_value
             self.run_data[run_uuid]["params"] = params
             # metrics
             metrics_folder = os.path.join(run_folder, FileStore.METRICS_FOLDER_NAME)
             os.makedirs(metrics_folder)
             metrics = {}
             for _ in range(3):
                 metric_name = random_str(random_int(6, 10))
                 timestamp = int(time.time())
                 metric_file = os.path.join(metrics_folder, metric_name)
                 values = []
                 for _ in range(10):
                     metric_value = random_int(100, 2000)
                     timestamp += random_int(10000, 2000000)
                     values.append((timestamp, metric_value))
                     with open(metric_file, 'a') as f:
                         f.write("%d %d\n" % (timestamp, metric_value))
                 metrics[metric_name] = values
             self.run_data[run_uuid]["metrics"] = metrics
             # artifacts
             os.makedirs(os.path.join(run_folder, FileStore.ARTIFACTS_FOLDER_NAME))
Esempio n. 18
0
 def _create_experiment_with_id(self, name, experiment_id):
     self._check_root_dir()
     location = mkdir(self.root_directory, str(experiment_id))
     experiment = Experiment(experiment_id, name, location)
     write_yaml(location, FileStore.META_DATA_FILE_NAME, dict(experiment))
     return experiment_id
Esempio n. 19
0
 def test_yaml_read_and_write(self):
     yaml_file = random_file("yaml")
     data = {"a": random_int(), "B": random_int()}
     file_utils.write_yaml(self.test_folder, yaml_file, data)
     read_data = file_utils.read_yaml(self.test_folder, yaml_file)
     self.assertEqual(data, read_data)
Esempio n. 20
0
 def __exit__(self, *args):
     write_yaml(self._root, self._file_name, self._original, overwrite=True)
Esempio n. 21
0
 def __enter__(self):
     new_dict = self._edit_func(self._original.copy())
     write_yaml(self._root, self._file_name, new_dict, overwrite=True)