def test_list_artifacts(self, databricks_artifact_repo): list_artifact_file_proto_mock = [ FileInfo(path="a.txt", is_dir=False, file_size=0) ] list_artifacts_dir_proto_mock = [ FileInfo(path="test/a.txt", is_dir=False, file_size=100), FileInfo(path="test/dir", is_dir=True, file_size=0), ] with mock.patch(DATABRICKS_ARTIFACT_REPOSITORY + "._call_endpoint") as call_endpoint_mock: list_artifact_response_proto = ListArtifacts.Response( root_uri="", files=list_artifacts_dir_proto_mock, next_page_token=None) call_endpoint_mock.return_value = list_artifact_response_proto artifacts = databricks_artifact_repo.list_artifacts("test/") assert isinstance(artifacts, list) assert isinstance(artifacts[0], FileInfoEntity) assert len(artifacts) == 2 assert artifacts[0].path == "test/a.txt" assert artifacts[0].is_dir is False assert artifacts[0].file_size == 100 assert artifacts[1].path == "test/dir" assert artifacts[1].is_dir is True assert artifacts[1].file_size is None # Calling list_artifacts() on a path that's a file should return an empty list list_artifact_response_proto = ListArtifacts.Response( root_uri="", files=list_artifact_file_proto_mock) call_endpoint_mock.return_value = list_artifact_response_proto artifacts = databricks_artifact_repo.list_artifacts("a.txt") assert len(artifacts) == 0
def list_artifacts(self, path=None): if path: run_relative_path = posixpath.join( self.run_relative_artifact_repo_root_path, path) else: run_relative_path = self.run_relative_artifact_repo_root_path infos = [] page_token = None while True: if page_token: json_body = message_to_json( ListArtifacts(run_id=self.run_id, path=run_relative_path, page_token=page_token)) else: json_body = message_to_json( ListArtifacts(run_id=self.run_id, path=run_relative_path)) response = self._call_endpoint(MlflowService, ListArtifacts, json_body) artifact_list = response.files # If `path` is a file, ListArtifacts returns a single list element with the # same name as `path`. The list_artifacts API expects us to return an empty list in this # case, so we do so here. if len(artifact_list) == 1 and artifact_list[0].path == run_relative_path \ and not artifact_list[0].is_dir: return [] for output_file in artifact_list: file_rel_path = posixpath.relpath( path=output_file.path, start=self.run_relative_artifact_repo_root_path) artifact_size = None if output_file.is_dir else output_file.file_size infos.append(FileInfo(file_rel_path, output_file.is_dir, artifact_size)) if len(artifact_list) == 0 or not response.next_page_token: break page_token = response.next_page_token return infos
def test_list_artifacts_with_relative_path(self): list_artifact_file_proto_mock = [ FileInfo(path=posixpath.join(MOCK_SUBDIR, "a.txt"), is_dir=False, file_size=0) ] list_artifacts_dir_proto_mock = [ FileInfo(path=posixpath.join(MOCK_SUBDIR, "test/a.txt"), is_dir=False, file_size=100), FileInfo(path=posixpath.join(MOCK_SUBDIR, "test/dir"), is_dir=True, file_size=0), ] with mock.patch(DATABRICKS_ARTIFACT_REPOSITORY + "._get_run_artifact_root" ) as get_run_artifact_root_mock, mock.patch( DATABRICKS_ARTIFACT_REPOSITORY_PACKAGE + ".message_to_json") as message_mock, mock.patch( DATABRICKS_ARTIFACT_REPOSITORY + "._call_endpoint") as call_endpoint_mock: get_run_artifact_root_mock.return_value = MOCK_RUN_ROOT_URI list_artifact_response_proto = ListArtifacts.Response( root_uri="", files=list_artifacts_dir_proto_mock, next_page_token=None) call_endpoint_mock.return_value = list_artifact_response_proto message_mock.return_value = None databricks_artifact_repo = get_artifact_repository( MOCK_SUBDIR_ROOT_URI) artifacts = databricks_artifact_repo.list_artifacts("test") assert isinstance(artifacts, list) assert isinstance(artifacts[0], FileInfoEntity) assert len(artifacts) == 2 assert artifacts[0].path == "test/a.txt" assert artifacts[0].is_dir is False assert artifacts[0].file_size == 100 assert artifacts[1].path == "test/dir" assert artifacts[1].is_dir is True assert artifacts[1].file_size is None message_mock.assert_called_with( ListArtifacts(run_id=MOCK_RUN_ID, path=posixpath.join(MOCK_SUBDIR, "test"))) # Calling list_artifacts() on a relative path that's a file should return an empty list list_artifact_response_proto = ListArtifacts.Response( root_uri="", files=list_artifact_file_proto_mock, next_page_token=None) call_endpoint_mock.return_value = list_artifact_response_proto artifacts = databricks_artifact_repo.list_artifacts("a.txt") assert len(artifacts) == 0
def _list_artifacts(): request_message = _get_request_message(ListArtifacts()) response_message = ListArtifacts.Response() if request_message.HasField('path'): path = request_message.path else: path = None run = _get_store().get_run(request_message.run_uuid) artifact_entities = _get_artifact_repo(run).list_artifacts(path) response_message.files.extend([a.to_proto() for a in artifact_entities]) response_message.root_uri = _get_artifact_repo(run).artifact_uri response = Response(mimetype='application/json') response.set_data(message_to_json(response_message)) return response
def _list_artifacts(): request_message = _get_request_message(ListArtifacts(), from_get=True) response_message = ListArtifacts.Response() if request_message.HasField('path'): path = request_message.path else: path = None run = store.get_run(request_message.run_uuid) artifact_entities = _get_artifact_repo(run).list_artifacts(path) response_message.files.extend([a.to_proto() for a in artifact_entities]) response_message.root_uri = _get_artifact_repo(run).artifact_uri response = Response(mimetype='application/json') response.set_data( MessageToJson(response_message, preserving_proto_field_name=True)) return response
def test_paginated_list_artifacts(self, databricks_artifact_repo): list_artifacts_proto_mock_1 = [ FileInfo(path="a.txt", is_dir=False, file_size=100), FileInfo(path="b", is_dir=True, file_size=0), ] list_artifacts_proto_mock_2 = [ FileInfo(path="c.txt", is_dir=False, file_size=100), FileInfo(path="d", is_dir=True, file_size=0), ] list_artifacts_proto_mock_3 = [ FileInfo(path="e.txt", is_dir=False, file_size=100), FileInfo(path="f", is_dir=True, file_size=0), ] list_artifacts_proto_mock_4 = [] with mock.patch(DATABRICKS_ARTIFACT_REPOSITORY_PACKAGE + ".message_to_json") as message_mock, mock.patch( DATABRICKS_ARTIFACT_REPOSITORY + "._call_endpoint") as call_endpoint_mock: list_artifact_paginated_response_protos = [ ListArtifacts.Response(root_uri="", files=list_artifacts_proto_mock_1, next_page_token="2"), ListArtifacts.Response(root_uri="", files=list_artifacts_proto_mock_2, next_page_token="4"), ListArtifacts.Response(root_uri="", files=list_artifacts_proto_mock_3, next_page_token="6"), ListArtifacts.Response(root_uri="", files=list_artifacts_proto_mock_4, next_page_token="8"), ] call_endpoint_mock.side_effect = list_artifact_paginated_response_protos message_mock.return_value = None artifacts = databricks_artifact_repo.list_artifacts() assert set(["a.txt", "b", "c.txt", "d", "e.txt", "f"]) == set([file.path for file in artifacts]) calls = [ mock.call(ListArtifacts(run_id=MOCK_RUN_ID, path="")), mock.call( ListArtifacts(run_id=MOCK_RUN_ID, path="", page_token="2")), mock.call( ListArtifacts(run_id=MOCK_RUN_ID, path="", page_token="4")), mock.call( ListArtifacts(run_id=MOCK_RUN_ID, path="", page_token="6")), ] message_mock.assert_has_calls(calls)
def test_paginated_list_artifacts(self, databricks_artifact_repo): list_artifacts_proto_mock_1 = [ FileInfo(path='a.txt', is_dir=False, file_size=100), FileInfo(path='b', is_dir=True, file_size=0) ] list_artifacts_proto_mock_2 = [ FileInfo(path='c.txt', is_dir=False, file_size=100), FileInfo(path='d', is_dir=True, file_size=0) ] list_artifacts_proto_mock_3 = [ FileInfo(path='e.txt', is_dir=False, file_size=100), FileInfo(path='f', is_dir=True, file_size=0) ] list_artifacts_proto_mock_4 = [] with mock.patch( DATABRICKS_ARTIFACT_REPOSITORY_PACKAGE + '.message_to_json')as message_mock, \ mock.patch( DATABRICKS_ARTIFACT_REPOSITORY + '._call_endpoint') as call_endpoint_mock: list_artifact_paginated_response_protos = [ ListArtifacts.Response(root_uri='', files=list_artifacts_proto_mock_1, next_page_token='2'), ListArtifacts.Response(root_uri='', files=list_artifacts_proto_mock_2, next_page_token='4'), ListArtifacts.Response(root_uri='', files=list_artifacts_proto_mock_3, next_page_token='6'), ListArtifacts.Response(root_uri='', files=list_artifacts_proto_mock_4, next_page_token='8'), ] call_endpoint_mock.side_effect = list_artifact_paginated_response_protos message_mock.return_value = None artifacts = databricks_artifact_repo.list_artifacts() assert set(['a.txt', 'b', 'c.txt', 'd', 'e.txt', 'f']) == set([file.path for file in artifacts]) calls = [ mock.call(ListArtifacts(run_id=MOCK_RUN_ID, path="")), mock.call( ListArtifacts(run_id=MOCK_RUN_ID, path="", page_token='2')), mock.call( ListArtifacts(run_id=MOCK_RUN_ID, path="", page_token='4')), mock.call( ListArtifacts(run_id=MOCK_RUN_ID, path="", page_token='6')) ] message_mock.assert_has_calls(calls)
def _list_artifacts(): request_message = _get_request_message(ListArtifacts()) response_message = ListArtifacts.Response() if request_message.HasField("path"): path = request_message.path else: path = None run_id = request_message.run_id or request_message.run_uuid run = _get_tracking_store().get_run(run_id) if _is_servable_proxied_run_artifact_root(run.info.artifact_uri): artifact_entities = _list_artifacts_for_proxied_run_artifact_root( proxied_artifact_root=run.info.artifact_uri, relative_path=path, ) else: artifact_entities = _get_artifact_repo(run).list_artifacts(path) response_message.files.extend([a.to_proto() for a in artifact_entities]) response_message.root_uri = run.info.artifact_uri response = Response(mimetype="application/json") response.set_data(message_to_json(response_message)) return response
def test_list_artifacts_with_relative_path(self): list_artifacts_dir_proto_mock = [ FileInfo(path=posixpath.join(MOCK_SUBDIR, 'test/a.txt'), is_dir=False, file_size=100), FileInfo(path=posixpath.join(MOCK_SUBDIR, 'test/dir'), is_dir=True, file_size=0) ] with mock.patch(DATABRICKS_ARTIFACT_REPOSITORY + '._get_run_artifact_root') \ as get_run_artifact_root_mock, \ mock.patch( DATABRICKS_ARTIFACT_REPOSITORY_PACKAGE + '.message_to_json')as message_mock, \ mock.patch( DATABRICKS_ARTIFACT_REPOSITORY + '._call_endpoint') as call_endpoint_mock: get_run_artifact_root_mock.return_value = MOCK_RUN_ROOT_URI list_artifact_response_proto = \ ListArtifacts.Response(root_uri='', files=list_artifacts_dir_proto_mock, next_page_token=None) call_endpoint_mock.return_value = list_artifact_response_proto message_mock.return_value = None databricks_artifact_repo = get_artifact_repository( MOCK_SUBDIR_ROOT_URI) artifacts = databricks_artifact_repo.list_artifacts('test') assert isinstance(artifacts, list) assert isinstance(artifacts[0], FileInfoEntity) assert len(artifacts) == 2 assert artifacts[0].path == 'test/a.txt' assert artifacts[0].is_dir is False assert artifacts[0].file_size == 100 assert artifacts[1].path == 'test/dir' assert artifacts[1].is_dir is True assert artifacts[1].file_size is None message_mock.assert_called_with( ListArtifacts(run_id=MOCK_RUN_ID, path=posixpath.join(MOCK_SUBDIR, "test")))
def _list_artifacts_mlflow_artifacts(): """ A request handler for `GET /mlflow-artifacts/artifacts?path=<value>` to list artifacts in `path` (a relative path from the root artifact directory). """ request_message = _get_request_message(ListArtifactsMlflowArtifacts()) path = request_message.path if request_message.HasField("path") else None artifact_repo = _get_artifact_repo_mlflow_artifacts() files = [] for file_info in artifact_repo.list_artifacts(path): basename = posixpath.basename(file_info.path) new_file_info = FileInfo(basename, file_info.is_dir, file_info.file_size) files.append(new_file_info.to_proto()) response_message = ListArtifacts.Response() response_message.files.extend(files) response = Response(mimetype="application/json") response.set_data(message_to_json(response_message)) return response