def test_file_retrieval(self):
        file_manager = InMemoryFileManager({
            'path/to/a': b'a',
            'path/to/b': b'b' * 37
        })
        retrieval_service = artifact_service.ArtifactRetrievalService(
            file_manager.file_reader, chunk_size=10)
        dep_a = self.file_artifact('path/to/a')
        self.assertEqual(
            retrieval_service.ResolveArtifacts(
                beam_artifact_api_pb2.ResolveArtifactsRequest(
                    artifacts=[dep_a])),
            beam_artifact_api_pb2.ResolveArtifactsResponse(
                replacements=[dep_a]))

        self.assertEqual(
            list(
                retrieval_service.GetArtifact(
                    beam_artifact_api_pb2.GetArtifactRequest(artifact=dep_a))),
            [beam_artifact_api_pb2.GetArtifactResponse(data=b'a')])
        self.assertEqual(
            list(
                retrieval_service.GetArtifact(
                    beam_artifact_api_pb2.GetArtifactRequest(
                        artifact=self.file_artifact('path/to/b')))),
            [
                beam_artifact_api_pb2.GetArtifactResponse(data=b'b' * 10),
                beam_artifact_api_pb2.GetArtifactResponse(data=b'b' * 10),
                beam_artifact_api_pb2.GetArtifactResponse(data=b'b' * 10),
                beam_artifact_api_pb2.GetArtifactResponse(data=b'b' * 7)
            ])
Beispiel #2
0
def resolve_artifacts(artifacts, service, dest_dir):
  if not artifacts:
    return artifacts
  else:
    return [
        maybe_store_artifact(artifact, service,
                             dest_dir) for artifact in service.ResolveArtifacts(
                                 beam_artifact_api_pb2.ResolveArtifactsRequest(
                                     artifacts=artifacts)).replacements
    ]
Beispiel #3
0
def resolve_as_files(retrieval_service, file_writer, dependencies):
  """Translates a set of dependencies into file-based dependencies."""
  # Resolve until nothing changes.  This ensures that they can be fetched.
  resolution = retrieval_service.ResolveArtifactss(
      beam_artifact_api_pb2.ResolveArtifactsRequest(
          artifacts=dependencies,
          # Anything fetchable will do.
          # TODO(robertwb): Take advantage of shared filesystems, urls.
          preferred_urns=[],
      ))
  dependencies = resolution.replacements

  # Fetch each of the dependencies, using file_writer to store them as
  # file-based artifacts.
  # TODO(robertwb): Consider parallelizing the actual writes.
  for dep in dependencies:
    if dep.role_urn == common_urns.artifact_roles.STAGING_TO.urn:
      base_name = os.path.basename(
          proto_utils.parse_Bytes(
              dep.role_payload,
              beam_runner_api_pb2.ArtifactStagingToRolePayload).staged_name)
    else:
      base_name = None
    unique_name = '-'.join(
        filter(
            None,
            [hashlib.sha256(dep.SerializeToString()).hexdigest(), base_name]))
    file_handle, path = file_writer(unique_name)
    with file_handle as fout:
      for chunk in retrieval_service.GetArtifact(
          beam_artifact_api_pb2.GetArtifactRequest(artifact=dep)):
        fout.write(chunk.data)
    yield beam_runner_api_pb2.ArtifactInformation(
        type_urn=common_urns.artifact_types.FILE.urn,
        type_payload=beam_runner_api_pb2.ArtifactFilePayload(
            path=path).SerializeToString(),
        role_urn=dep.role_urn,
        role_payload=dep.role_payload)