def test_file_retrieval(self): file_manager = InMemoryFileManager({ 'path/to/a': b'a', 'path/to/b': b'b' * 37 }) retrieval_service = artifact_service.ArtifactRetrievalService( file_manager.file_reader, chunk_size=10) dep_a = self.file_artifact('path/to/a') self.assertEqual( retrieval_service.ResolveArtifacts( beam_artifact_api_pb2.ResolveArtifactsRequest( artifacts=[dep_a])), beam_artifact_api_pb2.ResolveArtifactsResponse( replacements=[dep_a])) self.assertEqual( list( retrieval_service.GetArtifact( beam_artifact_api_pb2.GetArtifactRequest(artifact=dep_a))), [beam_artifact_api_pb2.GetArtifactResponse(data=b'a')]) self.assertEqual( list( retrieval_service.GetArtifact( beam_artifact_api_pb2.GetArtifactRequest( artifact=self.file_artifact('path/to/b')))), [ beam_artifact_api_pb2.GetArtifactResponse(data=b'b' * 10), beam_artifact_api_pb2.GetArtifactResponse(data=b'b' * 10), beam_artifact_api_pb2.GetArtifactResponse(data=b'b' * 10), beam_artifact_api_pb2.GetArtifactResponse(data=b'b' * 7) ])
def resolve_artifacts(artifacts, service, dest_dir): if not artifacts: return artifacts else: return [ maybe_store_artifact(artifact, service, dest_dir) for artifact in service.ResolveArtifacts( beam_artifact_api_pb2.ResolveArtifactsRequest( artifacts=artifacts)).replacements ]
def resolve_as_files(retrieval_service, file_writer, dependencies): """Translates a set of dependencies into file-based dependencies.""" # Resolve until nothing changes. This ensures that they can be fetched. resolution = retrieval_service.ResolveArtifactss( beam_artifact_api_pb2.ResolveArtifactsRequest( artifacts=dependencies, # Anything fetchable will do. # TODO(robertwb): Take advantage of shared filesystems, urls. preferred_urns=[], )) dependencies = resolution.replacements # Fetch each of the dependencies, using file_writer to store them as # file-based artifacts. # TODO(robertwb): Consider parallelizing the actual writes. for dep in dependencies: if dep.role_urn == common_urns.artifact_roles.STAGING_TO.urn: base_name = os.path.basename( proto_utils.parse_Bytes( dep.role_payload, beam_runner_api_pb2.ArtifactStagingToRolePayload).staged_name) else: base_name = None unique_name = '-'.join( filter( None, [hashlib.sha256(dep.SerializeToString()).hexdigest(), base_name])) file_handle, path = file_writer(unique_name) with file_handle as fout: for chunk in retrieval_service.GetArtifact( beam_artifact_api_pb2.GetArtifactRequest(artifact=dep)): fout.write(chunk.data) yield beam_runner_api_pb2.ArtifactInformation( type_urn=common_urns.artifact_types.FILE.urn, type_payload=beam_runner_api_pb2.ArtifactFilePayload( path=path).SerializeToString(), role_urn=dep.role_urn, role_payload=dep.role_payload)