def real_gcs_session_tmp_url_prefix(real_gcs_url_stem) -> Optional[str]: """ Sets up and tears down a temporary "directory" on GCS to be shared by all of our tests. Not applicable for fake GCS. """ if real_gcs_url_stem is None: yield None return gcs_fs = get_gcs_fs_without_warnings() random_hex_str = "%016x" % random.randint(0, 2**64) path_str = f"{getpass.getuser()}/BNTESTDATA/{random_hex_str}" gs_url = real_gcs_url_stem + "/" + path_str + "/" assert not gcs_fs.exists(gs_url) yield gs_url # This will throw an exception if the URL doesn't exist at this point. # Currently every test using this fixture does write some objects under this URL, # *and* doesn't clean all of them up. If this changes, we may need to start # handling this more gracefully. gcs_fs.rm(gs_url, recursive=True)
def _stage(self): cloudpickle = import_optional_dependency("cloudpickle") path = self.inputs_uri() logging.info(f"Staging task {self.name} at {path}") gcs_fs = get_gcs_fs_without_warnings() with gcs_fs.open(path, "wb") as f: cloudpickle.dump(self, f)
def gcs_fs(use_fake_gcp, multiprocessing_manager): if use_fake_gcp: # When running an AIP job, the GCS filesystem is serialized and then # deserialized. Any data written to the deserialized instance needs to # be readable by the original instance. Hence, a proxy object is used to # store the fake GCS data. This is independent of whether parallel # processing has been enabled. return FakeGcsFs(multiprocessing_manager.dict()) else: return get_gcs_fs_without_warnings()
def read_bytes_from_url(url): """Reads the contents of a URL and returns them as a bytes object.""" if is_file_url(url): path = path_from_url(url) return path.read_bytes() elif is_gcs_url(url): gcs_fs = get_gcs_fs_without_warnings() return gcs_fs.cat_file(url) else: raise AssertionError(f"Unexpected scheme in URL: {url}")
def result(self, timeout: int = None): # This will need an update to support other serializers. exc = self.exception(timeout) if exc is not None: raise exc try: gcs_fs = get_gcs_fs_without_warnings() with gcs_fs.open(self.output, "rb") as f: return pickle.load(f) except: # NOQA logging.warning( f"Failed to load output from succesful job at {self.output}" ) raise
def _run(ipath): cloudpickle = import_optional_dependency("cloudpickle") gcs_fs = get_gcs_fs_without_warnings() with gcs_fs.open(ipath, "rb") as f: task = cloudpickle.load(f) # Now that we have the task, set up logging. _set_up_logging(task.job_id(), task.config.project_name) logging.info(f"Read task from {ipath}") result = task.function() opath = task.output_uri() logging.info(f"Uploading result to {opath}") with gcs_fs.open(opath, "wb") as f: pickle.dump(result, f)
def gcs_builder(builder, tmp_gcs_url_prefix, use_fake_gcp, gcs_fs): URL_PREFIX = "gs://" assert tmp_gcs_url_prefix.startswith(URL_PREFIX) gcs_path = tmp_gcs_url_prefix[len(URL_PREFIX):] bucket_name, object_path = gcs_path.split("/", 1) builder = builder.build().to_builder() builder.set("core__persistent_cache__gcs__bucket_name", bucket_name) builder.set("core__persistent_cache__gcs__object_path", object_path) builder.set("core__persistent_cache__gcs__enabled", True) if use_fake_gcp: builder.set("core__persistent_cache__gcs__fs", gcs_fs) else: # Since gcs is enabled, if core__persistent_cache__gcs__fs is not set, # the builder should use get_gcs_fs_without_warnings() by default. # The passed in gcs_fs is used in other places, verify that it is # not a fake. assert gcs_fs == get_gcs_fs_without_warnings() return builder
def gcs_fs(use_fake_gcp, make_dict): if use_fake_gcp: return FakeGcsFs(make_dict) else: return get_gcs_fs_without_warnings()
def gcs_fs_download(url, path): fs = get_gcs_fs_without_warnings() if fs.isdir(url): fs.get(url, str(path), recursive=True) else: fs.get_file(url, str(path))
def gcs_fs_path_exists(url): fs = get_gcs_fs_without_warnings() return fs.exists(url)
def gcs_fs_wipe_path(url): assert "BNTESTDATA" in url fs = get_gcs_fs_without_warnings() fs.rm(url, recursive=True)
def run(): """ This method is a proxy to _run which does the actual work. The proxy exists so that _run can be replaced for testing. """ _run(sys.argv[-1], get_gcs_fs_without_warnings())