예제 #1
0
def real_gcs_session_tmp_url_prefix(real_gcs_url_stem) -> Optional[str]:
    """
    Sets up and tears down a temporary "directory" on GCS to be shared by all
    of our tests. Not applicable for fake GCS.
    """

    if real_gcs_url_stem is None:
        yield None
        return

    gcs_fs = get_gcs_fs_without_warnings()

    random_hex_str = "%016x" % random.randint(0, 2**64)
    path_str = f"{getpass.getuser()}/BNTESTDATA/{random_hex_str}"

    gs_url = real_gcs_url_stem + "/" + path_str + "/"
    assert not gcs_fs.exists(gs_url)

    yield gs_url

    # This will throw an exception if the URL doesn't exist at this point.
    # Currently every test using this fixture does write some objects under this URL,
    # *and* doesn't clean all of them up. If this changes, we may need to start
    # handling this more gracefully.
    gcs_fs.rm(gs_url, recursive=True)
예제 #2
0
    def _stage(self):
        cloudpickle = import_optional_dependency("cloudpickle")

        path = self.inputs_uri()
        logging.info(f"Staging task {self.name} at {path}")

        gcs_fs = get_gcs_fs_without_warnings()
        with gcs_fs.open(path, "wb") as f:
            cloudpickle.dump(self, f)
예제 #3
0
def gcs_fs(use_fake_gcp, multiprocessing_manager):
    if use_fake_gcp:
        # When running an AIP job, the GCS filesystem is serialized and then
        # deserialized. Any data written to the deserialized instance needs to
        # be readable by the original instance. Hence, a proxy object is used to
        # store the fake GCS data. This is independent of whether parallel
        # processing has been enabled.
        return FakeGcsFs(multiprocessing_manager.dict())
    else:
        return get_gcs_fs_without_warnings()
예제 #4
0
def read_bytes_from_url(url):
    """Reads the contents of a URL and returns them as a bytes object."""

    if is_file_url(url):
        path = path_from_url(url)
        return path.read_bytes()
    elif is_gcs_url(url):
        gcs_fs = get_gcs_fs_without_warnings()
        return gcs_fs.cat_file(url)
    else:
        raise AssertionError(f"Unexpected scheme in URL: {url}")
예제 #5
0
    def result(self, timeout: int = None):
        # This will need an update to support other serializers.
        exc = self.exception(timeout)
        if exc is not None:
            raise exc

        try:
            gcs_fs = get_gcs_fs_without_warnings()
            with gcs_fs.open(self.output, "rb") as f:
                return pickle.load(f)
        except:  # NOQA
            logging.warning(
                f"Failed to load output from succesful job at {self.output}"
            )
            raise
예제 #6
0
파일: main.py 프로젝트: HiveWang/bionic
def _run(ipath):
    cloudpickle = import_optional_dependency("cloudpickle")

    gcs_fs = get_gcs_fs_without_warnings()
    with gcs_fs.open(ipath, "rb") as f:
        task = cloudpickle.load(f)

    # Now that we have the task, set up logging.
    _set_up_logging(task.job_id(), task.config.project_name)
    logging.info(f"Read task from {ipath}")

    result = task.function()

    opath = task.output_uri()
    logging.info(f"Uploading result to {opath}")
    with gcs_fs.open(opath, "wb") as f:
        pickle.dump(result, f)
예제 #7
0
def gcs_builder(builder, tmp_gcs_url_prefix, use_fake_gcp, gcs_fs):
    URL_PREFIX = "gs://"
    assert tmp_gcs_url_prefix.startswith(URL_PREFIX)
    gcs_path = tmp_gcs_url_prefix[len(URL_PREFIX):]
    bucket_name, object_path = gcs_path.split("/", 1)

    builder = builder.build().to_builder()

    builder.set("core__persistent_cache__gcs__bucket_name", bucket_name)
    builder.set("core__persistent_cache__gcs__object_path", object_path)
    builder.set("core__persistent_cache__gcs__enabled", True)

    if use_fake_gcp:
        builder.set("core__persistent_cache__gcs__fs", gcs_fs)
    else:
        # Since gcs is enabled, if core__persistent_cache__gcs__fs is not set,
        # the builder should use get_gcs_fs_without_warnings() by default.
        # The passed in gcs_fs is used in other places, verify that it is
        # not a fake.
        assert gcs_fs == get_gcs_fs_without_warnings()

    return builder
예제 #8
0
def gcs_fs(use_fake_gcp, make_dict):
    if use_fake_gcp:
        return FakeGcsFs(make_dict)
    else:
        return get_gcs_fs_without_warnings()
예제 #9
0
파일: helpers.py 프로젝트: HiveWang/bionic
def gcs_fs_download(url, path):
    fs = get_gcs_fs_without_warnings()
    if fs.isdir(url):
        fs.get(url, str(path), recursive=True)
    else:
        fs.get_file(url, str(path))
예제 #10
0
파일: helpers.py 프로젝트: HiveWang/bionic
def gcs_fs_path_exists(url):
    fs = get_gcs_fs_without_warnings()
    return fs.exists(url)
예제 #11
0
파일: helpers.py 프로젝트: HiveWang/bionic
def gcs_fs_wipe_path(url):
    assert "BNTESTDATA" in url
    fs = get_gcs_fs_without_warnings()
    fs.rm(url, recursive=True)
예제 #12
0
def run():
    """
    This method is a proxy to _run which does the actual work. The proxy exists
    so that _run can be replaced for testing.
    """
    _run(sys.argv[-1], get_gcs_fs_without_warnings())