Exemple #1
0
def test_abspath_get_abspath_if_exists():
    # write a local file on CWD.
    test_local_file_abspath = os.path.join(os.getcwd(), "test.txt")
    u = AbsPath(test_local_file_abspath)
    if u.exists:
        u.rm()

    # if it doesn't exist
    assert AbsPath.get_abspath_if_exists("test.txt") == "test.txt"
    assert AbsPath.get_abspath_if_exists(AutoURI("test.txt")) == "test.txt"

    u.write("hello-world")

    # if it exists
    assert AbsPath.get_abspath_if_exists("test.txt") == test_local_file_abspath
    assert AbsPath.get_abspath_if_exists(
        AutoURI("test.txt")) == test_local_file_abspath

    assert AbsPath.get_abspath_if_exists(
        "tttttttttest.txt") == "tttttttttest.txt"
    assert (AbsPath.get_abspath_if_exists(
        AutoURI("tttttttttest.txt")) == "tttttttttest.txt")
    assert (AbsPath.get_abspath_if_exists("~/if-it-does-not-exist") ==
            "~/if-it-does-not-exist")
    assert AbsPath.get_abspath_if_exists(
        "non-existing-file") == "non-existing-file"

    u.rm()
Exemple #2
0
def run_write_v6_txt(prefix, nth):
    s = os.path.join(prefix, "v6.txt")
    u = AutoURI(s)
    if u.exists:
        u.rm()
    p = Pool(nth)
    p.map(write_v6_txt, list(zip([s] * nth, range(nth))))
    p.close()
    p.join()
def test_autouri_lock_with_context_raise(local_v6_txt, gcs_v6_txt, s3_v6_txt):
    for v6_txt in (local_v6_txt, gcs_v6_txt, s3_v6_txt):
        u_lock = AutoURI(v6_txt + URIBase.LOCK_FILE_EXT)

        try:
            with AutoURI(v6_txt).get_lock(no_lock=False):
                assert u_lock.exists
                time.sleep(1)
                raise AutoURIFileLockTestException
        except AutoURIFileLockTestException:
            assert not u_lock.exists
        else:
            assert False
Exemple #4
0
def recurse_raise_if_uri_not_exist(uri):
    uri = AutoURI(uri)
    if uri.is_valid:
        if uri.exists:
            if uri.ext == ".json":
                recurse_json(uri.read(), recurse_raise_if_uri_not_exist)
            elif uri.ext == ".tsv":
                recurse_tsv(uri.read(), recurse_raise_if_uri_not_exist)
            elif uri.ext == ".csv":
                recurse_csv(uri.read(), recurse_raise_if_uri_not_exist)
        else:
            raise Exception("URI is a valid path but does not exist.")
    return None, False
def test_autouri_lock_with_context(local_v6_txt, gcs_v6_txt, s3_v6_txt):
    for v6_txt in (local_v6_txt, gcs_v6_txt, s3_v6_txt):
        u_lock = AutoURI(v6_txt + URIBase.LOCK_FILE_EXT)

        with AutoURI(v6_txt).get_lock(no_lock=False):
            assert u_lock.exists
            time.sleep(1)
        assert not u_lock.exists

        with AutoURI(v6_txt).get_lock(no_lock=True):
            assert not u_lock.exists
            time.sleep(1)
        assert not u_lock.exists
Exemple #6
0
def test_localize_self_ref(
    local_test_path,
    gcs_j1_json_self_ref,
    gcs_v41_json_self_ref,
    gcs_v421_tsv_self_ref,
    gcs_v5_csv_self_ref,
    gcs_v6_txt_self_ref,
) -> Tuple[str, bool]:
    """Test detection of direct/indirect self reference
    while localizing GCS files on local storage.
    This is not only for detecting self references but also for any deep recursion
    beyond the depth limit (10 by default)

    Indirect self referencing in GCS files suffixed with _self_ref:
        v1.json -> v421.tsv -> v1.json -> ...
    """
    loc_prefix = os.path.join(local_test_path, "test_localize_self_ref")

    # localization from remote storages
    for j1_json in (gcs_j1_json_self_ref, ):
        u_j1_json = AutoURI(j1_json)
        loc_prefix_ = loc_prefix + u_j1_json.__class__.get_loc_suffix()

        with pytest.raises(AutoURIRecursionError):
            loc_uri, localized = AbsPath.localize(u_j1_json,
                                                  recursive=True,
                                                  loc_prefix=loc_prefix_)
def test_autouri_lock(local_v6_txt, gcs_v6_txt, s3_v6_txt):
    for v6_txt in (local_v6_txt, gcs_v6_txt, s3_v6_txt):
        u_lock = AutoURI(v6_txt + URIBase.LOCK_FILE_EXT)
        lock = AutoURI(v6_txt).get_lock(no_lock=False)

        lock.acquire()
        try:
            assert u_lock.exists
            time.sleep(1)
        finally:
            lock.release()
        assert not u_lock.exists

        # trivial dummy lock
        lock = AutoURI(v6_txt).get_lock(no_lock=True)
        assert not isinstance(lock, BaseFileLock)
Exemple #8
0
def test_abspath_cp_url(local_v6_txt, url_test_path) -> "AutoURI":
    """Test copying local_v6_txt to the following destination storages:
        url_test_path: local -> url
            This will fail as intended since URL is read-only.
    """
    u = AbsPath(local_v6_txt)
    basename = os.path.basename(local_v6_txt)

    for test_path in (url_test_path, ):
        u_dest = AutoURI(os.path.join(test_path, "test_abspath_cp", basename))
        with pytest.raises(ReadOnlyStorageError):
            _, ret = u.cp(u_dest, return_flag=True)
Exemple #9
0
def make_files_in_dir(prefix, make_local_empty_dir_d_a=False):
    """Make a compllicated directory structure with empty files.

    Directory structure and empty files in it:
        $prefix (as root)
            a
            b/
                a
                b
            c/
                a/
                    a
                    b
                b/
                    a/
                        a
                c
            d/ (optional if make_local_empty_dir_d_a)
                a/ (optional if make_local_empty_dir_d_a)
    Args:
        make_local_empty_dir_d_a:
            Make a local empty dir ($prefix/d/a/).
            This flag should not be used for cloud buckets since
            they don't support sub-directories.
    Returns:
        List of file URIs.
    """
    file_a = os.path.join(prefix, "a")
    file_b_a = os.path.join(prefix, "b/a")
    file_b_b = os.path.join(prefix, "b/b")
    file_c_a_a = os.path.join(prefix, "c/a/a")
    file_c_a_b = os.path.join(prefix, "c/a/b")
    file_c_b_a_a = os.path.join(prefix, "c/b/a/a")
    file_c_c = os.path.join(prefix, "c/c")

    all_files = [
        file_a,
        file_b_a,
        file_b_b,
        file_c_a_a,
        file_c_a_b,
        file_c_b_a_a,
        file_c_c,
    ]

    for uri in all_files:
        AutoURI(uri).write("")
    if make_local_empty_dir_d_a:
        path = os.path.join(prefix, "d/a")
        os.makedirs(path, exist_ok=True)

    return all_files
Exemple #10
0
def j1_json(
    prefix, prefix_v41=None, prefix_v421=None, prefix_v5=None, loc_suffix="", make=False
):
    u = "{prefix}/j1{loc_suffix}.json".format(prefix=prefix, loc_suffix=loc_suffix)
    if make:
        AutoURI(u).write(
            j1_json_contents(
                prefix=prefix,
                prefix_v41=prefix_v41,
                prefix_v421=prefix_v421,
                prefix_v5=prefix_v5,
                loc_suffix=loc_suffix,
            )
        )
    return u
Exemple #11
0
def write_v6_txt(x):
    """Lock -> write_lockfree -> read -> compare written vs read -> unlock.
    This writes different text for different thread.
    """
    uri, i = x
    s = v6_txt_contents() + str(i)
    u = AutoURI(uri, thread_id=i)

    with u.get_lock(no_lock=False):
        u.write(s, no_lock=True)
        assert u.read() == s
def test_autouri_lock_raise(local_v6_txt, gcs_v6_txt, s3_v6_txt):
    for v6_txt in (local_v6_txt, gcs_v6_txt, s3_v6_txt):
        u_lock = AutoURI(v6_txt + URIBase.LOCK_FILE_EXT)
        lock = AutoURI(v6_txt).get_lock(no_lock=False)

        lock.acquire()
        try:
            assert u_lock.exists
            time.sleep(1)
            raise AutoURIFileLockTestException
        except AutoURIFileLockTestException:
            assert True
        else:
            assert False
        finally:
            lock.release()
            assert not u_lock.exists
Exemple #13
0
def test_httpurl_localize(url_test_path, gcs_j1_json, gcs_v41_json,
                          gcs_v421_tsv, gcs_v5_csv,
                          gcs_v6_txt) -> Tuple[str, bool]:
    """Localize should fail.
    """
    loc_prefix = os.path.join(url_test_path, "test_httpurl_localize")

    for j1_json in (gcs_j1_json, ):
        # localization from local storage
        u_j1_json = AutoURI(j1_json)
        loc_prefix_ = loc_prefix + u_j1_json.__class__.get_loc_suffix()

        # for localization both with or without recursive
        # nothing should be localized actually
        # since they are already on a local storage
        # so loc_prefix directory itself shouldn't be created
        with pytest.raises(ReadOnlyStorageError):
            loc_uri, localized = HTTPURL.localize(u_j1_json,
                                                  recursive=False,
                                                  loc_prefix=loc_prefix_)
Exemple #14
0
def test_localize_mixed(
    local_test_path,
    mixed_j1_json,
    mixed_v41_json,
    mixed_v421_tsv,
    mixed_v5_csv,
    mixed_v6_txt,
) -> Tuple[str, bool]:
    """Test recursive localization of files on mixed storages
    Target is local storage.
    """
    loc_prefix = os.path.join(local_test_path, "test_localize_mixed")

    # localization from remote storages
    for j1_json in (mixed_j1_json, ):
        u_j1_json = AutoURI(j1_json)
        loc_prefix_ = loc_prefix + u_j1_json.__class__.get_loc_suffix()

        loc_uri, localized = AbsPath.localize(u_j1_json,
                                              recursive=True,
                                              return_flag=True,
                                              loc_prefix=loc_prefix_)
        # check if all URIs defeind in localized JSON file exist
        recurse_raise_if_uri_not_exist(loc_uri)
Exemple #15
0
def v421_tsv(
    prefix,
    prefix_v5=None,
    prefix_v1=None,
    prefix_v6=None,
    loc_suffix="",
    make=False,
    make_link_to_j1_json=False,
):
    u = "{prefix}/deeper/v421{loc_suffix}.tsv".format(
        prefix=prefix, loc_suffix=loc_suffix
    )
    if make:
        AutoURI(u).write(
            v421_tsv_contents(
                prefix=prefix,
                prefix_v5=prefix_v5,
                prefix_v1=prefix_v1,
                prefix_v6=prefix_v6,
                loc_suffix=loc_suffix,
                make_link_to_j1_json=make_link_to_j1_json,
            )
        )
    return u
Exemple #16
0
def test_abspath_localize(
    local_test_path,
    local_j1_json,
    local_v41_json,
    local_v421_tsv,
    local_v5_csv,
    local_v6_txt,
    s3_j1_json,
    s3_v41_json,
    s3_v421_tsv,
    s3_v5_csv,
    s3_v6_txt,
    gcs_j1_json,
    gcs_v41_json,
    gcs_v421_tsv,
    gcs_v5_csv,
    gcs_v6_txt,
    url_j1_json,
    url_v41_json,
    url_v421_tsv,
    url_v5_csv,
    url_v6_txt,
) -> Tuple[str, bool]:
    """Recursive localization is supported for the following file extensions:
        .json:
            Files defined only in values (not keys) can be recursively localized.
        .tsv/.csv:
            Files defined in all values can be recursively localized.

    This function will test localizing j1.json file on each remote storage.
    This JSON file has file paths including .tsv and .csv, which also include
    other files in its contents.
    Therefore, when the recursive flag is on, all files in these JSON, TSV, CSV
    files should be localized recursively with correct file names
    (controlled by cls.loc_prefix and cls.loc_suffix).

    Filenaming for (recursive) localization:
        cls.loc_prefix + remote_file_path_without_scheme + cls.loc_suffix (for recursvely only)

    For example,
    s3://test-bucket/j1.json has some file paths on s3://.

    With recursive localization, all these files must be localized on /tmp/user/loc_prefix/ with
    a correct directory structure (keeping original structure on source: i.e. bucket name, path)
    and the name of the JSON file should be j1.local.json since contents of this file should be
    modified to point to localized files in it. This is recursively done for all files in it too.

    Without recursive localization, autouri doesn't look inside that JSON file and just localize
    the file itself alone on /tmp/user/loc_prefix/ while keeping the same filename j1.local.json.

    Test localizing on a local storage from the following remote storages:
        local_test_path: local -> local
        s3_test_path: s3 -> local
        gcs_test_path: gcs -> local
        url_test_path: url -> local

    Parameters to be tested:
        make_md5_file:
            Make md5 file on destination only when it's REQUIRED.
            It's required only if we need to compare md5 hash of source and target.
            This is already tested cp and it's actually needed for local storage.
            Cloud URIs will provide md5 hash info in their metadata so md5 file
            is not required and hence will not be created even with this flag on.
        recursive:
            j1.json
    """
    loc_prefix = os.path.join(local_test_path, "test_abspath_localize")

    for j1_json in (local_j1_json, ):
        # localization from local storage
        u_j1_json = AutoURI(j1_json)
        loc_prefix_ = loc_prefix + u_j1_json.__class__.get_loc_suffix()

        # for localization both with or without recursive
        # nothing should be localized actually
        # since they are already on a local storage
        # so loc_prefix directory itself shouldn't be created
        loc_uri, localized = AbsPath.localize(u_j1_json,
                                              recursive=False,
                                              return_flag=True,
                                              loc_prefix=loc_prefix_)
        assert loc_uri == u_j1_json.uri and not localized
        assert not os.path.exists(loc_prefix)

        loc_uri, localized = AbsPath.localize(u_j1_json,
                                              recursive=True,
                                              return_flag=True,
                                              loc_prefix=loc_prefix_)
        assert loc_uri == u_j1_json.uri and not localized
        assert not os.path.exists(loc_prefix)
        # check if all URIs defeind in localized JSON file exist
        recurse_raise_if_uri_not_exist(loc_uri)

    # localization from remote storages
    for j1_json in (gcs_j1_json, s3_j1_json, url_j1_json):
        u_j1_json = AutoURI(j1_json)
        loc_prefix_ = loc_prefix + u_j1_json.__class__.get_loc_suffix()

        loc_uri, localized = AbsPath.localize(u_j1_json,
                                              recursive=False,
                                              return_flag=True,
                                              loc_prefix=loc_prefix_)
        assert loc_uri == os.path.join(loc_prefix_, u_j1_json.loc_dirname,
                                       u_j1_json.basename)
        assert localized and os.path.exists(loc_uri)

        loc_uri, localized = AbsPath.localize(u_j1_json,
                                              recursive=True,
                                              return_flag=True,
                                              loc_prefix=loc_prefix_)
        assert loc_uri == os.path.join(
            loc_prefix_,
            u_j1_json.loc_dirname,
            u_j1_json.basename_wo_ext + AbsPath.get_loc_suffix() +
            u_j1_json.ext,
        )
        assert localized and os.path.exists(loc_uri)
        # check if all URIs defeind in localized JSON file exist
        recurse_raise_if_uri_not_exist(loc_uri)
def test_autouri_lock_timeout(local_v6_txt):
    """Timeout = 3, 8 sec
    For local storage (AbsPath) only.
    Default poll_interval (10 sec) is too long for test remote files.
    """
    for v6_txt in (local_v6_txt, ):
        u_lock = AutoURI(v6_txt + URIBase.LOCK_FILE_EXT)

        time_s = time.time()
        lock = AutoURI(v6_txt).get_lock(no_lock=False)
        lock.acquire()
        try:
            lock2 = AutoURI(v6_txt).get_lock(no_lock=False, timeout=3)
            lock2.acquire()
            try:
                pass
            finally:
                lock2.release()
        except Timeout:
            assert 2 < time.time() - time_s < 4
        else:
            assert False
        finally:
            lock.release()
        assert not u_lock.exists

        time_s = time.time()
        lock = AutoURI(v6_txt).get_lock(no_lock=False)
        lock.acquire()
        try:
            lock2 = AutoURI(v6_txt).get_lock(no_lock=False, timeout=8)
            lock2.acquire()
            try:
                pass
            finally:
                lock2.release()
        except Timeout:
            assert 7 < time.time() - time_s < 9
        else:
            assert False
        finally:
            lock.release()
        assert not u_lock.exists
Exemple #18
0
def v6_txt(prefix, make=False):
    u = "{prefix}/v6.txt".format(prefix=prefix)
    if make:
        AutoURI(u).write(v6_txt_contents())
    return u
Exemple #19
0
def v5_csv(prefix, prefix_v6=None, make=False):
    u = "{prefix}/even/deeper/v5.csv".format(prefix=prefix)
    if make:
        AutoURI(u).write(v5_csv_contents(prefix=prefix, prefix_v6=prefix_v6))
    return u
Exemple #20
0
def v41_json(prefix, make=False):
    u = "{prefix}/v41.json".format(prefix=prefix)
    if make:
        AutoURI(u).write(v41_json_contents())
    return u
Exemple #21
0
def test_gcsuri_is_valid(path) -> bool:
    """Also tests AutoURI auto-conversion since it's based on is_valid property
    """
    expected = path.startswith("gs://")
    assert GCSURI(path).is_valid == expected
    assert not expected or type(AutoURI(path)) == GCSURI
Exemple #22
0
def test_abspath_cp(local_v6_txt, local_test_path, s3_test_path, gcs_test_path,
                    url_test_path) -> "AutoURI":
    """Test copying local_v6_txt to the following destination storages:
        local_test_path: local -> local
        s3_test_path: local -> s3
        gcs_test_path: local -> gcs

    Parameters to be tested:
        no_lock:
            Copy with no locking mechanism. There is no way to test this thoroughly here.
            This will be tested with multiple threads later in test_rece_cond.py.
        no_checksum:
            Don't check md5-hash/size/mtime to skip copying (even if file already exists on destination).
        make_md5_file:
            Make md5 file on destination only when it's REQUIRED.
            It's required only if we need to compare md5 hash of source and target.
    """
    u = AbsPath(local_v6_txt)
    basename = os.path.basename(local_v6_txt)

    for test_path in (local_test_path, s3_test_path, gcs_test_path):
        u_dest = AutoURI(os.path.join(test_path, "test_abspath_cp", basename))
        if u_dest.exists:
            u_dest.rm()

        assert not u_dest.exists
        _, ret = u.cp(u_dest, return_flag=True)
        assert u_dest.exists and u.read() == u_dest.read() and ret == 0
        u_dest.rm()

        assert not u_dest.exists
        # cp without lock will be tested throughly in test_race_cond.py
        _, ret = u.cp(u_dest, no_lock=True, return_flag=True)
        assert u_dest.exists and u.read() == u_dest.read() and ret == 0
        u_dest.rm()

        # trivial: copy without checksum when target doesn't exists
        assert not u_dest.exists
        _, ret = u.cp(u_dest, no_checksum=True, return_flag=True)
        assert u_dest.exists and u.read() == u_dest.read() and ret == 0

        # copy without checksum when target exists
        m_dest = u_dest.get_metadata()
        assert m_dest.exists
        time.sleep(1)
        _, ret = u.cp(u_dest, no_checksum=True, return_flag=True)
        # compare new mtime vs old mtime
        # new time should be larger if it's overwritten as intended
        assert u_dest.mtime > m_dest.mtime and u.read() == u_dest.read(
        ) and ret == 0

        # copy with checksum when target exists
        m_dest = u_dest.get_metadata()
        assert m_dest.exists
        _, ret = u.cp(u_dest, return_flag=True)
        # compare new mtime vs old mtime
        # new time should be the same as old time
        assert u_dest.mtime == m_dest.mtime and u.read() == u_dest.read(
        ) and ret == 1

        # make_md5_file works only when it's required
        # i.e. when we need to compare md5 hash of src vs target
        # so target must exist prior to test it
        assert u_dest.exists
        # delete md5 file if exists
        u_dest_md5_file = AutoURI(u_dest.uri + URIBase.MD5_FILE_EXT)
        if u_dest_md5_file.exists:
            u_dest_md5_file.rm()
        _, ret = u.cp(u_dest, make_md5_file=True, return_flag=True)
        assert u_dest.exists and u.read() == u_dest.read() and ret == 1
        u_dest.rm()
Exemple #23
0
def test_abspath_is_valid(path) -> bool:
    """Also tests AutoURI auto-conversion since it's based on is_valid property
    """
    expected = os.path.isabs(os.path.expanduser(path))
    assert AbsPath(path).is_valid == expected
    assert not expected or type(AutoURI(path)) == AbsPath
Exemple #24
0
def test_httpurl_is_valid(path) -> bool:
    """Also tests AutoURI auto-conversion since it's based on is_valid property
    """
    expected = path.startswith(("https://", "http://"))
    assert HTTPURL(path).is_valid == expected
    assert not expected or type(AutoURI(path)) == HTTPURL