コード例 #1
0
def test_local_storage(request, tmpdir):
    storages = systran_storages.StorageClient()
    corpus_dir = str(request.config.rootdir / "corpus")
    storages.get(
        os.path.join(corpus_dir, "train", "europarl-v7.de-en.10K.tok.de"),
        str(tmpdir.join("localcopy")))
    assert os.path.isfile(str(tmpdir.join("localcopy")))

    storages.rename(str(tmpdir.join("localcopy")),
                    str(tmpdir.join("localcopy2")))
    assert not os.path.exists(str(tmpdir.join("localcopy")))
    assert os.path.isfile(str(tmpdir.join("localcopy2")))

    storages.delete(str(tmpdir.join("localcopy2")))
    assert not os.path.exists(str(tmpdir.join("localcopy2")))

    # cannot transfer directory if not in remote mode
    with pytest.raises(Exception):
        storages.get(corpus_dir, str(tmpdir.join("localdir")))

    storages.get(corpus_dir, str(tmpdir.join("localdir")), directory=True)
    assert os.path.isfile(
        str(tmpdir.join("localdir", "train", "europarl-v7.de-en.10K.tok.de")))

    with pytest.raises(ValueError):
        storages.delete(str(tmpdir.join("localdir")))
    storages.delete(str(tmpdir.join("localdir")), recursive=True)
    assert not os.path.exists(str(tmpdir.join("localdir")))
コード例 #2
0
def test_is_managed_path():
    config = {"s3_models": {}, "s3_test": {}, "launcher": {}}
    client = systran_storages.StorageClient(config=config)
    assert not client.is_managed_path("/home/ubuntu/file.txt")
    assert not client.is_managed_path(":ubuntu/file.txt")
    assert not client.is_managed_path("storage:ubuntu/file.txt")
    assert client.is_managed_path("s3_models:ubuntu/file.txt")
    assert client.is_managed_path("s3_test:")
コード例 #3
0
ファイル: test_storage.py プロジェクト: SYSTRAN/storages
def test_storage_manager(tmpdir):
    config = {
        "s3_models": {
            "description": "model storage on S3",
            "type": "s3",
            "bucket": "my-model-storage",
            "aws_credentials": {
                "access_key_id": "AAAAAAAAAAAAAAAAAAAA",
                "secret_access_key": "abcdefghijklmnopqrstuvwxyz0123456789ABCD",
                "region_name": "us-east-2"
            },
            "default_ms": True
        },
        "s3_test": {
            "description": "some test files",
            "type": "s3",
            "bucket": "my-testfiles-storage",
            "aws_credentials": {
                "access_key_id": "AAAAAAAAAAAAAAAAAAAA",
                "secret_access_key": "abcdefghijklmnopqrstuvwxyz0123456789ABCD",
                "region_name": "us-east-2"
            }
        },
        "launcher": {
            "description": "launcher file storage",
            "type": "http",
            "get_pattern": "hereget/%s",
            "post_pattern": "herepost/%s"
        },
        "corpus_manager": {
            "account_id": "",
            "type": "systran_corpusmanager",
            "description": "CorpusManager file storage",
            "host_url": "localhost:8889"
        }
    }
    storages = systran_storages.StorageClient(config=config)
    s3_models_storage, path = storages._get_storage("s3_models:pathdir/mysupermodel")
    assert isinstance(s3_models_storage, systran_storages.storages.S3Storage)
    assert path == "pathdir/mysupermodel"
    assert s3_models_storage._storage_id == "s3_models"

    s3_models_storage, path = storages._get_storage("pathdir/mysupermodel", "s3_models")
    assert isinstance(s3_models_storage, systran_storages.storages.S3Storage)

    local_storage, path = storages._get_storage("/pathdir/mysupermodel")
    assert isinstance(local_storage, systran_storages.storages.LocalStorage)
    assert local_storage._storage_id == "local"

    http_storage, path = storages._get_storage("launcher:/hereget/mysupermodel")
    assert isinstance(http_storage, systran_storages.storages.HTTPStorage)

    cm_storage, path = storages._get_storage("corpus_manager:pathdir/mysupermodel")
    assert isinstance(cm_storage, systran_storages.storages.CMStorages)
    assert cm_storage._storage_id == "corpus_manager"

    with pytest.raises(ValueError):
        storages._get_storage("unknown:/hereget/mysupermodel")
コード例 #4
0
def test_storages(request, tmpdir, storages, storage_id):
    if storage_id.startswith('_'):
        return
    corpus_dir = str(request.config.rootdir / "corpus")

    storage_client = systran_storages.StorageClient(config=storages)

    with open(os.path.join(corpus_dir, "vocab", "en-vocab.txt"), "rb") as f:
        en_vocab = f.read()

    stor_tmp_dir = str(tmpdir.join("test_storages", storage_id))
    os.makedirs(stor_tmp_dir)
    # checking if the root is here
    assert storage_client.exists("/", storage_id=storage_id)
    # checking the main directory is here
    maindir_exists = storage_client.exists(os.path.join("myremotedirectory"),
                                           storage_id=storage_id)
    # first deleting directory - if it exists
    try:
        print("==> delete myremotedirectory")
        storage_client.delete(os.path.join("myremotedirectory"),
                              recursive=True,
                              storage_id=storage_id)
    except Exception as e:
        assert not maindir_exists, "cannot remove main directory (%s)" % str(e)
    # checking the directory is not there anymore
    assert not storage_client.exists(os.path.join("myremotedirectory"),
                                     storage_id=storage_id)
    # pushing a file to a directory
    storage_client.push(os.path.join(corpus_dir, "train",
                                     "europarl-v7.de-en.10K.tok.de"),
                        "myremotedirectory/",
                        storage_id=storage_id)
    # checking directory and files are created
    assert storage_client.exists(os.path.join("myremotedirectory"),
                                 storage_id=storage_id)
    assert storage_client.exists(os.path.join("myremotedirectory",
                                              "europarl-v7.de-en.10K.tok.de"),
                                 storage_id=storage_id)
    # pushing a file to a new file
    storage_client.push(os.path.join(corpus_dir, "train",
                                     "europarl-v7.de-en.10K.tok.de"),
                        os.path.join("myremotedirectory", "test",
                                     "copy-europarl-v7.de-en.10K.tok.de"),
                        storage_id=storage_id)
    # pushing a file to a new file on a completely new directory
    if storage_client.exists(storage_id + ":" +
                             os.path.join("myremotedirectory-new/")):
        storage_client.delete(os.path.join("myremotedirectory-new"),
                              recursive=True,
                              storage_id=storage_id)
    if storages[storage_id]["type"] == "local" and "basedir" not in storages[
            storage_id]:
        # access to absolute path for local storage without basedir means absolute path... this won't work
        with pytest.raises(Exception):
            storage_client.push(os.path.join(corpus_dir, "train",
                                             "europarl-v7.de-en.10K.tok.de"),
                                os.path.join(
                                    "/myremotedirectory-new", "test-new",
                                    "copy-europarl-v7.de-en.10K.tok.de"),
                                storage_id=storage_id)
        storage_client.push(os.path.join(corpus_dir, "train",
                                         "europarl-v7.de-en.10K.tok.de"),
                            os.path.join("myremotedirectory-new", "test-new",
                                         "copy-europarl-v7.de-en.10K.tok.de"),
                            storage_id=storage_id)
    else:
        storage_client.push(os.path.join(corpus_dir, "train",
                                         "europarl-v7.de-en.10K.tok.de"),
                            os.path.join("/myremotedirectory-new", "test-new",
                                         "copy-europarl-v7.de-en.10K.tok.de"),
                            storage_id=storage_id)
    # renaming a file
    storage_client.rename(os.path.join("myremotedirectory", "test",
                                       "copy-europarl-v7.de-en.10K.tok.de"),
                          os.path.join("myremotedirectory", "test",
                                       "copy2-europarl-v7.de-en.10K.tok.de"),
                          storage_id=storage_id)
    # pushing a full directory
    storage_client.push(os.path.join(corpus_dir, "vocab"),
                        os.path.join("myremotedirectory", "vocab"),
                        storage_id=storage_id)
    # getting a file back into local temp directory
    storage_client.get(os.path.join("myremotedirectory", "vocab",
                                    "en-vocab.txt"),
                       os.path.join(stor_tmp_dir),
                       storage_id=storage_id)
    assert os.path.exists(os.path.join(stor_tmp_dir, "en-vocab.txt"))

    # getting it back again, should use cache so not modify the file
    # to check cache modify, first byte of the file keeping it mtime
    stat = os.stat(os.path.join(stor_tmp_dir, "en-vocab.txt"))
    time.sleep(1)
    storage_client.get(os.path.join("myremotedirectory", "vocab",
                                    "en-vocab.txt"),
                       os.path.join(stor_tmp_dir),
                       storage_id=storage_id)
    new_stat = os.stat(os.path.join(stor_tmp_dir, "en-vocab.txt"))
    assert stat.st_mtime == new_stat.st_mtime, "file should not have changed"

    os.remove(os.path.join(stor_tmp_dir, "en-vocab.txt"))
    # renaming a directory
    storage_client.rename(os.path.join("myremotedirectory", "vocab"),
                          os.path.join("myremotedirectory", "vocab-2"),
                          storage_id=storage_id)
    # getting the file from renamed directory back into local temp directory
    storage_client.get(os.path.join("myremotedirectory", "vocab-2",
                                    "en-vocab.txt"),
                       os.path.join(stor_tmp_dir),
                       storage_id=storage_id)
    assert os.path.isfile(os.path.join(stor_tmp_dir, "en-vocab.txt"))
    with open(os.path.join(stor_tmp_dir, "en-vocab.txt"), "rb") as f:
        back_en_vocab = f.read()
    assert back_en_vocab == en_vocab
    # getting an inexisting file
    with pytest.raises(Exception):
        storage_client.get(os.path.join("myremotedirectory", "vocab-2",
                                        "troc"),
                           os.path.join(stor_tmp_dir, "troc"),
                           storage_id=storage_id)
    # streaming a file back
    size = 0
    nchunk = 0
    generator = storage_client.stream(os.path.join("myremotedirectory",
                                                   "vocab-2", "en-vocab.txt"),
                                      buffer_size=100,
                                      storage_id=storage_id)
    for chunk in generator:
        size += len(chunk)
        nchunk += 1
    assert size == len(en_vocab)
    assert nchunk >= int(math.ceil(len(en_vocab) / 100.))
    # deleting a file
    storage_client.delete(os.path.join("myremotedirectory", "vocab-2",
                                       "en-vocab.txt"),
                          storage_id=storage_id)
    assert not storage_client.exists(os.path.join("myremotedirectory",
                                                  "vocab-2", "en-vocab.txt"),
                                     storage_id=storage_id)
    # checking ls
    lsdir = sorted(
        storage_client.listdir(os.path.join("myremotedirectory/"),
                               storage_id=storage_id))
    assert lsdir == [
        'myremotedirectory/europarl-v7.de-en.10K.tok.de',
        'myremotedirectory/test/', 'myremotedirectory/vocab-2/'
    ]
    # checking ls
    lsdir = sorted(
        storage_client.listdir(os.path.join("myremotedirectory/"),
                               recursive=True,
                               storage_id=storage_id))
    assert lsdir == [
        'myremotedirectory/europarl-v7.de-en.10K.tok.de',
        'myremotedirectory/test/copy2-europarl-v7.de-en.10K.tok.de',
        'myremotedirectory/vocab-2/de-vocab.txt'
    ]
    # getting directory back
    with pytest.raises(Exception):
        storage_client.get(os.path.join("myremotedirectory"),
                           os.path.join(stor_tmp_dir),
                           storage_id=storage_id)
    storage_client.get(os.path.join("myremotedirectory"),
                       os.path.join(stor_tmp_dir, "myremotedirectory"),
                       directory=True,
                       storage_id=storage_id)
    storage_client.get(os.path.join("myremotedirectory"),
                       os.path.join(stor_tmp_dir, "myremotedirectory"),
                       directory=None,
                       storage_id=storage_id)
    local_listdir = sorted([
        f for f in os.listdir(os.path.join(stor_tmp_dir, "myremotedirectory"))
        if not f.endswith('#md5')
    ])
    # deleting full directory
    storage_client.delete(os.path.join("myremotedirectory"),
                          recursive=True,
                          storage_id=storage_id)
    # checking directory is not there anymore
    assert not storage_client.exists(os.path.join("myremotedirectory"),
                                     storage_id=storage_id)
コード例 #5
0
ファイル: test_storage.py プロジェクト: pj-liard/storages
def test_cm_storage(request, storages, storage_id):
    if storage_id != 'corpus_manager':
        return
    corpus_dir = str(request.config.rootdir / "corpus")

    storage_client = systran_storages.StorageClient(config=storages)

    #Push new corpus
    storage_client.push(os.path.join(corpus_dir, "train", "testFormat.txt"),
                        "myremotedirectory/",
                        storage_id=storage_id)

    assert storage_client.exists(os.path.join("myremotedirectory",
                                              "testFormat.txt"),
                                 storage_id=storage_id)

    #Push existing corpus
    with pytest.raises(Exception):
        storage_client.push(os.path.join(corpus_dir, "train",
                                         "testFormat.txt"),
                            "myremotedirectory/",
                            storage_id=storage_id)

    # checking ls
    lsdir = sorted(
        storage_client.listdir(os.path.join("myremotedirectory/"),
                               storage_id=storage_id))
    assert {'myremotedirectory/testFormat.txt'}.issubset(set(lsdir))

    #Delete pushed corpus
    storage_client.delete(os.path.join("myremotedirectory", "testFormat.txt"),
                          storage_id=storage_id)

    assert not storage_client.exists(os.path.join("myremotedirectory",
                                                  "testFormat.txt"),
                                     storage_id=storage_id)

    storage_client.push(os.path.join(corpus_dir, "train", "testFormat.tmx"),
                        os.path.join("myremotedirectory", "test/"),
                        storage_id=storage_id)

    assert storage_client.exists(os.path.join("myremotedirectory", "test",
                                              "testFormat.tmx"),
                                 storage_id=storage_id)

    lsdir = sorted(
        storage_client.listdir(os.path.join("myremotedirectory/"),
                               storage_id=storage_id))
    assert {'myremotedirectory/test/'}.issubset(set(lsdir))

    lsdir = sorted(
        storage_client.listdir(os.path.join("myremotedirectory/"),
                               recursive=True,
                               storage_id=storage_id))
    assert {'myremotedirectory/test/testFormat.tmx'}.issubset(set(lsdir))

    storage_client.delete(os.path.join("myremotedirectory", "test",
                                       "testFormat.tmx"),
                          storage_id=storage_id)

    assert not storage_client.exists(os.path.join("myremotedirectory", "test",
                                                  "testFormat.tmx"),
                                     storage_id=storage_id)

    with pytest.raises(Exception):
        storage_client.push(os.path.join(corpus_dir, "train",
                                         "europarl-v7.de-en.10K.tok.de"),
                            "myremotedirectory/",
                            storage_id=storage_id)

    assert not storage_client.exists(os.path.join(
        "myremotedirectory", "europarl-v7.de-en.10K.tok.de"),
                                     storage_id=storage_id)

    with pytest.raises(Exception):
        storage_client.delete(os.path.join("myremotedirectory",
                                           "europarl-v7.de-en.10K.tok.de"),
                              storage_id=storage_id)

    with pytest.raises(Exception):
        storage_client.push(os.path.join(corpus_dir, "train", "testFormat"),
                            "myremotedirectory/",
                            storage_id=storage_id)

    assert not storage_client.exists(
        os.path.join("myremotedirectory", "testFormat"), storage_id=storage_id)

    # Partition corpus
    training_data_path = os.path.join("myremotedirectory",
                                      "train") + os.path.sep
    testing_data_path = os.path.join("myremotedirectory", "test") + os.path.sep
    storage_client.partition_auto(os.path.join("myremotedirectory",
                                               "testFormat.txt"),
                                  training_path=training_data_path,
                                  testing_path=testing_data_path,
                                  storage_id=storage_id,
                                  percent=10)

    assert storage_client.exists(os.path.join("myremotedirectory/train",
                                              "testFormat.txt"),
                                 storage_id=storage_id)
    assert storage_client.exists(os.path.join("myremotedirectory/test",
                                              "testFormat.txt"),
                                 storage_id=storage_id)
    assert not storage_client.exists(os.path.join("myremotedirectory",
                                                  "testFormat.txt"),
                                     storage_id=storage_id)
コード例 #6
0
ファイル: test_storage.py プロジェクト: pj-liard/storages
def test_path_join():
    config = {"local": {"type": "local", "basedir": "/tmp"}}
    client = systran_storages.StorageClient(config)
    assert client.join("local:dir/", "file.txt") == "local:dir/file.txt"
    assert client.split("local:dir/file.txt") == ("local:", "dir", "file.txt")