async def output_file(user_id: int, project_id: str,
                      postgres_engine: Engine) -> Iterable[FileMetaData]:

    node_id = "fd6f9737-1988-341b-b4ac-0614b646fa82"

    # pylint: disable=no-value-for-parameter

    file = FileMetaData()
    file.simcore_from_uuid(f"{project_id}/{node_id}/filename.txt",
                           bucket_name="master-simcore")
    file.entity_tag = "df9d868b94e53d18009066ca5cd90e9f"
    file.user_name = "test"
    file.user_id = str(user_id)

    async with postgres_engine.acquire() as conn:
        stmt = (
            file_meta_data.insert().values(**attr.asdict(file), ).returning(
                literal_column("*")))
        result = await conn.execute(stmt)
        row = await result.fetchone()

        # hacks defect
        file.user_id = str(user_id)
        file.location_id = str(file.location_id)
        # --
        assert file == FileMetaData(**dict(row))  # type: ignore

        yield file

        result = await conn.execute(file_meta_data.delete().where(
            file_meta_data.c.file_uuid == row.file_uuid))
Exemple #2
0
def test_fmd_build():
    file_uuid = str(Path("1234") / Path("abcd") / Path("xx.dat"))
    fmd = FileMetaData()
    fmd.simcore_from_uuid(file_uuid, "test-bucket")

    assert fmd.node_id == "abcd"
    assert fmd.project_id == "1234"
    assert fmd.file_name == "xx.dat"
    assert fmd.object_name == "1234/abcd/xx.dat"
    assert fmd.file_uuid == file_uuid
    assert fmd.location == SIMCORE_S3_STR
    assert fmd.location_id == SIMCORE_S3_ID
    assert fmd.bucket_name == "test-bucket"
Exemple #3
0
def _create_file_meta_for_s3(postgres_url, s3_client, tmp_file):
    utils.create_tables(url=postgres_url)
    bucket_name = BUCKET_NAME
    s3_client.create_bucket(bucket_name, delete_contents_if_exists=True)


    # create file and upload
    filename = os.path.basename(tmp_file)
    project_id = "22"
    node_id = "1006"
    file_name = filename
    file_uuid = os.path.join(str(project_id), str(node_id), str(file_name))

    d = {   'object_name' : os.path.join(str(project_id), str(node_id), str(file_name)),
            'bucket_name' : bucket_name,
            'file_name' : filename,
            'user_id' : "42",
            'user_name' : "starbucks",
            'location' : SIMCORE_S3_STR,
            'project_id' : project_id,
            'project_name' : "battlestar",
            'node_id' : node_id,
            'node_name' : "this is the name of the node",
            'file_uuid' : file_uuid
        }

    fmd = FileMetaData(**d)

    return fmd
Exemple #4
0
def test_fmd_build():
    file_uuid = str(Path("api") / Path("abcd") / Path("xx.dat"))
    fmd = FileMetaData()
    fmd.simcore_from_uuid(file_uuid, "test-bucket")

    assert not fmd.node_id
    assert not fmd.project_id
    assert fmd.file_name == "xx.dat"
    assert fmd.object_name == "api/abcd/xx.dat"
    assert fmd.file_uuid == file_uuid
    assert fmd.location == SIMCORE_S3_STR
    assert fmd.location_id == SIMCORE_S3_ID
    assert fmd.bucket_name == "test-bucket"

    file_uuid = f"{uuid.uuid4()}/{uuid.uuid4()}/xx.dat"
    fmd.simcore_from_uuid(file_uuid, "test-bucket")

    assert fmd.node_id == file_uuid.split("/")[1]
    assert fmd.project_id == file_uuid.split("/")[0]
    assert fmd.file_name == "xx.dat"
    assert fmd.object_name == file_uuid
    assert fmd.file_uuid == file_uuid
    assert fmd.location == SIMCORE_S3_STR
    assert fmd.location_id == SIMCORE_S3_ID
    assert fmd.bucket_name == "test-bucket"
Exemple #5
0
def _create_file_meta_for_s3(postgres_url, s3_client, tmp_file):

    bucket_name = BUCKET_NAME
    s3_client.create_bucket(bucket_name, delete_contents_if_exists=True)

    # create file and upload
    filename = os.path.basename(tmp_file)
    project_id = "api"  # "357879cc-f65d-48b2-ad6c-074e2b9aa1c7"
    project_name = "battlestar"
    node_name = "galactica"
    node_id = "b423b654-686d-4157-b74b-08fa9d90b36e"
    file_name = filename
    file_uuid = os.path.join(str(project_id), str(node_id), str(file_name))
    display_name = os.path.join(str(project_name), str(node_name),
                                str(file_name))
    created_at = str(datetime.datetime.now())
    file_size = 1234

    d = {
        "object_name": os.path.join(str(project_id), str(node_id),
                                    str(file_name)),
        "bucket_name": bucket_name,
        "file_name": filename,
        "user_id": USER_ID,
        "user_name": "starbucks",
        "location": SIMCORE_S3_STR,
        "location_id": SIMCORE_S3_ID,
        "project_id": project_id,
        "project_name": project_name,
        "node_id": node_id,
        "node_name": node_name,
        "file_uuid": file_uuid,
        "file_id": file_uuid,
        "raw_file_path": file_uuid,
        "display_file_path": display_name,
        "created_at": created_at,
        "last_modified": created_at,
        "file_size": file_size,
    }

    fmd = FileMetaData(**d)

    return fmd
def _create_file_meta_for_s3(postgres_url, s3_client, tmp_file):
    utils.create_tables(url=postgres_url)
    bucket_name = BUCKET_NAME
    s3_client.create_bucket(bucket_name, delete_contents_if_exists=True)

    # create file and upload
    filename = os.path.basename(tmp_file)
    project_id = "22"
    project_name = "battlestar"
    node_name = "galactica"
    node_id = "1006"
    file_name = filename
    file_uuid = os.path.join(str(project_id), str(node_id), str(file_name))
    display_name = os.path.join(str(project_name), str(node_name),
                                str(file_name))
    created_at = str(datetime.datetime.now())
    file_size = 1234

    d = {
        "object_name": os.path.join(str(project_id), str(node_id),
                                    str(file_name)),
        "bucket_name": bucket_name,
        "file_name": filename,
        "user_id": USER_ID,
        "user_name": "starbucks",
        "location": SIMCORE_S3_STR,
        "location_id": SIMCORE_S3_ID,
        "project_id": project_id,
        "project_name": project_name,
        "node_id": node_id,
        "node_name": node_name,
        "file_uuid": file_uuid,
        "file_id": file_uuid,
        "raw_file_path": file_uuid,
        "display_file_path": display_name,
        "created_at": created_at,
        "last_modified": created_at,
        "file_size": file_size,
    }

    fmd = FileMetaData(**d)

    return fmd
Exemple #7
0
    def list_dataset_files_recursively(self, files: List[FileMetaData],
                                       base: BaseCollection,
                                       current_root: Path):
        for item in base:
            if isinstance(item, Collection):
                _current_root = current_root / Path(item.name)
                self.list_dataset_files_recursively(files, item, _current_root)
            else:
                parts = current_root.parts
                bucket_name = parts[0]
                file_name = item.name
                file_size = 0
                # lets assume we have only one file
                if item.files:
                    file_name = Path(
                        item.files[0].as_dict()["content"]["s3key"]).name
                    file_size = item.files[0].as_dict()["content"]["size"]
                # if this is in the root directory, the object_name is the filename only
                if len(parts) > 1:
                    object_name = str(Path(*list(parts)[1:]) / Path(file_name))
                else:
                    object_name = str(Path(file_name))

                file_uuid = str(Path(bucket_name) / Path(object_name))
                file_id = item.id
                created_at = item.created_at
                last_modified = item.updated_at
                fmd = FileMetaData(
                    bucket_name=bucket_name,
                    file_name=file_name,
                    object_name=object_name,
                    location=DATCORE_STR,
                    location_id=DATCORE_ID,
                    file_uuid=file_uuid,
                    file_id=file_id,
                    raw_file_path=file_uuid,
                    display_file_path=file_uuid,
                    created_at=created_at,
                    last_modified=last_modified,
                    file_size=file_size,
                )
                files.append(fmd)
Exemple #8
0
def dsm_mockup_db(
    postgres_service_url, s3_client, mock_files_factory
) -> Dict[str, FileMetaData]:

    # s3 client
    bucket_name = BUCKET_NAME
    s3_client.create_bucket(bucket_name, delete_contents_if_exists=True)

    # TODO: use pip install Faker
    users = ["alice", "bob", "chuck", "dennis"]

    projects = [
        "astronomy",
        "biology",
        "chemistry",
        "dermatology",
        "economics",
        "futurology",
        "geology",
    ]
    location = SIMCORE_S3_STR

    nodes = ["alpha", "beta", "gamma", "delta"]

    N = 100
    files = mock_files_factory(count=N)
    counter = 0
    data = {}
    for _file in files:
        idx = randrange(len(users))
        user_name = users[idx]
        user_id = idx + 10
        idx = randrange(len(projects))
        project_name = projects[idx]
        project_id = idx + 100
        idx = randrange(len(nodes))
        node = nodes[idx]
        node_id = idx + 10000
        file_name = str(counter)
        object_name = Path(str(project_id), str(node_id), str(counter)).as_posix()
        file_uuid = Path(object_name).as_posix()
        raw_file_path = file_uuid
        display_file_path = str(Path(project_name) / Path(node) / Path(file_name))
        created_at = str(datetime.datetime.now())
        file_size = 1234
        assert s3_client.upload_file(bucket_name, object_name, _file)

        d = {
            "file_uuid": file_uuid,
            "location_id": "0",
            "location": location,
            "bucket_name": bucket_name,
            "object_name": object_name,
            "project_id": str(project_id),
            "project_name": project_name,
            "node_id": str(node_id),
            "node_name": node,
            "file_name": file_name,
            "user_id": str(user_id),
            "user_name": user_name,
            "file_id": str(uuid.uuid4()),
            "raw_file_path": file_uuid,
            "display_file_path": display_file_path,
            "created_at": created_at,
            "last_modified": created_at,
            "file_size": file_size,
        }

        counter = counter + 1

        data[object_name] = FileMetaData(**d)

        # pylint: disable=no-member
        tests.utils.insert_metadata(postgres_service_url, data[object_name])

    total_count = 0
    for _obj in s3_client.list_objects(bucket_name, recursive=True):
        total_count = total_count + 1

    assert total_count == N

    yield data

    # s3 client
    s3_client.remove_bucket(bucket_name, delete_contents=True)
Exemple #9
0
    def list_files_raw_dataset(self, dataset_id: str) -> List[FileMetaDataEx]:
        files = []  # raw packages
        _files = []  # fmds
        data = {}  # map to keep track of parents-child

        cursor = ""
        page_size = 1000
        api = self._bf._api.datasets

        dataset = self._bf.get_dataset(dataset_id)
        if dataset is not None:
            while True:
                resp = api._get(
                    api._uri(
                        "/{id}/packages?cursor={cursor}&pageSize={pageSize}&includeSourceFiles={includeSourceFiles}",
                        id=dataset_id,
                        cursor=cursor,
                        pageSize=page_size,
                        includeSourceFiles=False,
                    ))
                for package in resp.get("packages", list()):
                    id = package["content"]["id"]
                    data[id] = package
                    files.append(package)
                cursor = resp.get("cursor")
                if cursor is None:
                    break

            for f in files:
                if f["content"]["packageType"] != "Collection":
                    filename = f["content"]["name"]
                    file_path = ""
                    file_id = f["content"]["nodeId"]
                    _f = f
                    while "parentId" in _f["content"].keys():
                        parentid = _f["content"]["parentId"]
                        _f = data[parentid]
                        file_path = _f["content"]["name"] + "/" + file_path

                    bucket_name = dataset.name
                    file_name = filename
                    file_size = 0
                    object_name = str(Path(file_path) / file_name)

                    file_uuid = str(Path(bucket_name) / object_name)
                    created_at = f["content"]["createdAt"]
                    last_modified = f["content"]["updatedAt"]
                    parent_id = dataset_id
                    if "parentId" in f["content"]:
                        parentId = f["content"]["parentId"]
                        parent_id = data[parentId]["content"]["nodeId"]

                    fmd = FileMetaData(
                        bucket_name=bucket_name,
                        file_name=file_name,
                        object_name=object_name,
                        location=DATCORE_STR,
                        location_id=DATCORE_ID,
                        file_uuid=file_uuid,
                        file_id=file_id,
                        raw_file_path=file_uuid,
                        display_file_path=file_uuid,
                        created_at=created_at,
                        last_modified=last_modified,
                        file_size=file_size,
                    )
                    fmdx = FileMetaDataEx(fmd=fmd, parent_id=parent_id)
                    _files.append(fmdx)

        return _files
Exemple #10
0
def test_file_entry_valid(file_size: Optional[int], entity_tag: Optional[str],
                          expected_validity: bool):
    file_meta_data = FileMetaData(file_size=file_size, entity_tag=entity_tag)
    assert is_file_entry_valid(file_meta_data) == expected_validity
Exemple #11
0
def dsm_mockup_db(postgres_service_url, s3_client, mock_files_factory):
    # db
    utils.create_tables(url=postgres_service_url)

    # s3 client
    bucket_name = BUCKET_NAME
    s3_client.create_bucket(bucket_name, delete_contents_if_exists=True)

    # TODO: use pip install Faker
    users = ['alice', 'bob', 'chuck', 'dennis']

    projects = [
        'astronomy', 'biology', 'chemistry', 'dermatology', 'economics',
        'futurology', 'geology'
    ]
    location = SIMCORE_S3_STR

    nodes = ['alpha', 'beta', 'gamma', 'delta']

    N = 100
    files = mock_files_factory(count=N)
    counter = 0
    data = {}
    for _file in files:
        idx = randrange(len(users))
        user_name = users[idx]
        user_id = idx + 10
        idx = randrange(len(projects))
        project_name = projects[idx]
        project_id = idx + 100
        idx = randrange(len(nodes))
        node = nodes[idx]
        node_id = idx + 10000
        file_name = str(counter)
        object_name = Path(str(project_id), str(node_id),
                           str(counter)).as_posix()
        file_uuid = Path(object_name).as_posix()

        assert s3_client.upload_file(bucket_name, object_name, _file)

        d = {
            'file_uuid': file_uuid,
            'location_id': "0",
            'location': location,
            'bucket_name': bucket_name,
            'object_name': object_name,
            'project_id': str(project_id),
            'project_name': project_name,
            'node_id': str(node_id),
            'node_name': node,
            'file_name': file_name,
            'user_id': str(user_id),
            'user_name': user_name
        }

        counter = counter + 1

        data[object_name] = FileMetaData(**d)

        # pylint: disable=no-member
        utils.insert_metadata(postgres_service_url, data[object_name])

    total_count = 0
    for _obj in s3_client.list_objects_v2(bucket_name, recursive=True):
        total_count = total_count + 1

    assert total_count == N
    yield data

    # s3 client
    s3_client.remove_bucket(bucket_name, delete_contents=True)

    # db
    utils.drop_tables(url=postgres_service_url)