Ejemplo n.º 1
0
async def start_web_socket(ws: WebSocket):
    while True:
        try:
            _ = await ws.receive_text()
        except WebSocketDisconnect:
            log_info(
                f'{ws.scope["client"]} - "WebSocket {ws.scope["path"]}" [disconnected]'
            )
            break
Ejemplo n.º 2
0
async def catch_all_no_namespace(request: Request, rest_of_path: str):
    headers = generate_headers_downstream(request.headers)
    segments = rest_of_path.strip('/').split('/')
    namespace, name, version, resource = get_info(segments) \
        if segments[0].startswith('@') \
        else get_info([""] + segments)
    log_info("forward request to CDN:",
             namespace=namespace,
             name=name,
             version=version,
             resource=resource)
    return await get_raw_resource(namespace, name, version, resource, headers)
Ejemplo n.º 3
0
async def init_resources(config: Configuration):
    log_info("Ensure database resources")
    headers = await config.admin_headers if config.admin_headers else {}

    log_info("Successfully retrieved authorization for resources creation")
    await config.storage.ensure_bucket(headers=headers)
    log_info("resources initialization done")
Ejemplo n.º 4
0
async def init_resources(config: Configuration):

    log_info("Ensure database resources")
    headers = await config.admin_headers if config.admin_headers else {}
    log_info("Successfully retrieved authorization for resources creation")
    doc_db = config.data_client.docdb
    storage = config.data_client.storage
    table_ok, bucket_ok = await asyncio.gather(
        doc_db.ensure_table(headers=headers),
        storage.ensure_bucket(headers=headers))
    if not bucket_ok or not table_ok:
        raise Exception(
            f"Problem during resources initialisation: table ok? {table_ok}; bucket ok? {bucket_ok}"
        )

    log_info("resources initialization done")
Ejemplo n.º 5
0
async def post_metadata(
        request: Request,
        project_id: str,
        metadata_body: EditMetadata,
        configuration: Configuration = Depends(get_configuration)):

    headers = generate_headers_downstream(request.headers)
    doc_db, storage, assets_gtw = configuration.doc_db, configuration.storage, configuration.assets_gtw_client
    owner = configuration.default_owner

    req, workflow, description = await asyncio.gather(
        storage.get_json(path="projects/{}/requirements.json".format(project_id), owner=owner, headers=headers),
        storage.get_json(path="projects/{}/workflow.json".format(project_id), owner=owner, headers=headers),
        storage.get_json(path="projects/{}/description.json".format(project_id), owner=owner, headers=headers)
        )
    log_info("Flux-Backend@Post metadata: got requirements and workflow")
    libraries = {**req['libraries'], **metadata_body.libraries}

    def get_package_id(factory_id: Union[str, Mapping[str, str]]):
        return "@youwol/" + factory_id.split("@")[1] if isinstance(factory_id, str) else factory_id['pack']

    used_packages = {get_package_id(m["factoryId"]) for m in workflow["modules"] + workflow["plugins"]}
    log_info("Flux-Backend@Post metadata: used_packages", used_packages=used_packages)

    body = {
        "libraries": {name: version for name, version in libraries.items() if name in used_packages},
        "using": {name: version for name, version in libraries.items()}
        }
    loading_graph = await assets_gtw.cdn_loading_graph(body=body, headers=headers)
    flux_packs = [p['name'] for p in loading_graph['lock'] if p['type'] == 'flux-pack']
    log_info("Flux-Backend@Post metadata: got loading graph", loading_graph=loading_graph)

    used_libraries = {lib["name"]: lib["version"] for lib in loading_graph["lock"]}
    requirements = Requirements(fluxComponents=[], fluxPacks=flux_packs,
                                libraries=used_libraries, loadingGraph=loading_graph)

    schema_version = description['schemaVersion'] if 'schemaVersion' else '0'
    coroutines = update_metadata(project_id=project_id, schema_version=schema_version,
                                 name=metadata_body.name, description=metadata_body.description,
                                 requirements=requirements, owner=owner, storage=storage, docdb=doc_db, headers=headers)
    await asyncio.gather(*coroutines)
    return {}
Ejemplo n.º 6
0
async def format_download_form(file_path: Path, base_path: Path, dir_path: Path, compress: bool, rename: str = None) \
        -> FormData:

    if compress and get_content_encoding(file_path) == "br":
        path_log = "/".join(file_path.parts[2:])
        start = time.time()
        if which('brotli'):
            log_info(f'brotlify (system) {path_log} ...')
            p = await asyncio.create_subprocess_shell(
                cmd=f'brotli {str(file_path)}',
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.PIPE,
                shell=True)

            async for f in p.stderr:
                log_error(f.decode('utf-8'))
            await p.communicate()
            os.system(f'rm {str(file_path)}')
            os.system(f'mv {str(file_path)}.br {str(file_path)}')
        else:
            log_info(f'brotlify (python) {path_log}')
            start = time.time()
            compressed = brotli.compress(file_path.read_bytes())
            with file_path.open("wb") as f:
                f.write(compressed)
        log_info(f'...{path_log} => {time.time() - start} s')

    data = open(str(file_path), 'rb').read()
    path_bucket = base_path / file_path.relative_to(
        dir_path) if not rename else base_path / rename

    return FormData(objectName=path_bucket,
                    objectData=data,
                    owner=Configuration.owner,
                    objectSize=len(data),
                    content_type=get_content_type(file_path.name),
                    content_encoding=get_content_encoding(file_path.name))
Ejemplo n.º 7
0
async def init_resources(
        config: Configuration
        ):

    log_info("Ensure database resources")
    headers = await config.admin_headers if config.admin_headers else {}
    log_info("Successfully retrieved authorization for resources creation")
    doc_dbs = config.doc_dbs
    log_info("Ensure items_db table")
    items_ok = await doc_dbs.items_db.ensure_table(headers=headers)
    log_info("Ensure folders_db table")
    folders_ok = await doc_dbs.folders_db.ensure_table(headers=headers)
    log_info("Ensure drives_db table")
    drives_ok = await doc_dbs.drives_db.ensure_table(headers=headers)
    log_info("Ensure deleted_db table")
    deleted_ok = await doc_dbs.deleted_db.ensure_table(headers=headers)

    if not (items_ok and folders_ok and drives_ok and deleted_ok):
        raise Exception(f"Problem during doc-db's table initialisation {[items_ok, folders_ok, drives_ok, deleted_ok]}")

    log_info("resources initialization done")
Ejemplo n.º 8
0
async def resolve_loading_tree(
    request: Request,
    body: LoadingGraphBody,
    configuration: Configuration = Depends(get_configuration)):

    doc_db = configuration.doc_db
    headers = generate_headers_downstream(request.headers)
    libraries = {name: version for name, version in body.libraries.items()}

    log_info(f"Start resolving loading graph: {libraries}")

    latest_queries = [
        name for name, version in libraries.items() if version == "latest"
    ]
    versions_resp = await asyncio.gather(*[
        list_versions(request=request, name=name, configuration=configuration)
        for name in latest_queries
    ],
                                         return_exceptions=True)

    if any(isinstance(v, Exception) for v in versions_resp):
        packages_error = [
            f"{name}#latest" for e, name in zip(versions_resp, latest_queries)
            if isinstance(e, Exception)
        ]
        raise PackagesNotFound(
            detail="Failed to retrieved latest version of package(s)",
            packages=packages_error)

    latest_versions = {
        name: resp.versions[0]
        for name, resp in zip(latest_queries, versions_resp)
    }
    explicit_versions = {**libraries, **latest_versions}

    log_info(f"Latest versions resolved",
             latest_versions=latest_versions,
             explicit_versions=explicit_versions)

    queries = [
        doc_db.get_document(partition_keys={"library_name": name},
                            clustering_keys={
                                "version_number":
                                get_version_number_str(version)
                            },
                            owner=configuration.owner,
                            headers=headers)
        for name, version in explicit_versions.items()
    ]

    dependencies = await asyncio.gather(*queries, return_exceptions=True)

    if any(isinstance(v, Exception) for v in dependencies):
        packages_error = [
            f"{name}#{version}"
            for e, (name,
                    version) in zip(dependencies, explicit_versions.items())
            if isinstance(e, Exception)
        ]
        raise PackagesNotFound(
            detail="Failed to retrieved explicit version of package(s)",
            packages=packages_error)

    dependencies_dict = {d["library_name"]: d for d in dependencies}

    async def add_missing_dependencies(missing_previous_loop=None):
        """ It maybe the case where some dependencies are missing in the provided body,
        here we fetch using 'body.using' or the latest version of them"""
        flatten_dependencies = set(
            flatten([[p.split("#")[0] for p in package['dependencies']]
                     for package in dependencies_dict.values()]))

        missing = [
            d for d in flatten_dependencies if d not in dependencies_dict
        ]
        if not missing:
            return dependencies_dict

        if missing_previous_loop and missing == missing_previous_loop:
            raise PackagesNotFound(
                detail="Indirect dependencies not found in the CDN",
                packages=missing)

        def get_dependency(dependency):
            if dependency in body.using:
                return get_query_version(configuration.doc_db, dependency,
                                         body.using[dependency], headers)
            return get_query_latest(configuration.doc_db, dependency, headers)

        versions = await asyncio.gather(
            *[get_dependency(dependency) for dependency in missing],
            return_exceptions=True)
        if any(len(v["documents"]) == 0 for v in versions):
            raise PackagesNotFound(
                detail="Failed to retrieve a version of indirect dependencies",
                packages=[
                    f"{name}#{body.using.get(name,'latest')}"
                    for v, name in zip(versions, missing)
                    if len(v["documents"]) == 0
                ])

        versions = list(flatten([d['documents'] for d in versions]))
        for version in versions:
            lib_name = version["library_name"]
            dependencies_dict[lib_name] = version

        return await add_missing_dependencies(missing_previous_loop=missing)

    await add_missing_dependencies()
    items_dict = {
        d["library_name"]: [to_package_id(d["library_name"]),
                            get_url(d)]
        for d in dependencies_dict.values()
    }
    r = loading_graph([], dependencies_dict.values(), items_dict)

    lock = [
        Library(name=d["library_name"],
                version=d["version"],
                namespace=d["namespace"],
                id=to_package_id(d["library_name"]),
                type=d["type"],
                fingerprint=d["fingerprint"])
        for d in dependencies_dict.values()
    ]

    return LoadingGraphResponseV1(graphType="sequential-v1",
                                  lock=lock,
                                  definition=r)
Ejemplo n.º 9
0
async def init_resources(config: Configuration):
    log_info("Ensure database resources")
    headers = await config.admin_headers if config.admin_headers else {}

    log_info("Successfully retrieved authorization for resources creation")
    log_info("Ensure assets table")
    table1_ok = await config.doc_db_asset.ensure_table(headers=headers)
    if not table1_ok:
        raise Exception("Problem during docdb_asset resources initialisation")
    log_info("Ensure assets bucket")
    bucket_ok = await config.storage.ensure_bucket(headers=headers)
    if not bucket_ok:
        raise Exception("Problem during bucket initialisation")
    log_info("Ensure access policy table")
    table2_ok = await asyncio.gather(
        config.doc_db_access_policy.ensure_table(headers=headers))
    if not table2_ok:
        raise Exception(
            "Problem during docdb_access_policy resources initialisation")
    log_info("Ensure access history table")
    table3_ok = await asyncio.gather(
        config.doc_db_access_history.ensure_table(headers=headers))
    if not table3_ok:
        raise Exception(
            "Problem during docdb_access_history resources initialisation")

    log_info("resources initialization done")
Ejemplo n.º 10
0
async def publish_package(file: IO, filename: str, content_encoding,
                          configuration, headers):

    if content_encoding not in ['identity', 'brotli']:
        raise HTTPException(
            status_code=422,
            detail="Only identity and brotli encoding are accepted ")
    need_compression = content_encoding == 'identity'
    dir_path = Path("./tmp_zips") / str(uuid4())
    zip_path = (dir_path / filename).with_suffix('.zip')

    os.makedirs(dir_path)
    try:
        log_info("extract .zip file...")
        compressed_size = extract_zip_file(file,
                                           zip_path,
                                           dir_path,
                                           delete_original=False)
        log_info("...zip extracted", compressed_size=compressed_size)

        package_path = next(
            flatten([[Path(root) / f for f in files if f == "package.json"]
                     for root, _, files in os.walk(dir_path)]))

        package_json = json.loads(open(package_path).read())
        library_id = package_json["name"].replace("@", '')
        version = package_json["version"]
        base_path = Path('libraries') / library_id / version
        storage = configuration.storage

        paths = flatten([[Path(root) / f for f in files]
                         for root, _, files in os.walk(dir_path)])
        paths = [p for p in paths if p != zip_path]
        form_original = await format_download_form(zip_path, base_path,
                                                   package_path.parent,
                                                   need_compression,
                                                   '__original.zip')
        forms = await asyncio.gather(*[
            format_download_form(path, base_path, package_path.parent,
                                 need_compression) for path in paths
        ])
        forms = list(forms) + [form_original]
        # the fingerprint in the md5 checksum of the included files after having eventually being compressed
        os.remove(zip_path)
        md5_stamp = md5_from_folder(dir_path)

        post_requests = [
            storage.post_object(path=form.objectName,
                                content=form.objectData,
                                content_type=form.content_type,
                                owner=Configuration.owner,
                                headers=headers) for form in forms
        ]

        log_info(f"Clean directory {str(base_path)}")
        await storage.delete_group(prefix=base_path,
                                   owner=Configuration.owner,
                                   headers=headers)

        log_info(f"Send {len(post_requests)} files to storage")
        await asyncio.gather(*post_requests)
        record = format_doc_db_record(package_path=package_path,
                                      fingerprint=md5_stamp)
        log_info("Create docdb document", record=record)
        await configuration.doc_db.create_document(record,
                                                   owner=Configuration.owner,
                                                   headers=headers)

        log_info("Done", md5_stamp=md5_stamp)
        return PublishResponse(
            name=package_json["name"],
            version=version,
            compressedSize=compressed_size,
            id=to_package_id(package_json["name"]),
            fingerprint=md5_stamp,
            url=
            f"{to_package_id(package_json['name'])}/{record['version']}/{record['bundle']}"
        )
    finally:
        shutil.rmtree(dir_path)