コード例 #1
0
def _get_data_product_url(
    data_product_name: str,
    namespace: str,
    version: str,
    component_name: str,
    data_registry_url: str,
    token: str,
) -> str:
    """
    Gets the url reference of a data product
    
    :param data_product_name: Name of the data product
    :param namespace: namespace that the data product is a member of
    :param version: version of the data product 
    :param component_name: name of the data product component used as input
    :param data_registry_url: base url of the data registry
    :param token: personal access token
    :return: url reference to the data product version component
    """
    namespace_ref = get_reference(
        {DataRegistryField.name: namespace},
        DataRegistryTarget.namespace,
        data_registry_url,
        token,
    )
    if namespace_ref is None:
        raise ValueError(f"No namespace found for {namespace}")

    query_data = {
        DataRegistryField.namespace: namespace_ref,
        DataRegistryField.name: data_product_name,
        DataRegistryField.version: version,
    }

    data_product = get_data(
        query_data, DataRegistryTarget.data_product, data_registry_url, token
    )
    obj = data_product["object"]

    query_data = {
        DataRegistryField.object: obj,
        DataRegistryField.name: component_name,
    }
    object_component = get_data(
        query_data, DataRegistryTarget.object_component, data_registry_url, token
    )
    url = object_component["url"]
    logger.info(
        f"Retrieved {url} for {namespace}/{data_product_name}/{version}/{component_name}"
    )
    return url
コード例 #2
0
 def _resolve_objects(self,
                      input_blocks: List[DownloaderDict],
                      external: bool = False) -> List[DownloaderDict]:
     resolved = []
     for block in input_blocks:
         obj = None
         component = block.get(
             (DataRegistryTarget.object_component, DataRegistryField.name))
         target = DataRegistryTarget.external_object if external else DataRegistryTarget.data_product
         object_ref = block[target, DataRegistryField.object]
         if component:
             # if a component is specified, only resolve objects that have that component
             components = get_data(
                 {
                     DataRegistryField.name: component,
                     DataRegistryField.object: object_ref
                 },
                 DataRegistryTarget.object_component,
                 self._data_registry_url,
                 self._token,
                 exact=False,
             )
             if components:
                 obj = get_on_end_point(object_ref, self._token)
         else:
             obj = get_on_end_point(object_ref, self._token)
             components = obj[DataRegistryField.components]
         if components:
             cblock = block.copy()
             for k, v in obj.items():
                 cblock[DataRegistryTarget.object, k] = v
             resolved.append(cblock)
     return resolved
コード例 #3
0
def _get_external_object_url(
    doi_or_unique_name: str,
    version: str,
    component_name: str,
    data_registry_url: str,
    token: str,
) -> str:
    """
    Gets the url reference of an external object

    :param doi_or_unique_name: Identifier of the external object
    :param version: version of the external object
    :param component_name: name of the external object component used as input
    :param data_registry_url: base url of the data registry
    :param token: personal access token
    :return: url reference to the external object version component
    """
    query_data = {
        DataRegistryField.doi_or_unique_name: doi_or_unique_name,
        DataRegistryField.version: version,
    }

    data_product = get_data(
        query_data, DataRegistryTarget.external_object, data_registry_url, token
    )
    obj = data_product["object"]

    query_data = {
        DataRegistryField.object: obj,
        DataRegistryField.name: component_name,
    }
    object_component = get_data(
        query_data, DataRegistryTarget.object_component, data_registry_url, token
    )
    url = object_component["url"]
    logger.info(f"Retrieved {url} for {doi_or_unique_name}/{version}/{component_name}")
    return url
コード例 #4
0
 def _resolve_external_objects(
         self, input_blocks: List[DownloaderDict]) -> List[DownloaderDict]:
     resolved = []
     for block in input_blocks:
         query_data = {
             DataRegistryField.doi_or_unique_name:
             block[DataRegistryTarget.external_object,
                   DataRegistryField.doi_or_unique_name]
         }
         version = block.get((DataRegistryTarget.external_object,
                              DataRegistryField.version))
         if version is not None:
             query_data[DataRegistryField.version] = version
         title = block.get(
             (DataRegistryTarget.external_object, DataRegistryField.title))
         if title is not None:
             query_data[DataRegistryField.title] = title
         external_objects = get_data(query_data,
                                     DataRegistryTarget.external_object,
                                     self._data_registry_url,
                                     self._token,
                                     exact=False)
         if external_objects:
             external_objects = sort_by_semver(external_objects)
             if block.get((DataRegistryTarget.object_component,
                           DataRegistryField.name)) is None:
                 grouped_external_objects = {}
                 for external_object in external_objects:
                     external_object_name = external_object[
                         DataRegistryField.doi_or_unique_name]
                     external_object_title = external_object[
                         DataRegistryField.title]
                     if (external_object_name, external_object_title
                         ) not in grouped_external_objects:
                         grouped_external_objects[
                             external_object_name,
                             external_object_title] = external_object
                 external_objects = list(grouped_external_objects.values())
             for external_object in external_objects:
                 cblock = block.copy()
                 for k, v in external_object.items():
                     cblock[DataRegistryTarget.external_object, k] = v
                 resolved.append(cblock)
     return resolved
コード例 #5
0
 def _resolve_namespaces(
         self, input_blocks: List[DownloaderDict]) -> List[DownloaderDict]:
     resolved = []
     for block in input_blocks:
         namespace_name = block[DataRegistryTarget.namespace,
                                DataRegistryField.name]
         namespaces = get_data(
             {DataRegistryField.name: namespace_name},
             DataRegistryTarget.namespace,
             self._data_registry_url,
             self._token,
             exact=False,
         )
         if namespaces:
             for namespace in namespaces:
                 cblock = block.copy()
                 for k, v in namespace.items():
                     cblock[DataRegistryTarget.namespace, k] = v
                 resolved.append(cblock)
     return resolved
コード例 #6
0
def _add_storage_root(
    posts: List[YamlDict],
    remote_uri: str,
    accessibility: int,
    data_registry_url: str,
    token: str,
) -> Union[YamlDict, str]:
    """
    Gets the storage root, adds it to the list of objects to post to the data registry, and returns them
    
    :param posts: List of posts to the data registry, will be modified
    :param remote_uri: URI to the root of the storage
    :param accessibility: accessibility level of the storage root
    :param data_registry_url: base url of the data registry
    :param token: personal access token 
    :return: the storage root dict or reference url
    """
    storage_root = get_data(
        {DataRegistryField.root: remote_uri},
        DataRegistryTarget.storage_root,
        data_registry_url,
        token,
    )
    if storage_root is None:
        logger.info(
            f"No storage_root found for {remote_uri}, creating new storage_root"
        )
        storage_root = _create_target_data_dict(
            DataRegistryTarget.storage_root,
            {
                DataRegistryField.name: remote_uri,
                DataRegistryField.root: remote_uri,
                DataRegistryField.accessibility: accessibility,
            },
        )
        posts.append(storage_root)
    else:
        storage_root = storage_root["url"]
    return storage_root
コード例 #7
0
 def _resolve_data_products(
         self, input_blocks: List[DownloaderDict]) -> List[DownloaderDict]:
     resolved = []
     for block in input_blocks:
         query_data = {
             DataRegistryField.name:
             block[DataRegistryTarget.data_product, DataRegistryField.name],
             DataRegistryField.namespace:
             block[DataRegistryTarget.namespace, DataRegistryField.url],
         }
         version = block.get(
             (DataRegistryTarget.data_product, DataRegistryField.version))
         if version is not None:
             query_data[DataRegistryField.version] = version
         data_products = get_data(query_data,
                                  DataRegistryTarget.data_product,
                                  self._data_registry_url,
                                  self._token,
                                  exact=False)
         if data_products:
             data_products = sort_by_semver(data_products)
             if block.get((DataRegistryTarget.object_component,
                           DataRegistryField.name)) is None:
                 # if globbing has been used we might have multiple data products so take the first
                 # as we've sorted by semver, by name
                 grouped_data_products = {}
                 for data_product in data_products:
                     data_product_name = data_product[
                         DataRegistryField.name]
                     if data_product_name not in grouped_data_products:
                         grouped_data_products[
                             data_product_name] = data_product
                 data_products = list(grouped_data_products.values())
             for data_product in data_products:
                 cblock = block.copy()
                 for k, v in data_product.items():
                     cblock[DataRegistryTarget.data_product, k] = v
                 resolved.append(cblock)
     return resolved
コード例 #8
0
def upload_data_product_cli(
    data_product_path,
    namespace,
    storage_root_name,
    storage_location_path,
    accessibility,
    data_product_name,
    data_product_description,
    data_product_version,
    component,
    data_registry,
    token,
    remote_uri,
    remote_option,
    remote_uri_override,
):
    configure_cli_logging()

    template_file = Path(__file__).parent / Path("templates/data_product.yaml")
    with open(template_file, "r") as f:
        template = f.read()

    data_registry = data_registry or DEFAULT_DATA_REGISTRY_URL
    remote_uri_override = remote_uri_override or remote_uri
    remote_uri = remote_uri.strip()
    remote_uri_override = remote_uri_override.strip()
    storage_root_name = storage_root_name or urllib.parse.urlparse(
        remote_uri_override).netloc
    storage_root = remote_uri_override
    remote_options = get_remote_options()
    arg_remote_options = dict(remote_option) if remote_option else {}
    remote_options.update(arg_remote_options)
    data_product_path = Path(data_product_path)

    storage_location_hash = FileAPI.calculate_hash(data_product_path)

    path = upload_to_storage(remote_uri,
                             remote_option,
                             data_product_path.parent,
                             data_product_path,
                             upload_path=storage_location_path,
                             path_prefix=namespace)
    namespace_ref = get_reference({DataRegistryField.name: namespace},
                                  DataRegistryTarget.namespace, data_registry,
                                  token)
    if namespace_ref:
        query = {
            DataRegistryField.name: data_product_name,
            DataRegistryField.namespace: namespace_ref
        }
        if data_product_version:
            query["version"] = data_product_version
        data_products = get_data(query, DataRegistryTarget.data_product,
                                 data_registry, token, False)
        if data_products:
            latest = next(iter(sort_by_semver(data_products)))
            data_product_version = str(
                semver.VersionInfo.parse(
                    latest[DataRegistryField.version]).bump_minor())
        elif not data_product_version:
            data_product_version = "0.1.0"

    populated_yaml = template.format(
        namespace=namespace,
        storage_root_name=storage_root_name,
        storage_root=storage_root,
        accessibility=accessibility,
        storage_location_path=path,
        storage_location_hash=storage_location_hash,
        data_product_name=data_product_name,
        data_product_description=data_product_description,
        data_product_version=data_product_version,
        component_name="COMPONENT_NAME",
        component_description="COMPONENT_DESCRIPTION",
    )
    config = yaml.safe_load(populated_yaml)
    component_template = config["post"].pop(-1)
    if component:
        for component_name, component_description in component:
            c = component_template["data"].copy()
            c["name"] = component_name
            c["description"] = component_description
            config["post"].append({
                "data": c,
                "target": DataRegistryTarget.object_component
            })
    else:
        c = component_template["data"].copy()
        c["name"] = data_product_name
        c["description"] = data_product_description
        config["post"].append({
            "data": c,
            "target": DataRegistryTarget.object_component
        })
    upload_from_config(config, data_registry, token)