def _get_data_product_url( data_product_name: str, namespace: str, version: str, component_name: str, data_registry_url: str, token: str, ) -> str: """ Gets the url reference of a data product :param data_product_name: Name of the data product :param namespace: namespace that the data product is a member of :param version: version of the data product :param component_name: name of the data product component used as input :param data_registry_url: base url of the data registry :param token: personal access token :return: url reference to the data product version component """ namespace_ref = get_reference( {DataRegistryField.name: namespace}, DataRegistryTarget.namespace, data_registry_url, token, ) if namespace_ref is None: raise ValueError(f"No namespace found for {namespace}") query_data = { DataRegistryField.namespace: namespace_ref, DataRegistryField.name: data_product_name, DataRegistryField.version: version, } data_product = get_data( query_data, DataRegistryTarget.data_product, data_registry_url, token ) obj = data_product["object"] query_data = { DataRegistryField.object: obj, DataRegistryField.name: component_name, } object_component = get_data( query_data, DataRegistryTarget.object_component, data_registry_url, token ) url = object_component["url"] logger.info( f"Retrieved {url} for {namespace}/{data_product_name}/{version}/{component_name}" ) return url
def _resolve_objects(self, input_blocks: List[DownloaderDict], external: bool = False) -> List[DownloaderDict]: resolved = [] for block in input_blocks: obj = None component = block.get( (DataRegistryTarget.object_component, DataRegistryField.name)) target = DataRegistryTarget.external_object if external else DataRegistryTarget.data_product object_ref = block[target, DataRegistryField.object] if component: # if a component is specified, only resolve objects that have that component components = get_data( { DataRegistryField.name: component, DataRegistryField.object: object_ref }, DataRegistryTarget.object_component, self._data_registry_url, self._token, exact=False, ) if components: obj = get_on_end_point(object_ref, self._token) else: obj = get_on_end_point(object_ref, self._token) components = obj[DataRegistryField.components] if components: cblock = block.copy() for k, v in obj.items(): cblock[DataRegistryTarget.object, k] = v resolved.append(cblock) return resolved
def _get_external_object_url( doi_or_unique_name: str, version: str, component_name: str, data_registry_url: str, token: str, ) -> str: """ Gets the url reference of an external object :param doi_or_unique_name: Identifier of the external object :param version: version of the external object :param component_name: name of the external object component used as input :param data_registry_url: base url of the data registry :param token: personal access token :return: url reference to the external object version component """ query_data = { DataRegistryField.doi_or_unique_name: doi_or_unique_name, DataRegistryField.version: version, } data_product = get_data( query_data, DataRegistryTarget.external_object, data_registry_url, token ) obj = data_product["object"] query_data = { DataRegistryField.object: obj, DataRegistryField.name: component_name, } object_component = get_data( query_data, DataRegistryTarget.object_component, data_registry_url, token ) url = object_component["url"] logger.info(f"Retrieved {url} for {doi_or_unique_name}/{version}/{component_name}") return url
def _resolve_external_objects( self, input_blocks: List[DownloaderDict]) -> List[DownloaderDict]: resolved = [] for block in input_blocks: query_data = { DataRegistryField.doi_or_unique_name: block[DataRegistryTarget.external_object, DataRegistryField.doi_or_unique_name] } version = block.get((DataRegistryTarget.external_object, DataRegistryField.version)) if version is not None: query_data[DataRegistryField.version] = version title = block.get( (DataRegistryTarget.external_object, DataRegistryField.title)) if title is not None: query_data[DataRegistryField.title] = title external_objects = get_data(query_data, DataRegistryTarget.external_object, self._data_registry_url, self._token, exact=False) if external_objects: external_objects = sort_by_semver(external_objects) if block.get((DataRegistryTarget.object_component, DataRegistryField.name)) is None: grouped_external_objects = {} for external_object in external_objects: external_object_name = external_object[ DataRegistryField.doi_or_unique_name] external_object_title = external_object[ DataRegistryField.title] if (external_object_name, external_object_title ) not in grouped_external_objects: grouped_external_objects[ external_object_name, external_object_title] = external_object external_objects = list(grouped_external_objects.values()) for external_object in external_objects: cblock = block.copy() for k, v in external_object.items(): cblock[DataRegistryTarget.external_object, k] = v resolved.append(cblock) return resolved
def _resolve_namespaces( self, input_blocks: List[DownloaderDict]) -> List[DownloaderDict]: resolved = [] for block in input_blocks: namespace_name = block[DataRegistryTarget.namespace, DataRegistryField.name] namespaces = get_data( {DataRegistryField.name: namespace_name}, DataRegistryTarget.namespace, self._data_registry_url, self._token, exact=False, ) if namespaces: for namespace in namespaces: cblock = block.copy() for k, v in namespace.items(): cblock[DataRegistryTarget.namespace, k] = v resolved.append(cblock) return resolved
def _add_storage_root( posts: List[YamlDict], remote_uri: str, accessibility: int, data_registry_url: str, token: str, ) -> Union[YamlDict, str]: """ Gets the storage root, adds it to the list of objects to post to the data registry, and returns them :param posts: List of posts to the data registry, will be modified :param remote_uri: URI to the root of the storage :param accessibility: accessibility level of the storage root :param data_registry_url: base url of the data registry :param token: personal access token :return: the storage root dict or reference url """ storage_root = get_data( {DataRegistryField.root: remote_uri}, DataRegistryTarget.storage_root, data_registry_url, token, ) if storage_root is None: logger.info( f"No storage_root found for {remote_uri}, creating new storage_root" ) storage_root = _create_target_data_dict( DataRegistryTarget.storage_root, { DataRegistryField.name: remote_uri, DataRegistryField.root: remote_uri, DataRegistryField.accessibility: accessibility, }, ) posts.append(storage_root) else: storage_root = storage_root["url"] return storage_root
def _resolve_data_products( self, input_blocks: List[DownloaderDict]) -> List[DownloaderDict]: resolved = [] for block in input_blocks: query_data = { DataRegistryField.name: block[DataRegistryTarget.data_product, DataRegistryField.name], DataRegistryField.namespace: block[DataRegistryTarget.namespace, DataRegistryField.url], } version = block.get( (DataRegistryTarget.data_product, DataRegistryField.version)) if version is not None: query_data[DataRegistryField.version] = version data_products = get_data(query_data, DataRegistryTarget.data_product, self._data_registry_url, self._token, exact=False) if data_products: data_products = sort_by_semver(data_products) if block.get((DataRegistryTarget.object_component, DataRegistryField.name)) is None: # if globbing has been used we might have multiple data products so take the first # as we've sorted by semver, by name grouped_data_products = {} for data_product in data_products: data_product_name = data_product[ DataRegistryField.name] if data_product_name not in grouped_data_products: grouped_data_products[ data_product_name] = data_product data_products = list(grouped_data_products.values()) for data_product in data_products: cblock = block.copy() for k, v in data_product.items(): cblock[DataRegistryTarget.data_product, k] = v resolved.append(cblock) return resolved
def upload_data_product_cli( data_product_path, namespace, storage_root_name, storage_location_path, accessibility, data_product_name, data_product_description, data_product_version, component, data_registry, token, remote_uri, remote_option, remote_uri_override, ): configure_cli_logging() template_file = Path(__file__).parent / Path("templates/data_product.yaml") with open(template_file, "r") as f: template = f.read() data_registry = data_registry or DEFAULT_DATA_REGISTRY_URL remote_uri_override = remote_uri_override or remote_uri remote_uri = remote_uri.strip() remote_uri_override = remote_uri_override.strip() storage_root_name = storage_root_name or urllib.parse.urlparse( remote_uri_override).netloc storage_root = remote_uri_override remote_options = get_remote_options() arg_remote_options = dict(remote_option) if remote_option else {} remote_options.update(arg_remote_options) data_product_path = Path(data_product_path) storage_location_hash = FileAPI.calculate_hash(data_product_path) path = upload_to_storage(remote_uri, remote_option, data_product_path.parent, data_product_path, upload_path=storage_location_path, path_prefix=namespace) namespace_ref = get_reference({DataRegistryField.name: namespace}, DataRegistryTarget.namespace, data_registry, token) if namespace_ref: query = { DataRegistryField.name: data_product_name, DataRegistryField.namespace: namespace_ref } if data_product_version: query["version"] = data_product_version data_products = get_data(query, DataRegistryTarget.data_product, data_registry, token, False) if data_products: latest = next(iter(sort_by_semver(data_products))) data_product_version = str( semver.VersionInfo.parse( latest[DataRegistryField.version]).bump_minor()) elif not data_product_version: data_product_version = "0.1.0" populated_yaml = template.format( namespace=namespace, storage_root_name=storage_root_name, storage_root=storage_root, accessibility=accessibility, storage_location_path=path, storage_location_hash=storage_location_hash, data_product_name=data_product_name, data_product_description=data_product_description, data_product_version=data_product_version, component_name="COMPONENT_NAME", component_description="COMPONENT_DESCRIPTION", ) config = yaml.safe_load(populated_yaml) component_template = config["post"].pop(-1) if component: for component_name, component_description in component: c = component_template["data"].copy() c["name"] = component_name c["description"] = component_description config["post"].append({ "data": c, "target": DataRegistryTarget.object_component }) else: c = component_template["data"].copy() c["name"] = data_product_name c["description"] = data_product_description config["post"].append({ "data": c, "target": DataRegistryTarget.object_component }) upload_from_config(config, data_registry, token)