def test_dmf_find(): tmp_dir = Path(scratch_dir) / "dmf_find" dmf = DMF(path=tmp_dir, create=True) # populate with batches of records # they all have the tag 'all', each batch has 'batch<N>' as well # All resources in a batch are given version 1.0.<N> # Individual resources will have data of {i: 0..<batchsz-1>} batchsz, numbatches = 10, 9 all_ids = [] for i in range(numbatches): n = batchsz batch = "batch{:d}".format(i + 1) version = resource.version_list([1, 0, i + 1]) ids = add_resources(dmf, num=n, tags=["all", batch], version_info={"version": version}) all_ids.extend(ids) if _log.isEnabledFor(logging.DEBUG): r = dmf.fetch_one(all_ids[0]) _log.debug("First resource:\n{}".format(r)) # Find all records, 2 ways total_num = batchsz * numbatches result = list(dmf.find()) assert len(result) == total_num result = list(dmf.find({"tags": ["all"]})) assert len(result) == total_num # Find with 'all' result = list(dmf.find({"tags!": ["all", "batch1"]})) assert len(result) == batchsz
def test_dmf_find(tmp_dmf): # populate with batches of records # they all have the tag 'all', each batch has 'batch<N>' as well # All resources in a batch are given version 1.0.<N> # Individual resources will have data of {i: 0..<batchsz-1>} batchsz, numbatches = 10, 9 all_ids = [] for i in range(numbatches): n = batchsz batch = 'batch{:d}'.format(i + 1) version = resource.version_list([1, 0, i + 1]) ids = add_resources(tmp_dmf, num=n, tags=['all', batch], version_info={'version': version}) all_ids.extend(ids) if _log.isEnabledFor(logging.DEBUG): r = tmp_dmf.fetch_one(all_ids[0]) _log.debug("First resource:\n{}".format(r)) # Find all records, 2 ways total_num = batchsz * numbatches result = list(tmp_dmf.find()) assert len(result) == total_num result = list(tmp_dmf.find({'tags': ['all']})) assert len(result) == total_num # Find with 'all' result = list(tmp_dmf.find({'tags!': ['all', 'batch1']})) assert len(result) == batchsz
def __init__(self, dmf, default_version=None): """Constructor. Args: dmf (idaes.dmf.DMF): Data management framework. default_version (Union[None, list, tuple]): Default version to give the class, if the containing module does not have a `__version__` variable. If None, the absence of that variable will cause an error. Raises: TypeError: if `default_version` isn't something that :func:`resource.version_list` can convert. """ self._dmf = dmf self._defver = None if default_version is not None: try: self._defver = resource.version_list(default_version) except ValueError as err: raise TypeError('Bad "default_version": {}'.format(err))
def visit_metadata(self, obj, meta): """Called for each property class encountered during the "walk" initiated by `index_property_metadata()`. Args: obj (property_base.PropertyParameterBase): Property class instance meta (property_base.PropertyClassMetadata): Associated metadata Returns: None Raises: AttributeError: if """ _log.debug("Adding resource to DMF that indexes the property package " '"{}"'.format(".".join([obj.__module__, obj.__name__]))) r = resource.Resource(type_=resource.ResourceTypes.code) r.data = {"units": meta.default_units, "properties": meta.properties} containing_module = obj.__module__ if hasattr(containing_module, "__version__"): obj_ver = resource.version_list(containing_module.__version__) elif self._defver is None: raise AttributeError("No __version__ for module {}, and no " "default".format(containing_module)) else: obj_ver = self._defver r.v["codes"].append({ "type": "class", "language": "python", "name": ".".join([obj.__module__, obj.__name__]), "version": obj_ver, }) r.v["tags"].append(self.INDEXED_PROPERTY_TAG) # Search for existing indexed codes. # A match exists if all 3 of these are the same: # codes.type == class # codes.language == python # codes.name == <module>.<class> info = {k: r.v["codes"][0][k] for k in ("type", "language", "name")} rsrc_list, dup_rsrc = [], None # Loop through all the right kind of resources for rsrc in self._dmf.find({ r.TYPE_FIELD: resource.ResourceTypes.code, "tags": ["indexed-property"] }): # skip any resources without one code if len(rsrc.v["codes"]) != 1: continue code = rsrc.v["codes"][0] # skip any resource of wrong code type, name, lang. skip = False for k in info: if code[k] != info[k]: skip = True break if skip: continue # skip any resources missing the recorded metadata skip = False for data_key in r.data.keys(): if data_key not in rsrc.data: skip = True break if skip: continue # If the version of the found code is the same as the # version of the one to be added, then it is a duplicate if code["version"] == obj_ver: dup_rsrc = rsrc break rsrc_list.append(rsrc) if dup_rsrc: # This is considered a normal, non-exceptional situation _log.debug("DMFVisitor: Not adding duplicate index for " "{}v{}".format(info["name"], obj_ver)) else: # add the resource r.validate() _log.debug( 'DMFVisitor: Adding resource for code "{}"v{} type={}'.format( r.v["codes"][0]["name"], r.v["codes"][0]["version"], r.v["codes"][0]["type"], )) self._dmf.add(r) if rsrc_list: # Connect to most recent (highest) version rsrc_list.sort(key=lambda rs: rs.v["codes"][0]["version"]) # for rsrc in rsrc_list: rsrc = rsrc_list[-1] rel = resource.Triple(r, resource.Predicates.version, rsrc) resource.create_relation(rel) self._dmf.update(rsrc) self._dmf.update(r)
def register( resource_type, url, info, copy, strict, unique, contained, derived, used, prev, is_subject, version, ): _log.debug(f"Register object type='{resource_type}' url/path='{url.path}'") # process url if url.scheme in ("file", ""): path = url.path else: click.echo("Currently, URL must be a file") sys.exit(Code.NOT_SUPPORTED.value) # create the resource _log.debug("create resource") try: rsrc = resource.Resource.from_file(path, as_type=resource_type, strict=strict, do_copy=copy) except resource.Resource.InferResourceTypeError as err: click.echo(f"Failed to infer resource: {err}") sys.exit(Code.IMPORT_RESOURCE.value) except resource.Resource.LoadResourceError as err: click.echo(f"Failed to load resource: {err}") sys.exit(Code.IMPORT_RESOURCE.value) # connect to DMF try: dmf = DMF() except errors.WorkspaceError as err: click.echo(f"Failed to connect to DMF: {err}") sys.exit(Code.WORKSPACE_NOT_FOUND.value) except errors.DMFError as err: click.echo(f"Failed to connect to DMF: {err}") sys.exit(Code.DMF.value) # check uniqueness if unique: df = rsrc.v["datafiles"][0] # file info for this upload query = {"datafiles": [{"sha1": df["sha1"]}]} query_result, dup_ids = dmf.find(query), [] for dup in query_result: dup_df = dup.v["datafiles"][0] if dup_df["path"] in df["path"]: dup_ids.append(dup.id) n_dup = len(dup_ids) if n_dup > 0: click.echo(f"This file is already in {n_dup} resource(s): " f"{' '.join(dup_ids)}") sys.exit(Code.DMF_OPER.value) # process relations _log.debug("add relations") rel_to_add = { # translate into standard relation names resource.PR_CONTAINS: contained, resource.PR_DERIVED: derived, resource.PR_USES: used, resource.PR_VERSION: prev, } target_resources = {} # keep target resources in dict, update at end for rel_name, rel_ids in rel_to_add.items(): for rel_id in rel_ids: if rel_id in target_resources: rel_subj = target_resources[rel_id] else: rel_subj = dmf.fetch_one(rel_id) target_resources[rel_id] = rel_subj if rel_subj is None: click.echo(f"Relation {rel_name} target not found: {rel_id}") sys.exit(Code.DMF_OPER.value) if is_subject == "yes": resource.create_relation_args(rsrc, rel_name, rel_subj) else: resource.create_relation_args(rel_subj, rel_name, rsrc) _log.debug(f"added relation {rsrc.id} <-- {rel_name} -- {rel_id}") _log.debug("update resource relations") for rel_rsrc in target_resources.values(): dmf.update(rel_rsrc) # add metadata if version: try: vlist = resource.version_list(version) except ValueError: click.echo(f"Invalid version `{version}`") sys.exit(Code.INPUT_VALUE.value) else: rsrc.v["version_info"]["version"] = vlist # add the resource _log.debug("add resource begin") try: new_id = dmf.add(rsrc) except errors.DuplicateResourceError as err: click.echo(f"Failed to add resource: {err}") sys.exit(Code.DMF_OPER.value) _log.debug(f"added resource: {new_id}") if info == "yes": pfxlen = len(new_id) si = _ShowInfo("term", pfxlen) for rsrc in dmf.find_by_id(new_id): si.show(rsrc) else: click.echo(new_id)