def fuzzymatch(set, value): result = set.get(value) if result and result[0][0] > 0.2: log.warning(f"Did you mean {result[0][1]} (matched from {value})?") return result[0][1] else: return None
def lint(session, model): events = [] for CIM_class in tqdm(model.schema.class_hierarchy("dfs"), desc=f"Linting...", leave=True): query = session.query(CIM_class.class_) for prop in CIM_class.props: if not prop.optional and prop.used: total = query.count() objects = query.filter_by(**{prop.full_label: None}).count() if objects: events.append({ "Class": CIM_class.label, "Property": prop.full_label, "Total": total, "Type": "Missing", "Violations": objects, "Unique": None }) log.debug( f"Missing mandatory property {prop.full_label} for " f"{objects} instances of type {CIM_class.label}.") if prop.range: try: if isinstance(prop.range, CIMClass): col = getattr(CIM_class.class_, prop.full_label + "_id") validity = session.query(col).except_( session.query(prop.range.class_.id)) elif isinstance(prop.range, CIMEnum): col = getattr(CIM_class.class_, prop.full_label + "_name") validity = session.query(col).except_( session.query(CIMEnumValue.name)) except AttributeError: log.warning( f"Couldn't determine validity of {prop.full_label} on " f"{CIM_class.label}. The linter does not yet support " f"many-to-many relationships.") # ToDo: Association table errors are currently not caught else: count = validity.count() # query.except() returns (None) if right hand side table is empty if count > 1 or (count == 1 and tuple( validity.one())[0] is not None): non_unique = query.filter( col.in_(val[0] for val in validity.all())).count() events.append({ "Class": CIM_class.label, "Property": prop.full_label, "Total": total, "Type": "Invalid", "Violations": non_unique, "Unique": count }) return pivot_table(DataFrame(events), values=["Violations", "Unique"], index=["Type", "Class", "Total", "Property"])
def parse_values(self, el, session): if not self.parent: argmap = {} insertables = [] else: argmap, insertables = self.parent.parse_values(el, session) props = [prop for prop in self.props if prop.used] for prop in props: value = prop.xpath(el) if prop.many_remote and prop.used and value: _id = [el.attrib.values()[0]] _remote_ids = [] if len(set(value)) > 1: for raw_value in value: _remote_ids = _remote_ids + [ v for v in raw_value.split("#") if len(v) ] else: _remote_ids = [v for v in value[0].split("#") if len(v)] _ids = _id * len(_remote_ids) # Insert tuples in chunks of 400 elements max for chunk in chunks(list(zip(_ids, _remote_ids)), 400): _ins = prop.association_table.insert([{ f"{prop.domain.label}_id": _id, f"{prop.range.label}_id": _remote_id } for (_id, _remote_id) in chunk]) insertables.append(_ins) elif len(value) == 1 or len(set(value)) == 1: value = value[0] if isinstance(prop.range, CIMEnum): argmap[prop.key] = shorten_namespace(value, self.nsmap) else: try: t = prop.mapped_datatype if t == "Float": argmap[prop.key] = float(value) elif t == "Boolean": argmap[prop.key] = value.lower() == "true" elif t == "Integer": argmap[prop.key] = int(value) elif len([v for v in value.split("#") if v]) > 1: log.warning( f"Ambiguous data values for {self.name}:{prop.key}: {len(set(value))} unique values. " f"(Skipped)") # If reference doesn't resolve value is set to None (Validation # has to catch missing obligatory values) else: argmap[prop.key] = value.replace("#", "") except ValueError: argmap[prop.key] = value.replace("#", "") elif len(value) > 1: log.warning( f"Ambiguous data values for {self.name}:{prop.key}: {len(set(value))} unique values. " f"(Skipped)") # If reference doesn't resolve value is set to None (Validation # has to catch missing obligatory values) return argmap, insertables
def _raw_property(self, property_identifier) -> Union[list, str, None]: """ Extract a property from the CIM entity :param property_identifier: property name :return: The CIM entity's property as a list, a string, or None """ if self.Attributes[property_identifier] is None: xp = self.XPathMap if property_identifier not in xp.keys(): raise KeyError(f"Invalid property_identifier name {property_identifier}.") results = xp[property_identifier](self.description) # pylint: disable=unsubscriptable-object if len(set(results)) == 1: self.Attributes[property_identifier] = results[0] elif not results: self.Attributes[property_identifier] = None else: log.warning(f"Ambiguous class property_identifier ({property_identifier}) for {self.name}.") self.Attributes[property_identifier] = [result for result in set(results)] return self.Attributes[property_identifier]
def merge_sources(sources): d_ = defaultdict(dict) from lxml.etree import XPath xp = { "id": XPath("@rdf:ID", namespaces=get_nsmap(sources)), "about": XPath("@rdf:about", namespaces=get_nsmap(sources)) } for source in sources: for element in source.tree.getroot(): try: uuid = determine_uuid(element, xp) classname = shorten_namespace(element.tag, HDict(get_nsmap(sources))) if classname not in d_ or uuid not in d_[classname].keys(): d_[classname][uuid] = element else: [d_[classname][uuid].append(sub) for sub in element] # pylint: disable=expression-not-assigned except ValueError: log.warning(f"Skipped element during merge: {element}.") return d_
def update_path(self, path): if path is None: out_dir = os.getcwd() elif isinstance(path, list): try: out_dir = os.path.commonpath( [os.path.abspath(path) for path in path]) except ValueError: # Paths are on different drives - default to cwd. log.warning( f"Datasources have no common root. Database file will be saved to {os.getcwd()}" ) out_dir = os.getcwd() else: out_dir = os.path.abspath(path) if not os.path.isabs(self.path): if os.path.isdir(out_dir): db_path = os.path.join(out_dir, self.path) else: db_path = os.path.join(os.path.dirname(out_dir), "out.db") else: db_path = os.path.abspath(self.path) self.path = db_path
def _generate(self): xp_type_res = XPath(f"rdf:type/@rdf:resource", namespaces=self.root.nsmap) xp_stype_res = XPath(f"cims:stereotype/@rdf:resource", namespaces=self.root.nsmap) xp_stype_txt = XPath(f"cims:stereotype/text()", namespaces=self.root.nsmap) postponed = [] for element in self.root: type_res = xp_type_res(element) stype_res = xp_stype_res(element) stype_txt = xp_stype_txt(element) if Schema._isclass(type_res): if Schema._isenum(stype_res): obj = CIMEnum(element) self.Elements["CIMEnum"][obj.name].append(obj) elif Schema._isdt(stype_txt): obj = CIMDT(element) self.Elements["CIMDT"][obj.name].append(obj) else: obj = CIMClass(element) self.Elements["CIMClass"][obj.name].append(obj) elif Schema._isprop(type_res): postponed.append(element) elif Schema._ispackage(type_res): obj = CIMPackage(element) self.Elements["CIMPackage"][obj.name].append(obj) elif type_res: postponed.append(element) else: obj = SchemaElement(element) log.warning(f"Element skipped: {obj.name}") for element in postponed: type_res = xp_type_res(element) if Schema._isprop(type_res): obj = CIMProp(element) if obj._domain in self.Elements["CIMDT"].keys(): if obj.name.endswith(".unit"): obj = CIMDTUnit(element) self.Elements["CIMDTUnit"][obj.name].append(obj) elif obj.name.endswith(".value"): obj = CIMDTValue(element) self.Elements["CIMDTValue"][obj.name].append(obj) elif obj.name.endswith(".multiplier"): obj = CIMDTMultiplier(element) self.Elements["CIMDTMultiplier"][obj.name].append(obj) elif obj.name.endswith(".denominatorUnit"): obj = CIMDTDenominatorUnit(element) self.Elements["CIMDTDenominatorUnit"][obj.name].append( obj) elif obj.name.endswith(".denominatorMultiplier"): obj = CIMDTDenominatorMultiplier(element) self.Elements["CIMDTDenominatorMultiplier"][ obj.name].append(obj) else: obj = CIMDTProperty(element) self.Elements["CIMDTProperty"][obj.name].append(obj) else: self.Elements["CIMProp"][obj.name].append(obj) continue obj = CIMEnumValue(element) if obj._enum_name and obj._enum_name in self.Elements[ "CIMEnum"].keys(): self.Elements["CIMEnumValue"][obj.name].append(obj) else: log.debug(f"Failed to identify purpose for {type_res}") self._merge_elements() for key, value in self.Elements.items(): if value: log.debug(f"Generated {len(value)} {key}.")
def test_all(runslow=False): if runslow: pytest.main([os.path.join(_TESTROOT), "--runslow"]) else: pytest.main([os.path.join(_TESTROOT)]) except ModuleNotFoundError: pass try: # See if we already know a schemaroot CONFIG["Paths"]["SCHEMAROOT"] = get_path("SCHEMAROOT") if not os.path.isdir(CONFIG["Paths"]["SCHEMAROOT"]): # Is schemaroot an actual directory? log.warning(f"Invalid schema path in configuration.") raise NotADirectoryError except (KeyError, NotADirectoryError): if os.path.isdir(os.path.join(_PACKAGEROOT, "res", "schemata")): # Look in the default path CONFIG["Paths"]["SCHEMAROOT"] = os.path.join(_PACKAGEROOT, "res", "schemata") log.info(f"Found schemata in default location.") else: # Ask user to configure log.warning(f"No schemata configured. Use cimpyorm.configure(path_to_schemata) to set-up.") from cimpyorm.api import configure try: # See if we already know a datasetroot CONFIG["Paths"]["DATASETROOT"] = get_path("DATASETROOT") if not os.path.isdir(CONFIG["Paths"]["DATASETROOT"]):