Example #1
0
def fuzzymatch(set, value):
    result = set.get(value)
    if result and result[0][0] > 0.2:
        log.warning(f"Did you mean {result[0][1]} (matched from {value})?")
        return result[0][1]
    else:
        return None
Example #2
0
def lint(session, model):
    events = []
    for CIM_class in tqdm(model.schema.class_hierarchy("dfs"),
                          desc=f"Linting...",
                          leave=True):
        query = session.query(CIM_class.class_)
        for prop in CIM_class.props:
            if not prop.optional and prop.used:
                total = query.count()
                objects = query.filter_by(**{prop.full_label: None}).count()
                if objects:
                    events.append({
                        "Class": CIM_class.label,
                        "Property": prop.full_label,
                        "Total": total,
                        "Type": "Missing",
                        "Violations": objects,
                        "Unique": None
                    })
                    log.debug(
                        f"Missing mandatory property {prop.full_label} for "
                        f"{objects} instances of type {CIM_class.label}.")
                if prop.range:
                    try:
                        if isinstance(prop.range, CIMClass):
                            col = getattr(CIM_class.class_,
                                          prop.full_label + "_id")
                            validity = session.query(col).except_(
                                session.query(prop.range.class_.id))
                        elif isinstance(prop.range, CIMEnum):
                            col = getattr(CIM_class.class_,
                                          prop.full_label + "_name")
                            validity = session.query(col).except_(
                                session.query(CIMEnumValue.name))
                    except AttributeError:
                        log.warning(
                            f"Couldn't determine validity of {prop.full_label} on "
                            f"{CIM_class.label}. The linter does not yet support "
                            f"many-to-many relationships.")
                        # ToDo: Association table errors are currently not caught
                    else:
                        count = validity.count()
                        # query.except() returns (None) if right hand side table is empty
                        if count > 1 or (count == 1 and tuple(
                                validity.one())[0] is not None):
                            non_unique = query.filter(
                                col.in_(val[0]
                                        for val in validity.all())).count()
                            events.append({
                                "Class": CIM_class.label,
                                "Property": prop.full_label,
                                "Total": total,
                                "Type": "Invalid",
                                "Violations": non_unique,
                                "Unique": count
                            })

    return pivot_table(DataFrame(events),
                       values=["Violations", "Unique"],
                       index=["Type", "Class", "Total", "Property"])
Example #3
0
 def parse_values(self, el, session):
     if not self.parent:
         argmap = {}
         insertables = []
     else:
         argmap, insertables = self.parent.parse_values(el, session)
     props = [prop for prop in self.props if prop.used]
     for prop in props:
         value = prop.xpath(el)
         if prop.many_remote and prop.used and value:
             _id = [el.attrib.values()[0]]
             _remote_ids = []
             if len(set(value)) > 1:
                 for raw_value in value:
                     _remote_ids = _remote_ids + [
                         v for v in raw_value.split("#") if len(v)
                     ]
             else:
                 _remote_ids = [v for v in value[0].split("#") if len(v)]
             _ids = _id * len(_remote_ids)
             # Insert tuples in chunks of 400 elements max
             for chunk in chunks(list(zip(_ids, _remote_ids)), 400):
                 _ins = prop.association_table.insert([{
                     f"{prop.domain.label}_id":
                     _id,
                     f"{prop.range.label}_id":
                     _remote_id
                 } for (_id, _remote_id) in chunk])
                 insertables.append(_ins)
         elif len(value) == 1 or len(set(value)) == 1:
             value = value[0]
             if isinstance(prop.range, CIMEnum):
                 argmap[prop.key] = shorten_namespace(value, self.nsmap)
             else:
                 try:
                     t = prop.mapped_datatype
                     if t == "Float":
                         argmap[prop.key] = float(value)
                     elif t == "Boolean":
                         argmap[prop.key] = value.lower() == "true"
                     elif t == "Integer":
                         argmap[prop.key] = int(value)
                     elif len([v for v in value.split("#") if v]) > 1:
                         log.warning(
                             f"Ambiguous data values for {self.name}:{prop.key}: {len(set(value))} unique values. "
                             f"(Skipped)")
                         # If reference doesn't resolve value is set to None (Validation
                         # has to catch missing obligatory values)
                     else:
                         argmap[prop.key] = value.replace("#", "")
                 except ValueError:
                     argmap[prop.key] = value.replace("#", "")
         elif len(value) > 1:
             log.warning(
                 f"Ambiguous data values for {self.name}:{prop.key}: {len(set(value))} unique values. "
                 f"(Skipped)")
             # If reference doesn't resolve value is set to None (Validation
             # has to catch missing obligatory values)
     return argmap, insertables
Example #4
0
 def _raw_property(self, property_identifier) -> Union[list, str, None]:
     """
     Extract a property from the CIM entity
     :param property_identifier: property name
     :return: The CIM entity's property as a list, a string, or None
     """
     if self.Attributes[property_identifier] is None:
         xp = self.XPathMap
         if property_identifier not in xp.keys():
             raise KeyError(f"Invalid property_identifier name {property_identifier}.")
         results = xp[property_identifier](self.description)  # pylint: disable=unsubscriptable-object
         if len(set(results)) == 1:
             self.Attributes[property_identifier] = results[0]
         elif not results:
             self.Attributes[property_identifier] = None
         else:
             log.warning(f"Ambiguous class property_identifier ({property_identifier}) for {self.name}.")
             self.Attributes[property_identifier] = [result for result in set(results)]
     return self.Attributes[property_identifier]
Example #5
0
def merge_sources(sources):
    d_ = defaultdict(dict)
    from lxml.etree import XPath
    xp = {
        "id": XPath("@rdf:ID", namespaces=get_nsmap(sources)),
        "about": XPath("@rdf:about", namespaces=get_nsmap(sources))
    }
    for source in sources:
        for element in source.tree.getroot():
            try:
                uuid = determine_uuid(element, xp)
                classname = shorten_namespace(element.tag,
                                              HDict(get_nsmap(sources)))
                if classname not in d_ or uuid not in d_[classname].keys():
                    d_[classname][uuid] = element
                else:
                    [d_[classname][uuid].append(sub) for sub in element]  # pylint: disable=expression-not-assigned
            except ValueError:
                log.warning(f"Skipped element during merge: {element}.")
    return d_
Example #6
0
 def update_path(self, path):
     if path is None:
         out_dir = os.getcwd()
     elif isinstance(path, list):
         try:
             out_dir = os.path.commonpath(
                 [os.path.abspath(path) for path in path])
         except ValueError:
             # Paths are on different drives - default to cwd.
             log.warning(
                 f"Datasources have no common root. Database file will be saved to {os.getcwd()}"
             )
             out_dir = os.getcwd()
     else:
         out_dir = os.path.abspath(path)
     if not os.path.isabs(self.path):
         if os.path.isdir(out_dir):
             db_path = os.path.join(out_dir, self.path)
         else:
             db_path = os.path.join(os.path.dirname(out_dir), "out.db")
     else:
         db_path = os.path.abspath(self.path)
     self.path = db_path
Example #7
0
 def _generate(self):
     xp_type_res = XPath(f"rdf:type/@rdf:resource",
                         namespaces=self.root.nsmap)
     xp_stype_res = XPath(f"cims:stereotype/@rdf:resource",
                          namespaces=self.root.nsmap)
     xp_stype_txt = XPath(f"cims:stereotype/text()",
                          namespaces=self.root.nsmap)
     postponed = []
     for element in self.root:
         type_res = xp_type_res(element)
         stype_res = xp_stype_res(element)
         stype_txt = xp_stype_txt(element)
         if Schema._isclass(type_res):
             if Schema._isenum(stype_res):
                 obj = CIMEnum(element)
                 self.Elements["CIMEnum"][obj.name].append(obj)
             elif Schema._isdt(stype_txt):
                 obj = CIMDT(element)
                 self.Elements["CIMDT"][obj.name].append(obj)
             else:
                 obj = CIMClass(element)
                 self.Elements["CIMClass"][obj.name].append(obj)
         elif Schema._isprop(type_res):
             postponed.append(element)
         elif Schema._ispackage(type_res):
             obj = CIMPackage(element)
             self.Elements["CIMPackage"][obj.name].append(obj)
         elif type_res:
             postponed.append(element)
         else:
             obj = SchemaElement(element)
             log.warning(f"Element skipped: {obj.name}")
     for element in postponed:
         type_res = xp_type_res(element)
         if Schema._isprop(type_res):
             obj = CIMProp(element)
             if obj._domain in self.Elements["CIMDT"].keys():
                 if obj.name.endswith(".unit"):
                     obj = CIMDTUnit(element)
                     self.Elements["CIMDTUnit"][obj.name].append(obj)
                 elif obj.name.endswith(".value"):
                     obj = CIMDTValue(element)
                     self.Elements["CIMDTValue"][obj.name].append(obj)
                 elif obj.name.endswith(".multiplier"):
                     obj = CIMDTMultiplier(element)
                     self.Elements["CIMDTMultiplier"][obj.name].append(obj)
                 elif obj.name.endswith(".denominatorUnit"):
                     obj = CIMDTDenominatorUnit(element)
                     self.Elements["CIMDTDenominatorUnit"][obj.name].append(
                         obj)
                 elif obj.name.endswith(".denominatorMultiplier"):
                     obj = CIMDTDenominatorMultiplier(element)
                     self.Elements["CIMDTDenominatorMultiplier"][
                         obj.name].append(obj)
                 else:
                     obj = CIMDTProperty(element)
                     self.Elements["CIMDTProperty"][obj.name].append(obj)
             else:
                 self.Elements["CIMProp"][obj.name].append(obj)
             continue
         obj = CIMEnumValue(element)
         if obj._enum_name and obj._enum_name in self.Elements[
                 "CIMEnum"].keys():
             self.Elements["CIMEnumValue"][obj.name].append(obj)
         else:
             log.debug(f"Failed to identify purpose for {type_res}")
     self._merge_elements()
     for key, value in self.Elements.items():
         if value:
             log.debug(f"Generated {len(value)} {key}.")
Example #8
0
    def test_all(runslow=False):
        if runslow:
            pytest.main([os.path.join(_TESTROOT), "--runslow"])
        else:
            pytest.main([os.path.join(_TESTROOT)])
except ModuleNotFoundError:
    pass


try:
    # See if we already know a schemaroot
    CONFIG["Paths"]["SCHEMAROOT"] = get_path("SCHEMAROOT")
    if not os.path.isdir(CONFIG["Paths"]["SCHEMAROOT"]):
        # Is schemaroot an actual directory?
        log.warning(f"Invalid schema path in configuration.")
        raise NotADirectoryError
except (KeyError, NotADirectoryError):
    if os.path.isdir(os.path.join(_PACKAGEROOT, "res", "schemata")):
        # Look in the default path
        CONFIG["Paths"]["SCHEMAROOT"] = os.path.join(_PACKAGEROOT, "res", "schemata")
        log.info(f"Found schemata in default location.")
    else:
        # Ask user to configure
        log.warning(f"No schemata configured. Use cimpyorm.configure(path_to_schemata) to set-up.")
        from cimpyorm.api import configure

try:
    # See if we already know a datasetroot
    CONFIG["Paths"]["DATASETROOT"] = get_path("DATASETROOT")
    if not os.path.isdir(CONFIG["Paths"]["DATASETROOT"]):