def _attach_attribute_annotation(node, record): # Adding a node to show all attributes attributes = list( (attr_name, value) for attr_name, value in record.attributes if attr_name not in PROV_ATTRIBUTE_QNAMES ) if not attributes: return # No attribute to display # Sort the attributes. attributes = sorted_attributes(record.get_type(), attributes) ann_rows = [ANNOTATION_START_ROW] ann_rows.extend( ANNOTATION_ROW_TEMPLATE % ( attr.uri, escape(six.text_type(attr)), ' href=\"%s\"' % value.uri if isinstance(value, Identifier) else '', escape(six.text_type(value) if not isinstance(value, datetime) else six.text_type(value.isoformat()))) for attr, value in attributes ) ann_rows.append(ANNOTATION_END_ROW) count[3] += 1 annotations = pydot.Node( 'ann%d' % count[3], label='\n'.join(ann_rows), **ANNOTATION_STYLE ) dot.add_node(annotations) dot.add_edge(pydot.Edge(annotations, node, **ANNOTATION_LINK_STYLE))
def _attach_attribute_annotation(node, record): # Adding a node to show all attributes attributes = list((attr_name, value) for attr_name, value in record.attributes if attr_name not in PROV_ATTRIBUTE_QNAMES) if not attributes: return # No attribute to display # Sort the attributes. attributes = sorted_attributes(record.get_type(), attributes) ann_rows = [ANNOTATION_START_ROW] ann_rows.extend( ANNOTATION_ROW_TEMPLATE % (attr.uri, escape(six.text_type(attr)), ' href=\"%s\"' % value.uri if isinstance(value, Identifier) else '', escape( six.text_type(value) if not isinstance(value, datetime) else six.text_type(value.isoformat()))) for attr, value in attributes) ann_rows.append(ANNOTATION_END_ROW) count[3] += 1 annotations = pydot.Node('ann%d' % count[3], label='\n'.join(ann_rows), **ANNOTATION_STYLE) dot.add_node(annotations) dot.add_edge(pydot.Edge(annotations, node, **ANNOTATION_LINK_STYLE))
def serialize_bundle(self, bundle, element=None, force_types=False): """ Serializes a bundle or document to PROV XML. :param bundle: The bundle or document. :param element: The XML element to write to. Will be created if None. :type force_types: boolean, optional :param force_types: Will force xsd:types to be written for most attributes mainly PROV-"attributes", e.g. tags not in the PROV namespace. Off by default meaning xsd:type attributes will only be set for prov:type, prov:location, and prov:value as is done in the official PROV-XML specification. Furthermore the types will always be set if the Python type requires it. False is a good default and it should rarely require changing. """ # Build the namespace map for lxml and attach it to the root XML # element. nsmap = { ns.prefix: ns.uri for ns in self.document._namespaces.get_registered_namespaces() } if self.document._namespaces._default: nsmap[None] = self.document._namespaces._default.uri for namespace in bundle.namespaces: if namespace not in nsmap: nsmap[namespace.prefix] = namespace.uri for key, value in DEFAULT_NAMESPACES.items(): uri = value.uri if value.prefix == "xsd": # The XSD namespace for some reason has no hash at the end # for PROV XML, but for all other serializations it does. uri = uri.rstrip("#") nsmap[value.prefix] = uri if element is not None: xml_bundle_root = etree.SubElement(element, _ns_prov("bundleContent"), nsmap=nsmap) else: xml_bundle_root = etree.Element(_ns_prov("document"), nsmap=nsmap) if bundle.identifier: xml_bundle_root.attrib[_ns_prov("id")] = str(bundle.identifier) for record in bundle._records: rec_type = record.get_type() identifier = str( record._identifier) if record._identifier else None if identifier: attrs = {_ns_prov("id"): identifier} else: attrs = None # Derive the record label from its attributes which is sometimes # needed. attributes = list(record.attributes) rec_label = self._derive_record_label(rec_type, attributes) elem = etree.SubElement(xml_bundle_root, _ns_prov(rec_label), attrs) for attr, value in sorted_attributes(rec_type, attributes): subelem = etree.SubElement( elem, _ns(attr.namespace.uri, attr.localpart)) if isinstance(value, prov.model.Literal): if value.datatype not in [ None, PROV["InternationalizedString"] ]: subelem.attrib[_ns_xsi("type")] = "%s:%s" % ( value.datatype.namespace.prefix, value.datatype.localpart, ) if value.langtag is not None: subelem.attrib[_ns_xml("lang")] = value.langtag v = value.value elif isinstance(value, prov.model.QualifiedName): if attr not in PROV_ATTRIBUTE_QNAMES: subelem.attrib[_ns_xsi("type")] = "xsd:QName" v = str(value) elif isinstance(value, datetime.datetime): v = value.isoformat() else: v = str(value) # xsd type inference. # # This is a bit messy and there are all kinds of special # rules but it appears to get the job done. # # If it is a type element and does not yet have an # associated xsi type, try to infer it from the value. # The not startswith("prov:") check is a little bit hacky to # avoid type interference when the type is a standard prov # type. # # To enable a mapping of Python types to XML and back, # the XSD type must be written for these types. ALWAYS_CHECK = [ bool, datetime.datetime, float, int, prov.identifier.Identifier, ] ALWAYS_CHECK = tuple(ALWAYS_CHECK) if ((force_types or type(value) in ALWAYS_CHECK or attr in [PROV_TYPE, PROV_LOCATION, PROV_VALUE]) and _ns_xsi("type") not in subelem.attrib and not str(value).startswith("prov:") and not (attr in PROV_ATTRIBUTE_QNAMES and v) and attr not in [PROV_ATTR_TIME, PROV_LABEL]): xsd_type = None if isinstance(value, bool): xsd_type = XSD_BOOLEAN v = v.lower() elif isinstance(value, str): xsd_type = XSD_STRING elif isinstance(value, float): xsd_type = XSD_DOUBLE elif isinstance(value, int): xsd_type = XSD_INT elif isinstance(value, datetime.datetime): # Exception of the exception, while technically # still correct, do not write XSD dateTime type for # attributes in the PROV namespaces as the type is # already declared in the XSD and PROV XML also does # not specify it in the docs. if (attr.namespace.prefix != "prov" or "time" not in attr.localpart.lower()): xsd_type = XSD_DATETIME elif isinstance(value, prov.identifier.Identifier): xsd_type = XSD_ANYURI if xsd_type is not None: subelem.attrib[_ns_xsi("type")] = str(xsd_type) if attr in PROV_ATTRIBUTE_QNAMES and v: subelem.attrib[_ns_prov("ref")] = v else: subelem.text = v return xml_bundle_root
def serialize_bundle(self, bundle, element=None, force_types=False): """ Serializes a bundle or document to PROV XML. :param bundle: The bundle or document. :param element: The XML element to write to. Will be created if None. :type force_types: boolean, optional :param force_types: Will force xsd:types to be written for most attributes mainly PROV-"attributes", e.g. tags not in the PROV namespace. Off by default meaning xsd:type attributes will only be set for prov:type, prov:location, and prov:value as is done in the official PROV-XML specification. Furthermore the types will always be set if the Python type requires it. False is a good default and it should rarely require changing. """ # Build the namespace map for lxml and attach it to the root XML # element. No dictionary comprehension in Python 2.6! nsmap = dict((ns.prefix, ns.uri) for ns in self.document._namespaces.get_registered_namespaces()) if self.document._namespaces._default: nsmap[None] = self.document._namespaces._default.uri for namespace in bundle.namespaces: if namespace not in nsmap: nsmap[namespace.prefix] = namespace.uri for key, value in DEFAULT_NAMESPACES.items(): uri = value.uri if value.prefix == "xsd": # The XSD namespace for some reason has no hash at the end # for PROV XML, but for all other serializations it does. uri = uri.rstrip("#") nsmap[value.prefix] = uri if element is not None: xml_bundle_root = etree.SubElement( element, _ns_prov("bundleContent"), nsmap=nsmap) else: xml_bundle_root = etree.Element(_ns_prov("document"), nsmap=nsmap) if bundle.identifier: xml_bundle_root.attrib[_ns_prov("id")] = six.text_type(bundle.identifier) for record in bundle._records: rec_type = record.get_type() identifier = six.text_type(record._identifier) \ if record._identifier else None if identifier: attrs = {_ns_prov("id"): identifier} else: attrs = None # Derive the record label from its attributes which is sometimes # needed. attributes = list(record.attributes) rec_label = self._derive_record_label(rec_type, attributes) elem = etree.SubElement(xml_bundle_root, _ns_prov(rec_label), attrs) for attr, value in sorted_attributes(rec_type, attributes): subelem = etree.SubElement( elem, _ns(attr.namespace.uri, attr.localpart)) if isinstance(value, prov.model.Literal): if value.datatype not in \ [None, PROV["InternationalizedString"]]: subelem.attrib[_ns_xsi("type")] = "%s:%s" % ( value.datatype.namespace.prefix, value.datatype.localpart) if value.langtag is not None: subelem.attrib[_ns_xml("lang")] = value.langtag v = value.value elif isinstance(value, prov.model.QualifiedName): if attr not in PROV_ATTRIBUTE_QNAMES: subelem.attrib[_ns_xsi("type")] = "xsd:QName" v = six.text_type(value) elif isinstance(value, datetime.datetime): v = value.isoformat() else: v = six.text_type(value) # xsd type inference. # # This is a bit messy and there are all kinds of special # rules but it appears to get the job done. # # If it is a type element and does not yet have an # associated xsi type, try to infer it from the value. # The not startswith("prov:") check is a little bit hacky to # avoid type interference when the type is a standard prov # type. # # To enable a mapping of Python types to XML and back, # the XSD type must be written for these types. ALWAYS_CHECK = [bool, datetime.datetime, float, prov.identifier.Identifier] # Add long and int on Python 2, only int on Python 3. ALWAYS_CHECK.extend(six.integer_types) ALWAYS_CHECK = tuple(ALWAYS_CHECK) if (force_types or type(value) in ALWAYS_CHECK or attr in [PROV_TYPE, PROV_LOCATION, PROV_VALUE]) and \ _ns_xsi("type") not in subelem.attrib and \ not six.text_type(value).startswith("prov:") and \ not (attr in PROV_ATTRIBUTE_QNAMES and v) and \ attr not in [PROV_ATTR_TIME, PROV_LABEL]: xsd_type = None if isinstance(value, bool): xsd_type = XSD_BOOLEAN v = v.lower() elif isinstance(value, six.string_types): xsd_type = XSD_STRING elif isinstance(value, float): xsd_type = XSD_DOUBLE elif isinstance(value, six.integer_types): xsd_type = XSD_INT elif isinstance(value, datetime.datetime): # Exception of the exception, while technically # still correct, do not write XSD dateTime type for # attributes in the PROV namespaces as the type is # already declared in the XSD and PROV XML also does # not specify it in the docs. if attr.namespace.prefix != "prov" \ or "time" not in attr.localpart.lower(): xsd_type = XSD_DATETIME elif isinstance(value, prov.identifier.Identifier): xsd_type = XSD_ANYURI if xsd_type is not None: subelem.attrib[_ns_xsi("type")] = six.text_type(xsd_type) if attr in PROV_ATTRIBUTE_QNAMES and v: subelem.attrib[_ns_prov("ref")] = v else: subelem.text = v return xml_bundle_root