def embedding_pred(self, parent, child, ns_mapping): """ Predicate for recognizing inlined content in an XML; to be used for DINGO's xml-import hook 'embedded_predicate'. The question this predicate must answer is whether the child should be extracted into a separate object. The function returns either - False (the child is not to be extracted) - True (the child is extracted but nothing can be inferred about what kind of object is extracted) - a string giving some indication about the object type (if nothing else is known: the name of the element) Note: the 'parent' and 'child' arguments are XMLNodes as defined by the Python libxml2 bindings. If you have never worked with these, have a look at - Mike Kneller's brief intro: http://mikekneller.com/kb/python/libxml2python/part1 - the functions in django-dingos core.xml_utils module For iodef import, we extract only Incident elements. """ values = extract_attributes(parent, prefix_key_char='@') # Incident - see RFC5070 page 12 if child.name == 'Incident': return child.name return False
def id_and_revision_extractor(self, xml_elt): """ Function for generating a unique identifier for extracted embedded content; to be used for DINGO's xml-import hook 'embedded_id_gen'. This function is called - for the top-level node of the XML to be imported. - for each node at which an embedded object is extracted from the XML (when this occurs is governed by the following function, the embedding_pred It must return an identifier and, where applicable, a revision and or timestamp; in the form of a dictionary {'id':<identifier>, 'timestamp': <timestamp>}. How you format the identifier is up to you, because you will have to adopt the code below in function xml_import such that the Information Objects are created with the proper identifier (consisting of qualifying namespace and uri. Note: the xml_elt is an XMLNode defined by the Python libxml2 bindings. If you have never worked with these, have a look at - Mike Kneller's brief intro: http://mikekneller.com/kb/python/libxml2python/part1 - the functions in django-dingos core.xml_utils module Function for generating a unique identifier for extracted embedded content; to be used for DINGO's xml-import hook 'embedded_id_gen'. For the iodef import, we only extract embedded 'Incident' objects and therefore must teach this function to extract identifier and timestamp for incidents. """ result = {'id': None, 'timestamp': None} if not xml_elt.name == "Incident": return result # So we have an Incident node. These have the following shape:: # # <Incident purpose="mitigation"> # <IncidentID name="csirt.example.com">908711</IncidentID> # <ReportTime>2006-06-08T05:44:53-05:00</ReportTime> # <Description>Large bot-net</Description> # ... # # So we must find the child nodes 'IncidentID' and 'ReportTime' ... child = xml_elt.children found_id = False found_ts = False while child: attributes = extract_attributes(child, prefix_key_char='') if child.name == "IncidentID": result['id'] = '%s:%s' % (attributes.get('name'), child.content) found_id = True elif child.name == "ReportTime": naive = parse_datetime(child.content) if not timezone.is_aware(naive): aware = timezone.make_aware(naive, timezone.utc) else: aware = naive result['timestamp'] = aware found_ts = True if found_id and found_ts: break child = child.next return result
def openioc_embedding_pred(self,parent, child, ns_mapping): """ Predicate for recognizing inlined content in an XML; to be used for DINGO's xml-import hook 'embedded_predicate'. The question this predicate must answer is whether the child should be extracted into a separate object. The function returns either - False (the child is not to be extracted) - True (the child is extracted but nothing can be inferred about what kind of object is extracted) - a string giving some indication about the object type (if nothing else is known: the name of the element, often the namespace of the embedded object) - a dictionary, of the following form:: {'id_and_revision_info' : { 'id': something/None, 'ts': something/None, ... other information you want to record for this object for later usage, }, 'embedded_ns': False/True/some indication about object type as string} Note: the 'parent' and 'child' arguments are XMLNodes as defined by the Python libxml2 bindings. If you have never worked with these, have a look at - Mike Kneller's brief intro: http://mikekneller.com/kb/python/libxml2python/part1 - the functions in django-dingos core.xml_utils module """ # For openIOC, we extract the Indicator-Item elements, # since those correspond to observables. child_attributes = extract_attributes(child,prefix_key_char='') if ('id' in child_attributes and child.name == 'IndicatorItem'): # The embedding predicate is supposed to not only return # 'True' or 'False', but in case there is an embedding, # it should also contain information regarding the type of # object that is embedded. This is used, for example, to # create the DataType information for the embedding element # (it is a reference to an object of type X). # In OpenIOC, The IndicatorItems have the following form:: # # <IndicatorItem id="b9ef2559-cc59-4463-81d9-52800545e16e" condition="contains"> # <Context document="FileItem" search="FileItem/PEInfo/Sections/Section/Name" type="mir"/> # <Content type="string">.stub</Content> # </IndicatorItem> # # We take the 'document' attribute of the 'Context' element as object type # of the embedded object (as we shall see below, upon import, we rewrite # the IndicatorItem such that it corresponds to the 'fact_term = value' structure # used for STIX/CybOX data. grandchild = child.children type_info = None while grandchild is not None: if grandchild.name == 'Context': context_attributes = extract_attributes(grandchild,prefix_key_char='') if 'document' in context_attributes: type_info = context_attributes['document'] break grandchild = grandchild.next if type_info: return type_info else: return True else: return False
def id_and_revision_extractor(self,xml_elt): """ Function for determing an identifier (and, where applicable, timestamp/revision information) for extracted embedded content; to be used for DINGO's xml-import hook 'id_and_revision_extractor'. This function is called - for the top-level node of the XML to be imported. - for each node at which an embedded object is extracted from the XML (when this occurs is governed by the following function, the embedding_pred It must return an identifier and, where applicable, a revision and or timestamp; in the form of a dictionary {'id':<identifier>, 'timestamp': <timestamp>}. How you format the identifier is up to you, because you will have to adopt the code in function xml_import such that the Information Objects are created with the proper identifier (consisting of qualifying namespace and uri.) In OpenIOC, the identifier is contained in the 'id' attribute of an element; the top-level 'ioc' element carries a timestamp in the 'last-modified' attribute. Note: the xml_elt is an XMLNode defined by the Python libxml2 bindings. If you have never worked with these, have a look at - Mike Kneller's brief intro: http://mikekneller.com/kb/python/libxml2python/part1 - the functions in django-dingos core.xml_utils module """ result = {'id':None, 'timestamp': None} attributes = extract_attributes(xml_elt,prefix_key_char='@') # Extract identifier: if '@id' in attributes: result['id']=attributes['@id'] # Extract time-stamp if '@last-modified' in attributes: naive = parse_datetime(attributes['@last-modified'].strip()) if naive: # Make sure that information regarding the timezone is # included in the time stamp. If it is not, we chose # utc as default timezone: if we assume that the same # producer of OpenIOC data always uses the same timezone # for filling in the 'last-modified' attribute, then # this serves the main purpose of time stamps for our # means: we can find out the latest revision of a # given piece of data. if not timezone.is_aware(naive): aware = timezone.make_aware(naive,timezone.utc) else: aware = naive result['timestamp']= aware return result