def parsexml_(*args, **kwargs): if (XMLParser_import_library == XMLParser_import_lxml and 'parser' not in kwargs): # Use the lxml ElementTree compatible parser so that, e.g., # we ignore comments. kwargs['parser'] = etree_.ETCompatXMLParser(huge_tree=True) doc = etree_.parse(*args, **kwargs) return doc
def readwsdl(self, soap_client, wsdl_url): p = urllib.parse.urlparse(wsdl_url) actual_path = p.path + '?{}'.format(p.query) if p.query else p.path self.wsdl_string = soap_client.getUrl(actual_path, msg='{}:getwsdl'.format(self.log_prefix)) commlog.defaultLogger.logWsdl(self.wsdl_string, self.service_id) try: self.wsdl = etree_.fromstring(self.wsdl_string, parser=etree_.ETCompatXMLParser()) # make am ElementTree instance except etree_.XMLSyntaxError as ex: self._logger.error('could not read wsdl from {}: error={}, data=\n{}'.format(actual_path, ex, self.wsdl_string))
def get_xml_parser(): """Returns an ``etree.ETCompatXMLParser`` instance.""" parser = etree.ETCompatXMLParser(huge_tree=True, resolve_entities=False, remove_comments=False, strip_cdata=False, remove_blank_text=True) return parser
def load_file(filename): data = WRAPPER % (strip_bad_header(open(filename, 'rb').read())) # data = open(filename).read() parser = ET.ETCompatXMLParser(resolve_entities=False) try: return filter_comments(ET.XML(data, parser)) except Exception as err: log.error("Failure loading file: %r", filename) raise
def parse_GDML( gdml , version=None , schema=None , lxmlParseKwargs={} ): """ Converts XML semantics into Python data entities (GenerateDS classes). @param gdml file-like object or string text to parse XML/GDML from @param version is expected to be a 3-ints tuple denoting one of the available schema version @param lxmlParseKwargs is keyword arguments that have to be forwarded into coresponding functions of `lxml.parse*()` Returns three entities: - GDML data (instance of GenerateDS `gdml') or `None' if exception occurred - list of warnings (may be empty) - exception if occurred, otherwise `None' Performs validation and parsing of GDML document applying XSD-schema validation (if schema document is provided). On the validation stage all the <restricition/> (including fixed="", and default="" tags) will be taken into account and, thus, output XML object will be supplied with additional information. Note, that contrary to usual behaviour when XML parser generates the separate PSVI object, lxml module will modify original document after validation. """ L = logging.getLogger(__name__) parser = None try: parser = lxmlETree.ETCompatXMLParser(schema=schema, attribute_defaults=True) except AttributeError: L.debug('Failed to instantiate ETCompatXMLParser; fallback to xml.etree') parser = lxmlETree.XMLParser(schema=schema, attribute_defaults=True) try: if isinstance(gdml, io.TextIOBase): lxmlDoc = lxmlETree.parse(gdml, parser=parser, **lxmlParseKwargs) else: lxmlDoc = lxmlETree.fromstring(gdml, parser=parser, **lxmlParseKwargs) except Exception as e: return None, [], e if version is None: version = sorted(gGDMLStructs.keys())[-1] L.debug('No GDML schema version forced; assuming %s'%( '.'.join([str(v) for v in version]))) m = gGDMLStructs[version] # get tag of root node rootNode = lxmlDoc.getroot() rootTag, rootClass = get_root_tag(rootNode, m) rootObj = m.gdml.factory() warnings = [] try: rootObj.build(rootNode, gds_collector_=WarningsCollector(warnings)) except Exception as e: return None, warnings, e return rootObj, warnings, None
def setUp(self): super().setUp() self.lroe_op_model = self.env['lroe.operation'] schemas_version_dirname = LROEXMLSchema.schemas_version_dirname script_dirpath = os.path.abspath(os.path.dirname(__file__)) schemas_dirpath = os.path.join(script_dirpath, 'schemas') lroe_240_chapter_1 = self.env.ref( "l10n_es_ticketbai_api_batuz.lroe_chapter_pj_240_1") lroe_240_subchapter_1 = self.env.ref( "l10n_es_ticketbai_api_batuz.lroe_subchapter_pj_240_1_1") lroe_140_chapter_1 = self.env.ref( "l10n_es_ticketbai_api_batuz.lroe_chapter_pf_140_1") lroe_140_subchapter_1 = self.env.ref( "l10n_es_ticketbai_api_batuz.lroe_subchapter_pf_140_1_1") # Load XSD file with XADES imports test_01_xsd_filepath = os.path.abspath( os.path.join(schemas_dirpath, '%s/%s' % (schemas_version_dirname, TEST_01_XSD_SCHEMA))) self.test_01_schema_doc = etree.parse( test_01_xsd_filepath, parser=etree.ETCompatXMLParser()) test_02_xsd_filepath = os.path.abspath( os.path.join(schemas_dirpath, '%s/%s' % (schemas_version_dirname, TEST_02_XSD_SCHEMA))) self.test_02_schema_doc = etree.parse( test_02_xsd_filepath, parser=etree.ETCompatXMLParser()) test_03_xsd_filepath = os.path.abspath( os.path.join(schemas_dirpath, '%s/%s' % (schemas_version_dirname, TEST_03_XSD_SCHEMA))) self.test_03_schema_doc = etree.parse( test_03_xsd_filepath, parser=etree.ETCompatXMLParser()) test_04_xsd_filepath = os.path.abspath( os.path.join(schemas_dirpath, '%s/%s' % (schemas_version_dirname, TEST_04_XSD_SCHEMA))) self.test_04_schema_doc = etree.parse( test_04_xsd_filepath, parser=etree.ETCompatXMLParser()) self.lroe_240_chapter_1 = lroe_240_chapter_1 self.lroe_240_subchapter_1 = lroe_240_subchapter_1 self.lroe_140_chapter_1 = lroe_140_chapter_1 self.lroe_140_subchapter_1 = lroe_140_subchapter_1
def setUp(self): super().setUp() test_dir_path = os.path.abspath(os.path.dirname(__file__)) self.company_values_json_filepath = os.path.join( test_dir_path, 'company.json') # Disabled by default for automatic tests self.send_to_tax_agency = False # Enable for local testing self.number_prefix = '%d/' % randrange(1, 10**19) self.refund_number_prefix = '%d/' % randrange(1, 10**19) schemas_version_dirname = XMLSchema.schemas_version_dirname script_dirpath = os.path.abspath(os.path.dirname(__file__)) schemas_dirpath = os.path.join(script_dirpath, '../ticketbai/schemas') url = pathname2url(os.path.join(schemas_dirpath, 'catalog.xml')) catalog_path = "file:%s" % url os.environ['XML_CATALOG_FILES'] = catalog_path # Load XSD file with XADES imports test_xml_invoice_filepath = os.path.abspath( os.path.join( schemas_dirpath, '%s/test_ticketBai V1-2.xsd' % schemas_version_dirname)) self.test_xml_invoice_schema_doc = etree.parse( test_xml_invoice_filepath, parser=etree.ETCompatXMLParser()) # Load XSD file with XADES imports test_xml_cancellation_filepath = os.path.abspath( os.path.join( schemas_dirpath, '%s/test_Anula_ticketBai V1-2.xsd' % schemas_version_dirname)) self.test_xml_cancellation_schema_doc = etree.parse( test_xml_cancellation_filepath, parser=etree.ETCompatXMLParser()) self.main_company = self.env.ref('base.main_company') self._prepare_company(self.main_company) self.partner = self.env.ref("l10n_es_ticketbai_api.res_partner_binovo") self.partner_extracommunity = self.env.ref( 'l10n_es_ticketbai_api.res_partner_yamaha_jp') self.partner_intracommunity = self.env.ref( 'l10n_es_ticketbai_api.res_partner_oca') self.group_system = self.env.ref('base.group_system') # Settings self.group_user = self.env.ref('base.group_user') # Employee # Contact creation self.demo_user = self.env.ref('base.user_demo') # Demo user self.tech_user = self.env.ref( 'l10n_es_ticketbai_api.user_tech') # Root user
def parsexml_(infile, parser=None, **kwargs): if parser is None: # Use the lxml ElementTree compatible parser so that, e.g., # we ignore comments. try: parser = etree_.ETCompatXMLParser() except AttributeError: # fallback to xml.etree parser = etree_.XMLParser() doc = etree_.parse(infile, parser=parser, **kwargs) return doc
def parsexmlstring_(instring, parser=None, **kwargs): if parser is None: # Use the lxml ElementTree compatible parser so that, e.g., # we ignore comments. try: parser = etree_.ETCompatXMLParser() except AttributeError: # fallback to xml.etree parser = etree_.XMLParser() element = etree_.fromstring(instring, parser=parser, **kwargs) return element
def _read_cansas(self, path): """ Load data and P(r) information from a CanSAS XML file. :param path: file path :return: Data1D object if a single SASentry was found, or a list of Data1D objects if multiple entries were found, or None of nothing was found :raise RuntimeError: when the file can't be opened :raise ValueError: when the length of the data vectors are inconsistent """ output = [] if os.path.isfile(path): basename = os.path.basename(path) root, extension = os.path.splitext(basename) #TODO: eventually remove the check for .xml once # the P(r) writer/reader is truly complete. if extension.lower() in self.ext or extension.lower() == '.xml': tree = etree.parse(path, parser=etree.ETCompatXMLParser()) # Check the format version number # Specifying the namespace will take care of the file #format version root = tree.getroot() entry_list = root.xpath('/ns:SASroot/ns:SASentry', namespaces={'ns': CANSAS_NS}) for entry in entry_list: prstate = self._parse_prstate(entry) #prstate could be None when .svs file is loaded #in this case, skip appending to output if prstate is not None: sas_entry, _ = self._parse_entry(entry) sas_entry.meta_data['prstate'] = prstate sas_entry.filename = prstate.file output.append(sas_entry) else: raise RuntimeError("%s is not a file" % path) # Return output consistent with the loader's api if len(output) == 0: return None elif len(output) == 1: # Call back to post the new state self.call_back(output[0].meta_data['prstate'], datainfo=output[0]) #self.state = output[0].meta_data['prstate'] return output[0] else: return output
def setUp(self): super().setUp() # can only set this environment variable once because lxml # loads it only at startup. Luckily having several catalogs is # supported so we provide the catalogs variable for related # addons to plug any required additional catalog. os.environ['XML_CATALOG_FILES'] = ' '.join(self.catalogs) test_dir_path = os.path.abspath(os.path.dirname(__file__)) self.company_values_json_filepath = os.path.join( test_dir_path, 'company.json') # Disabled by default for automatic tests self.send_to_tax_agency = False # Enable for local testing self.number_prefix = '%d/' % randrange(1, 10**19) self.refund_number_prefix = '%d/' % randrange(1, 10**19) schemas_version_dirname = XMLSchema.schemas_version_dirname script_dirpath = os.path.abspath(os.path.dirname(__file__)) schemas_dirpath = os.path.join(script_dirpath, 'schemas') # Load XSD file with XADES imports test_xml_invoice_filepath = os.path.abspath( os.path.join( schemas_dirpath, '%s/test_ticketBai V1-2.xsd' % schemas_version_dirname)) self.test_xml_invoice_schema_doc = etree.parse( test_xml_invoice_filepath, parser=etree.ETCompatXMLParser()) # Load XSD file with XADES imports test_xml_cancellation_filepath = os.path.abspath( os.path.join( schemas_dirpath, '%s/test_Anula_ticketBai V1-2.xsd' % schemas_version_dirname)) self.test_xml_cancellation_schema_doc = etree.parse( test_xml_cancellation_filepath, parser=etree.ETCompatXMLParser()) self.main_company = self.env.ref('base.main_company') self._prepare_company(self.main_company) self.partner = self.env.ref("l10n_es_ticketbai_api.res_partner_binovo") self.partner_extracommunity = self.env.ref( 'l10n_es_ticketbai_api.res_partner_yamaha_jp') self.partner_intracommunity = self.env.ref( 'l10n_es_ticketbai_api.res_partner_peugeot') # Contact creation self.tech_user = self.env.ref( 'l10n_es_ticketbai_api.user_tech') # Root user
def fromXMLString(cls, xmlString, schema=None, **kwargs): parser = etree_.ETCompatXMLParser() try: doc = etree_.fromstring(xmlString, parser=parser, **kwargs) except Exception as ex: print('load error "{}" in "{}"'.format(ex, xmlString)) raise if schema is not None: msgNode = doc.find('s12:Body', nsmap)[0] schema.assertValid(msgNode) return cls(doc=doc, rawdata=xmlString)
def parsexml_(infile, parser=None, **kwargs): if parser is None: # Use the lxml ElementTree compatible parser so that, e.g., # we ignore comments. parser = etree_.ETCompatXMLParser() try: if isinstance(infile, os.PathLike): infile = os.path.join(infile) except AttributeError: pass doc = etree_.parse(infile, parser=parser, **kwargs) return doc
def from_dict(cls, d, return_obj=None): if not d: return None if not return_obj: return_obj = cls() super(MAECInstance, cls).from_dict(d, return_obj) if 'maec' in d: parser = etree.ETCompatXMLParser(huge_tree=True) return_obj.maec = etree.parse(StringIO(d['maec']), parser=parser) return return_obj
def from_dict(cls, d, return_obj=None): if not d: return None if not return_obj: return_obj = cls() super(OpenIOCTestMechanism, cls).from_dict(d, return_obj) if 'ioc' in d: parser = etree.ETCompatXMLParser(huge_tree=True) return_obj.ioc = etree.parse(StringIO(d['ioc']), parser=parser) return return_obj
def parsexml_(*args, **kwargs): """ parsexml_ :param args: :param kwargs: :return: doc """ if XMLParser_import_library == XMLParser_import_lxml and 'parser' not in kwargs: # Use the lxml ElementTree compatible parser so that, e.g., # we ignore comments. kwargs['parser'] = eTree.ETCompatXMLParser() doc = eTree.parse(*args, **kwargs) return doc
def main(): maec_malware_instance = MAECInstance() maec_malware_instance.add_name("Poison Ivy Variant v4392-acc") maec_malware_instance.add_type("Remote Access Trojan") maec_malware_instance.maec = etree.fromstring( MAEC_XML, parser=etree.ETCompatXMLParser()) ttp = TTP(title="Poison Ivy Variant v4392-acc") ttp.behavior = Behavior() ttp.behavior.add_malware_instance(maec_malware_instance) stix_package = STIXPackage() stix_package.add_ttp(ttp) print stix_package.to_xml()
def open_file(self, filename): self.frame.hide() # load and parse the file if lxml_loaded: parser = ElementTree.ETCompatXMLParser() self.tree = ElementTree.parse(filename, parser=parser) else: self.tree = ElementTree.parse(filename) # validate the file if lxml is available if lxml_loaded: xmlschema_tree = ElementTree.parse(xmlschema_f) xmlschema = ElementTree.XMLSchema(xmlschema_tree) xmlschema.assertValid(self.tree) root = self.tree.getroot() self.layout(root, list(), self.main_sizer) self.frame.show() return self.anonymous_controls, self.controls
def parsexml_(infile, parser=None, keep_signature=False, **kwargs): if parser is None: # Use the lxml ElementTree compatible parser so that, e.g., # we ignore comments. parser = etree_.ETCompatXMLParser() doc = etree_.parse(infile, parser=parser, **kwargs) root = doc.getroot() # remove Signature element before XML comparison if not keep_signature: for child in root: if child.tag in [ "{http://www.w3.org/2000/09/xmldsig#}Signature", "{http://www.w3.org/2000/09/xmldsig#}\ ds:Signature" ]: root.remove(child) subtree = etree_.ElementTree(root) return subtree
def parse_lshw(raw_data): parser = ET.ETCompatXMLParser(recover=True) response = ET.fromstring(raw_data, parser=parser) if response.tag and response.tag.upper() == 'LIST': response = response[0] elif response.tag and response.tag.upper() == 'NODE': pass else: raise Error('Lshw parse error.') for element in response.findall('.//'): for k in element.attrib.keys(): try: v = element.attrib[k] except UnicodeDecodeError: continue # value has bytes not possible to decode with UTF-8 if (element.tag, k) in TAG_TRANSLATION_PAIRS: try: element.tag = v except ValueError: pass continue if (element.tag, k) in TEXT_TRANSLATION_PAIRS: element.text = v continue if k == 'units': value = ET.Element(b'value') value.text = element.text element.text = '' element.append(value) child = ET.Element(k) child.text = v element.append(child) return nullify( etree_to_dict( response, _converters=[ _nullify, int, float, lck.xml.converters._datetime, lck.xml.converters._datetime_strip_tz, ], ), )[1]
def main(self, surface_x3d, faces_csv, vertices_csv, vertexlimits_csv): # custom parser needed to parse files larger than approx. 10 MB. tree = etree.parse(surface_x3d, parser=etree.ETCompatXMLParser(huge_tree=True)) rootNode = tree.getroot() faceNode = rootNode.find('.//IndexedFaceSet') faces = re.sub('[\s,]+', ',', faceNode.attrib['coordIndex'].strip()) faces = re.sub(',-1,', '\n', faces) faces = re.sub(',-1$', '', faces) with open(faces_csv, 'w') as fp: fp.write(faces) # experimental: save binary data (works well, but gz is more useful) """ faces = json.loads('[['+re.sub('\n','],[',faces)+']]') faces = numpy.array(faces,numpy.dtype('<u4')) faces_dat = re.sub('\.csv$','',faces_csv)+'.dat'; with open(faces_dat,'w') as fp: fp.write(faces.tostring()) """ vertexNode = faceNode.find('Coordinate') with open(vertices_csv, 'w') as fp: vertices = re.sub('[\s,]+', ' ', vertexNode.attrib['point'].strip()) vertices = vertices.split(' ') vertices = [ ','.join(vertices[i:i + 3]) for i in range(0, len(vertices), 3) ] fp.write('\n'.join(vertices)) V = LoadVertices().setInput(vertices_csv).getOutput('vertices') V = numpy.array(V, float) minmax = [ ','.join([str(v) for v in V.min(axis=0)]), ','.join([str(v) for v in V.max(axis=0)]) ] with open(vertexlimits_csv, 'w') as fp: fp.write('\n'.join(minmax)) return FancyDict(faces_csv=faces_csv, vertices_csv=vertices_csv, vertexlimits_csv=vertexlimits_csv)
def parsexml_(infile, parser=None, keep_signature=False, **kwargs): "accepts both NFe and nfeProc documents" if parser is None: # Use the lxml ElementTree compatible parser so that, e.g., # we ignore comments. parser = etree_.ETCompatXMLParser() doc = etree_.parse(infile, parser=parser, **kwargs) if doc.getroot().tag == '{http://www.portalfiscal.inf.br/nfe}nfeProc': root = doc.getroot()[0] else: root = doc.getroot() # remove Signature element before XML comparison if not keep_signature: for child in root: if child.tag in ["{http://www.w3.org/2000/09/xmldsig#}Signature", "{http://www.w3.org/2000/09/xmldsig#}\ ds:Signature"]: root.remove(child) subtree = etree_.ElementTree(root) return subtree
def parseString(inString, silence=False): if sys.version_info.major == 2: from StringIO import StringIO else: from io import BytesIO as StringIO parser = etree_.ETCompatXMLParser(strip_cdata=False) rootNode = parsexmlstring_(inString, parser) rootTag, rootClass = get_root_tag(rootNode) if rootClass is None: rootTag = 'valuesType' rootClass = supermod.valuesType rootObj = rootClass.factory() rootObj.build(rootNode) # Enable Python to collect the space used by the DOM. if not SaveElementTreeNode: rootNode = None if not silence: sys.stdout.write('<?xml version="1.0" ?>\n') rootObj.export(sys.stdout, 0, name_=rootTag, namespacedef_='') return rootObj
def main(): rdf_path = '~/gutenberg/rdf-files/cache/epub' report_freq = 500 books = [] for i, book_dir in enumerate(os.listdir(rdf_path)): if i % report_freq == 0: print(i) doc = etree.parse(f'{rdf_path}/{book_dir}/pg{book_dir}.rdf', etree.ETCompatXMLParser()) parsed = parse_doc(doc) parsed['id'] = book_dir books.append(parsed) with open('gutenberg.pkl', 'wb') as f: pickle.dump(books, f) print('Saved books pickle file.') with open(f'gutenberg.json', 'w') as f: json.dump(books, f) print('Saved books json file.')
def read_x3d(path): """Read x3d files. This code has been adapted from : https://github.com/INCF/Scalable-Brain-Atlas Parameters ---------- path : string Full path to a .x3d file. Returns ------- vertices : array_like Array of vertices of shape (n_vertices, 3) faces : array_like Array of faces of shape (n_faces, 3) """ from lxml import etree import re logger.info(' X3D file detected') # Read root node : tree = etree.parse(path, parser=etree.ETCompatXMLParser(huge_tree=True)) root_node = tree.getroot() # Get mesh faces : face_node = root_node.find('.//IndexedFaceSet') faces = re.sub('[\s,]+', ',', face_node.attrib['coordIndex'].strip()) faces = re.sub(',-1,', '\n', faces) faces = re.sub(',-1$', '', faces) faces = np.array(faces.replace('\n', ',').split(',')).astype(int) faces = faces.reshape(int(faces.shape[0] / 3), 3) # Get mesh vertices : vertex_node = face_node.find('Coordinate') vertices = re.sub('[\s,]+', ' ', vertex_node.attrib['point'].strip()) vertices = np.array(vertices.split(' ')[0:-1]).astype(float) vertices = vertices.reshape(int(vertices.shape[0] / 3), 3) return vertices, faces
def read(self, path): """ Load data and corfunc information frmo a CanSAS file. :param path: The file path to read from :return: Data1D object, a list of Data1D objects, or None :raise IOError: When the file can't be found :raise IOError: When the file is an invalid file type :raise ValueError: When the length of the data vectors are inconsistent """ output = [] if os.path.isfile(path): # Load file basename = os.path.basename(path) root, ext = os.path.splitext(basename) if not ext.lower() in self.ext: raise IOError, "{} is not a supported file type".format(ext) tree = etree.parse(path, parser=etree.ETCompatXMLParser()) root = tree.getroot() entry_list = root.xpath('/ns:SASroot/ns:SASentry', namespaces={'ns': CANSAS_NS}) for entry in entry_list: corstate = self._parse_state(entry) if corstate is not None: sas_entry, _ = self._parse_entry(entry) sas_entry.meta_data['corstate'] = corstate sas_entry.filename = corstate.file output.append(sas_entry) else: # File not found msg = "{} is not a valid file path or doesn't exist".format(path) raise IOError, msg if len(output) == 0: return None elif len(output) == 1: self.callback(output[0].meta_data['corstate'], datainfo=output[0]) return output[0] else: return output
def parse(inFilename, silence=False): parser = etree_.ETCompatXMLParser(strip_cdata=False) doc = parsexml_(inFilename, parser) rootNode = doc.getroot() rootTag, rootClass = get_root_tag(rootNode) if rootClass is None: rootTag = 'Wheel' rootClass = supermod.Wheel rootObj = rootClass.factory() rootObj.build(rootNode) # Enable Python to collect the space used by the DOM. if not SaveElementTreeNode: doc = None rootNode = None if not silence: sys.stdout.write('<?xml version="1.0" ?>\n') rootObj.export( sys.stdout, 0, name_=rootTag, namespacedef_='xmlns:tns="http://www.example.org/wheel/"', pretty_print=True) return rootObj
def parseLiteral(inFilename, silence=False): parser = etree_.ETCompatXMLParser(strip_cdata=False) doc = parsexml_(inFilename, parser) rootNode = doc.getroot() rootTag, rootClass = get_root_tag(rootNode) if rootClass is None: rootTag = 'Wheel' rootClass = supermod.Wheel rootObj = rootClass.factory() rootObj.build(rootNode) # Enable Python to collect the space used by the DOM. if not SaveElementTreeNode: doc = None rootNode = None if not silence: sys.stdout.write('#from preserve_cdata_tags2_sup import *\n\n') sys.stdout.write('import preserve_cdata_tags2_sup as model_\n\n') sys.stdout.write('rootObj = model_.rootClass(\n') rootObj.exportLiteral(sys.stdout, 0, name_=rootTag) sys.stdout.write(')\n') return rootObj
def parse_rdf(db: MongodbCache): files = [d for d in listdir(settings.CACHE_UNPACK_DIRECTORY) if d.startswith("pg") and d.endswith(".rdf")] total = len(files) for index, file_name in enumerate(files): file_name_stripped = re.search("pg(.*?).rdf", file_name).group(1) Utils.update_progress_bar(f"Processing progress: {index} / {total}") file_path = path.join(settings.CACHE_UNPACK_DIRECTORY, file_name) doc = etree.parse(file_path, etree.ETCompatXMLParser()) gutenberg_book_id = int(file_name_stripped) author_aliases = parse_author(doc) gutenberg_author_id = parse_author_id(doc) newbook = Book( gutenberg_id=gutenberg_book_id, number_of_downloads=parse_downloads(doc), date_issued=parse_date_issued(doc), title=parse_title(doc), doc_type=parse_type(doc), language=parse_languages(doc), author=author_aliases, gutenberg_author_id=gutenberg_author_id, formats=parse_formats(doc), publisher=parse_publisher(doc), rights=parse_rights(doc), subjects=parse_subjects(doc), bookshelves=parse_bookshelves(doc), ) author = Author( gutenberg_id=gutenberg_author_id, aliases=author_aliases, ) db.insert_book(newbook) db.insert_author(author) db.flush()
def parse_xml_to_obj(self, xml_file, check_version=True, check_root=True): """Creates a STIX binding object from the supplied xml file. Arguments: xml_file -- A filename/path or a file-like object reprenting a STIX instance document check_version -- Inspect the version before parsing. check_root -- Inspect the root element before parsing. """ parser = etree.ETCompatXMLParser(huge_tree=True) tree = etree.parse(xml_file, parser=parser) if check_version: self._check_version(tree) if check_root: self._check_root(tree) import stix.bindings.stix_core as stix_core_binding stix_package_obj = stix_core_binding.STIXType().factory() stix_package_obj.build(tree.getroot()) return stix_package_obj