Ejemplos de ETCompatXMLParser en Python, ejemplos de lxml.etree.ETCompatXMLParser en Python

Ejemplo n.º 1

0

Mostrar archivo

def parsexml_(*args, **kwargs):
    if (XMLParser_import_library == XMLParser_import_lxml
            and 'parser' not in kwargs):
        # Use the lxml ElementTree compatible parser so that, e.g.,
        #   we ignore comments.
        kwargs['parser'] = etree_.ETCompatXMLParser(huge_tree=True)
    doc = etree_.parse(*args, **kwargs)
    return doc

Ejemplo n.º 2

0

Mostrar archivo

 def readwsdl(self, soap_client, wsdl_url):
     p = urllib.parse.urlparse(wsdl_url)
     actual_path = p.path + '?{}'.format(p.query) if p.query else p.path
     self.wsdl_string = soap_client.getUrl(actual_path, msg='{}:getwsdl'.format(self.log_prefix))
     commlog.defaultLogger.logWsdl(self.wsdl_string, self.service_id)
     try:
         self.wsdl = etree_.fromstring(self.wsdl_string, parser=etree_.ETCompatXMLParser()) # make am ElementTree instance
     except etree_.XMLSyntaxError as ex:
         self._logger.error('could not read wsdl from {}: error={}, data=\n{}'.format(actual_path, ex, self.wsdl_string))

Ejemplo n.º 3

0

Mostrar archivo

def get_xml_parser():
    """Returns an ``etree.ETCompatXMLParser`` instance."""
    parser = etree.ETCompatXMLParser(huge_tree=True,
                                     resolve_entities=False,
                                     remove_comments=False,
                                     strip_cdata=False,
                                     remove_blank_text=True)

    return parser

Ejemplo n.º 4

0

Mostrar archivo

Archivo: generate.py Proyecto: QPanScience/GPU_PY-pyopengl

def load_file(filename):
    data = WRAPPER % (strip_bad_header(open(filename, 'rb').read()))
    # data = open(filename).read()
    parser = ET.ETCompatXMLParser(resolve_entities=False)
    try:
        return filter_comments(ET.XML(data, parser))
    except Exception as err:
        log.error("Failure loading file: %r", filename)
        raise

Ejemplo n.º 5

0

Mostrar archivo

Archivo: library.py Proyecto: CrankOne/geode

def parse_GDML( gdml
              , version=None
              , schema=None
              , lxmlParseKwargs={}
              ):
    """
    Converts XML semantics into Python data entities (GenerateDS classes).
        @param gdml file-like object or string text to parse XML/GDML from
        @param version is expected to be a 3-ints tuple denoting one of the
            available schema version
        @param lxmlParseKwargs is keyword arguments that have to be forwarded
            into coresponding functions of `lxml.parse*()`
    Returns three entities:
        - GDML data (instance of GenerateDS `gdml') or `None' if exception occurred
        - list of warnings (may be empty)
        - exception if occurred, otherwise `None'

    Performs validation and parsing of GDML document applying XSD-schema
    validation (if schema document is provided). On the validation stage all
    the <restricition/> (including fixed="", and default="" tags) will be taken
    into account and, thus, output XML object will be supplied with additional
    information.

    Note, that contrary to usual behaviour when XML parser generates the
    separate PSVI object, lxml module will modify original document after
    validation.
    """
    L = logging.getLogger(__name__)
    parser = None
    try:
        parser = lxmlETree.ETCompatXMLParser(schema=schema, attribute_defaults=True)
    except AttributeError:
        L.debug('Failed to instantiate ETCompatXMLParser; fallback to xml.etree')
        parser = lxmlETree.XMLParser(schema=schema, attribute_defaults=True)
    try:
        if isinstance(gdml, io.TextIOBase):
            lxmlDoc = lxmlETree.parse(gdml, parser=parser, **lxmlParseKwargs)
        else:
            lxmlDoc = lxmlETree.fromstring(gdml, parser=parser, **lxmlParseKwargs)
    except Exception as e:
        return None, [], e
    if version is None:
        version = sorted(gGDMLStructs.keys())[-1]
        L.debug('No GDML schema version forced; assuming %s'%(
            '.'.join([str(v) for v in version])))
    m = gGDMLStructs[version]
    # get tag of root node
    rootNode = lxmlDoc.getroot()
    rootTag, rootClass = get_root_tag(rootNode, m)
    rootObj = m.gdml.factory()
    warnings = []
    try:
        rootObj.build(rootNode, gds_collector_=WarningsCollector(warnings))
    except Exception as e:
        return None, warnings, e
    return rootObj, warnings, None

Ejemplo n.º 6

0

Mostrar archivo

 def setUp(self):
     super().setUp()
     self.lroe_op_model = self.env['lroe.operation']
     schemas_version_dirname = LROEXMLSchema.schemas_version_dirname
     script_dirpath = os.path.abspath(os.path.dirname(__file__))
     schemas_dirpath = os.path.join(script_dirpath, 'schemas')
     lroe_240_chapter_1 = self.env.ref(
         "l10n_es_ticketbai_api_batuz.lroe_chapter_pj_240_1")
     lroe_240_subchapter_1 = self.env.ref(
         "l10n_es_ticketbai_api_batuz.lroe_subchapter_pj_240_1_1")
     lroe_140_chapter_1 = self.env.ref(
         "l10n_es_ticketbai_api_batuz.lroe_chapter_pf_140_1")
     lroe_140_subchapter_1 = self.env.ref(
         "l10n_es_ticketbai_api_batuz.lroe_subchapter_pf_140_1_1")
     # Load XSD file with XADES imports
     test_01_xsd_filepath = os.path.abspath(
         os.path.join(schemas_dirpath,
                      '%s/%s' % (schemas_version_dirname, TEST_01_XSD_SCHEMA)))
     self.test_01_schema_doc = etree.parse(
         test_01_xsd_filepath,
         parser=etree.ETCompatXMLParser())
     test_02_xsd_filepath = os.path.abspath(
         os.path.join(schemas_dirpath,
                      '%s/%s' % (schemas_version_dirname, TEST_02_XSD_SCHEMA)))
     self.test_02_schema_doc = etree.parse(
         test_02_xsd_filepath,
         parser=etree.ETCompatXMLParser())
     test_03_xsd_filepath = os.path.abspath(
         os.path.join(schemas_dirpath,
                      '%s/%s' % (schemas_version_dirname, TEST_03_XSD_SCHEMA)))
     self.test_03_schema_doc = etree.parse(
         test_03_xsd_filepath,
         parser=etree.ETCompatXMLParser())
     test_04_xsd_filepath = os.path.abspath(
         os.path.join(schemas_dirpath,
                      '%s/%s' % (schemas_version_dirname, TEST_04_XSD_SCHEMA)))
     self.test_04_schema_doc = etree.parse(
         test_04_xsd_filepath,
         parser=etree.ETCompatXMLParser())
     self.lroe_240_chapter_1 = lroe_240_chapter_1
     self.lroe_240_subchapter_1 = lroe_240_subchapter_1
     self.lroe_140_chapter_1 = lroe_140_chapter_1
     self.lroe_140_subchapter_1 = lroe_140_subchapter_1

Ejemplo n.º 7

0

Mostrar archivo

 def setUp(self):
     super().setUp()
     test_dir_path = os.path.abspath(os.path.dirname(__file__))
     self.company_values_json_filepath = os.path.join(
         test_dir_path, 'company.json')
     # Disabled by default for automatic tests
     self.send_to_tax_agency = False  # Enable for local testing
     self.number_prefix = '%d/' % randrange(1, 10**19)
     self.refund_number_prefix = '%d/' % randrange(1, 10**19)
     schemas_version_dirname = XMLSchema.schemas_version_dirname
     script_dirpath = os.path.abspath(os.path.dirname(__file__))
     schemas_dirpath = os.path.join(script_dirpath, '../ticketbai/schemas')
     url = pathname2url(os.path.join(schemas_dirpath, 'catalog.xml'))
     catalog_path = "file:%s" % url
     os.environ['XML_CATALOG_FILES'] = catalog_path
     # Load XSD file with XADES imports
     test_xml_invoice_filepath = os.path.abspath(
         os.path.join(
             schemas_dirpath,
             '%s/test_ticketBai V1-2.xsd' % schemas_version_dirname))
     self.test_xml_invoice_schema_doc = etree.parse(
         test_xml_invoice_filepath, parser=etree.ETCompatXMLParser())
     # Load XSD file with XADES imports
     test_xml_cancellation_filepath = os.path.abspath(
         os.path.join(
             schemas_dirpath,
             '%s/test_Anula_ticketBai V1-2.xsd' % schemas_version_dirname))
     self.test_xml_cancellation_schema_doc = etree.parse(
         test_xml_cancellation_filepath, parser=etree.ETCompatXMLParser())
     self.main_company = self.env.ref('base.main_company')
     self._prepare_company(self.main_company)
     self.partner = self.env.ref("l10n_es_ticketbai_api.res_partner_binovo")
     self.partner_extracommunity = self.env.ref(
         'l10n_es_ticketbai_api.res_partner_yamaha_jp')
     self.partner_intracommunity = self.env.ref(
         'l10n_es_ticketbai_api.res_partner_oca')
     self.group_system = self.env.ref('base.group_system')  # Settings
     self.group_user = self.env.ref('base.group_user')  # Employee
     # Contact creation
     self.demo_user = self.env.ref('base.user_demo')  # Demo user
     self.tech_user = self.env.ref(
         'l10n_es_ticketbai_api.user_tech')  # Root user

Ejemplo n.º 8

0

Mostrar archivo

Archivo: oneperType03_2One.py Proyecto: hurta2yaisel/generateds

def parsexml_(infile, parser=None, **kwargs):
    if parser is None:
        # Use the lxml ElementTree compatible parser so that, e.g.,
        #   we ignore comments.
        try:
            parser = etree_.ETCompatXMLParser()
        except AttributeError:
            # fallback to xml.etree
            parser = etree_.XMLParser()
    doc = etree_.parse(infile, parser=parser, **kwargs)
    return doc

Ejemplo n.º 9

0

Mostrar archivo

def parsexmlstring_(instring, parser=None, **kwargs):
    if parser is None:
        # Use the lxml ElementTree compatible parser so that, e.g.,
        #   we ignore comments.
        try:
            parser = etree_.ETCompatXMLParser()
        except AttributeError:
            # fallback to xml.etree
            parser = etree_.XMLParser()
    element = etree_.fromstring(instring, parser=parser, **kwargs)
    return element

Ejemplo n.º 10

0

Mostrar archivo

Archivo: inversion_state.py Proyecto: zattala/sasview

    def _read_cansas(self, path):
        """
        Load data and P(r) information from a CanSAS XML file.

        :param path: file path

        :return: Data1D object if a single SASentry was found,
                    or a list of Data1D objects if multiple entries were found,
                    or None of nothing was found

        :raise RuntimeError: when the file can't be opened
        :raise ValueError: when the length of the data vectors are inconsistent

        """
        output = []

        if os.path.isfile(path):
            basename = os.path.basename(path)
            root, extension = os.path.splitext(basename)
            #TODO: eventually remove the check for .xml once
            # the P(r) writer/reader is truly complete.
            if  extension.lower() in self.ext or extension.lower() == '.xml':

                tree = etree.parse(path, parser=etree.ETCompatXMLParser())
                # Check the format version number
                # Specifying the namespace will take care of the file
                #format version
                root = tree.getroot()

                entry_list = root.xpath('/ns:SASroot/ns:SASentry',
                                        namespaces={'ns': CANSAS_NS})

                for entry in entry_list:
                    prstate = self._parse_prstate(entry)
                    #prstate could be None when .svs file is loaded
                    #in this case, skip appending to output
                    if prstate is not None:
                        sas_entry, _ = self._parse_entry(entry)
                        sas_entry.meta_data['prstate'] = prstate
                        sas_entry.filename = prstate.file
                        output.append(sas_entry)
        else:
            raise RuntimeError("%s is not a file" % path)

        # Return output consistent with the loader's api
        if len(output) == 0:
            return None
        elif len(output) == 1:
            # Call back to post the new state
            self.call_back(output[0].meta_data['prstate'], datainfo=output[0])
            #self.state = output[0].meta_data['prstate']
            return output[0]
        else:
            return output

Ejemplo n.º 11

0

Mostrar archivo

 def setUp(self):
     super().setUp()
     # can only set this environment variable once because lxml
     # loads it only at startup. Luckily having several catalogs is
     # supported so we provide the catalogs variable for related
     # addons to plug any required additional catalog.
     os.environ['XML_CATALOG_FILES'] = ' '.join(self.catalogs)
     test_dir_path = os.path.abspath(os.path.dirname(__file__))
     self.company_values_json_filepath = os.path.join(
         test_dir_path, 'company.json')
     # Disabled by default for automatic tests
     self.send_to_tax_agency = False  # Enable for local testing
     self.number_prefix = '%d/' % randrange(1, 10**19)
     self.refund_number_prefix = '%d/' % randrange(1, 10**19)
     schemas_version_dirname = XMLSchema.schemas_version_dirname
     script_dirpath = os.path.abspath(os.path.dirname(__file__))
     schemas_dirpath = os.path.join(script_dirpath, 'schemas')
     # Load XSD file with XADES imports
     test_xml_invoice_filepath = os.path.abspath(
         os.path.join(
             schemas_dirpath,
             '%s/test_ticketBai V1-2.xsd' % schemas_version_dirname))
     self.test_xml_invoice_schema_doc = etree.parse(
         test_xml_invoice_filepath, parser=etree.ETCompatXMLParser())
     # Load XSD file with XADES imports
     test_xml_cancellation_filepath = os.path.abspath(
         os.path.join(
             schemas_dirpath,
             '%s/test_Anula_ticketBai V1-2.xsd' % schemas_version_dirname))
     self.test_xml_cancellation_schema_doc = etree.parse(
         test_xml_cancellation_filepath, parser=etree.ETCompatXMLParser())
     self.main_company = self.env.ref('base.main_company')
     self._prepare_company(self.main_company)
     self.partner = self.env.ref("l10n_es_ticketbai_api.res_partner_binovo")
     self.partner_extracommunity = self.env.ref(
         'l10n_es_ticketbai_api.res_partner_yamaha_jp')
     self.partner_intracommunity = self.env.ref(
         'l10n_es_ticketbai_api.res_partner_peugeot')
     # Contact creation
     self.tech_user = self.env.ref(
         'l10n_es_ticketbai_api.user_tech')  # Root user

Ejemplo n.º 12

0

Mostrar archivo

Archivo: soapenvelope.py Proyecto: hhleroy97/sdc11073

    def fromXMLString(cls, xmlString, schema=None, **kwargs):
        parser = etree_.ETCompatXMLParser()

        try:
            doc = etree_.fromstring(xmlString, parser=parser, **kwargs)
        except Exception as ex:
            print('load error "{}" in "{}"'.format(ex, xmlString))
            raise
        if schema is not None:
            msgNode = doc.find('s12:Body', nsmap)[0]
            schema.assertValid(msgNode)
        return cls(doc=doc, rawdata=xmlString)

Ejemplo n.º 13

0

Mostrar archivo

def parsexml_(infile, parser=None, **kwargs):
    if parser is None:
        # Use the lxml ElementTree compatible parser so that, e.g.,
        #   we ignore comments.
        parser = etree_.ETCompatXMLParser()
    try:
        if isinstance(infile, os.PathLike):
            infile = os.path.join(infile)
    except AttributeError:
        pass
    doc = etree_.parse(infile, parser=parser, **kwargs)
    return doc

Ejemplo n.º 14

0

Mostrar archivo

    def from_dict(cls, d, return_obj=None):
        if not d:
            return None
        if not return_obj:
            return_obj = cls()

        super(MAECInstance, cls).from_dict(d, return_obj)
        if 'maec' in d:
            parser = etree.ETCompatXMLParser(huge_tree=True)
            return_obj.maec = etree.parse(StringIO(d['maec']), parser=parser)

        return return_obj

Ejemplo n.º 15

0

Mostrar archivo

Archivo: open_ioc_2010_test_mechanism.py Proyecto: sidjames/crits_dependencies

    def from_dict(cls, d, return_obj=None):
        if not d:
            return None
        if not return_obj:
            return_obj = cls()

        super(OpenIOCTestMechanism, cls).from_dict(d, return_obj)
        if 'ioc' in d:
            parser = etree.ETCompatXMLParser(huge_tree=True)
            return_obj.ioc = etree.parse(StringIO(d['ioc']), parser=parser)

        return return_obj

Ejemplo n.º 16

0

Mostrar archivo

Archivo: cableplan.py Proyecto: zhangineer/acitoolkit

def parsexml_(*args, **kwargs):
    """
    parsexml_
    :param args:
    :param kwargs:
    :return: doc
    """
    if XMLParser_import_library == XMLParser_import_lxml and 'parser' not in kwargs:
        # Use the lxml ElementTree compatible parser so that, e.g.,
        #   we ignore comments.
        kwargs['parser'] = eTree.ETCompatXMLParser()
    doc = eTree.parse(*args, **kwargs)
    return doc

Ejemplo n.º 17

0

Mostrar archivo

def main():
    maec_malware_instance = MAECInstance()
    maec_malware_instance.add_name("Poison Ivy Variant v4392-acc")
    maec_malware_instance.add_type("Remote Access Trojan")
    maec_malware_instance.maec = etree.fromstring(
        MAEC_XML, parser=etree.ETCompatXMLParser())

    ttp = TTP(title="Poison Ivy Variant v4392-acc")
    ttp.behavior = Behavior()
    ttp.behavior.add_malware_instance(maec_malware_instance)

    stix_package = STIXPackage()
    stix_package.add_ttp(ttp)

    print stix_package.to_xml()

Ejemplo n.º 18

0

Mostrar archivo

Archivo: html.py Proyecto: eunjongkim/Pythics

 def open_file(self, filename):
     self.frame.hide()
     # load and parse the file
     if lxml_loaded:
         parser = ElementTree.ETCompatXMLParser()
         self.tree = ElementTree.parse(filename, parser=parser)
     else:
         self.tree = ElementTree.parse(filename)
     # validate the file if lxml is available
     if lxml_loaded:
         xmlschema_tree = ElementTree.parse(xmlschema_f)
         xmlschema = ElementTree.XMLSchema(xmlschema_tree)
         xmlschema.assertValid(self.tree)
     root = self.tree.getroot()
     self.layout(root, list(), self.main_sizer)
     self.frame.show()
     return self.anonymous_controls, self.controls

Ejemplo n.º 19

0

Mostrar archivo

def parsexml_(infile, parser=None, keep_signature=False, **kwargs):
    if parser is None:
        # Use the lxml ElementTree compatible parser so that, e.g.,
        #   we ignore comments.
        parser = etree_.ETCompatXMLParser()
    doc = etree_.parse(infile, parser=parser, **kwargs)
    root = doc.getroot()
    # remove Signature element before XML comparison
    if not keep_signature:
        for child in root:
            if child.tag in [
                    "{http://www.w3.org/2000/09/xmldsig#}Signature",
                    "{http://www.w3.org/2000/09/xmldsig#}\
                             ds:Signature"
            ]:
                root.remove(child)
    subtree = etree_.ElementTree(root)
    return subtree

Ejemplo n.º 20

0

Mostrar archivo

def parse_lshw(raw_data):
    parser = ET.ETCompatXMLParser(recover=True)
    response = ET.fromstring(raw_data, parser=parser)
    if response.tag and response.tag.upper() == 'LIST':
        response = response[0]
    elif response.tag and response.tag.upper() == 'NODE':
        pass
    else:
        raise Error('Lshw parse error.')
    for element in response.findall('.//'):
        for k in element.attrib.keys():
            try:
                v = element.attrib[k]
            except UnicodeDecodeError:
                continue   # value has bytes not possible to decode with UTF-8
            if (element.tag, k) in TAG_TRANSLATION_PAIRS:
                try:
                    element.tag = v
                except ValueError:
                    pass
                continue
            if (element.tag, k) in TEXT_TRANSLATION_PAIRS:
                element.text = v
                continue
            if k == 'units':
                value = ET.Element(b'value')
                value.text = element.text
                element.text = ''
                element.append(value)
            child = ET.Element(k)
            child.text = v
            element.append(child)
    return nullify(
        etree_to_dict(
            response,
            _converters=[
                _nullify,
                int,
                float,
                lck.xml.converters._datetime,
                lck.xml.converters._datetime_strip_tz,
            ],
        ),
    )[1]

Ejemplo n.º 21

0

Mostrar archivo

Archivo: x3d_to_mesh.py Proyecto: rbakker/sba-dev

    def main(self, surface_x3d, faces_csv, vertices_csv, vertexlimits_csv):
        # custom parser needed to parse files larger than approx. 10 MB.
        tree = etree.parse(surface_x3d,
                           parser=etree.ETCompatXMLParser(huge_tree=True))
        rootNode = tree.getroot()
        faceNode = rootNode.find('.//IndexedFaceSet')
        faces = re.sub('[\s,]+', ',', faceNode.attrib['coordIndex'].strip())
        faces = re.sub(',-1,', '\n', faces)
        faces = re.sub(',-1$', '', faces)
        with open(faces_csv, 'w') as fp:
            fp.write(faces)
        # experimental: save binary data (works well, but gz is more useful)
        """
    faces = json.loads('[['+re.sub('\n','],[',faces)+']]')
    faces = numpy.array(faces,numpy.dtype('<u4'))
    faces_dat = re.sub('\.csv$','',faces_csv)+'.dat';
    with open(faces_dat,'w') as fp:
      fp.write(faces.tostring())
    """

        vertexNode = faceNode.find('Coordinate')
        with open(vertices_csv, 'w') as fp:
            vertices = re.sub('[\s,]+', ' ',
                              vertexNode.attrib['point'].strip())
            vertices = vertices.split(' ')
            vertices = [
                ','.join(vertices[i:i + 3])
                for i in range(0, len(vertices), 3)
            ]
            fp.write('\n'.join(vertices))

        V = LoadVertices().setInput(vertices_csv).getOutput('vertices')
        V = numpy.array(V, float)
        minmax = [
            ','.join([str(v) for v in V.min(axis=0)]),
            ','.join([str(v) for v in V.max(axis=0)])
        ]
        with open(vertexlimits_csv, 'w') as fp:
            fp.write('\n'.join(minmax))

        return FancyDict(faces_csv=faces_csv,
                         vertices_csv=vertices_csv,
                         vertexlimits_csv=vertexlimits_csv)

Ejemplo n.º 22

0

Mostrar archivo

Archivo: leiauteNFe_sub.py Proyecto: tiagocardosos/nfelib

def parsexml_(infile, parser=None, keep_signature=False, **kwargs):
    "accepts both NFe and nfeProc documents"
    if parser is None:
        # Use the lxml ElementTree compatible parser so that, e.g.,
        #   we ignore comments.
        parser = etree_.ETCompatXMLParser()
    doc = etree_.parse(infile, parser=parser, **kwargs)
    if doc.getroot().tag == '{http://www.portalfiscal.inf.br/nfe}nfeProc':
        root = doc.getroot()[0]
    else:
        root = doc.getroot()
    # remove Signature element before XML comparison
    if not keep_signature:
        for child in root:
            if child.tag in ["{http://www.w3.org/2000/09/xmldsig#}Signature",
                             "{http://www.w3.org/2000/09/xmldsig#}\
                             ds:Signature"]:
                root.remove(child)
    subtree = etree_.ElementTree(root)
    return subtree

Ejemplo n.º 23

0

Mostrar archivo

def parseString(inString, silence=False):
    if sys.version_info.major == 2:
        from StringIO import StringIO
    else:
        from io import BytesIO as StringIO
    parser = etree_.ETCompatXMLParser(strip_cdata=False)
    rootNode = parsexmlstring_(inString, parser)
    rootTag, rootClass = get_root_tag(rootNode)
    if rootClass is None:
        rootTag = 'valuesType'
        rootClass = supermod.valuesType
    rootObj = rootClass.factory()
    rootObj.build(rootNode)
    # Enable Python to collect the space used by the DOM.
    if not SaveElementTreeNode:
        rootNode = None
    if not silence:
        sys.stdout.write('<?xml version="1.0" ?>\n')
        rootObj.export(sys.stdout, 0, name_=rootTag, namespacedef_='')
    return rootObj

Ejemplo n.º 24

0

Mostrar archivo

Archivo: parse_gutenberg_catalog.py Proyecto: gokererdogan/gutenberg

def main():
    rdf_path = '~/gutenberg/rdf-files/cache/epub'
    report_freq = 500
    books = []
    for i, book_dir in enumerate(os.listdir(rdf_path)):
        if i % report_freq == 0:
            print(i)
        doc = etree.parse(f'{rdf_path}/{book_dir}/pg{book_dir}.rdf',
                          etree.ETCompatXMLParser())
        parsed = parse_doc(doc)
        parsed['id'] = book_dir
        books.append(parsed)

    with open('gutenberg.pkl', 'wb') as f:
        pickle.dump(books, f)
    print('Saved books pickle file.')

    with open(f'gutenberg.json', 'w') as f:
        json.dump(books, f)
    print('Saved books json file.')

Ejemplo n.º 25

0

Mostrar archivo

Archivo: read_data.py Proyecto: skjerns/visbrain

def read_x3d(path):
    """Read x3d files.

    This code has been adapted from :
    https://github.com/INCF/Scalable-Brain-Atlas

    Parameters
    ----------
    path : string
        Full path to a .x3d file.

    Returns
    -------
    vertices : array_like
        Array of vertices of shape (n_vertices, 3)
    faces : array_like
        Array of faces of shape (n_faces, 3)
    """
    from lxml import etree
    import re
    logger.info('    X3D file detected')

    # Read root node :
    tree = etree.parse(path, parser=etree.ETCompatXMLParser(huge_tree=True))
    root_node = tree.getroot()

    # Get mesh faces :
    face_node = root_node.find('.//IndexedFaceSet')
    faces = re.sub('[\s,]+', ',', face_node.attrib['coordIndex'].strip())
    faces = re.sub(',-1,', '\n', faces)
    faces = re.sub(',-1$', '', faces)
    faces = np.array(faces.replace('\n', ',').split(',')).astype(int)
    faces = faces.reshape(int(faces.shape[0] / 3), 3)

    # Get mesh vertices :
    vertex_node = face_node.find('Coordinate')
    vertices = re.sub('[\s,]+', ' ', vertex_node.attrib['point'].strip())
    vertices = np.array(vertices.split(' ')[0:-1]).astype(float)
    vertices = vertices.reshape(int(vertices.shape[0] / 3), 3)

    return vertices, faces

Ejemplo n.º 26

0

Mostrar archivo

Archivo: corfunc_state.py Proyecto: tothuynga/sasview

    def read(self, path):
        """
        Load data and corfunc information frmo a CanSAS file.

        :param path: The file path to read from
        :return: Data1D object, a list of Data1D objects, or None
        :raise IOError: When the file can't be found
        :raise IOError: When the file is an invalid file type
        :raise ValueError: When the length of the data vectors are inconsistent
        """
        output = []
        if os.path.isfile(path):
            # Load file
            basename = os.path.basename(path)
            root, ext = os.path.splitext(basename)
            if not ext.lower() in self.ext:
                raise IOError, "{} is not a supported file type".format(ext)
            tree = etree.parse(path, parser=etree.ETCompatXMLParser())
            root = tree.getroot()
            entry_list = root.xpath('/ns:SASroot/ns:SASentry',
                                    namespaces={'ns': CANSAS_NS})
            for entry in entry_list:
                corstate = self._parse_state(entry)

                if corstate is not None:
                    sas_entry, _ = self._parse_entry(entry)
                    sas_entry.meta_data['corstate'] = corstate
                    sas_entry.filename = corstate.file
                    output.append(sas_entry)
        else:
            # File not found
            msg = "{} is not a valid file path or doesn't exist".format(path)
            raise IOError, msg

        if len(output) == 0:
            return None
        elif len(output) == 1:
            self.callback(output[0].meta_data['corstate'], datainfo=output[0])
            return output[0]
        else:
            return output

Ejemplo n.º 27

0

Mostrar archivo

def parse(inFilename, silence=False):
    parser = etree_.ETCompatXMLParser(strip_cdata=False)
    doc = parsexml_(inFilename, parser)
    rootNode = doc.getroot()
    rootTag, rootClass = get_root_tag(rootNode)
    if rootClass is None:
        rootTag = 'Wheel'
        rootClass = supermod.Wheel
    rootObj = rootClass.factory()
    rootObj.build(rootNode)
    # Enable Python to collect the space used by the DOM.
    if not SaveElementTreeNode:
        doc = None
        rootNode = None
    if not silence:
        sys.stdout.write('<?xml version="1.0" ?>\n')
        rootObj.export(
            sys.stdout, 0, name_=rootTag,
            namespacedef_='xmlns:tns="http://www.example.org/wheel/"',
            pretty_print=True)
    return rootObj

Ejemplo n.º 28

0

Mostrar archivo

def parseLiteral(inFilename, silence=False):
    parser = etree_.ETCompatXMLParser(strip_cdata=False)
    doc = parsexml_(inFilename, parser)
    rootNode = doc.getroot()
    rootTag, rootClass = get_root_tag(rootNode)
    if rootClass is None:
        rootTag = 'Wheel'
        rootClass = supermod.Wheel
    rootObj = rootClass.factory()
    rootObj.build(rootNode)
    # Enable Python to collect the space used by the DOM.
    if not SaveElementTreeNode:
        doc = None
        rootNode = None
    if not silence:
        sys.stdout.write('#from preserve_cdata_tags2_sup import *\n\n')
        sys.stdout.write('import preserve_cdata_tags2_sup as model_\n\n')
        sys.stdout.write('rootObj = model_.rootClass(\n')
        rootObj.exportLiteral(sys.stdout, 0, name_=rootTag)
        sys.stdout.write(')\n')
    return rootObj

Ejemplo n.º 29

0

Mostrar archivo

def parse_rdf(db: MongodbCache):
    files = [d for d in listdir(settings.CACHE_UNPACK_DIRECTORY) if d.startswith("pg") and d.endswith(".rdf")]
    total = len(files)

    for index, file_name in enumerate(files):
        file_name_stripped = re.search("pg(.*?).rdf", file_name).group(1)

        Utils.update_progress_bar(f"Processing progress: {index} / {total}")
        file_path = path.join(settings.CACHE_UNPACK_DIRECTORY, file_name)
        doc = etree.parse(file_path, etree.ETCompatXMLParser())

        gutenberg_book_id = int(file_name_stripped)
        author_aliases = parse_author(doc)
        gutenberg_author_id = parse_author_id(doc)

        newbook = Book(
            gutenberg_id=gutenberg_book_id,
            number_of_downloads=parse_downloads(doc),
            date_issued=parse_date_issued(doc),
            title=parse_title(doc),
            doc_type=parse_type(doc),
            language=parse_languages(doc),
            author=author_aliases,
            gutenberg_author_id=gutenberg_author_id,
            formats=parse_formats(doc),
            publisher=parse_publisher(doc),
            rights=parse_rights(doc),
            subjects=parse_subjects(doc),
            bookshelves=parse_bookshelves(doc),
        )

        author = Author(
            gutenberg_id=gutenberg_author_id,
            aliases=author_aliases,
        )

        db.insert_book(newbook)
        db.insert_author(author)

    db.flush()

Ejemplo n.º 30

0

Mostrar archivo

    def parse_xml_to_obj(self, xml_file, check_version=True, check_root=True):
        """Creates a STIX binding object from the supplied xml file.

        Arguments:
        xml_file -- A filename/path or a file-like object reprenting a STIX instance document
        check_version -- Inspect the version before parsing.
        check_root -- Inspect the root element before parsing.

        """
        parser = etree.ETCompatXMLParser(huge_tree=True)
        tree = etree.parse(xml_file, parser=parser)

        if check_version:
            self._check_version(tree)

        if check_root:
            self._check_root(tree)

        import stix.bindings.stix_core as stix_core_binding
        stix_package_obj = stix_core_binding.STIXType().factory()
        stix_package_obj.build(tree.getroot())

        return stix_package_obj