Ejemplo n.º 1
0
def main(input_file, output_file):
    root = ElementTree().parse(input_file)
    for screen in root.findall('screen'):
        print("====", screen.get('name'))
        process(screen)
        for widget in screen:
            process(widget)
    ElementTree(root).write(output_file)
Ejemplo n.º 2
0
    def test_from_element(self):
        elem = Element('document')
        tree = ElementTree(elem)
        self.assertEqual('document', tree.getroot().tag)

        with open('sample.xml') as f:
            et = ElementTree(file=f)
        root = et.getroot()

        tree = ElementTree(root)
        self.assertEqual(root.base, tree.getroot().base)
        self.assertEqual(et.docinfo.URL, tree.docinfo.URL)
Ejemplo n.º 3
0
    def parse(self, data):
        """ Parse a webdav reply. Retrieve any resources as objects
            and return them as a list.

            :param data: The webdav reply to parse
            :type data: String


            :return: self.response_objects

        """
        data_elements = HTML(data)
        xml_etree = ElementTree(data_elements)
        all_response_elements = xml_etree.findall("//response")
        for response in all_response_elements:
            new_response = Response()
            resp_tree = ElementTree(response)
            new_response.href = resp_tree.find('//href').text
            if resp_tree.find('//collection') is not None:
                new_response.resourcetype = 'collection'
            else:
                new_response.resourcetype = 'resource'
                new_response.executable = getattr(
                    resp_tree.find('//executable'), 'text', None)
            new_response.creationdate = getattr(
                resp_tree.find('//creationdate'), 'text', None)
            new_response.getcontentlength = getattr(
                resp_tree.find('//getcontentlength'), 'text', None)
            new_response.getlastmodified = getattr(
                resp_tree.find('//getlastmodified'), 'text', None)
            new_response.getetag = getattr(
                resp_tree.find('//getetag'), 'text', None)
            new_response.getcontenttype = getattr(
                resp_tree.find('//getcontenttype'), 'text', None)
            new_response.status = getattr(
                resp_tree.find('//status'), 'text', None)

            # Now we have the properties that are easy to get,
            # lets get the lock information
            lock_tree = resp_tree.findall('//lockentry')
            for lock in lock_tree:
                lock_tree = ElementTree(lock)
                lock_obj = Lock()
                lock_obj.locktype = lock_tree.find(
                    '//locktype').getchildren()[-1].tag
                lock_obj.lockscope = lock_tree.find(
                    '//lockscope').getchildren()[-1].tag
                new_response.locks.append(lock_obj)

            self.response_objects.append(new_response)

        return self.response_objects
Ejemplo n.º 4
0
    def test_etree_from_file(self):
        with open('sample.xml') as f:
            et = ElementTree(file=f)
        root = et.getroot()
        self.assertEqual('{http://example.tld}document', root.tag)
        self.assertEqual('x', root.prefix)
        self.assertTrue('x' in root.nsmap)

        with open('hello.xml') as f:
            et = ElementTree(file=f)
        root = et.getroot()
        self.assertEqual('hello', root.tag)
        self.assertEqual(None, root.prefix)
        self.assertEqual({}, root.nsmap)
Ejemplo n.º 5
0
def export(info_root, config_root, package_path):
    """
    Exports the root elements and saves them to installer files.

    :param info_root: The root element of the info.xml file.
    :param config_root: The root element of the moduleconfig.xml file.
    :param package_path: The path to save the files to.
    :param hidden_nodes: The currently hidden nodes in either tree.
    """
    hidden_nodes_pairs = []
    for root in (info_root, config_root):
        for node in root:
            if node.hidden_children:
                for hidden_node in node.hidden_children:
                    hidden_nodes_pairs.append(
                        (node, hidden_node, node.index(hidden_node)))
                    node.remove(hidden_node)

    try:
        fomod_folder = _check_file(package_path, "fomod")
    except MissingFileError as e:
        makedirs(join(package_path, e.file))
        fomod_folder = join(package_path, e.file)

    fomod_folder_path = join(package_path, fomod_folder)

    try:
        info_file = _check_file(fomod_folder_path, "Info.xml")
    except MissingFileError as e:
        info_file = e.file

    try:
        config_file = _check_file(fomod_folder_path, "ModuleConfig.xml")
    except MissingFileError as e:
        config_file = e.file

    info_path = join(fomod_folder_path, info_file)
    config_path = join(fomod_folder_path, config_file)

    with open(info_path, "wb") as infofile:
        info_tree = ElementTree(info_root)
        info_tree.write(infofile, pretty_print=True)

    with open(config_path, "wb") as configfile:
        config_tree = ElementTree(config_root)
        config_tree.write(configfile, pretty_print=True)

    for pair in hidden_nodes_pairs:
        pair[0].insert(pair[2], pair[1])
Ejemplo n.º 6
0
def envelope(**kwargs):
    """Create OAI-PMH envelope for response."""
    e_oaipmh = Element(etree.QName(NS_OAIPMH, 'OAI-PMH'), nsmap=NSMAP)
    e_oaipmh.set(etree.QName(NS_XSI, 'schemaLocation'),
                 '{0} {1}'.format(NS_OAIPMH, NS_OAIPMH_XSD))
    e_tree = ElementTree(element=e_oaipmh)

    if current_app.config['OAISERVER_XSL_URL']:
        e_oaipmh.addprevious(
            etree.ProcessingInstruction(
                'xml-stylesheet', 'type="text/xsl" href="{0}"'.format(
                    current_app.config['OAISERVER_XSL_URL'])))

    e_responseDate = SubElement(e_oaipmh, etree.QName(NS_OAIPMH,
                                                      'responseDate'))
    # date should be first possible moment
    e_responseDate.text = datetime_to_datestamp(datetime.utcnow())
    e_request = SubElement(e_oaipmh, etree.QName(NS_OAIPMH, 'request'))
    for key, value in kwargs.items():
        if key == 'from_' or key == 'until':
            value = datetime_to_datestamp(value)
        elif key == 'resumptionToken':
            value = value['token']
        e_request.set(key, value)
    e_request.text = url_for('invenio_oaiserver.response', _external=True)
    return e_tree, e_oaipmh
Ejemplo n.º 7
0
def export_tree(record, **kwargs):
    """Create OAI-PMH response for verb Identify."""
    e_tree, e_getrecord = verb(**kwargs)
    e_record = SubElement(e_getrecord, etree.QName(NS_OAIPMH, 'record'))

    import dateutil.parser
    header(
        e_record,
        identifier=record.get('control_number'),
        datestamp=dateutil.parser.parse(record.get('updated')),
        sets=record.get('_oai', {}).get('sets', []),
    )
    e_metadata = SubElement(e_record, etree.QName(NS_OAIPMH, 'metadata'))
    e_metadata.append(
        dumps_etree(records=record, schema_type=kwargs['metadataPrefix']))

    root = e_tree.getroot()
    e_oaipmh = Element(etree.QName(NS_OAIPMH, 'OAI-PMH'), nsmap=NSMAP)
    e_oaipmh.set(etree.QName(NS_XSI, 'schemaLocation'),
                 '{0} {1}'.format(NS_OAIPMH, NS_OAIPMH_XSD))
    e_tree = ElementTree(element=e_oaipmh)
    for e in root.getchildren():
        e_oaipmh.append(e)
    root.clear()

    return e_tree
Ejemplo n.º 8
0
 def __str__ (self) :
     tree = ElementTree ()
     tree._setroot (Element ('Header'))
     r = tree.getroot ()
     for e in self.header ('HUHU') :
         r.append (e)
     return tostring (tree, pretty_print = True, encoding = 'unicode')
Ejemplo n.º 9
0
def make_tree(html):
    """
    Returns an lxml tree for the given HTML string (either Unicode or
    bytestring).

    This is better than lxml.html.document_fromstring because this takes care
    of a few known issues.
    """
    # Normalize newlines. Otherwise, "\r" gets converted to an HTML entity
    # by lxml.
    html = re.sub('\r\n', '\n', html)

    # Remove <?xml> declaration in Unicode objects, because it causes an error:
    # "ValueError: Unicode strings with encoding declaration are not supported."
    # Note that the error only occurs if the <?xml> tag has an "encoding"
    # attribute, but we remove it in all cases, as there's no downside to
    # removing it.
    if isinstance(html, unicode):
        html = re.sub(r'^\s*<\?xml\s+.*?\?>', '', html)
    else:
        html = UnicodeDammit(html, isHTML=True).unicode
    html = html.strip()
    if html:
        try:
            return document_fromstring(html)
        except:
            # Fall back to using the (slow) BeautifulSoup parser.
            return lxml.html.soupparser.fromstring(html)
    else:
        root = Element('body')
        root.text = u''
        return ElementTree(root)
Ejemplo n.º 10
0
def write_nfo(root_name, nfo_fields, media_filename, link="", filename=None):
    """
    Write the NFO file

    Args:
        root_name (str): NFO root name.
        nfo_fields (dict): Fields.
        media_filename (str): Media file name.
        link (str): Scrapper URL link.
        filename (str): NFO file name.
    """
    nfo_root = Element(root_name)

    for field_name, values in nfo_fields.items():
        if not values:
            continue
        if not isinstance(values, list):
            values = [values]
        for value in values:
            SubElement(nfo_root, field_name).text = f"{value}"

    if not filename:
        filename = f"{media_filename}.nfo"
    else:
        filename = join(media_filename, filename)

    ElementTree(nfo_root).write(filename,
                                encoding="utf-8",
                                xml_declaration=True,
                                pretty_print=True)

    if link:
        with open(filename, "at") as nfo_file:
            nfo_file.write(link)
Ejemplo n.º 11
0
    def delete_record(self, identifier: str) -> None:
        """
        Deletes an existing record

        Uses the CSW transactional profile to delete an existing record from a CSW catalogue.

        :type identifier str
        :param identifier: ISO 19115 file identifier
        """
        _csw = self._get_client()
        try:
            _csw.transaction(ttype=CSWTransactionType.DELETE.value,
                             identifier=identifier)
            _csw.results["deleted"] = int(
                ElementTree(fromstring(_csw.response)).xpath(
                    "/csw:TransactionResponse/csw:TransactionSummary/csw:totalDeleted/text()",
                    namespaces=csw_namespaces,
                )[0])
            # noinspection PyTypeChecker
            if _csw.results["deleted"] != 1:
                raise RecordServerException()
        except HTTPError as e:
            if e.response.content.decode() == "Catalogue not yet available.":
                raise CSWDatabaseNotInitialisedException()
        except XMLSyntaxError:
            if _csw.response.decode() == "Missing authorisation token.":
                raise CSWAuthMissingException()
            elif _csw.response.decode() == "Insufficient authorisation token.":
                raise CSWAuthInsufficientException()
Ejemplo n.º 12
0
Archivo: po.py Proyecto: pete-woods/hud
    def save_xml(self, path):
        existing = self._read_existing_mappings(path)

        keyword_mapping = Element('keywordMapping')

        for entry in self.original:
            # No point adding obsolete entries
            if entry.obsolete:
                continue

            # Don't add keyword entries again
            if entry.msgid.startswith(u'hud-keywords:'):
                continue

            mapping = SubElement(keyword_mapping,
                                 'mapping',
                                 original=entry.msgid)

            # Either use the old mappings or add some blank ones
            if entry.msgid in existing:
                for keyword in existing[entry.msgid]:
                    SubElement(mapping, 'keyword', name=keyword)
            else:
                SubElement(mapping, 'keyword', name='')
                SubElement(mapping, 'keyword', name='')

        ElementTree(keyword_mapping).write(path,
                                           encoding='utf-8',
                                           xml_declaration=True,
                                           pretty_print=True)
Ejemplo n.º 13
0
 def __init__(self, file):
     if os.path.isfile(file):
         self.root = ElementTree(file=file).getroot()
     else:
         print('Cannot opet file %s', file)
         self.root = Element("db")
     self.file = file
Ejemplo n.º 14
0
def getterm(request, lang, code):
    params = urllib.urlencode({
        'tree_id': code or '',
        'lang': lang,
    })
    resource = urllib.urlopen(settings.DECS_SERVICE, params)

    tree = ElementTree()
    tree.parse(resource)

    result = tree.find("decsws_response/tree/self/term_list/term")
    if result is None:
        result = tree.findall(
            'decsws_response/tree/term_list[@lang="%s"]/term' % lang)
        json = '[%s]' % ','.join(
            (JSON_TERM % (r.text.capitalize(), r.attrib['tree_id'])
             for r in result))
    else:
        descriptors = tree.findall(
            'decsws_response/record_list/record/descriptor_list/descriptor')
        description = ','.join(
            ['"%s":"%s"' % (d.attrib['lang'], d.text) for d in descriptors])
        json = '[%s]' % (JSON_MULTILINGUAL_TERM %
                         (description, result.attrib['tree_id']))

    return HttpResponse(json, mimetype='application/json')
 def _processRecords(self, lxmlNode):
     verbNode = xpathFirst(lxmlNode, "/oai:OAI-PMH/oai:%s" % self._verb)
     for item in verbNode.iterchildren(tag=VERB_TAGNAME[self._verb]):
         header = None
         for h in item.iterchildren():
             if h.tag == HEADER_TAG:
                 header = h
                 break
         else:
             if item.tag != HEADER_TAG:
                 raise IndexError("Invalid oai header")
             header = item
         for child in header.iterchildren():
             if child.tag == IDENTIFIER_TAG:
                 identifier = child.text
             elif child.tag == DATESTAMP_TAG:
                 datestamp = child.text
         try:
             yield self._add(identifier=identifier,
                             lxmlNode=ElementTree(item),
                             datestamp=datestamp)
         except Exception as e:
             self._logError(format_exc())
             self._logError("While processing:")
             self._logError(lxmltostring(item))
             self._errorState = "ERROR while processing '%s': %s" % (
                 identifier, str(e))
             raise
         yield  # some room for others
     self._resumptionToken = xpathFirst(verbNode,
                                        "oai:resumptionToken/text()")
Ejemplo n.º 16
0
    def dumpVOCAnnotations(output_folder, filename, size, names,
                           bounding_boxes):
        node_root = Element('annotation')

        SubElement(node_root, 'folder').text = 'images'
        SubElement(node_root, 'filename').text = filename + '.jpg'
        SubElement(node_root,
                   'path').text = os.path.join(output_folder, 'images')

        node_size = SubElement(node_root, 'size')
        SubElement(node_size, 'width').text = str(size[1])
        SubElement(node_size, 'height').text = str(size[0])
        SubElement(node_size, 'depth').text = str(size[2])

        SubElement(node_root, 'segmented').text = '0'

        for name, bb in zip(names, bounding_boxes):
            node_object = SubElement(node_root, 'object')
            SubElement(node_object, 'name').text = name
            SubElement(node_object, 'pose').text = 'Unspecified'
            SubElement(node_object, 'truncated').text = '0'
            SubElement(node_object, 'difficult').text = '0'

            node_bndbox = SubElement(node_object, 'bndbox')
            SubElement(node_bndbox, 'xmin').text = str(bb[0])
            SubElement(node_bndbox, 'ymin').text = str(bb[1])
            SubElement(node_bndbox, 'xmax').text = str(bb[2])
            SubElement(node_bndbox, 'ymax').text = str(bb[3])

        tree = ElementTree(node_root)
        tree.write(os.path.join(output_folder, 'annotations', filename) +
                   '.xml',
                   pretty_print=True)
Ejemplo n.º 17
0
    def toXmlFileCanonical(elem, file, *args, **kwds):
        """Serialize and write an element to an XML file using lxml's C14N method.

        @see: All arguments are the same as C{toXmlFile}.
        """

        ElementTree(elem).write_c14n(file, *args, **kwds)
    def exportXMLToFile(self):
        ''' NO DOCUMENTATION '''
        assert self.has_selected_item()

        # Ask the users where they want to save the file
        start_dir = ''
        opus_home = os.environ.get('OPUS_HOME')
        if opus_home:
            start_dir_test = os.path.join(opus_home, 'project_configs')
            if start_dir_test:
                start_dir = start_dir_test
        configDialog = QFileDialog()
        filter_str = QString("*.xml")
        fd = configDialog.getSaveFileName(self.manager.base_widget,
                                          QString("Save As..."),
                                          QString(start_dir), filter_str)
        # Check for cancel
        if len(fd) == 0:
            return
        fileNameInfo = QFileInfo(QString(fd))
        fileName = fileNameInfo.fileName().trimmed()
        fileNamePath = fileNameInfo.absolutePath().trimmed()
        saveName = os.path.join(str(fileNamePath), str(fileName))

        # proper root node for XmlConfiguration
        root_node = Element('opus_project')
        import copy
        root_node.append(copy.deepcopy(self.selected_item().node))

        # Write out the file
        ElementTree(root_node).write(saveName)
Ejemplo n.º 19
0
def create_mlt(talk, output_file, dv_frame_rate):
    mlt = Element('mlt')
    playlist = Element("playlist", id="playlist0")
    for cut_file in talk["cut_list"]:
        # Create the producer
        producer = SubElement(mlt, 'producer', id=cut_file["filename"])
        producer_property = SubElement(producer, "property", name="resource")
        producer_property.text = os.path.join(cut_file["filepath"],
                                              cut_file["filename"])

        # Create the playlist entry
        args = {}
        args['producer'] = cut_file['filename']
        # Check if we have any cuts, and calculate the appropriate frame
        if 'in' in cut_file and cut_file['in']:
            args['in'] = str(
                int(cut_file['in'].total_seconds()) * dv_frame_rate)
        if 'out' in cut_file and cut_file['out']:
            args['out'] = str(
                int(cut_file['out'].total_seconds()) * dv_frame_rate)
        playlist_entry = SubElement(playlist, "entry", args)

    # The playlist must be after the producers
    mlt.append(playlist)

    # Write out the tree
    ElementTree(mlt).write(output_file, pretty_print=True)
Ejemplo n.º 20
0
def write_sbml_model(cobra_model, filename, use_fbc_package=True, **kwargs):
    if not use_fbc_package:
        if libsbml is None:
            raise Exception("libSBML required to write non-fbc models")
        write_sbml2(cobra_model, filename, use_fbc_package=False, **kwargs)
        return
    # create xml
    xml = model_to_xml(cobra_model, **kwargs)
    write_args = {"encoding": "UTF-8", "xml_declaration": True}
    if _with_lxml:
        write_args["pretty_print"] = True
        write_args["pretty_print"] = True
    else:
        indent_xml(xml)
    # write xml to file
    should_close = True
    if hasattr(filename, "write"):
        xmlfile = filename
        should_close = False
    elif filename.endswith(".gz"):
        xmlfile = GzipFile(filename, "wb")
    elif filename.endswith(".bz2"):
        xmlfile = BZ2File(filename, "wb")
    else:
        xmlfile = open(filename, "wb")
    ElementTree(xml).write(xmlfile, **write_args)
    if should_close:
        xmlfile.close()
Ejemplo n.º 21
0
 def test_propfind_all_names(self):
     self.sub_object.get_descendants.return_value += [self.sub_object]
     request = Mock(META={})
     path = 'collection/sub_object'
     v = DavView(base_url='/base/', path=path, request=request, acl_class=FullAcl, xml_pretty_print=True)
     v.__dict__['resource'] = self.sub_object
     resp = v.propfind(request, path,
         etree.XPathDocumentEvaluator(ElementTree(
             D.propfind(
                 D.propname()
             )
         ), namespaces=WEBDAV_NSMAP)
     )
     self.assertEqual(resp.status_code, 207)
     self.assertEqual(resp.content,
         etree.tostring(D.multistatus(
             D.response(
                 D.href('/base/collection/sub_object'),
                 D.propstat(
                     D.prop(
                         D.getcontentlength(),
                         D.creationdate(),
                         D.getlastmodified(),
                         D.resourcetype(),
                         D.displayname(),
                     ),
                     D.status("HTTP/1.1 200 OK")
                 )
             ),
         ), pretty_print=True, xml_declaration=True, encoding='utf-8')
     )
Ejemplo n.º 22
0
    def __init__(self, parent_widget):
        QDialog.__init__(self, parent_widget)
        self.setupUi(self)

        settings_directory = os.path.join(os.environ['OPUS_HOME'], 'settings')
        self._config_filename = os.path.join(settings_directory, 'database_server_configurations.xml')
        try:
            root = ElementTree(file=self._config_filename).getroot()
            view = XmlView(self)
            model = XmlModel(root)
            delegate = XmlItemDelegate(view)
            view.setModel(model)
            # Turns out that Qt Garbage collects the model (and delegate) if we don't explicitly
            # bind it to a Python object in addition to using the PyQt .setModel() method.
            view._model = model
            view._delegate = delegate
            view.setItemDelegate(delegate)
            view.openDefaultItems()

            self.gridlayout.addWidget(view)

            self.tree_view = view
            self.xml_root = root
            return

        except IOError, ex:
            MessageBox.error(mainwindow = self,
                          text = 'Could not initialize Database Settings',
                          detailed_text = str(ex))
            self.xml_root = None
            self._config_filename = ''
            self.configFile = None
Ejemplo n.º 23
0
def serialize_browse_report(browse_report, stream=None, pretty_print=False):
    if not stream:
        stream = StringIO()

    browse_report_elem = Element(ns_rep("browseReport"),
                                 nsmap={"rep": ns_rep.uri},
                                 attrib={"version": "1.3"})

    SubElement(
        browse_report_elem,
        ns_rep("responsibleOrgName")).text = browse_report.responsible_org_name
    SubElement(
        browse_report_elem,
        ns_rep("dateTime")).text = browse_report.date_time.isoformat("T")
    SubElement(browse_report_elem,
               ns_rep("browseType")).text = browse_report.browse_type

    for browse in browse_report:
        browse_report_elem.append(_serialize_browse(browse))

    et = ElementTree(browse_report_elem)
    et.write(stream,
             pretty_print=pretty_print,
             encoding="utf-8",
             xml_declaration=True)

    return stream
    def _to_xmltv(
        self, channel_ids: List[str], days: int, offset: int
    ) -> ElementTree:
        xmltv = self._xmltv_element(
            "tv",
            **{
                "source-info-name": "Bouygues Telecom",
                "source-info-url": "https://www.bouyguestelecom.fr/tv-direct/",
                "source-data-url": self._API_URL,
                "generator-info-name": self._generator,
                "generator-info-url": self._generator_url,
            },
        )

        xmltv_channels = {}  # type: Dict[str, Element]
        xmltv_programs = []

        for xmltv_program in self._get_xmltv_programs(
            channel_ids, days, offset
        ):
            if xmltv_program is None:
                continue
            channel_id = xmltv_program.get("channel")
            if channel_id not in xmltv_channels:
                xmltv_channels[channel_id] = self._to_xmltv_channel(channel_id)
            xmltv_programs.append(xmltv_program)

        xmltv.extend(xmltv_channels.values())
        xmltv.extend(xmltv_programs)

        return ElementTree(xmltv)
Ejemplo n.º 25
0
    def create_pascalvoc_xml_tree(self, img_file, bbxs):
        img_path, filename = os.path.split(img_file)
        _, folder = os.path.split(img_path)

        node_root = Element('annotation')

        # Folder, filename, and path
        node_folder = SubElement(node_root, 'folder')
        node_folder.text = folder
        node_filename = SubElement(node_root, 'filename')
        node_filename.text = filename
        node_path = SubElement(node_root, 'path')
        node_path.text = img_file

        # Data source
        node_source = SubElement(node_root, 'source')
        node_database = SubElement(node_source, 'database')
        node_database.text = "Unknown"

        # Image size and segmented
        node_size = SubElement(node_root, 'size')
        node_width = SubElement(node_size, 'width')
        node_width.text = str(self.resize_w)
        node_height = SubElement(node_size, 'height')
        node_height.text = str(self.resize_h)
        node_depth = SubElement(node_size, 'depth')
        node_depth.text = "1"
        node_segmented = SubElement(node_root, 'segmented')
        node_segmented.text = "0"

        for bbx in bbxs:
            self._create_pascalvoc_object(node_root, bbx)
        xml_tree = ElementTree(node_root)

        return xml_tree
Ejemplo n.º 26
0
    def extract_custom_properties_data_from_eventdb(self, curs):
        query = "SELECT CUSTOM_PROPERTIES FROM E_RECIPIENT_SKIPPED WHERE LAUNCH_ID=19081 ORDER BY EVENT_STORED_DT"
        curs.execute(query)
        custom_properties = curs.fetchall()
        xml_tag = "<?xml version='1.0'?>"
        custom_properties = xml_tag + str(custom_properties)
        tree = ElementTree()
        tree.parse(custom_properties)

        data = {}
        for val in custom_properties:
            val = xml_tag + str(val[0])
            tree.parse(val)
            rows = tree.findall("prop")
            for row in rows:
                for elem in row.findall("*"):
                    if elem.tag == "name":
                        Name = elem.text
                        data[Name] = {}
                    elif elem.tag == "value":
                        Value = elem.text
                        data[Value] = {}

                    else:
                        data[Name][elem.tag.lower()] = elem.text
        return data
Ejemplo n.º 27
0
    def update_record(self, record: str) -> None:
        """
        Updates an existing record

        Uses the CSW transactional profile to update an existing record in a CSW catalogue.

        This method requires complete/replacement records, partial record updates are not supported.

        :type record str
        :param record: ISO 19115-2 record encoded as an XML string
        """
        _csw = self._get_client()
        try:
            _csw.transaction(ttype=CSWTransactionType.UPDATE.value,
                             typename="gmd:MD_Metadata",
                             record=record)
            # Workaround for https://github.com/geopython/OWSLib/issues/678
            _csw.results["updated"] = int(
                ElementTree(fromstring(_csw.response)).xpath(
                    "/csw:TransactionResponse/csw:TransactionSummary/csw:totalUpdated/text()",
                    namespaces=csw_namespaces,
                )[0])
            if _csw.results["updated"] != 1:
                raise RecordServerException()
        except HTTPError as e:
            if e.response.content.decode() == "Catalogue not yet available.":
                raise CSWDatabaseNotInitialisedException()
        except XMLSyntaxError:
            if _csw.response.decode() == "Missing authorisation token.":
                raise CSWAuthMissingException()
            elif _csw.response.decode() == "Insufficient authorisation token.":
                raise CSWAuthInsufficientException()
Ejemplo n.º 28
0
    def parse_bill(self, bill_url, session, chamber):
        bill = None
        bill_page = ElementTree(lxml.html.fromstring(self.urlopen(bill_url)))
        for anchor in bill_page.findall('//a'):
            if (anchor.text_content().startswith('status of')
                    or anchor.text_content().startswith(
                        'Detailed Information (status)')):
                status_url = anchor.attrib['href'].replace("\r", "").replace(
                    "\n", "")
                bill = self.parse_bill_status_page(status_url, bill_url,
                                                   session, chamber)
            elif anchor.text_content().startswith('This bill in WP'):
                index_url = anchor.attrib['href']
                index_url = index_url[0:index_url.rindex('/')]
                # this looks weird.  See http://data.opi.mt.gov/bills/BillHtml/SB0002.htm for why
                index_url = index_url[index_url.rindex("http://"):]
                self.add_bill_versions(bill, index_url)

        if bill is None:
            # No bill was found.  Maybe something like HB0790 in the 2005 session?
            # We can search for the bill metadata.
            page_name = bill_url.split("/")[-1].split(".")[0]
            bill_type = page_name[0:2]
            bill_number = page_name[2:]
            laws_year = self.metadata['session_details'][session]['years'][
                0] % 100

            status_url = self.search_url_template % (laws_year, bill_type,
                                                     bill_number)
            bill = self.parse_bill_status_page(status_url, bill_url, session,
                                               chamber)
        return bill
Ejemplo n.º 29
0
    def scrape(self, chamber, session):
        for term in self.metadata['terms']:
            if session in term['sessions']:
                year = term['start_year']
                break

        self.versions_dict = self._versions_dict(year)

        base_bill_url = 'http://data.opi.mt.gov/bills/%d/BillHtml/' % year
        index_page = ElementTree(
            lxml.html.fromstring(self.urlopen(base_bill_url)))

        bill_urls = []
        for bill_anchor in index_page.findall('//a'):
            # See 2009 HB 645
            if bill_anchor.text.find("govlineveto") == -1:
                # House bills start with H, Senate bills start with S
                if chamber == 'lower' and bill_anchor.text.startswith('H'):
                    bill_urls.append("%s%s" %
                                     (base_bill_url, bill_anchor.text))
                elif chamber == 'upper' and bill_anchor.text.startswith('S'):
                    bill_urls.append("%s%s" %
                                     (base_bill_url, bill_anchor.text))

        for bill_url in bill_urls:
            bill = self.parse_bill(bill_url, session, chamber)
            if bill:
                self.save_bill(bill)
Ejemplo n.º 30
0
def export(html, custom_filename=''):
    try:
        root = fromstring(html, parser=HTMLParser(collect_ids=False))
    except (ParserError, XMLSyntaxError):
        raise SystemExit('Error while parsing content')

    try:
        content, = root.xpath(CONTENT_PATH)
    except ValueError:
        raise SystemExit('Multiple results while parsing the content')

    for xpath in BAD_XPATHS:
        for bad in content.xpath(xpath):
            bad.getparent().remove(bad)

    if custom_filename:
        filename = custom_filename
    else:
        file_options = dict(
            mode='w+b',
            suffix='.html',
            prefix=f'{int(time())}_',
            dir=getcwd(),
            delete=False,
        )
        with NamedTemporaryFile(**file_options) as html_file:
            filename = html_file.name

    ElementTree(content).write(filename, encoding=HTML_ENCODING)