Esempio n. 1
0
        def parsegeneral(self,path):
                #creating an object parser which helps in retrieving the xml parsing when it encounters an error
                parser = etree.XMLParser(recover=True)
                tree=etree.parse(path,parser=parser)
                #saving the root node in root variable
                root = tree.getroot()
                #saving the content of context node in child list
                child = root.findall('.//context')
                data =[]
                for index in range(len(child)):
                                data += etree.tostringlist(child[index],encoding="us-ascii",method="xml")


		# removing context tags form beginning and end
		# and tokenizing the sentences into words from
		# which the stop words and punctuations
		# will be remove later.
                t=[]
                for x in range(len(data)):
                                s=str(data[x])
                                s=s[13:len(s)-16]
                                t.append(nltk.word_tokenize(s))

                # removing stop words and punctuation from the
                # words in the sentences
                stop=set(stopwords.words('english'))
                stop.update(['head','Mr','I','In','Mrs','A','So','To','us','He','And','Yes'])  
                for x in range(len(t)):
                        # removing stop words
                        t[x]=[word for word in t[x] if word not in stop]
                        # removing not alphabet words i.e punctuations etc.
                        t[x]=[word for word in t[x] if self.isalfa(word)]

                return t,data
Esempio n. 2
0
def process_xpaths(forest: dict, xpaths: dict):
    news = {}
    for new in xpaths.keys():
        pages = []
        book = forest[new].xpath(xpaths[new] + '//h3')
        for page in book:
            pages.append(
                etree.tostringlist(page, encoding='unicode', method='text')[0])
        news[new] = pages
    return news
Esempio n. 3
0
def setup_project(product):
    global confs 
    confs = {'product': product}
    log.info("Creating project to %(product)s product." % confs)
    android = configspl.sdkdir + "/tools/android "
    # These acitivity and package params will be rewrite
    params = "create project --target 1 --name %(product)s \
--path ../%(product)s --activity MainActivity \
--package br.ufrn.dimap.%(product)s" % confs
    log.info(android + params)
    os.system(android + params)
    shutil.rmtree("../%(product)s/src/br/ufrn" % confs)
    
    files = ['.classpath', '.project', 'AndroidManifest.xml']
    for f in files:
        params = {'file': f, 'product': confs['product']}
        copiar = 'cp ./%(file)s ../%(product)s/' % params
        os.system(copiar)
    
    # Changes in .project file
    tree = etree.parse("../%(product)s/.project" % confs)
    name = tree.find('name')
    name.text = confs['product']
    file1 = open("../%(product)s/.project" % confs, 'wb')
    xml = [b'<?xml version="1.0" encoding="UTF-8"?>\n'] + etree.tostringlist(tree)
    file1.writelines(xml)
    file1.close()
    
    # Changes in AndroidManifest.xml file
    tree = etree.parse("../%(product)s/AndroidManifest.xml" % confs)
    root = tree.getroot()
    root.attrib['package'] += '.' + confs['product']
    file1 = open("../%(product)s/AndroidManifest.xml" % confs, 'wb')
    xml = [b'<?xml version="1.0" encoding="UTF-8"?>\n'] + etree.tostringlist(tree)
    file1.writelines(xml)
    file1.close()
Esempio n. 4
0
def extract_mail(path, suffix):
    tree_root = etree.Element("meta")
    if suffix == "":
        object = mailbox.mbox(path)
        mbox = etree.SubElement(tree_root, "mbox")
#    if suffix == ".mbs":
#        object = mailbox.MaildirMessage(path)
#        mbox = etree.SubElement(tree_root, "maildir")
    for message in object:
        subject = message['subject']
        sender = message['from']
        receiver = message['to']
        date = message['date']
        text = message['message']
        msg = etree.SubElement(mbox, "msg")
        etree.SubElement(msg, "to").text = receiver
        etree.SubElement(msg, "from").text = sender
        etree.SubElement(msg, "subject").text = subject
        etree.SubElement(msg, "date").text = date
        etree.SubElement(msg, "message").text = text
    tree = etree.tostringlist(tree_root, pretty_print=True)
    return tree
Esempio n. 5
0
         <li class="item-1"><a href="link2.html">second item</a></li>
         <li class="item-0"><a href="link5.html">a属性</a>
     </ul>
 </div>
'''
# #初始化xpath对象
# html = etree.HTML(text)
# #解析对象输出代码
# result = etree.tostring(html,encoding='utf-8')

#etree.parse是一个解析器,修复HTML中缺少的元素
html = etree.parse(r'C:\Users\dell\Desktop\xml.html', etree.HTMLParser())
#将html中的元素解析成字节
result = etree.tostring(html)
#将html中的元素解析成列表
result1 = etree.tostringlist(html)
#打印文本中所有的节点元素
element = html.xpath('//*')

print(type(html))
print(type(result))
#打印经过处理后的代码
#etree会自动修复缺少的的文本节点
print(result.decode('utf-8'))
print(result1)
print(element)

print('-' * 20 + '这是分隔符' + '-' * 20)

#!/usr/bin/env  python
#coding :utf-8
Esempio n. 6
0
from lxml import etree

page = etree.Element('table')
doc = etree.ElementTree(page)
headElt = etree.SubElement(page, 'tbody')
#Header
tr = etree.SubElement(headElt, 'tr')
th = etree.SubElement(tr, 'th')
th.text = 'Type'
th = etree.SubElement(tr, 'th')
th.text = 'Server'
#Body
tr = etree.SubElement(headElt, 'tr')
td = etree.SubElement(tr, 'td')
td.text = 'Element1'
outFile = open('homemade2.xml', 'w')
#doc.write(outFile)
#doc.write(outFile,pretty_print=True, xml_declaration=True,   encoding="utf-8")
print etree.dump(page)
#outFile.seek(10)
list = etree.tostringlist(page)
for item in list:
    outFile.write("%s\n" % item)
#outFile.write("\n".join(list).join("\n"))
Esempio n. 7
0
from lxml import etree
import csv
from lxml.builder import E


def TYPE(*args):
    return {"type": ' '.join(args)}


def STATUT(*args):
    return {"statut": ' '.join(args)}


with open('sanisettesparis.csv', newline='') as csvfile:
    spamreader = csv.reader(csvfile, delimiter=';', quotechar='|')
    xmlFile = 'toilettes-paris-librairieLXML.xml'
    xmlData = open(xmlFile, 'w')
    xmlData.write('<?xml version="1.0"?>' + "\n")
    xmlData.write('<!DOCTYPE toilettes SYSTEM "wc.dtd">' + "\n")
    xmlData.write('<toilettes>' + "\n")
    for row in spamreader:
        xml = page = (E.toilette(
            TYPE(row[0]), STATUT(row[1]),
            E.adresse(E.libelle(row[2]), E.arrondissement(row[3])),
            E.horaire(row[4]),
            E.services(E.acces_pmr(row[5]), E.relais_bebe(row[6])),
            E.equipement(row[7])))
        xmlData.write(str(etree.tostringlist(page, pretty_print=False)))
    xmlData.write('</toilettes>' + "\n")
    xmlData.close()
Esempio n. 8
0
def get_entity_list_xml(
    entity_dict,
    nsmap,
    root_element_tag,
    sub_element_tag,
    root_prefix='None',
):
    """ Get entity list in xml format
        :params: entity_dict with root key as entity name. The value is
        an array of entity dictionaries which each containing entity attributes
        as keys and a separate 'links' key/value pair. The value of which is an
        array of dictionaries containing hyperlinks with relations to the
        entity in each dictionary. An example entity_dict is shown below:
        entity_dict = {
        'vmhosts': [{
            "id": 'host-1234',
            "name": 'newhost',
            "links": [
                {
                    "rel": "self",
                    "href": 'http://localhost:8774/v2/admin/vmhosts'
                },
                {
                    "rel": "bookmark",
                    "href": 'http://localhost:8774/admin/vmhosts'
                }
             ],
        }],
        "vmhosts_links": [
                {
                    "rel": "next",
                    "href": 'http://localhost:8774/v2/admin/vmhosts&marker=4"
                }
        ]}
        :params nsmap: namespace map to be used for the generated xml.
        :params root_element_tag: element tag of the root element.
        :params sub_element_tag: element tag for each sub element. i.e for each
        entity dictionary.
        :params root_prefix: root prefix to be used for identifying the
        namespace of the document from the nsmap.
        :returns: list of entities in xml format using the entity dictionary.
        :raises LookupError: If there is more than one root(key) element in the
        entity_dict.
    """
    if not entity_dict:
        return ''
    # TODO(siva): add check for entities_links
    keys = entity_dict.keys()
    root_key = ''
    if len(keys) > 2:
        raise LookupError('More than one root element in entity')
    page_links = []
    if len(keys) == 2:
        if keys[0].endswith("_links"):
            page_links = entity_dict[keys[0]]
            root_key = keys[1]
        elif keys[1].endswith("_links"):
            root_key = keys[0]
            page_links = entity_dict[keys[1]]
        else:
            raise LookupError('More than one root element in entity')
    else:
        root_key = entity_dict.keys()[0]
    root_namespace = ''
    if nsmap is not None and root_prefix in nsmap:
        root_namespace = '{%s}' % nsmap[root_prefix]
    root = Element(root_namespace + root_element_tag, nsmap=nsmap)
    dict_list = entity_dict[root_key]
    for ent in dict_list:
        if not ent:
            continue
        link_list = []
        if 'links' in ent:
            link_list = ent['links']
            del ent['links']
        attrib = {}
        for (key, val) in ent.items():
            if key is not None:
                if val is not None:
                    attrib[key] = val
                else:
                    attrib[key] = ''
        entity_sub = SubElement(root, root_namespace + sub_element_tag,
                                attrib)
        for link in link_list:
            SubElement(entity_sub, constants.ATOM + 'link', link)

    for link in page_links:
        SubElement(root, constants.ATOM + 'link', link)
    return etree.tostringlist(root)[0]
Esempio n. 9
0
    logger = logging.getLogger('session_test')

    logger.critical("Started")

    switch1 = Switch("172.16.1.166", port="8443")
    switch1.set_variable('uid', 'cisco')
    switch1.set_variable('pwd', 'cisco')
    print(switch1)
    resp = switch1.run_commands(data=['show version'], format='json')
    time.sleep(1)
    pprint(resp)

    resp = switch1.run_commands(data=['show version'], format='xml')
    time.sleep(1)
    ET.dump(resp)
    pprint(ET.tostringlist(resp))
    pprint(resp.findall(".//kickstart_ver_str")[0].text)

    resp = switch1.run_commands(data=['show vrf', 'show ip arp', 'show mac address-table'], format='xml')
    time.sleep(1)
    ET.dump(resp)
    print(ET.tostringlist(resp))

    print()
    print("=" * 80)
    print("Printing vrf")
    print(ET.tostring(switch1.run_commands(['show vrf'], transport='xss', port=22), encoding='unicode', pretty_print=True))
    print("Printing arp")
    print(ET.tostring(switch1.run_commands(['show ip arp'], transport='xss', port=22), encoding='unicode', pretty_print=True))
    print("Print macs")
    print(ET.tostring(switch1.run_commands(['show mac address-table'], transport='xss'), encoding='unicode', pretty_print=True))
Esempio n. 10
0
			w = 1
			if int(item.tag[1:])==lasthai:
				discard2 = ET.SubElement(game, "discard2")
				discard2.text=th2txt(int(item.tag[1:]))
			else:
				discard1 = ET.SubElement(game, "discard1")
				discard1.text=th2txt(int(item.tag[1:]))
			lasthai = int(item.tag[1:])
		elif item.tag[0] == 'F':
			w = 2
			if int(item.tag[1:])==lasthai:
				discard2 = ET.SubElement(game, "discard2")
				discard2.text=th2txt(int(item.tag[1:]))
			else:
				discard1 = ET.SubElement(game, "discard1")
				discard1.text=th2txt(int(item.tag[1:]))
			lasthai = int(item.tag[1:])
		elif item.tag[0] == 'G':
			w = 3
			if int(item.tag[1:])==lasthai:
				discard2 = ET.SubElement(game, "discard2")
				discard2.text=th2txt(int(item.tag[1:]))
			else:
				discard1 = ET.SubElement(game, "discard1")
				discard1.text=th2txt(int(item.tag[1:]))
			lasthai = int(item.tag[1:])
		continue
mjr.writelines(ET.tostringlist(root,encoding="utf-8",pretty_print=True))
mjr.close

Esempio n. 11
0
def save(dbcon, filename):
    ''' Write contents of plexos sqlite database to xml filename

        Args:   dbcon - sqlite database connection
                filename - Location to save plexos XML file.  The file will be
                           overwritten if it exists

        No Return
    '''
    # TODO: Check for overwrite existing xml
    # Get list of objects with objname
    dbcon.row_factory = sql.Row
    cur = dbcon.cursor()
    cur.execute("SELECT name FROM sqlite_master WHERE type='table'")
    tables = [t[0] for t in cur.fetchall()]
    with codecs.open(filename, "w", "utf-8-sig") as fout:
        # file writing in Python3 is different than 2, have to convert
        # strings to bytes or open the file with an encoding.  There is no
        # easy write for all data types
        plexos_meta = {}
        try:
            cur.execute("SELECT name, value FROM '%s'" % (META_TABLE))
        except sql.Error:
            LOGGER.warning("No metadata found in table %s", META_TABLE)
            plexos_meta['namespace'] = "http://tempuri.org/MasterDataSet.xsd"
            plexos_meta['root_element'] = "MasterDataSet"
        else:
            for row in cur.fetchall():
                plexos_meta[row[0]] = row[1]
        fout.write('<%s xmlns="%s">\r\n' %
                   (plexos_meta['root_element'], plexos_meta['namespace']))
        for table_name in sorted(tables):
            if table_name == META_TABLE:
                continue
            try:
                cur.execute("SELECT * FROM '%s'" % (table_name))
            except sql.Error:
                LOGGER.warning("Bad table %s", table_name)
                continue
            row_keys = [k[0] for k in cur.description]
            #cElementTree has no pretty print, so some convolution is needed
            row = cur.fetchone()
            while row is not None:
                fout.write('  ')
                ele = etree.Element('t_' + table_name)
                for (sube, val) in zip(row_keys, row):
                    # Uncommenting the following will ignore subelements with no values
                    # Sometimes missing subelements with no values were crashing plexos.
                    # See issue #54
                    if val is None:
                        continue
                    attr_ele = etree.SubElement(ele, sube)
                    if isinstance(val, int):
                        val = str(val)
                    attr_ele.text = val
                ele_slist = etree.tostringlist(ele)
                # This is done because in python2, to_string prepends the string with an
                # xml declaration.  Also in python2, the base class of 'bytes' is basestring
                # TODO: Will this ever process an element with no data?
                if isinstance(ele_slist[0], str):
                    ele_s = "".join(ele_slist)
                else:
                    # Python3 bytes object
                    ele_s = ""
                    for byte_list in ele_slist:
                        ele_s += byte_list.decode('UTF-8')
                fout.write(
                    ele_s.replace('><',
                                  '>\r\n    <').replace('  </t_', '</t_'))
                fout.write('\r\n')
                row = cur.fetchone()
        fout.write('</%s>\r\n' % plexos_meta['root_element'])
Esempio n. 12
0
def get_entity_list_xml(
    entity_dict,
    nsmap,
    root_element_tag,
    sub_element_tag,
    root_prefix='None',
):
    """ Get entity list in xml format
        :params: entity_dict with root key as entity name. The value is
        an array of entity dictionaries which each containing entity attributes
        as keys and a separate 'links' key/value pair. The value of which is an
        array of dictionaries containing hyperlinks with relations to the
        entity in each dictionary. An example entity_dict is shown below:
        entity_dict = {
        'vmhosts': [{
            "id": 'host-1234',
            "name": 'newhost',
            "links": [
                {
                    "rel": "self",
                    "href": 'http://localhost:8774/v2/admin/vmhosts'
                },
                {
                    "rel": "bookmark",
                    "href": 'http://localhost:8774/admin/vmhosts'
                }
             ],
        }],
        "vmhosts_links": [
                {
                    "rel": "next",
                    "href": 'http://localhost:8774/v2/admin/vmhosts&marker=4"
                }
        ]}
        :params nsmap: namespace map to be used for the generated xml.
        :params root_element_tag: element tag of the root element.
        :params sub_element_tag: element tag for each sub element. i.e for each
        entity dictionary.
        :params root_prefix: root prefix to be used for identifying the
        namespace of the document from the nsmap.
        :returns: list of entities in xml format using the entity dictionary.
        :raises LookupError: If there is more than one root(key) element in the
        entity_dict.
    """

    if not entity_dict:
        return ''
    #TODO(siva): add check for entities_links
    keys = entity_dict.keys()
    root_key = ''
    if len(keys) > 2:
        raise LookupError('More than one root element in entity')
    page_links = []
    if len(keys) == 2:
        if keys[0].endswith("_links"):
            page_links = entity_dict[keys[0]]
            root_key = keys[1]
        elif keys[1].endswith("_links"):
            root_key = keys[0]
            page_links = entity_dict[keys[1]]
        else:
            raise LookupError('More than one root element in entity')
    else:
        root_key = entity_dict.keys()[0]
    root_namespace = ''
    if nsmap is not None and root_prefix in nsmap:
        root_namespace = '{%s}' % nsmap[root_prefix]
    root = Element(root_namespace + root_element_tag, nsmap=nsmap)
    dict_list = entity_dict[root_key]
    for ent in dict_list:
        if not ent:
            continue
        link_list = []
        if 'links' in ent:
            link_list = ent['links']
            del ent['links']
        attrib = {}
        for (key, val) in ent.items():
            if key is not None:
                if val is not None:
                    attrib[key] = val
                else:
                    attrib[key] = ''
        entity_sub = SubElement(root, root_namespace + sub_element_tag, attrib)
        for link in link_list:
            SubElement(entity_sub, constants.ATOM + 'link', link)
    for link in page_links:
        SubElement(root, constants.ATOM + 'link', link)
    return etree.tostringlist(root)[0]
Esempio n. 13
0
                        element.getparent().replace(element, replace_element)

                for (element, replace_element, out_dict) in \
                        elements_to_be_replaced:
                    LOG.debug(
                        _('Replaced element path: %s' %
                          replace_element.getroottree().getpath(
                              replace_element)))
                    replace_dict_out.update(
                        {tree.getpath(replace_element): out_dict})
            except (KeyError, IndexError, ValueError), err:
                LOG.error(
                    _('Lookup Error while finding tag healthnmon api... %s ' %
                      str(err)),
                    exc_info=1)
    return etree.tostringlist(tree.getroot())[0]


def dump_resource_xml(resource_obj, tag):
    """Serialize object using resource model """

    LOG.debug(_('Exporting tag: %s as xml...' % tag))
    xml_out_file = StringIO.StringIO()
    resource_obj.export(xml_out_file, 0, name_=tag)
    return xml_out_file.getvalue()


def get_project_context(req):
    """ Get project context from request
    :param req: request object from which context would be fetched.
    :returns: project context tuple (context, project_id)
Esempio n. 14
0
 def print_tree(self):
   print(etree.tostringlist(self.tree, pretty_print=True))
Esempio n. 15
0
                        tree._setroot(replace_element)
                    else:
                        element.getparent().replace(element,
                                                    replace_element)

                for (element, replace_element, out_dict) in \
                        elements_to_be_replaced:
                    LOG.debug(_('Replaced element path: %s'
                                % replace_element.getroottree().getpath(
                                    replace_element)))
                    replace_dict_out.update(
                        {tree.getpath(replace_element): out_dict})
            except (KeyError, IndexError, ValueError), err:
                LOG.error(_('Lookup Error while finding tag \
                healthnmon api... %s ' % str(err)), exc_info=1)
    return etree.tostringlist(tree.getroot())[0]


def dump_resource_xml(resource_obj, tag):
    """Serialize object using resource model """

    LOG.debug(_('Exporting tag: %s as xml...' % tag))
    xml_out_file = StringIO.StringIO()
    resource_obj.export(xml_out_file, 0, name_=tag)
    return xml_out_file.getvalue()


def get_project_context(req):
    """ Get project context from request
    :param req: request object from which context would be fetched.
    :returns: project context tuple (context, project_id)
Esempio n. 16
0
        elif item.tag[0] == 'E':
            w = 1
            if int(item.tag[1:]) == lasthai:
                discard2 = ET.SubElement(game, "discard2")
                discard2.text = th2txt(int(item.tag[1:]))
            else:
                discard1 = ET.SubElement(game, "discard1")
                discard1.text = th2txt(int(item.tag[1:]))
            lasthai = int(item.tag[1:])
        elif item.tag[0] == 'F':
            w = 2
            if int(item.tag[1:]) == lasthai:
                discard2 = ET.SubElement(game, "discard2")
                discard2.text = th2txt(int(item.tag[1:]))
            else:
                discard1 = ET.SubElement(game, "discard1")
                discard1.text = th2txt(int(item.tag[1:]))
            lasthai = int(item.tag[1:])
        elif item.tag[0] == 'G':
            w = 3
            if int(item.tag[1:]) == lasthai:
                discard2 = ET.SubElement(game, "discard2")
                discard2.text = th2txt(int(item.tag[1:]))
            else:
                discard1 = ET.SubElement(game, "discard1")
                discard1.text = th2txt(int(item.tag[1:]))
            lasthai = int(item.tag[1:])
        continue
mjr.writelines(ET.tostringlist(root, encoding="utf-8", pretty_print=True))
mjr.close
Esempio n. 17
0
    for new in xpaths.keys():
        pages = []
        book = forest[new].xpath(xpaths[new] + '//h3')
        for page in book:
            pages.append(
                etree.tostringlist(page, encoding='unicode', method='text')[0])
        news[new] = pages
    return news


def get_news(urls={}, xpaths={}):

    if not urls: urls = DEFAULT_URLS
    if not xpaths: xpaths = DEFAULT_NEWS_XPATHS

    garden = forest(urls)
    return process_xpaths(garden, xpaths)


if __name__ == '__main__':
    # news = get_news()

    tree = tree_from_html('https://cointelegraph.com/')
    data = tree.xpath('//*[@id="js-main-slideshow-pager"]//h3')
    news = []
    for new in data:
        news.append(
            etree.tostringlist(new, encoding='unicode', method='text')[0])

    breaking_news = get_news()