Beispiel #1
0
def _newstocks(data, pageNo, retry_count, pause):
    for _ in range(retry_count):
        time.sleep(pause)
        ct._write_console()
        try:
            html = lxml.html.parse(rv.NEW_STOCKS_URL%(ct.P_TYPE['http'],ct.DOMAINS['vsf'],
                         ct.PAGES['newstock'], pageNo))
            res = html.xpath('//table[@id=\"NewStockTable\"]/tr')
            if len(res) == 0:
                return data
            if ct.PY3:
                sarr = [etree.tostring(node).decode('utf-8') for node in res]
            else:
                sarr = [etree.tostring(node) for node in res]
            sarr = ''.join(sarr)
            sarr = sarr.replace('<font color="red">*</font>', '')
            sarr = '<table>%s</table>'%sarr
            df = pd.read_html(StringIO(sarr), skiprows=[0, 1])[0]
            df = df.drop([df.columns[idx] for idx in [12, 13, 14]], axis=1)
            df.columns = rv.NEW_STOCKS_COLS
            df['code'] = df['code'].map(lambda x : str(x).zfill(6))
            df['xcode'] = df['xcode'].map(lambda x : str(x).zfill(6))
            res = html.xpath('//table[@class=\"table2\"]/tr[1]/td[1]/a/text()')
            tag = '下一页' if ct.PY3 else unicode('下一页', 'utf-8')
            hasNext = True if tag in res else False 
            data = data.append(df, ignore_index=True)
            pageNo += 1
            if hasNext:
                data = _newstocks(data, pageNo, retry_count, pause)
        except Exception as ex:
            print(ex)
        else:
            return data 
Beispiel #2
0
    def update(self, resp = None):
        if resp is None:
            resp = self.make_request(update_template % (self.nick, 
                                                        self.uuid, 
                                                        self.tableid), 
                                                        "GET")

        if resp.get("type") == "error":
            print "Looks like we've got an error:"
            message = find_subelem(resp, "message")
            print message.text
            return

        tableInfo = find_subelem(resp, "tableInfo")
        if tableInfo is None:
            print "wtf mate"
            print etree.tostring(resp, pretty_print=True)
            raise Exception

        if not self.tableid:
            self.tableid = find_subelem(tableInfo, "tableid").text
        if not self.color:
            if find_subelem(tableInfo, "blackPlayer").text == self.nick:
                self.color = "black"
            else:
                self.color = "white"
        if find_subelem(tableInfo, "board") is not None:
            self.playing = True
            for row in find_subelem(tableInfo, "board"):
                print row.text
        userInfo = find_subelem(resp, "userInfo")
        if userInfo is not None:
            self.ratio = float(find_subelem(userInfo, "ratio").text)
            self.points = int(find_subelem(userInfo, "points").text)
Beispiel #3
0
def _get_storage_xml(params, ignore_source=False):
    src_type = params.get('src_type')
    disk = E.disk(type=src_type, device=params.get('type'))
    disk.append(E.driver(name='qemu', type=params['format']))

    disk.append(E.target(dev=params.get('dev'), bus=params['bus']))
    if params.get('address'):
        # ide disk target id is always '0'
        disk.append(E.address(
            type='drive', controller=params['address']['controller'],
            bus=params['address']['bus'], target='0',
            unit=params['address']['unit']))

    if ignore_source:
        return ET.tostring(disk)

    # Working with url paths
    if src_type == 'network':
        output = urlparse.urlparse(params.get('path'))
        host = E.host(name=output.hostname, port=
                      output.port or socket.getservbyname(output.scheme))
        source = E.source(protocol=output.scheme, name=output.path)
        source.append(host)
        disk.append(source)
    else:
        # Fixing source attribute
        source = E.source()
        source.set(DEV_TYPE_SRC_ATTR_MAP[src_type], params.get('path'))
        disk.append(source)

    return ET.tostring(disk)
Beispiel #4
0
def remove_spaces(data):
    # Spaces need to be consistent with splits and the TOC.
    # "Foo Bar.htm" and "Foo%20Bar.htm" are seen as different.

    def fnsan(fn):
        return fn.replace(" ", "").replace("%20", "")

    # rename actual files
    for file_name in data:
        if file_name.endswith(".htm") and file_name != fnsan(file_name):
            data[fnsan(file_name)] = data[file_name]
            del data[file_name]

    # update TOC
    file_name = "toc.ncx"
    root = etree.fromstring(data[file_name])
    for element in root.findall('.//*'):
        if element.get("src"):
            element.set("src", fnsan(element.get("src")))
    data[file_name] = "<?xml version='1.0' encoding='utf-8'?>\n" + etree.tostring(root)

    # update inventory
    file_name = "content.opf"
    root = etree.fromstring(data[file_name])
    for element in root.findall('.//*'):
        if element.get("href"):
            element.set("href", fnsan(element.get("href")))
    data[file_name] = "<?xml version='1.0' encoding='utf-8'?>\n" + etree.tostring(root)
Beispiel #5
0
 def test_fromstring_kml_document(self):
     """Tests the parsing of an valid KML string"""
     test_kml = b'<kml xmlns="http://www.opengis.net/kml/2.2"/>'
     tree = fromstring(test_kml, schema=Schema('ogckml22.xsd'))
     self.assertEqual(etree.tostring(tree, encoding='ascii'), test_kml)
     tree = fromstring(test_kml)
     self.assertEqual(etree.tostring(tree, encoding='ascii'), test_kml)
    def test_get_html(self):
        # usual output
        output = self.the_input.get_html()
        self.assertEqual(
            etree.tostring(output),
            """<div>{\'status\': Status(\'queued\'), \'button_enabled\': True, \'rows\': \'10\', \'queue_len\': \'3\', \'mode\': \'\', \'cols\': \'80\', \'STATIC_URL\': \'/dummy-static/\', \'linenumbers\': \'true\', \'queue_msg\': \'\', \'value\': \'print "good evening"\', \'msg\': u\'Submitted. As soon as a response is returned, this message will be replaced by that feedback.\', \'matlab_editor_js\': \'/dummy-static/js/vendor/CodeMirror/octave.js\', \'hidden\': \'\', \'id\': \'prob_1_2\', \'tabsize\': 4}</div>"""
        )

        # test html, that is correct HTML5 html, but is not parsable by XML parser.
        old_render_template = self.the_input.capa_system.render_template
        self.the_input.capa_system.render_template = lambda *args: textwrap.dedent("""
                <div class='matlabResponse'><div id='mwAudioPlaceHolder'>
                <audio controls autobuffer autoplay src='data:audio/wav;base64='>Audio is not supported on this browser.</audio>
                <div>Right click <a href=https://endpoint.mss-mathworks.com/media/filename.wav>here</a> and click \"Save As\" to download the file</div></div>
                <div style='white-space:pre' class='commandWindowOutput'></div><ul></ul></div>
            """).replace('\n', '')
        output = self.the_input.get_html()
        self.assertEqual(
            etree.tostring(output),
            textwrap.dedent("""
            <div class='matlabResponse'><div id='mwAudioPlaceHolder'>
            <audio src='data:audio/wav;base64=' autobuffer="" controls="" autoplay="">Audio is not supported on this browser.</audio>
            <div>Right click <a href="https://endpoint.mss-mathworks.com/media/filename.wav">here</a> and click \"Save As\" to download the file</div></div>
            <div style='white-space:pre' class='commandWindowOutput'/><ul/></div>
            """).replace('\n', '').replace('\'', '\"')
        )

        # check that exception is raised during parsing for html.
        self.the_input.capa_system.render_template = lambda *args: "<aaa"
        with self.assertRaises(etree.XMLSyntaxError):
            self.the_input.get_html()

        self.the_input.capa_system.render_template = old_render_template
Beispiel #7
0
 def assertEqualsXML(self, result, expect):
     # Parse the xml, strip white space, and convert back
     # this allows us to compare if they are logically equal
     parser = etree.XMLParser(remove_blank_text=True)
     result = etree.tostring(etree.XML(result, parser))
     expect = etree.tostring(etree.XML(expect, parser))
     self.assertEquals(result, expect)
Beispiel #8
0
    def test_negotiated_rate_fetching(self):
        """
        Test the rate fetching with negotiated rates.
        This will fail if your shipper number is not eligible for negotaited
        rates
        """
        rating_request = RatingService.rating_request_type(
            E.Shipment(
                Helper.get_shipper(self.shipper_number, "US"),
                Helper.get_ship_to("US"),
                Helper.get_ship_from("US"),
                RatingService.service_type(Code='03'),    # UPS Ground
                Helper.get_package("US", package_type_code="00"),
                RatingService.rate_information_type(negotiated=True)
            ),
        )
        response = self.rating_api.request(rating_request)

        self.assertTrue(
            hasattr(response.RatedShipment, 'NegotiatedRates')
        )
        self.assertTrue(
            response.RatedShipment.RatedPackage.TotalCharges.MonetaryValue
        )
        print etree.tostring(response, pretty_print=True)
Beispiel #9
0
def extMainText(html, threshold = 0.5, filterMode = False):
    """
    Parses HTML and keeps only main text parts.

    PARAMETERS:
    html - Input html text, MUST BE UNICODE!
    threshold - The density threshold to distinguish major content & others.
    filterMode - Use normal 'Extract' mode or the other 'Filter' mode.

    RETURN:
    final text of page by first gettin ghtml fragments and then use the get_text function
    """
    finalHtml = ""
    html = _removeControlChars(html)
    # If we prepare a BeautifulSoup instance manually and pass it to lxml.html.soupparser.convert_tree()
    # then this func work well as 'import ExtMainText' but will throw strange error for 'import jqhtml.ExtMainText'.
    root = lxml.html.soupparser.fromstring(html)
    # densDic returns {'self': (tag density, length of pure text, total length of html tags and text, etree instance), 'child': list of density dics for child entities }
    densDic = _calcDensity(root)
    if filterMode:
        finalHtml =  _filterSpam(densDic, threshold)
    else:
        maxPart, textLen, maxPartChilds, textLenChilds = _getMainText(densDic, threshold)
        if textLenChilds > textLen:
            finalHtml = ''.join(map(lambda tree: etree.tostring(tree, encoding = unicode) if tree != None else '', maxPartChilds))
        else:
            finalHtml = etree.tostring(maxPart, encoding = unicode) if maxPart != None else ''
    return get_text(finalHtml)
Beispiel #10
0
    def test_dhcp_xml(self):
        """
        Test network dhcp xml
        """
        dhcp_range = {'start': '192.168.122.100', 'end': '192.168.122.254'}
        host1 = {
            'mac': '00:16:3e:77:e2:ed',
            'name': 'foo.example.com',
            'ip': '192.168.122.10',
        }
        host2 = {
            'mac': '00:16:3e:3e:a9:1a',
            'name': 'bar.example.com',
            'ip': '192.168.122.11',
        }
        params = {}

        dhcp = nxml._get_dhcp_elem(**params)
        self.assertEqual(None, dhcp)

        params['range'] = dhcp_range
        xml = ET.tostring(nxml._get_dhcp_elem(**params))
        start = xpath_get_text(xml, '/dhcp/range/@start')
        end = xpath_get_text(xml, '/dhcp/range/@end')
        self.assertEqual(dhcp_range['start'], start[0])
        self.assertEqual(dhcp_range['end'], end[0])

        params['hosts'] = [host1, host2]
        xml = ET.tostring(nxml._get_dhcp_elem(**params))
        ip = xpath_get_text(xml, '/dhcp/host/@ip')
        self.assertEqual(ip, [host1['ip'], host2['ip']])
Beispiel #11
0
    def test_ip_xml(self):
        """
        Test network ip xml
        """
        dhcp_range = {'start': '192.168.122.100', 'end': '192.168.122.254'}
        params = {}

        dhcp = nxml._get_dhcp_elem(**params)
        self.assertEqual(None, dhcp)

        params['net'] = '192.168.122.0/255.255.255.0'
        params['dhcp'] = {'range': dhcp_range}
        xml = ET.tostring(nxml._get_ip_elem(**params))
        start = xpath_get_text(xml, '/ip/dhcp/range/@start')[0]
        end = xpath_get_text(xml, '/ip/dhcp/range/@end')[0]
        self.assertEqual(dhcp_range['start'], start)
        self.assertEqual(dhcp_range['end'], end)

        address = xpath_get_text(xml, '/ip/@address')[0]
        netmask = xpath_get_text(xml, '/ip/@netmask')[0]
        self.assertEqual(address, params['net'].split('/')[0])
        self.assertEqual(netmask, params['net'].split('/')[1])

        # test _get_ip_xml can accepts strings: '192.168.122.0/24',
        # which is same as "192.168.122.0/255.255.255.0"
        params['net'] = '192.168.122.0/24'
        xml = ET.tostring(nxml._get_ip_elem(**params))
        netmask = xpath_get_text(xml, '/ip/@netmask')[0]
        self.assertEqual(netmask, str(ipaddr.IPNetwork(params['net']).netmask))
def extMainText(html, threshold = 0.5, filterMode = False):
    """
    Parses HTML and keeps only main text parts.

    PARAMETERS:
    html - Input html text, MUST BE UNICODE!
    threshold - The density threshold to distinguish major content & others.
    filterMode - Use normal 'Extract' mode or the other 'Filter' mode.

    RETURN:
    html fragments of main text
    """
    html = _removeControlChars(html)
    # If we prepare a BeautifulSoup instance manually and pass it to lxml.html.soupparser.convert_tree()
    # then this func work well as 'import ExtMainText' but will throw strange error for 'import jqhtml.ExtMainText'.
    root = lxml.html.soupparser.fromstring(html)
    densDic = _calcDensity(root)
    if filterMode:
        return _filterSpam(densDic, threshold)
    else:
        maxPart, textLen, maxPartChilds, textLenChilds = _getMainText(densDic, threshold)
        if textLenChilds > textLen:
            return ''.join(map(lambda tree: etree.tostring(tree, encoding = unicode) if tree != None else '', maxPartChilds))
        else:
            return etree.tostring(maxPart, encoding = unicode) if maxPart != None else ''
def make_files(job_links,i,j,k,l):
	for j_link in job_links:
		try:
			page = make_connection("http://www.workopolis.com/" + j_link)
			tree = html.fromstring(page.content)
			section = tree.xpath('//section[@class="main-content job-view-main-content js-analyticsJobView"]')
			side_bar = tree.xpath('//section[@class="sidebar-block sidebar-clean"]')
			#page_cat = tree.xpath('//a[@class="job-view-header-link link"]')
			#if len(page_cat) > 0: Results are too restrictive if we cut all other categories (Too many miscategorized)
			#	page_cat = etree.tostring(page_cat[0]).decode("utf-8")
			if len(section) > 0: #and c_link.text in page_cat:
				page_str = etree.tostring(section[0]).decode("utf-8")
				page_str += etree.tostring(side_bar[0]).decode("utf-8")
				page_str = page_str.replace("&#13;","")

				if len(page_str) > 0:
					file3.write("http://www.workopolis.com" + j_link + "\n")
					file4 = open(str(i) +".html", 'w')
					file4.write("<a class=\"page-link\" href=\"www.workopolis.com"+j_link+"\"></a>\n")
					file4.write(page_str)
					file4.close()
					i += 1 # Increment sub category count
					j += 1 # Increment city count
					l += 1 # Increment category count
				else:
					k += 1 # Increment empty count
			else:
				k += 1 # Increment empty count
		except etree.XMLSyntaxError:
			k += 1 # Increment empty count
	return [i,j,k,l]
Beispiel #14
0
    def test_simple_two_tags(self):
        class SomeObject(ComplexModel):
            s = Unicode
            i = Integer

        v = SomeObject(s='s', i=5)

        cloth = E.a(
            E.b1(),
            E.b2(
                E.c1(spyne_id="s"),
                E.c2(),
            ),
            E.e(
                E.g1(),
                E.g2(spyne_id="i"),
                E.g3(),
            ),
        )

        elt = self._run(v, cloth=cloth)

        print etree.tostring(elt, pretty_print=True)
        assert elt[0].tag == 'b1'
        assert elt[1].tag == 'b2'
        assert elt[1][0].tag == 'c1'
        assert elt[1][0].text == 's'
        assert elt[1][1].tag == 'c2'
        assert elt[2].tag == 'e'
        assert elt[2][0].tag == 'g1'
        assert elt[2][1].tag == 'g2'
        assert elt[2][1].text == '5'
        assert elt[2][2].tag == 'g3'
def init(handler, ctxInfo):
  try:
    # TODO: handle dependence on sys.argv intelligently for testing/etc imports.
    mode = ((len(sys.argv) > 1) and sys.argv[1] in ARGS_LIST) and sys.argv[1] or usage()

    isSetup = False
    hand = handler(mode, ctxInfo)
    if ARG_EXECUTE == mode:
      info = ConfigInfo()
      hand.execute(info)
      print hand.toXml(info)
    elif ARG_SETUP == mode:
      hand.setup()
      print hand.toXml(ConfigInfo())
    else:
      raise InternalException("Can't get here, boss (%s)." % mode)
  except Exception, e:
    exType, exMsg, exBT = sys.exc_info()
    # <eai_error>
    root            = et.Element("eai_error");
    #   <recognized>[true|false]</recognized>
    knownNode       = et.SubElement(root, "recognized")
    knownNode.text  = isinstance(e, AdminManagerException) and "true" or "false"
    #   <type>[exception class]</type>
    typeNode        = et.SubElement(root, "type");
    typeNode.text   = unicode(exType)
    #   <message>[exception value]</message>
    msgNode         = et.SubElement(root, "message");
    msgNode.text    = unicode(exMsg)
    #   <stacktrace>[bt]</stacktrace>
    stackNode       = et.SubElement(root, "stacktrace");
    stackNode.text  = traceback.format_exc()
    # </eai_error>
    print et.tostring(root)
def test_payment_create_customer_xml():
    gateway = dinero.get_gateway('authorize.net')
    options = {
        'email': '*****@*****.**',

        'number': '4' + '1' * 15,
        'month': '12',
        'year': '2012',
    }
    xml = gateway._create_customer_xml(options)
    should_be = trimmy(
             """<createCustomerProfileRequest xmlns="AnetApi/xml/v1/schema/AnetApiSchema.xsd">
                    <merchantAuthentication>
                        <name>{login_id}</name>
                        <transactionKey>{transaction_key}</transactionKey>
                    </merchantAuthentication>
                    <profile>
                        <email>[email protected]</email>
                        <paymentProfiles>
                            <payment>
                                <creditCard>
                                    <cardNumber>4111111111111111</cardNumber>
                                    <expirationDate>2012-12</expirationDate>
                                </creditCard>
                            </payment>
                        </paymentProfiles>
                    </profile>
                </createCustomerProfileRequest>""".format(
                        login_id=gateway.login_id,
                        transaction_key=gateway.transaction_key,
                    ))
    assert etree.tostring(xml) == should_be, "Invalid XML (\n\t%s\n\t%s\n)" % (etree.tostring(xml), should_be)
Beispiel #17
0
def bookmarcs(request):
    #print list(Bookmarc.objects.raw("SELECT id, gen_id FROM rbooks_bookmarc WHERE user_id=%s GROUP BY gen_id ORDER BY add_date DESC", params=[request.user.id]))
    bookmarcs = Bookmarc.objects.values('id', 'gen_id').filter(user=request.user).order_by('-add_date')
    gen_ids = {}
    for bookmarc in bookmarcs:
        gen_ids[bookmarc.gen_id] = {'bookmarc': bookmarc}


    for record in Record.objects.using('records').filter(gen_id__in=gen_ids.keys()):
        doc_tree = etree.XML(record.content)
        doc_tree = xslt_bib_draw_transformer(doc_tree)
        gen_ids[record.gen_id]['record']= record
        gen_ids[record.gen_id]['bib'] = etree.tostring(doc_tree).replace(u'<b/>', u' '),

    for record in Ebook.objects.using('records').filter(gen_id__in=gen_ids):
        doc_tree = etree.XML(record.content)
        doc_tree = xslt_bib_draw_transformer(doc_tree)
        gen_ids[record.gen_id]['record'] = record
        gen_ids[record.gen_id]['bib'] = etree.tostring(doc_tree).replace(u'<b/>', u' '),

    records = []
    for bookmarc in bookmarcs:
        records.append(gen_ids[bookmarc.gen_id])

    return render(request, 'rbooks/frontend/bookmarcs.html', {
        'records': records
    })
Beispiel #18
0
    def populate_summary(self):
        """
        Populates a summary list of vulnerabilities checked by Nexpose to a dict.

        Must provide a NexposeAPI instance to use.
        """

        if not self.isLoggedIn():
            self.log.warn("No Nexpose API instance provided...")
            return False

        attributes = {
        }

        xml = self.make_xml('VulnerabilityListingRequest', attributes, isroot=True)
        self.log.debug("Sending VulnerabilityListingRequest:\n%s" % (xml))
        result = self.send_command(xml)
        tree = etree.parse(result)
        self.log.debug("Result: %s" % (etree.tostring(tree)))
        data = tree.getroot()
        if data.attrib['success'] == '1':
            vulns = data.findall('VulnerabilitySummary')
            self.vulnxml = etree.tostring(data)
            for vuln in vulns:
                self.vulnerabilities[vuln.attrib['id']] = vuln.attrib
                #del(self.vulnerabilities[vuln.attrib['id']]['id'])
        else:
            self.log.warn("VulnerabilityListing failed")
            return False

        self.log.debug("Loaded %s Vulnerabilities..." % (len(self.vulnerabilities)))
        return True
def test_charge_customer_xml():
    gateway = dinero.get_gateway('authorize.net')
    price = 123.45
    customer_id = '123456789'
    card_id = '987654321'
    options = {
        'cvv': '123'
    }
    xml = gateway._charge_customer_xml(customer_id, card_id, price, options)
    should_be = trimmy(
             """<createCustomerProfileTransactionRequest xmlns="AnetApi/xml/v1/schema/AnetApiSchema.xsd">
                    <merchantAuthentication>
                        <name>{login_id}</name>
                        <transactionKey>{transaction_key}</transactionKey>
                    </merchantAuthentication>
                    <transaction>
                        <profileTransAuthCapture>
                            <amount>{price}</amount>
                            <customerProfileId>{customer_id}</customerProfileId>
                            <customerPaymentProfileId>{card_id}</customerPaymentProfileId>
                            <cardCode>{cvv}</cardCode>
                        </profileTransAuthCapture>
                    </transaction>
                </createCustomerProfileTransactionRequest>""".format(
                        login_id=gateway.login_id,
                        transaction_key=gateway.transaction_key,
                        price=price,
                        customer_id=customer_id,
                        card_id=card_id,
                        **options
                    ))
    assert etree.tostring(xml) == should_be, "Invalid XML (\n\t%s\n\t%s\n)" % (etree.tostring(xml), should_be)
Beispiel #20
0
def remove_module(request):
    xpath = request.POST['xpath']
    
    defaultPrefix = request.session['moduleDefaultPrefix']
    namespace = request.session['moduleNamespaces'][defaultPrefix]
    template_content = request.session['moduleTemplateContent']
    
    dom = etree.parse(BytesIO(template_content.encode('utf-8')))
    
    # set the element namespace
    xpath = xpath.replace(defaultPrefix +":", namespace)
    # add the element to the sequence
    element = dom.find(xpath)
    
    if '{http://mdcs.ns}_mod_mdcs_' in element.attrib:
        del element.attrib['{http://mdcs.ns}_mod_mdcs_']
    
    # remove prefix from namespaces
    nsmap = element.nsmap
    for prefix, ns in nsmap.iteritems():
        if ns == 'http://mdcs.ns':
            del nsmap[prefix]
            break
    
    # create a new element to replace the previous one (can't replace directly the nsmap using lxml)
    element = etree.Element(element.tag, nsmap = nsmap);
    
    # save the tree in the session
    request.session['moduleTemplateContent'] = etree.tostring(dom) 
    print etree.tostring(element)
    
    return HttpResponse(json.dumps({}), content_type='application/javascript')
Beispiel #21
0
def set_target_inner(module, tree, xpath, namespaces, attribute, value):
    changed = False

    try:
        if not is_node(tree, xpath, namespaces):
            changed = check_or_make_target(module, tree, xpath, namespaces)
    except Exception as e:
        missing_namespace = ""
        # NOTE: This checks only the namespaces defined in root element!
        # TODO: Implement a more robust check to check for child namespaces' existance
        if tree.getroot().nsmap and ":" not in xpath:
            missing_namespace = "XML document has namespace(s) defined, but no namespace prefix(es) used in xpath!\n"
        module.fail_json(msg="%sXpath %s causes a failure: %s\n  -- tree is %s" %
                             (missing_namespace, xpath, e, etree.tostring(tree, pretty_print=True)), exception=traceback.format_exc())

    if not is_node(tree, xpath, namespaces):
        module.fail_json(msg="Xpath %s does not reference a node! tree is %s" %
                             (xpath, etree.tostring(tree, pretty_print=True)))

    for element in tree.xpath(xpath, namespaces=namespaces):
        if not attribute:
            changed = changed or (element.text != value)
            if element.text != value:
                element.text = value
        else:
            changed = changed or (element.get(attribute) != value)
            if ":" in attribute:
                attr_ns, attr_name = attribute.split(":")
                # attribute = "{{%s}}%s" % (namespaces[attr_ns], attr_name)
                attribute = "{{{0}}}{1}".format(namespaces[attr_ns], attr_name)
            if element.get(attribute) != value:
                element.set(attribute, value)

    return changed
Beispiel #22
0
def _parse_fq_data(url, index, retry_count, pause):
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            request = Request(url)
            text = urlopen(request, timeout=10).read()
            text = text.decode('GBK')
            html = lxml.html.parse(StringIO(text))
            res = html.xpath('//table[@id=\"FundHoldSharesTable\"]')
            if ct.PY3:
                sarr = [etree.tostring(node).decode('utf-8') for node in res]
            else:
                sarr = [etree.tostring(node) for node in res]
            sarr = ''.join(sarr)
            df = pd.read_html(sarr, skiprows = [0, 1])[0]
            if len(df) == 0:
                return pd.DataFrame()
            if index:
                df.columns = ct.HIST_FQ_COLS[0:7]
            else:
                df.columns = ct.HIST_FQ_COLS
            if df['date'].dtypes == np.object:
                df['date'] = df['date'].astype(np.datetime64)
            df = df.drop_duplicates('date')
        except Exception as e:
            print(e)
        else:
            return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Beispiel #23
0
    def addServiceItem(self, item_name, description, rate, account):
        root = etree.Element('QBXMLMsgsRq')
        root.set("onError", "continueOnError")

        el_base = etree.SubElement(root,'ItemServiceAddRq')
        el_base.set("requestID", "")

        el_isa = etree.SubElement(el_base, "ItemServiceAdd")

        el_name = etree.SubElement(el_isa, "Name")
        el_name.text = str(item_name)

        el_sop = etree.SubElement(el_isa, "SalesOrPurchase")

        el_desc = etree.SubElement(el_sop, "Desc")
        el_desc.text = str(description)

        el_price = etree.SubElement(el_sop, "Price")
        el_price.text = "%.2f" % rate

        el_ar = etree.SubElement(el_sop, "AccountRef")
        el_an = etree.SubElement(el_ar, "FullName")
        el_an.text = str(account)

        res = self.__makeQBXMLReq(root)
        xmldoc = self.__submitQBXMLReq(res)

        if self.debug:
            print etree.tostring(xmldoc, pretty_print=True, encoding="utf-8", xml_declaration=True)
Beispiel #24
0
    def __makeQBXMLReq(self, data):
        """
        Add the session authentication information to the specificed qbXML document
        in preperation for submission to Quickbooks.
        """
        root = etree.Element("QBXML")
        tree = etree.ElementTree(root)
        root.addprevious(etree.ProcessingInstruction ('qbxml', 'version="6.0"'))

        el_signon = etree.SubElement(root, "SignonMsgsRq")

        el_signon_ticket = etree.SubElement(el_signon, 'SignonTicketRq')

        el_datetime = etree.SubElement(el_signon_ticket,'ClientDateTime')
        el_datetime.text = self.__getXMLDatetime()

        el_session_ticket = etree.SubElement(el_signon_ticket,'SessionTicket')
        el_session_ticket.text = self.__session_ticket

        el_lang = etree.SubElement(el_signon_ticket,'Language')
        el_lang.text = 'English'

        el_app_id = etree.SubElement(el_signon_ticket,'AppID')
        el_app_id.text = self.app_name_id

        el_ver = etree.SubElement(el_signon_ticket,'AppVer')
        el_ver.text = self.app_name_ver

        root.append(data)

        if self.debug:
            print etree.tostring(tree, pretty_print=True, encoding="utf-8", xml_declaration=True)
        return tree
Beispiel #25
0
    def __makeSignInReq(self):
        """
        Generate the XML document that contains the sign-in request for the Quickbooks API
        """
        root = etree.Element("QBXML")
        tree = etree.ElementTree(root)
        root.addprevious(etree.ProcessingInstruction ('qbxml', 'version="6.0"'))

        el_signon = etree.SubElement(root, "SignonMsgsRq")
        el_app_cert = etree.SubElement(el_signon, 'SignonAppCertRq')

        el_datetime = etree.SubElement(el_app_cert,'ClientDateTime')
        el_datetime.text = self.__getXMLDatetime()

        el_app = etree.SubElement(el_app_cert,'ApplicationLogin')
        el_app.text = self.app_name

        el_ticket = etree.SubElement(el_app_cert,'ConnectionTicket')
        el_ticket.text = self.conn_ticket

        el_lang = etree.SubElement(el_app_cert,'Language')
        el_lang.text = 'English'

        el_app_id = etree.SubElement(el_app_cert,'AppID')
        el_app_id.text = self.app_name_id

        el_ver = etree.SubElement(el_app_cert,'AppVer')
        el_ver.text = self.app_name_ver

        if self.debug:
            print etree.tostring(tree, pretty_print=True, encoding="utf-8", xml_declaration=True)
        return tree
    def test_preprocess_xml(self, content):
        xml = etree.fromstring("""
        <CFRGRANULE>
          <PART>
            <APPENDIX>
              <TAG>Other Text</TAG>
              <GPH DEEP="453" SPAN="2">
                <GID>ABCD.0123</GID>
              </GPH>
            </APPENDIX>
          </PART>
        </CFRGRANULE>""")
        content.Macros.return_value = [
            ("//GID[./text()='ABCD.0123']/..", """
              <HD SOURCE="HD1">Some Title</HD>
              <GPH DEEP="453" SPAN="2">
                <GID>EFGH.0123</GID>
              </GPH>""")]
        reg_text.preprocess_xml(xml)
        should_be = etree.fromstring("""
        <CFRGRANULE>
          <PART>
            <APPENDIX>
              <TAG>Other Text</TAG>
              <HD SOURCE="HD1">Some Title</HD>
              <GPH DEEP="453" SPAN="2">
                <GID>EFGH.0123</GID>
              </GPH></APPENDIX>
          </PART>
        </CFRGRANULE>""")

        self.assertEqual(etree.tostring(xml), etree.tostring(should_be))
    def extract_each(self, filename):
        with open(filename) as f:
            data = f.read()
        try:
            html = etree.HTML(data)
        except:
            return Catalog()
        course_block = html.xpath('//body//p')
        course_title = [' '.join(
            etree.HTML(etree.tostring(e)).xpath('//b/text()')
        ).strip() for e in course_block]
        course_desc = [' '.join(
            etree.HTML(etree.tostring(e)).xpath('//p/text()')
        ).replace('\n', ' ').replace('\t', ' ').replace('\r', ' ').strip() for e in course_block]

        course_instructor = [' '.join(
            etree.HTML(etree.tostring(e)).xpath('//i/text()')
        ) for e in course_block]


        d = DataFrame({'course_title': course_title, 'desc': course_desc, 'instructor': course_instructor})

        d['course_id'] = d['course_title'].map(lambda x: ' '.join(x.encode('utf-8').split(' '.encode('utf-8'))[:2]).strip())
        d['course_title'] = d['course_title'].map(lambda x: ' '.join(x.encode('utf-8').split(' '.encode('utf-8'))[2:]).strip())
        d['desc'] = d['desc'].apply(lambda x: x.replace('\n', ' ').replace('\r', ' ').strip('"').strip())

        d['school'] = 'uw'
        d['id'] = filename.strip(self.input_directory).strip('/')

        return Catalog(data=d)
Beispiel #28
0
def _today_ticks(symbol, tdate, pageNo, retry_count, pause):
    ct._write_console()
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            html = lxml.html.parse(ct.TODAY_TICKS_URL % (ct.P_TYPE['http'],
                                                         ct.DOMAINS['vsf'], ct.PAGES['t_ticks'],
                                                         symbol, tdate, pageNo
                                ))  
            res = html.xpath('//table[@id=\"datatbl\"]/tbody/tr')
            if ct.PY3:
                sarr = [etree.tostring(node).decode('utf-8') for node in res]
            else:
                sarr = [etree.tostring(node) for node in res]
            sarr = ''.join(sarr)
            sarr = '<table>%s</table>'%sarr
            sarr = sarr.replace('--', '0')
            df = pd.read_html(StringIO(sarr), parse_dates=False)[0]
            df.columns = ct.TODAY_TICK_COLUMNS
            df['pchange'] = df['pchange'].map(lambda x : x.replace('%', ''))
        except Exception as e:
            print(e)
        else:
            return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
    def test_iframe_youtube(self):
        html = etree.parse(os.path.join(TEST_DATA_DIR, 'media-video.html'))
        cnxml = self.call_target(html).getroot()

        try:
            elm = cnxml.xpath('//*[@id="test_media_video_youtube"]')[0]
        except IndexError:
            transformed_cnxml = etree.tostring(cnxml)
            self.fail('Failed to pass through media@id and/or '
                      'the iframe->iframe tag transform: '
                      + transformed_cnxml)

        self.assertEqual(elm.tag, '{http://cnx.rice.edu/cnxml}media')
        (iframe,) = elm.getchildren()
        self.assertEqual(iframe.tag, '{http://cnx.rice.edu/cnxml}iframe')
        self.assertEqual(iframe.attrib['src'],
                         'http://www.youtube.com/v/k9oSQNTHUZM')

        try:
            elm = cnxml.xpath('//*[@id="test_media_video_youtube_2"]')[0]
        except IndexError:
            transformed_cnxml = etree.tostring(cnxml)
            self.fail('Failed to pass through media@id and/or '
                      'the iframe->iframe tag transform: '
                      + transformed_cnxml)

        self.assertEqual(elm.tag, '{http://cnx.rice.edu/cnxml}media')
        (iframe,) = elm.getchildren()
        self.assertEqual(iframe.tag, '{http://cnx.rice.edu/cnxml}iframe')
        self.assertEqual(iframe.attrib['src'],
                         'http://www.youtube.com/embed/r-FonWBEb0o')
Beispiel #30
0
def _get_report_data(year, quarter, pageNo, dataArr, orderby):
    ct._write_console()
    try:
        request = Request(ct.REPORT_URL % (ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['fd'],
                                           year, quarter, pageNo, ct.PAGE_NUM[1], orderby))
        # 默认排序抓取的信息有重复和遗漏,增加排序功能参数orderby
        text = urlopen(request, timeout=10).read()
        text = text.decode('GBK')
        text = text.replace('--', '')
        html = lxml.html.parse(StringIO(text))
        res = html.xpath("//table[@class=\"list_table\"]/tr")
        if ct.PY3:
            sarr = [etree.tostring(node).decode('utf-8') for node in res]
        else:
            sarr = [etree.tostring(node) for node in res]
        sarr = ''.join(sarr)
        sarr = '<table>%s</table>' % sarr
        df = pd.read_html(sarr)[0]
        df = df.drop(11, axis=1)
        df.columns = ct.REPORT_COLS
        dataArr = dataArr.append(df, ignore_index=True)
        nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick')
        if len(nextPage) > 0:
            pageNo = re.findall(r'\d+', nextPage[0])[0]
            return _get_report_data(year, quarter, pageNo, dataArr,orderby)
        else:
            return dataArr
    except Exception as e:
        print(e)
Beispiel #31
0
def getAdmin():
    username = checkPassword()
    if not username or username != 'admin':
        return """<meta http-equiv="refresh" content="0;url=/home/login" />"""


########################  CREATE ARTICLE ###############################################################################

###### ADMIN
    changepass = E.div(
        E.h2('Change admin password:'******'Current password:'******'name': 'oldpassword',
                      'type': 'password'
                  })),
            E.div(E.label('New password:'******'name': 'newpassword1',
                      'type': 'password'
                  })),
            E.div(E.label('Confirm new password:'******'name': 'newpassword2',
                      'type': 'password'
                  })),
            E.a(
                {
                    'class': 'button-link',
                    'onclick': "changepassword($(this).parent().serialize());"
                }, 'Change password')))

    changeemail = E.div(
        E.h2('Change admin email address:'),
        E.form(
            E.div(
                E.label('Password:'******'name': 'password',
                    'id': 'emailpassword',
                    'type': 'password'
                })),
            E.div(
                E.label('New email:'),
                E.input({
                    'name': 'newemail1',
                    'id': 'newemail1',
                    'type': 'text'
                })),
            E.div(
                E.label('Confirm new email:'),
                E.input({
                    'name': 'newemail2',
                    'id': 'newemail2',
                    'type': 'text'
                })),
            E.a(
                {
                    'class': 'button-link',
                    'onclick': "changeemail($(this).parent().serialize());"
                }, 'Change email')))

    ###### USERS
    adduser = E.div(
        E.h2('Add User:'******'id': 'adduserform'},
               E.div(E.label('User name:'),
                     E.input({
                         'name': 'newusername',
                         'type': 'text'
                     })),
               E.div(E.label('Email address:'),
                     E.input({
                         'name': 'email',
                         'type': 'text'
                     })),
               E.div(E.label('User password:'******'name': 'newpassword1',
                         'type': 'password'
                     })),
               E.div(E.label('Confirm user password:'******'name': 'newpassword2',
                         'type': 'password'
                     })),
               E.a(
                   {
                       'class': 'button-link',
                       'onclick': "adduser($(this).parent().serialize());"
                   }, 'Add user')))

    userlist = E.select({'name': 'user'})
    vals = db.execute("SELECT name FROM lab_members")
    if vals == []:
        userlist.append(E.option(''))
    else:
        for val in vals:
            userlist.append(E.option(val[0]))
    removeuser = E.div(
        E.h2('Remove User:'******'User name:'), userlist),
            E.a(
                {
                    'class': 'button-link',
                    'onclick': "removeuser($(this).parent().serialize());"
                }, 'Remove user')))

    ##### GENES
    addgene = E.div(
        E.h2('Add Gene:'),
        E.form(
            E.div(E.label('Gene Name:'),
                  E.input({
                      'name': 'genename',
                      'type': 'text'
                  })),
            E.div(
                E.label('Wild Type Presence:'),
                E.select({'name': 'genedefaultstatus'}, E.option('+'),
                         E.option('-'))),
            E.a(
                {
                    'class': 'button-link',
                    'onclick': "addgene($(this).parent().serialize());"
                }, 'Add gene')))
    genelist = E.select({'name': 'gene'})
    vals = db.execute("SELECT name FROM genes")
    if vals == []:
        genelist.append(E.option(''))
    else:
        for val in vals:
            genelist.append(E.option(val[0]))
    removegene = E.div(
        E.h2('Remove Gene:'),
        E.form(
            E.div(E.label('Gene Name:'), genelist),
            E.a(
                {
                    'class': 'button-link',
                    'onclick': "removegene($(this).parent().serialize());"
                }, 'Remove gene')))

    ##### STRAINS
    addstrain = E.div(
        E.h2('Add Strain:'),
        E.form(
            E.div(E.label('Strain Name:'),
                  E.input({
                      'name': 'strainname',
                      'type': 'text'
                  })),
            E.a(
                {
                    'class': 'button-link',
                    'onclick': "addstrain($(this).parent().serialize());"
                }, 'Add Strain')))
    strainlist = E.select({'name': 'strain'})
    vals = db.execute("SELECT name FROM strains")
    if vals == []:
        strainlist.append(E.option(''))
    else:
        for val in vals:
            strainlist.append(E.option(val[0]))
    removestrain = E.div(
        E.h2('Remove strain:'),
        E.form({'class': 'form'}, E.div(E.label('Strain Name:'), strainlist),
               E.a(
                   {
                       'class': 'button-link',
                       'onclick': "removestrain($(this).parent().serialize());"
                   }, 'Remove strain')))

    ### PUT ALL THE PIECES TOGETHER
    article = E.div(
        E.div({'id': 'notification'}, ''),
        E.div({'class': 'tabs'},
              E.ul(E.li(E.a({'href': '#tab1'}, 'Admin')),
                   E.li(E.a({'href': '#tab2'}, 'Users')),
                   E.li(E.a({'href': '#tab3'}, 'Genes')),
                   E.li(E.a({'href': '#tab4'}, 'Strains'))),
              E.div(
                  E.div({'id': 'tab1'}, changepass, changeemail),
                  E.div({'id': 'tab2'}, adduser, removeuser),
                  E.div({'id': 'tab3'}, addgene, removegene),
                  E.div({'id': 'tab4'}, addstrain, removestrain),
              )))

    article = etree.tostring(article, pretty_print=True)

    ####################### STYLE AND JAVASCRIPT #############################################################################
    style = """ 
h2    {
    margin-bottom: 10px;
    padding-bottom: 5px;
    border-bottom: 1px solid #D8D8D8;
    }
input{
    webkit-border-radius: 8px;
    -moz-border-radius: 8px;
    border-radius: 8px;
    padding: 4px;
}

table{
    width:500px;
    margin:10px;
}
#notification{
    display:none;
    padding:5px;
    margin:3px;
    background-color:#FFFF66;
    webkit-border-radius: 8px;
    -moz-border-radius: 8px;
    border-radius: 8px;}
label{
    display: inline-block;
    float: left;
    clear: left;
    width: 200px;
    margin-right: 10px;
    white-space: nowrap;}
    
form .button-link{
    margin-left:0px;
}
form div{margin-bottom:10px;}
    """
    javascript = """ 
var t;
function notify(data){
    $('#notification').html(data);
    $('#notification').show();
    var fadefunc="$('#notification').hide('fade', {}, 200);";
    t=setTimeout(fadefunc,15000);
}


function changepassword(fields){
    $.post('/home/ajax/changepassword/',{fields:fields}, function(data){ notify(data);});}
function changeemail(fields){
    alert(fields);
    $.post('/home/ajax/changeemail/',{fields:fields}, function(data){ notify(data);});}
    
function adduser(fields){
    $.post('/admin/adduser/',{fields:fields}, function(data){ notify(data);});}
function removeuser(fields){
    $.post('/admin/removeuser/',{fields:fields}, function(data){ notify(data);});}
    

 

function addgene(fields){
    $.post('/admin/addgene/',{fields:fields}, function(data){ notify(data);refreshgenes();});}
function removegene(fields){
    $.post('/admin/removegene/',{fields:fields}, function(data){ notify(data);refreshgenes();});}
function refreshgenes(){
    $.post('/admin/refreshgenes/', function(data){ $("select[name='gene']").html(data);});}  
function addstrain(fields){
    $.post('/admin/addstrain/',{fields:fields}, function(data){ notify(data);refreshstrains();});}
function removestrain(fields){
    $.post('/admin/removestrain/',{fields:fields}, function(data){ notify(data);refreshstrains();});}
function refreshstrains(){
    $.post('/admin/refreshstrains/', function(data){ $("select[name='strain']").html(data);});}  


$(document).ready(function(){
   $( ".tabs" ).tabs();
});

    """
    resources = "<style type='text/css'>" + style + '</style><script type="text/javascript">' + javascript + '</script>'
    return glamsTemplate(article, username, resources=resources)
Beispiel #32
0
print("Reading source component specification from '" + src_spec_path + "'")
src_spec_name = os.path.basename(src_spec_path)
xml_parser = etree.XMLParser(remove_blank_text=True)
src_spec = etree.parse(src_spec_path, xml_parser).getroot()

print("Processing Linux binary '" + src_bin_path + "'")
binary_name = os.path.basename(src_bin_path)
binary_size = os.path.getsize(src_bin_path)
add_provides_memory(src_spec, "binary", "subject_binary",
                    LINUX_VIRTUAL_ADDRESS, binary_name, binary_size, "true",
                    "true")

if src_initramfs_path is not None:
    print("Reading source system policy from '" + src_policy_path + "'")
    src_policy = etree.parse(src_policy_path, xml_parser).getroot()
    initramfs_addr = get_initramfs_address(src_policy)

    if initramfs_addr is None:
        print("Warning: Manually add mappings for " + src_initramfs_path)
    else:
        print("Processing initramfs '" + src_initramfs_path + "'")
        initramfs_name = os.path.basename(src_initramfs_path)
        initramfs_size = os.path.getsize(src_initramfs_path)
        add_provides_memory(src_spec, "modules_initramfs", "subject_initrd",
                            initramfs_addr, initramfs_name, initramfs_size,
                            "false", "false")

with open(out_spec_path, 'wb') as out_spec:
    print("Writing component specification to '" + out_spec_path + "'")
    out_spec.write(etree.tostring(src_spec, pretty_print=True))
Beispiel #33
0
from lxml import etree
import lxml.html
from io import StringIO, BytesIO

broken_html = '<html><head><title>test<body><h1>page title</h1><a href="hupu">aa</a>'
parser = etree.HTMLParser()
tree = etree.parse(StringIO(broken_html), parser)
#tree = lxml.html.fromstring(broken_html)
#ele = tree.xpath('.//div[@class="AfficheAccessory"]')
#for  child in ele:
#    child.set("class","sun")

print(etree.tostring(tree, encoding='utf-8'))
Beispiel #34
0
    def __init__(self, system, xml, state):
        """
        Instantiate an InputType class.  Arguments:

        - system    : ModuleSystem instance which provides OS, rendering, and user context.
                      Specifically, must have a render_template function.
        - xml       : Element tree of this Input element
        - state     : a dictionary with optional keys:
                      * 'value'  -- the current value of this input
                                    (what the student entered last time)
                      * 'id' -- the id of this input, typically
                                "{problem-location}_{response-num}_{input-num}"
                      * 'status' (answered, unanswered, unsubmitted)
                      * 'input_state' -- dictionary containing any inputtype-specific state
                                        that has been preserved
                      * 'feedback' (dictionary containing keys for hints, errors, or other
                         feedback from previous attempt.  Specifically 'message', 'hint',
                         'hintmode'.  If 'hintmode' is 'always', the hint is always displayed.)
        """

        self.xml = xml
        self.tag = xml.tag
        self.system = system

        # NOTE: ID should only come from one place.  If it comes from multiple,
        # we use state first, XML second (in case the xml changed, but we have
        # existing state with an old id). Since we don't make this guarantee,
        # we can swap this around in the future if there's a more logical
        # order.

        self.input_id = state.get('id', xml.get('id'))
        if self.input_id is None:
            raise ValueError("input id state is None. xml is {0}".format(
                etree.tostring(xml)))

        self.value = state.get('value', '')

        feedback = state.get('feedback', {})
        self.msg = feedback.get('message', '')
        self.hint = feedback.get('hint', '')
        self.hintmode = feedback.get('hintmode', None)
        self.input_state = state.get('input_state', {})

        # put hint above msg if it should be displayed
        if self.hintmode == 'always':
            self.msg = self.hint + ('<br/>' if self.msg else '') + self.msg

        self.status = state.get('status', 'unanswered')

        try:
            # Pre-parse and propcess all the declared requirements.
            self.process_requirements()

            # Call subclass "constructor" -- means they don't have to worry about calling
            # super().__init__, and are isolated from changes to the input
            # constructor interface.
            self.setup()
        except Exception as err:
            # Something went wrong: add xml to message, but keep the traceback
            msg = "Error in xml '{x}': {err} ".format(x=etree.tostring(xml),
                                                      err=str(err))
            raise Exception, msg, sys.exc_info()[2]
Beispiel #35
0
 def to_string(self, pretty=False, method='xml', encoding='UTF-8'):
     """Prints the element to string."""
     return etree.tostring(self._element,
                           method=method,
                           encoding=encoding,
                           pretty_print=pretty)

from lxml import etree

package = etree.Element('Package')
doc = etree.ElementTree(package)

for namedtype,dates in sorted( changedTypes.items() ):
  typeBlock =  etree.SubElement(package,'types')
  for date,people in sorted( dates.items() ):
    

    for person,changes in sorted( people.items() ):
      typeBlock.append( etree.Comment( '{} On {} START'.format( person, date.strftime('%D') ) ) )

      for change in sorted( changes ):
        el = etree.Element('members')
        el.text = change
        typeBlock.append( el )

  nameElem =  etree.SubElement(typeBlock,'name')
  nameElem.text = namedtype

ver = etree.Element('version')
ver.text = str( VERSION )
package.append(ver)


with open('./package.xml', 'wb') as f:
    f.write( etree.tostring(doc,pretty_print=True, xml_declaration=True, encoding='UTF-8') )
Beispiel #37
0
def createSBNG_ER_gml(graph):
    page = etree.Element('sbgn', xmlns="http://sbgn.org/libsbgn/0.2")
    doc = etree.ElementTree(page)
    tagDictionary = {}
    stateDictionary = {}
    sbgnmap = etree.SubElement(page, 'map', language="entity relationship")
    gidx = 0
    for nodeId in (graph.nodes()):
        if 'gid' not in graph.node[nodeId]:
            glyphtag = etree.SubElement(sbgnmap,
                                        'glyph',
                                        id='id{0}'.format(nodeId))
            glyphtag.set('class', 'entity')
            etree.SubElement(glyphtag,
                             'label',
                             text=graph.node[nodeId]['LabelGraphics']['text'])
            etree.SubElement(glyphtag, 'bbox', x='0', y="0", w="120", h="60")
            tagDictionary[nodeId] = glyphtag
    for nodeId in (graph.nodes()):
        if 'gid' in graph.node[nodeId]:
            if 'isGroup' in graph.node[nodeId]:
                glyphtag = tagDictionary[graph.node[nodeId]['gid']]
                component = etree.SubElement(glyphtag,
                                             'glyph',
                                             id="id{0}".format(nodeId))
                component.set('class', "state variable")

                clabel = etree.SubElement(
                    component,
                    'state',
                    variable=graph.node[nodeId]['LabelGraphics']['text'])
                etree.SubElement(component,
                                 'bbox',
                                 x='0',
                                 y="0",
                                 w="54",
                                 h="15")

                tagDictionary[nodeId] = component

    for nodeId in (graph.nodes()):
        if 'gid' in graph.node[nodeId] and 'isGroup' not in graph.node[nodeId]:
            glyphtag = tagDictionary[graph.node[nodeId]['gid']]
            if glyphtag.get('class') == 'entity':
                component = etree.SubElement(glyphtag,
                                             'glyph',
                                             id="id{0}".format(nodeId))
                component.set('class', "state variable")

                clabel = etree.SubElement(
                    component,
                    'state',
                    variable=graph.node[nodeId]['LabelGraphics']['text'])
                etree.SubElement(component,
                                 'bbox',
                                 x='0',
                                 y="0",
                                 w="54",
                                 h="15")

            elif glyphtag.get('class') == 'state variable':
                component = etree.SubElement(sbgnmap,
                                             'glyph',
                                             id="id{0}".format(nodeId))
                component.set('class', "variable value")

                clabel = etree.SubElement(
                    component,
                    'label',
                    text=graph.node[nodeId]['LabelGraphics']['text'])
                arctag = etree.SubElement(sbgnmap, 'arc')
                arctag.set('class', 'assignment')
                arctag.set('source', 'id{0}'.format(nodeId))
                arctag.set('target', 'id{0}'.format(graph.node[nodeId]['gid']))
                arctag.set('id', 'as{0}'.format(gidx))
                etree.SubElement(arctag, 'start', x='0', y='0')
                etree.SubElement(arctag, 'end', x='0', y='0')

                etree.SubElement(component,
                                 'bbox',
                                 x='0',
                                 y="0",
                                 w="20",
                                 h="20")

                gidx += 1

    idx = 0
    for edgeId in (graph.edges()):
        arctag = etree.SubElement(sbgnmap, 'arc')
        arctag.set('class', 'assignment')
        arctag.set('source', 'id{0}'.format(edgeId[0]))
        arctag.set('target', 'id{0}'.format(edgeId[1]))
        arctag.set('id', 'a{0}'.format(idx))
        etree.SubElement(arctag, 'start', x='0', y='0')
        etree.SubElement(arctag, 'end', x='0', y='0')

        idx += 1
    return etree.tostring(page, pretty_print=True)
Beispiel #38
0
    def _handle_exception(cls, exception):
        code = 500  # default code
        is_website_request = bool(getattr(request, 'is_frontend', False) and getattr(request, 'website', False))
        if not is_website_request:
            # Don't touch non website requests exception handling
            return super(Http, cls)._handle_exception(exception)
        else:
            try:
                response = super(Http, cls)._handle_exception(exception)

                if isinstance(response, Exception):
                    exception = response
                else:
                    # if parent excplicitely returns a plain response, then we don't touch it
                    return response
            except Exception as e:
                if 'werkzeug' in config['dev_mode']:
                    raise e
                exception = e

            values = dict(
                exception=exception,
                traceback=traceback.format_exc(),
            )

            if isinstance(exception, werkzeug.exceptions.HTTPException):
                if exception.code is None:
                    # Hand-crafted HTTPException likely coming from abort(),
                    # usually for a redirect response -> return it directly
                    return exception
                else:
                    code = exception.code

            if isinstance(exception, odoo.exceptions.AccessError):
                code = 403

            if isinstance(exception, QWebException):
                values.update(qweb_exception=exception)
                if type(exception.error) == odoo.exceptions.AccessError:
                    code = 403

            values.update(
                status_message=werkzeug.http.HTTP_STATUS_CODES[code],
                status_code=code,
            )

            view_id = code
            if request.website.is_publisher() and isinstance(exception, werkzeug.exceptions.NotFound):
                view_id = 'page_404'
                values['path'] = request.httprequest.path[1:]

            if not request.uid:
                cls._auth_method_public()

            with registry(request.env.cr.dbname).cursor() as cr:
                env = api.Environment(cr, request.uid, request.env.context)
                if code == 500:
                    logger.error("500 Internal Server Error:\n\n%s", values['traceback'])
                    View = env["ir.ui.view"]
                    if 'qweb_exception' in values:
                        if 'load could not load template' in exception.args:
                            # When t-calling an inexisting template, we don't have reference to
                            # the view that did the t-call. We need to find it.
                            values['views'] = View.search([
                                ('type', '=', 'qweb'),
                                '|',
                                ('arch_db', 'ilike', 't-call="%s"' % exception.name),
                                ('arch_db', 'ilike', "t-call='%s'" % exception.name)
                            ], order='write_date desc', limit=1)
                        else:
                            try:
                                # exception.name might be int, string
                                exception_template = int(exception.name)
                            except:
                                exception_template = exception.name
                            view = View._view_obj(exception_template)
                            et = etree.fromstring(view.with_context(inherit_branding=False).read_combined(['arch'])['arch'])
                            node = et.find(exception.path.replace('/templates/t/', './'))
                            line = node is not None and etree.tostring(node, encoding='unicode')
                            # line = exception.html  # FALSE -> contains branding <div t-att-data="request.browse('ok')"/>
                            if line:
                                # If QWebException occurs in a child view, the parent view is raised
                                values['editable'] = request.uid and request.website.is_publisher()
                                values['views'] = View._views_get(exception_template).filtered(
                                    lambda v: line in v.arch
                                )
                            else:
                                values['views'] = view
                        # Keep only views that we can reset
                        values['views'] = values['views'].filtered(
                            lambda view: view._get_original_view().arch_fs or 'oe_structure' in view.key
                        )
                elif code == 403:
                    logger.warn("403 Forbidden:\n\n%s", values['traceback'])
                try:
                    html = env['ir.ui.view'].render_template('website.%s' % view_id, values)
                except Exception:
                    html = env['ir.ui.view'].render_template('website.http_error', values)

            return werkzeug.wrappers.Response(html, status=code, content_type='text/html;charset=utf-8')
Beispiel #39
0
 def pprint(self):
     print(etree.tostring(self.to_xml(), pretty_print=True).decode())
Beispiel #40
0
def _eval_xml(self, node, env):
    if node.tag in ('field','value'):
        t = node.get('type','char')
        f_model = node.get('model')
        if node.get('search'):
            f_search = node.get("search")
            f_use = node.get("use",'id')
            f_name = node.get("name")
            idref2 = {}
            if f_search:
                idref2 = _get_idref(self, env, f_model, self.idref)
            q = safe_eval(f_search, idref2)
            ids = env[f_model].search(q).ids
            if f_use != 'id':
                ids = [x[f_use] for x in env[f_model].browse(ids).read([f_use])]
            _fields = env[f_model]._fields
            if (f_name in _fields) and _fields[f_name].type == 'many2many':
                return ids
            f_val = False
            if len(ids):
                f_val = ids[0]
                if isinstance(f_val, tuple):
                    f_val = f_val[0]
            return f_val
        a_eval = node.get('eval')
        if a_eval:
            idref2 = _get_idref(self, env, f_model, self.idref)
            try:
                return safe_eval(a_eval, idref2)
            except Exception:
                logging.getLogger('odoo.tools.convert.init').error(
                    'Could not eval(%s) for %s in %s', a_eval, node.get('name'), env.context)
                raise
        def _process(s):
            matches = re.finditer(br'[^%]%\((.*?)\)[ds]'.decode('utf-8'), s)
            done = set()
            for m in matches:
                found = m.group()[1:]
                if found in done:
                    continue
                done.add(found)
                id = m.groups()[0]
                if not id in self.idref:
                    self.idref[id] = self.id_get(id)
                # So funny story: in Python 3, bytes(n: int) returns a
                # bytestring of n nuls. In Python 2 it obviously returns the
                # stringified number, which is what we're expecting here
                s = s.replace(found, pycompat.text_type(self.idref[id]))
            s = s.replace('%%', '%') # Quite wierd but it's for (somewhat) backward compatibility sake
            return s

        if t == 'xml':
            _fix_multiple_roots(node)
            return '<?xml version="1.0"?>\n'\
                +_process("".join(etree.tostring(n, encoding='unicode') for n in node))
        if t == 'html':
            return _process("".join(etree.tostring(n, encoding='unicode') for n in node))

        data = node.text
        if node.get('file'):
            with file_open(node.get('file'), 'rb') as f:
                data = f.read()

        if t == 'base64':
            return base64.b64encode(data)

        # after that, only text content makes sense
        data = pycompat.to_text(data)
        if t == 'file':
            from ..modules import module
            path = data.strip()
            if not module.get_module_resource(self.module, path):
                raise IOError("No such file or directory: '%s' in %s" % (
                    path, self.module))
            return '%s,%s' % (self.module, path)

        if t == 'char':
            return data

        if t == 'int':
            d = data.strip()
            if d == 'None':
                return None
            return int(d)

        if t == 'float':
            return float(data.strip())

        if t in ('list','tuple'):
            res=[]
            for n in node.iterchildren(tag='value'):
                res.append(_eval_xml(self, n, env))
            if t=='tuple':
                return tuple(res)
            return res
    elif node.tag == "function":
        args = []
        a_eval = node.get('eval')
        # FIXME: should probably be exclusive
        if a_eval:
            self.idref['ref'] = self.id_get
            args = safe_eval(a_eval, self.idref)
        for n in node:
            return_val = _eval_xml(self, n, env)
            if return_val is not None:
                args.append(return_val)
        model = env[node.get('model')]
        method = node.get('name')
        # this one still depends on the old API
        return odoo.api.call_kw(model, method, args, {})
    elif node.tag == "test":
        return node.text
#!/usr/bin/python

from jnpr.junos import Device
from lxml import etree

if __name__ == '__main__':
    # Hardcoded credentials just for lab ease
    with Device(host='66.129.235.12',
                port=45002,
                user='******',
                passwd='Juniper!1') as dev:
        #dev.open() ^with statements omits manual connection open and close for reliability.

        # Launch rpc from pyEZ, using etree as parser
        cnf = dev.rpc.get_config(filter_xml=etree.XML(
            '<configuration><interfaces/></configuration>'))
        print(etree.tostring(cnf))

        #dev.close()
Beispiel #42
0
def _get_iiif_error(parameter, text):
    error = Element('error', nsmap=NSMAP)
    SubElement(error, 'parameter').text = parameter
    SubElement(error, 'text').text = text
    return etree.tostring(error, method='xml')
Beispiel #43
0
            "mode", self.handler.get_argument('debug', 'text'))
        self.debug_log_handler.log_data.set("request-id",
                                            str(self.handler.request_id))

        # if we have 500 but have "noxsl" in args without "debug" in args
        # apply xsl for debug info anyway
        if self.handler.xml.apply_xsl or not self.debug_mode:
            # show 'awesome' debug page
            try:
                xsl_file = open(tornado.options.options.debug_xsl)
                tranform = etree.XSLT(etree.XML(xsl_file.read()))
                xsl_file.close()
                log_document = str(tranform(self.debug_log_handler.log_data))
                self.handler.set_header('Content-Type',
                                        'text/html; charset=UTF-8')
            except Exception, e:
                self.handler.log.exception('XSLT debug file error')
                self.handler.set_header('Content-Type',
                                        'application/xml; charset=UTF-8')
                log_document = etree.tostring(self.debug_log_handler.log_data,
                                              encoding='UTF-8',
                                              xml_declaration=True)
        else:
            self.handler.set_header('Content-Type',
                                    'application/xml; charset=UTF-8')
            log_document = etree.tostring(self.debug_log_handler.log_data,
                                          encoding='UTF-8',
                                          xml_declaration=True)

        return log_document
Beispiel #44
0
	def check(self, **kwargs):
		'''Performs the checks on the metadata.xml for the package
		@param xpkg: the pacakge being checked
		@param checkdir: string, directory path
		@param checkdirlist: list of checkdir's
		@param repolevel: integer
		@returns: boolean
		'''
		xpkg = kwargs.get('xpkg')
		checkdir = kwargs.get('checkdir')
		checkdirlist = kwargs.get('checkdirlist').get()

		self.musedict = {}
		if self.options.mode in ['manifest']:
			self.muselist = frozenset(self.musedict)
			return False

		# metadata.xml file check
		if "metadata.xml" not in checkdirlist:
			self.qatracker.add_error("metadata.missing", xpkg + "/metadata.xml")
			self.muselist = frozenset(self.musedict)
			return False

		# metadata.xml parse check
		metadata_bad = False

		# read metadata.xml into memory
		try:
			_metadata_xml = etree.parse(os.path.join(checkdir, 'metadata.xml'))
		except (ParserError, SyntaxError, EnvironmentError) as e:
			metadata_bad = True
			self.qatracker.add_error("metadata.bad", "%s/metadata.xml: %s" % (xpkg, e))
			del e
			self.muselist = frozenset(self.musedict)
			return False

		indentation_chars = Counter()
		for l in etree.tostring(_metadata_xml).splitlines():
			indentation_chars.update(re.match(rb"\s*", l).group(0))
		if len(indentation_chars) > 1:
			self.qatracker.add_error("metadata.warning", "%s/metadata.xml: %s" %
				(xpkg, "inconsistent use of tabs and spaces in indentation")
			)

		xml_encoding = _metadata_xml.docinfo.encoding
		if xml_encoding.upper() != metadata_xml_encoding:
			self.qatracker.add_error(
				"metadata.bad", "%s/metadata.xml: "
				"xml declaration encoding should be '%s', not '%s'" %
				(xpkg, metadata_xml_encoding, xml_encoding))

		if not _metadata_xml.docinfo.doctype:
			metadata_bad = True
			self.qatracker.add_error(
				"metadata.bad",
				"%s/metadata.xml: %s" % (xpkg, "DOCTYPE is missing"))
		else:
			doctype_system = _metadata_xml.docinfo.system_url
			if doctype_system.replace('http://', 'https://') != metadata_dtd_uri:
				if doctype_system is None:
					system_problem = "but it is undefined"
				else:
					system_problem = "not '%s'" % doctype_system
				self.qatracker.add_error(
					"metadata.bad", "%s/metadata.xml: "
					"DOCTYPE: SYSTEM should refer to '%s', %s" %
					(xpkg, metadata_dtd_uri, system_problem))
			doctype_name = _metadata_xml.docinfo.doctype.split(' ')[1]
			if doctype_name != metadata_doctype_name:
				self.qatracker.add_error(
					"metadata.bad", "%s/metadata.xml: "
					"DOCTYPE: name should be '%s', not '%s'" %
					(xpkg, metadata_doctype_name, doctype_name))

		# load USE flags from metadata.xml
		self.musedict = parse_metadata_use(_metadata_xml)
		for atom in chain(*self.musedict.values()):
			if atom is None:
				continue
			try:
				atom = Atom(atom)
			except InvalidAtom as e:
				self.qatracker.add_error(
					"metadata.bad",
					"%s/metadata.xml: Invalid atom: %s" % (xpkg, e))
			else:
				if atom.cp != xpkg:
					self.qatracker.add_error(
						"metadata.bad",
						"%s/metadata.xml: Atom contains "
						"unexpected cat/pn: %s" % (xpkg, atom))

		# Only carry out if in package directory or check forced
		if not metadata_bad:
			validator = etree.XMLSchema(file=self.metadata_xsd)
			if not validator.validate(_metadata_xml):
				self._add_validate_errors(xpkg, validator.error_log)
		self.muselist = frozenset(self.musedict)
		return False
Beispiel #45
0
 def _xml_strip(self, xml):
     parser = etree.XMLParser(remove_blank_text=True)
     return etree.tostring(etree.fromstring(xml, parser=parser))
Beispiel #46
0
    import argparse
    argparser = argparse.ArgumentParser(
        description='Converts KonText config.xml version 0.7.x '
        'to the version 0.8')
    argparser.add_argument('conf_file',
                           metavar='CONF_FILE',
                           help='an XML configuration file')
    argparser.add_argument(
        '-u',
        '--update',
        type=int,
        help='Perform a single update (identified by a number)')
    argparser.add_argument('-p',
                           '--print',
                           action='store_const',
                           const=True,
                           help='Print result instead of writing it to a file')
    args = argparser.parse_args()

    doc = etree.parse(args.conf_file)
    process_document(doc, getattr(args, 'update'))

    result_xml = etree.tostring(doc, encoding='utf-8', pretty_print=True)
    if getattr(args, 'print'):
        print(result_xml)
    else:
        output_path = '%s.new.xml' % args.conf_file.rsplit('.', 1)[0]
        with open(output_path, 'wb') as f:
            f.write(result_xml)
            print('DONE!\nConverted config written to %s\n' % output_path)
 def save_graph(self, filename):
     
     with open(filename, 'wb') as file:
         file.write('<?xml version="1.0" encoding="UTF-8"?>\n')
         file.write(etree.tostring(self.root, pretty_print=True))
Beispiel #48
0
 def _to_xml(self, root):
     """Convert the xml object to an xml string."""
     return etree.tostring(root, encoding='UTF-8', xml_declaration=True)
def create_metadata(metadata, filename):

    namespaces = dict()

    namespaces['opt'] = 'http://www.opengis.net/opt/2.1'
    namespaces['om']  = 'http://www.opengis.net/om/2.0'
    namespaces['gml'] = 'http://www.opengis.net/gml/3.2'
    namespaces['eop'] = 'http://www.opengis.net/eop/2.1'
    namespaces['sar'] = 'http://www.opengis.net/sar/2.1'
    namespaces['ssp'] = 'http://www.opengis.net/ssp/2.1'
    
    
    for key, value in namespaces.items():
        etree.register_namespace(key, value)
   
    root = etree.Element('{{}}EarthObservation'.format(namespaces['ssp']))

    # Time
    if 'startdate' in metadata.keys():
    
        phenomenon_time = etree.SubElement(root, '{{{}}}phenomenonTime'.format(namespaces['om']))
        time_period = etree.SubElement(phenomenon_time, '{{{}}}TimePeriod'.format(namespaces['gml']))
        begin_position = etree.SubElement(time_period, '{{{}}}beginPosition'.format(namespaces['gml']))
        end_position = etree.SubElement(time_period, '{{{}}}endPosition'.format(namespaces['gml']))
    
        begin_position.text = metadata['startdate']
        end_position.text = metadata['enddate']
    
    # geo
    if 'wkt' in metadata.keys():
        
        feature_of_interest = etree.SubElement(root, '{{{}}}featureOfInterest'.format(namespaces['om']))
        footprint = etree.SubElement(feature_of_interest, '{{{}}}Footprint'.format(namespaces['ssp']))
        multi_extentOf = etree.SubElement(footprint, '{{{}}}multiExtentOf'.format(namespaces['ssp']))
        multi_surface = etree.SubElement(multi_extentOf, '{{{}}}MultiSurface'.format(namespaces['gml']))
        surface_members = etree.SubElement(multi_surface, '{{{}}}surfaceMembers'.format(namespaces['gml']))
        polygon = etree.SubElement(surface_members, '{{{}}}Polygon'.format(namespaces['gml']))
        exterior = etree.SubElement(polygon, '{{{}}}exterior'.format(namespaces['gml']))
        linear_ring = etree.SubElement(exterior, '{{{}}}LinearRing'.format(namespaces['gml']))
        poslist = etree.SubElement(linear_ring, '{{{}}}posList'.format(namespaces['gml']))
        
        
        coords = np.asarray([t[::-1] for t in list(loads(metadata['wkt']).exterior.coords)]).tolist()
 
        pos_list = ''
        for elem in coords:
            pos_list += ' '.join(str(e) for e in elem) + ' '   

        poslist.attrib['count'] = str(len(coords))
        poslist.text = pos_list


    if 'product_type' in metadata.keys():
   
        # Metadata property
        metadata_property = etree.SubElement(root, '{{{}}}metaDataProperty'.format(namespaces['eop']))
        earth_observation_metadata = etree.SubElement(metadata_property, '{{{}}}EarthObservationMetaData'.format(namespaces['eop']))
        identifier = etree.SubElement(earth_observation_metadata, '{{{}}}identifier'.format(namespaces['eop']))
        product_type = etree.SubElement(earth_observation_metadata, '{{{}}}productType'.format(namespaces['eop']))
        identifier.text = metadata['identifier'] 
        product_type.text = metadata['product_type'] 

    if 'vs' in metadata.keys():
        
        if not 'product_type' in metadata.keys():
            
            metadata_property = etree.SubElement(root, '{{{}}}metaDataProperty'.format(namespaces['eop']))
            earth_observation_metadata = etree.SubElement(metadata_property, '{{{}}}EarthObservationMetaData'.format(namespaces['eop']))
   
        vendor_specific = etree.SubElement(earth_observation_metadata, '{{{}}}vendorSpecific'.format(namespaces['eop']))
    
        for k, v in vs.items():
       
            specific_information = etree.SubElement(vendor_specific, '{{{}}}SpecificInformation'.format(namespaces['eop']))
            local_attribute = etree.SubElement(specific_information, '{{{}}}localAttribute'.format(namespaces['eop']))
            local_value = etree.SubElement(specific_information, '{{{}}}localValue'.format(namespaces['eop']))

            local_attribute.text = k
            local_value.text = v
    
   
            
    eop_xml = filename + '.xml'
    with open(eop_xml, 'wb') as file:
        file.write('<?xml version="1.0" encoding="UTF-8"?>\n')
        file.write(etree.tostring(root, pretty_print=True))
    
    with open(filename + '.properties', 'wb') as file:

        if 'title' in metadata.keys():
            file.write('title={}\n'.format(metadata['title']))

        if 'startdate' and 'enddate' in metadata.keys():
            file.write('date={}/{}\n'.format(metadata['startdate'], 
                                             metadata['enddate']))

        if 'wkt' in metadata.keys():
            file.write('geometry={0}\n'.format(metadata['wkt']))

        if 'cat' in metadata.keys():
            
            temp_expression = []
            for k, v in cat.items():

                temp_expression.append(','.join(metadata['cat'][k].values()))

            cat_expression = '|'.join(temp_expression)
            
            file.write('category={}'.format(cat_expression))

    return filename + '.xml', filename + '.properties'
Beispiel #50
0
 def write_to_file(self, file_path):
     self.xml.append(self.choices)
     file = open(file_path, 'w')
     file.write(etree.tostring(self.xml, pretty_print=True))
Beispiel #51
0
    def egress( self, envelope, http_headers, operation, binding_options ):

        # Format the request body as pretty printed XML
        xml = etree.tostring( envelope, pretty_print = True, encoding = 'unicode')

        print( f'\nRequest\n-------\nHeaders:\n{http_headers}\n\nBody:\n{xml}' )
 def view_graph(self):
     
     print etree.tostring(self.root , pretty_print=True)
Beispiel #53
0
def html2plaintext(html, body_id=None, encoding='utf-8'):
    """ From an HTML text, convert the HTML to plain text.
    If @param body_id is provided then this is the tag where the
    body (not necessarily <body>) starts.
    """
    ## (c) Fry-IT, www.fry-it.com, 2007
    ## <*****@*****.**>
    ## download here: http://www.peterbe.com/plog/html2plaintext

    html = ustr(html)

    if not html:
        return ''

    tree = etree.fromstring(html, parser=etree.HTMLParser())

    if body_id is not None:
        source = tree.xpath('//*[@id=%s]' % (body_id,))
    else:
        source = tree.xpath('//body')
    if len(source):
        tree = source[0]

    url_index = []
    i = 0
    for link in tree.findall('.//a'):
        url = link.get('href')
        if url:
            i += 1
            link.tag = 'span'
            link.text = '%s [%s]' % (link.text, i)
            url_index.append(url)

    html = ustr(etree.tostring(tree, encoding=encoding))
    # \r char is converted into &#13;, must remove it
    html = html.replace('&#13;', '')

    html = html.replace('<strong>', '*').replace('</strong>', '*')
    html = html.replace('<b>', '*').replace('</b>', '*')
    html = html.replace('<h3>', '*').replace('</h3>', '*')
    html = html.replace('<h2>', '**').replace('</h2>', '**')
    html = html.replace('<h1>', '**').replace('</h1>', '**')
    html = html.replace('<em>', '/').replace('</em>', '/')
    html = html.replace('<tr>', '\n')
    html = html.replace('</p>', '\n')
    html = re.sub('<br\s*/?>', '\n', html)
    html = re.sub('<.*?>', ' ', html)
    html = html.replace(' ' * 2, ' ')
    html = html.replace('&gt;', '>')
    html = html.replace('&lt;', '<')
    html = html.replace('&amp;', '&')

    # strip all lines
    html = '\n'.join([x.strip() for x in html.splitlines()])
    html = html.replace('\n' * 2, '\n')

    for i, url in enumerate(url_index):
        if i == 0:
            html += '\n\n'
        html += ustr('[%s] %s\n') % (i + 1, url)

    return html
Beispiel #54
0
    def ingress( self, envelope, http_headers, operation ):

        # Format the response body as pretty printed XML
        xml = etree.tostring( envelope, pretty_print = True, encoding = 'unicode')

        print( f'\nResponse\n-------\nHeaders:\n{http_headers}\n\nBody:\n{xml}' )
Beispiel #55
0
def _verify(t,
            keyspec,
            sig_path=".//{%s}Signature" % NS['ds'],
            drop_signature=False):
    """
    Verify the signature(s) in an XML document.

    Throws an XMLSigException on any non-matching signatures.

    :param t: XML as lxml.etree
    :param keyspec: X.509 cert filename, string with fingerprint or X.509 cert as string
    :returns: True if signature(s) validated, False if there were no signatures
    """
    if config.debug_write_to_files:
        with open("/tmp/foo-sig.xml", "w") as fd:
            fd.write(etree_to_string(t))

    validated = []
    for sig in t.findall(sig_path):
        try:
            sv = sig.findtext(".//{%s}SignatureValue" % NS['ds'])
            if not sv:
                raise XMLSigException("No SignatureValue")

            log.debug("SignatureValue: {!s}".format(sv))
            this_cert = xmlsec.crypto.from_keyspec(keyspec,
                                                   signature_element=sig)
            log.debug("key size: {!s} bits".format(this_cert.keysize))

            si = sig.find(".//{%s}SignedInfo" % NS['ds'])
            log.debug("Found signedinfo {!s}".format(etree.tostring(si)))
            cm_alg = _cm_alg(si)
            try:
                sig_digest_alg = _sig_alg(si)
            except AttributeError:
                raise XMLSigException(
                    "Failed to validate {!s} because of unsupported hash format"
                    .format(etree.tostring(sig)))

            refmap = _process_references(t,
                                         sig,
                                         verify_mode=True,
                                         sig_path=sig_path,
                                         drop_signature=drop_signature)
            for ref, obj in refmap.items():

                log.debug("transform %s on %s" % (cm_alg, etree.tostring(si)))
                sic = _transform(cm_alg, si)
                log.debug("SignedInfo C14N: %s" % sic)
                if this_cert.do_digest:
                    digest = xmlsec.crypto._digest(sic, sig_digest_alg)
                    log.debug("SignedInfo digest: %s" % digest)
                    b_digest = b64d(digest)
                    actual = _signed_value(b_digest, this_cert.keysize, True,
                                           sig_digest_alg)
                else:
                    actual = sic

                if not this_cert.verify(b64d(sv), actual, sig_digest_alg):
                    raise XMLSigException(
                        "Failed to validate {!s} using sig digest {!s} and cm {!s}"
                        .format(etree.tostring(sig), sig_digest_alg, cm_alg))
                validated.append(obj)
        except (XMLSigException,
                ValueError) as ex:  # we will try the next available signature
            log.error(ex)

    if not validated:
        raise XMLSigException("No valid ds:Signature elements found")

    return validated
Beispiel #56
0
    def parse_html(self, url_boj, response):
        #处理编码问题
        # charset = 'utf-8'
        # try:
        #     try:
        #         search_res = re.search('meta.*?charset="(.*?)"', response.text)
        #         charset = search_res.group(1)
        #     except:
        #         search_res = re.search('meta.*?charset=(.*?)"', response.text)
        #         charset = search_res.group(1)
        # except:
        #     pass

        html_text = response
        results = re.findall('<script.*?data-repeatable>({"data".*?)</script>',
                             html_text)
        order_list = []
        item_list = []
        for res in results:
            json_obj = json.loads(res)

            #搜索列表位置  order
            order = json_obj['order']
            order_list.append(order)

            # contentType 音频,视频,优质科普文章,专家回答, 专家语音解答, 问答
            if 'title' in json_obj['data']:
                contentType = json_obj['data']['title']
            elif 'extend_data' in json_obj['data']:
                contentType = json_obj['data']['extend_data']['title']
            elif 'extendData' in json_obj['data']:
                contentType = json_obj['data']['extendData']['title']
            else:
                contentType = '结构卡'

            if '_' in contentType:
                contentType = contentType.split('_')[1]
                contentType = re.sub('(\(.*?\))', '', contentType)
            if '-' in contentType:
                contentType = contentType.split('-')[0].strip()
            if 'hasVoice' in json_obj['data']:
                contentType = '音频'
            if 'video' in json_obj['data'] or (
                    'videoList' in json_obj['data']
                    and len(json_obj['data']['videoList']) > 0
            ) or 'media' in json_obj['data']:
                contentType = '视频'
            if '<em>' in contentType:
                contentType = '问答'

            #contentStyle top1, 搜索智能聚合, 权威样式(特殊处理)
            if 'showLeftText' in json_obj['data']:
                contentStyle = json_obj['data']['showLeftText']
            elif 'extend_data' in json_obj[
                    'data'] and 'showLeftText' in json_obj['data'][
                        'extend_data']:
                contentStyle = json_obj['data']['extend_data']['showLeftText']
            elif 'extendData' in json_obj[
                    'data'] and 'showLeftText' in json_obj['data'][
                        'extendData']:
                contentStyle = json_obj['data']['extendData']['showLeftText']
            else:
                contentStyle = 'top1'

            #医生详情
            #1.top1样式1
            if 'info' in json_obj['data']:
                try:
                    query = json_obj['data']['unhighTitle']
                    name = json_obj['data']['info']['author']['name']
                    hospital = json_obj['data']['info']['content'][1]
                    jobTitle = json_obj['data']['info']['content'][0]
                    origin = json_obj['data']['showurl_area']['logo_name']
                    obj = {
                        'keyword': url_boj['keyword'],
                        'order': order,
                        'query': query,
                        'contentType': contentType,
                        'contentStyle': contentStyle,
                        'name': name,
                        'hospital': hospital,
                        'jobTitle': jobTitle,
                        'origin': origin,
                    }
                    item_list.append(obj)
                    print(obj)
                except:
                    pass
            #1.top1样式2 结构卡 (头疼怎么办)
            elif 'tabList' in json_obj['data']:
                if 'imageCount' not in json_obj['data']:
                    query = json_obj['data']['sgTitle']
                    name = json_obj['data']['tabList'][0]['doctor']['name']
                    hospital = json_obj['data']['tabList'][0]['doctor'][
                        'hospital']
                    jobTitle = json_obj['data']['tabList'][0]['doctor'][
                        'level']
                    origin = ''
                    contentStyle = 'top1'
                    obj = {
                        'keyword': url_boj['keyword'],
                        'order': order,
                        'query': query,
                        'contentType': contentType,
                        'contentStyle': contentStyle,
                        'name': name,
                        'hospital': hospital,
                        'jobTitle': jobTitle,
                        'origin': origin,
                    }
                    item_list.append(obj)
                    print(obj)
            #2.优质科普文章类型,  专家回答类型
            elif 'list' in json_obj['data'] or 'extend_data' in json_obj[
                    'data']:
                if 'list' in json_obj['data']:
                    data = json_obj['data']['list']
                elif 'extend_data' in json_obj['data']:
                    data = json_obj['data']['extend_data']['list']

                for item in data:
                    query = item['title'].replace('<em>', '').replace(
                        '</em>', '').replace('?', '').replace('。', '')
                    if 'doctorInfo' in item:
                        name = item[
                            'doctorName'] if 'doctorName' in item else item[
                                'doctorInfo'].split(' ')[0]
                        hospital = item[
                            'hospital'] if 'hospital' in item else item[
                                'doctorInfo'].split(' ')[1]
                    else:
                        name = item[
                            'doctorName'] if 'doctorName' in item else ''
                        hospital = item[
                            'hospital'] if 'hospital' in item else ''

                    jobTitle = item[
                        'doctorTitle'] if 'doctorTitle' in item else ''
                    if 'list' in json_obj['data']:
                        origin = item['source'] if 'source' in item else ''
                    elif 'extend_data' in json_obj['data']:
                        origin = item['miptitle'] if 'miptitle' in item else ''
                    obj = {
                        'keyword': url_boj['keyword'],
                        'order': order,
                        'query': query,
                        'contentType': contentType,
                        'contentStyle': contentStyle,
                        'name': name,
                        'hospital': hospital,
                        'jobTitle': jobTitle,
                        'origin': origin,
                    }
                    if name == '':
                        continue
                    item_list.append(obj)
                    print(obj)
            #3.视频类
            elif 'videoList' in json_obj['data']:
                for item in json_obj['data']['videoList']:
                    query = item['title'].replace('<em>', '').replace(
                        '</em>', '').replace('?', '').replace('。', '')
                    name = item['doctor_name'] if 'doctor_name' in item else ''
                    hospital = item['hospital'] if 'hospital' in item else ''
                    jobTitle = item[
                        'doctor_level'] if 'doctor_level' in item else ''
                    origin = item['source'] if 'source' in item else ''
                    obj = {
                        'keyword': url_boj['keyword'],
                        'order': order,
                        'query': query,
                        'contentType': contentType,
                        'contentStyle': contentStyle,
                        'name': name,
                        'hospital': hospital,
                        'jobTitle': jobTitle,
                        'origin': origin,
                    }
                    if name == None:
                        continue
                    item_list.append(obj)
                    print(obj)
            #4.音频类
            elif 'extendData' in json_obj['data']:
                for item in json_obj['data']['extendData']['list']:
                    query = item['title'].replace('<em>', '').replace(
                        '</em>', '').replace('?', '').replace('。', '')
                    name = item['doctorName'] if 'doctorName' in item else ''
                    hospital = item['hospital'] if 'hospital' in item else ''
                    jobTitle = item[
                        'doctorTitle'] if 'doctorTitle' in item else ''
                    origin = item['miptitle'] if 'miptitle' in item else ''
                    obj = {
                        'keyword': url_boj['keyword'],
                        'order': order,
                        'query': query,
                        'contentType': contentType,
                        'contentStyle': contentStyle,
                        'name': name,
                        'hospital': hospital,
                        'jobTitle': jobTitle,
                        'origin': origin,
                    }
                    if name == None:
                        continue
                    item_list.append(obj)
                    print(obj)

        #匹配非js json模式的,权威样式
        html = HTML(html_text)
        # results = html.xpath('//div[@id="results"]/div[@class="c-result result c-clk-recommend"]')
        results = html.xpath(
            '//div[@id="results"]/div[@class="c-result result"]')
        for res in results:
            detail_html_text = etree.tostring(res)
            order_res = re.search('order="(\d+)"',
                                  detail_html_text.decode()).group(1)
            if order_res in order_list:
                continue
            detail_html = HTML(detail_html_text.decode())
            order = order_res
            query = detail_html.xpath(
                'string(//span[@class="c-title-text"])').split('_')[0].replace(
                    '?', '')
            # contentType = detail_html.xpath('string(//span[@class="c-title-text"])').split('_')[1]
            contentType = '问答'
            contentStyle = '权威样式'
            name = detail_html.xpath(
                'string(//div[@class="c-span11 c-line-clamp1"]//span[1])')
            hospital = detail_html.xpath(
                'string(//div[@class="c-span11 c-line-clamp1"]//span[3])')
            jobTitle = detail_html.xpath(
                'string(//div[@class="c-span11 c-line-clamp1"]//span[2])')
            origin = detail_html.xpath('string(//span[@class="c-color-gray"])')
            obj = {
                'keyword': url_boj['keyword'],
                'order': order,
                'query': query,
                'contentType': contentType,
                'contentStyle': contentStyle,
                'name': name,
                'hospital': hospital,
                'jobTitle': jobTitle,
                'origin': origin,
            }
            if name == '':
                continue
            item_list.append(obj)
            print(obj)

        self.write(url_boj, item_list)
Beispiel #57
0
 def test_xml_roundtrip(self):
     p1 = Property("tau_m", 20.0, mV)
     element = p1.to_xml()
     xml = etree.tostring(element, pretty_print=True)
     p2 = Property.from_xml(element, Document(mV=mV))
     self.assertEqual(p1, p2)
Beispiel #58
0
def sign(t,
         key_spec,
         cert_spec=None,
         reference_uri='',
         insert_index=0,
         sig_path=".//{%s}Signature" % NS['ds']):
    """
    Sign an XML document. This means to 'complete' all Signature elements in the XML.

    :param t: XML as lxml.etree
    :param key_spec: private key reference, see xmlsec.crypto.from_keyspec() for syntax.
    :param cert_spec: None or public key reference (to add cert to document),
                      see xmlsec.crypto.from_keyspec() for syntax.
    :param sig_path: An xpath expression identifying the Signature template element
    :param reference_uri: Envelope signature reference URI
    :param insert_index: Insertion point for the Signature element,
                         Signature is inserted at beginning by default
    :returns: XML as lxml.etree (for convenience, 't' is modified in-place)
    """
    private = xmlsec.crypto.from_keyspec(key_spec, private=True)

    public = None
    if cert_spec is not None:
        public = xmlsec.crypto.from_keyspec(cert_spec)
        if public is None:
            raise XMLSigException("Unable to load public key from '%s'" %
                                  cert_spec)
        if public.keysize and private.keysize:  # XXX maybe one set and one not set should also raise exception?
            if public.keysize != private.keysize:
                raise XMLSigException(
                    "Public and private key sizes do not match ({!s}, {!s})".
                    format(public.keysize, private.keysize))
            # This might be incorrect for PKCS#11 tokens if we have no public key
            log.debug("Using {!s} bit key".format(private.keysize))
    sig_paths = t.findall(sig_path)
    templates = list(filter(_is_template, sig_paths))
    if not templates:
        tmpl = add_enveloped_signature(t,
                                       reference_uri=reference_uri,
                                       pos=insert_index)
        templates = [tmpl]

    assert templates, XMLSigException(
        "Failed to both find and add a signing template")

    if config.debug_write_to_files:
        with open("/tmp/sig-ref.xml", "w") as fd:
            fd.write(etree_to_string(root_elt(t)))

    for sig in templates:
        log.debug("processing sig template: %s" % etree.tostring(sig))
        si = sig.find(".//{%s}SignedInfo" % NS['ds'])
        assert si is not None
        cm_alg = _cm_alg(si)
        sig_alg = _sig_alg(si)

        _process_references(t, sig, verify_mode=False, sig_path=sig_path)
        # XXX create signature reference duplicates/overlaps process references unless a c14 is part of transforms
        log.debug("transform %s on %s" % (cm_alg, etree.tostring(si)))
        sic = _transform(cm_alg, si)
        log.debug("SignedInfo C14N: %s" % sic)

        # sign hash digest and insert it into the XML
        if private.do_digest:
            digest = xmlsec.crypto._digest(sic, sig_alg)
            log.debug("SignedInfo digest: %s" % digest)
            b_digest = b64d(digest)
            tbs = _signed_value(b_digest, private.keysize, private.do_padding,
                                sig_alg)
        else:
            tbs = sic

        signed = private.sign(tbs, sig_alg)
        signature = b64e(signed)
        if isinstance(signature, six.binary_type):
            signature = six.text_type(signature, 'utf-8')
        log.debug("SignatureValue: %s" % signature)
        sv = sig.find(".//{%s}SignatureValue" % NS['ds'])
        if sv is None:
            si.addnext(DS.SignatureValue(signature))
        else:
            sv.text = signature

        for cert_src in (public, private):
            if cert_src is not None and cert_src.cert_pem:
                # Insert cert_data as b64-encoded X.509 certificate into XML document
                sv_elt = si.getnext()
                sv_elt.addnext(
                    DS.KeyInfo(
                        DS.X509Data(
                            DS.X509Certificate(pem2b64(cert_src.cert_pem)))))
                break  # add the first we find, no more

    return t
Beispiel #59
0
def from_ele(maybe_ele):
    if etree.iselement(maybe_ele):
        return etree.tostring(maybe_ele).decode("utf-8")
    else:
        return maybe_ele
Beispiel #60
0
def _process_references(t,
                        sig,
                        verify_mode=True,
                        sig_path=".//{%s}Signature" % NS['ds'],
                        drop_signature=False):
    """
    :returns: hash algorithm as string
    """

    verified_objects = {}
    for ref in sig.findall(".//{%s}Reference" % NS['ds']):
        obj = None
        hash_alg = None
        uri = ref.get('URI', None)
        if uri is None or uri == '#' or uri == '':
            ref_obj = _implicit_same_document(t, sig)
            if ref_obj is None:
                raise XMLSigException(
                    "Unable to find reference while processing implicit same document reference"
                )
            ct = _remove_child_comments(ref_obj)
            obj = root_elt(ct)
        elif uri.startswith('#'):
            ct = copy.deepcopy(t)
            ref_obj = _get_by_id(ct, uri[1:])
            if ref_obj is None:
                raise XMLSigException(
                    "Unable to find reference while processing '%s'" % uri)
            obj = _remove_child_comments(ref_obj)
        else:
            raise XMLSigException("Unknown reference %s" % uri)

        if obj is None:
            raise XMLSigException("Unable to dereference Reference URI='%s'" %
                                  uri)

        obj_copy = obj
        if verify_mode:
            obj_copy = copy.deepcopy(obj)
            if drop_signature:
                for sig in obj_copy.findall(sig_path):
                    sig.getparent().remove(sig)

        if config.debug_write_to_files:
            with open("/tmp/foo-pre-transform.xml", "w") as fd:
                fd.write(etree_to_string(obj))

        for tr in ref.findall(".//{%s}Transform" % NS['ds']):
            obj = _transform(_alg(tr), obj, tr=tr, sig_path=sig_path)
            nslist = _find_nslist(tr)
            if nslist is not None:
                r = root_elt(t)
                for nsprefix in nslist:
                    if nsprefix in r.nsmap:
                        obj_copy.nsmap[nsprefix] = r.nsmap[nsprefix]

        if not isinstance(obj, six.string_types):
            if config.debug_write_to_files:
                with open("/tmp/foo-pre-serialize.xml", "w") as fd:
                    fd.write(etree_to_string(obj))
            obj = _transform(constants.TRANSFORM_C14N_INCLUSIVE, obj)

        if config.debug_write_to_files:
            with open("/tmp/foo-obj.xml", "w") as fd:
                if six.PY2:
                    obj = obj.encode('utf-8')
                fd.write(obj)

        hash_alg = _ref_digest(ref)
        log.debug("using hash algorithm %s" % hash_alg)
        digest = xmlsec.crypto._digest(obj, hash_alg)
        log.debug("computed %s digest %s for ref %s" % (hash_alg, digest, uri))
        dv = ref.find(".//{%s}DigestValue" % NS['ds'])

        if verify_mode:
            log.debug("found %s digest %s for ref %s" %
                      (hash_alg, dv.text, uri))
            computed_digest_binary = b64d(digest)
            digest_binary = b64d(dv.text)
            if digest_binary == computed_digest_binary:  # no point in verifying signature if the digest doesn't match
                verified_objects[ref] = obj_copy
            else:
                log.error("not returning ref %s - digest mismatch" % uri)
        else:  # signing - lets store the digest
            log.debug("replacing digest in %s" % etree.tostring(dv))
            dv.text = digest

    if verify_mode:
        return verified_objects
    else:
        return None