def transformXML(repository, XSL): printHeading('Transforming OAI records to Solr records') # Run through XML files in oai-temp folder OAITempPath = repositoryPath('oai-temp', repository) OAIPath = repositoryPath('oai', repository) solrTempPath = repositoryPath('solr-temp', repository, None, True) if os.path.exists(OAITempPath): fileList = os.listdir(OAITempPath) if fileList != None: fileList.sort() for fileName in fileList: solrFilePath = solrTempPath + '/' + fileName OAIFilePath = OAITempPath + '/' + fileName try: fileXML = etree.parse(OAIFilePath) solrXML = XSL(fileXML, collections="'geoleo-oai'") solrFile = open(solrFilePath, 'w') solrFile.write( etree.tostring(solrXML, encoding='utf-8', method='xml')) solrFile.close() print u'Created Solr file »' + solrFilePath + u'«' moveFile(fileName, OAITempPath, OAIPath) except: printError(u'Could not convert file »' + fileName + u'«', repository)
def parse_type(self, pattern=u'//div[@class="cui_subnav_wrap"]//a[@class="cui_ico_order"]'): _type = etree.tostring(self.html.xpath(pattern)[0]).split('"')[-2] if u'自由行订单' in _type: self.travel_type = u"自助游" elif u'酒店+顶点订单' in _type: self.travel_type = u'周末游' else: self.travel_type = u'跟团游'
def process(path): root = etree.parse(path) element = root.find(".//database") element.attrib['clustered'] = 'true' xml = etree.tostring(root, pretty_print=True) return xml
def fields_view_get(self, view_id=None, view_type='tree', toolbar=False, submenu=False): res = super(Cabacera, self).fields_view_get(view_id=view_id, view_type=view_type, toolbar=toolbar, submenu=submenu) default_type = self.env.context.get('cantidad', False) if default_type: doc = etree.XML(res['arch']) for t in doc.xpath("//" + view_type): t.attrib['create'] = '1' res['arch'] = etree.tostring(doc) return res
def _send_request(self, req_str, root_name, service): resp_str = None try: resp_str = self.SOAP_post(req_str, service) root = etree.XML(resp_str) resp_body = root.find(root_name) if resp_body is not None and len(resp_body) > 0: resp_str = etree.tostring(resp_body) except Exception, ex: print 'RESPONSE:' print resp_str print 'send_request :: Unexpected exception :: %s', ex
def parse_begin_time(self, pattern=u'//table[@class="calendar_right"]//td[@class="on"]/a/span[@class="date basefix"]'): date_list = [] date_fix = etree.tostring(self.html.xpath(u'//div[@class="calendar"]//span[@date-ele="summary"]')) date_prefix = re.compile('\d+').findall(date_fix) _dates = self.html.xpath(pattern) for _date in _dates: date_list.push(etree.tostring(_date).split('"')[-2]) for i in range(len(date_list)-2): if int(date_list[i]) > int(date_list[i+1]): index = i for i in range(len(date_list)-1): if i < index: date_list[i] = date_prefix[0]+'-'+date_prefix[1]+'-'+date_list[i] else: month = int(date_prefix[1]) + 1 year = int(date_prefix[0]) if month > 12: year = year + 1 month = month - 12 date_list[i] = str(year) + '-' + str(month) + '-' + date_list[i] self.begin_time = date_list
def prettyPrintXml(self,): # Generate xml text xmlText = self.xmlData.decode() self.xmlText = xmlText if(self.xmlText is not None and len(self.xmlText) > 0): #print('***xml Text:\n' + str(self.xmlText)) rootElement = etree.fromstring(self.xmlText) prettyPrintText=etree.tostring(rootElement, pretty_print=True).decode() if(prettyPrintText is not None and len(prettyPrintText) > 0): self.xmlText = prettyPrintText if (self.xmlFile is not None): elementTree = etree.ElementTree(rootElement) elementTree.write(self.xmlFile, pretty_print=True, encoding='utf-8') else: print('Not writing xml text to file, because None was provided for xmlFile parameter')
def convert(): try: root = etree.Element("TelescopeStatus", attrib={"timestamp": str(time.time())}) for name, sensor in self._sensors.items(): child = etree.Element("TelStat") make_elem(child, "Name", name) make_elem(child, "Value", str(sensor.value())) make_elem(child, "Status", str(sensor.status())) make_elem(child, "Type", self._parser[name]["type"]) if "units" in self._parser[name]: make_elem(child, "Units", self._parser[name]["units"]) root.append(child) except Exception as error: req.reply("ok", str(error)) else: req.reply("ok", etree.tostring(root))
def get_latest_version(url): previous_version = '0' version = '0' response = urllib2.urlopen(url) html = response.read() parser = etree.HTMLParser() tree = etree.parse(BytesIO(html), parser) result = etree.tostring(tree.getroot(), pretty_print=True, method="html") for row in tree.iter('td'): if row is not None and row.xpath('a/@href'): build_num = row.xpath('a/@href')[0] previous_version = build_num[-4:-1] if previous_version != 'way' and previous_version != "0.0" and int( previous_version) >= int(version): version = previous_version return version
def parse_begin_time(self, pattern=u'//div[@class="basefix CalendarBody"]//td[@class="new_date_on"]/a/span[class="date basefix"]'): date_list = [] date_fix = self.html.xpath(u'//div[@class="detail_calendar_warp"]//strong').text date_prefix = re.compile('\d+').findall(date_fix) _dates = self.html.xpath(pattern) for _date in _dates: date_list.push(etree.tostring(_date).split('"')[-2]) for i in range(len(date_list)-2): if int(date_list[i]) > int(date_list[i+1]): index = i for i in range(len(date_list)-1): if i < index: date_list[i] = date_prefix[0]+'-'+date_prefix[1]+'-'+date_list[i] else: month = int(date_prefix[1]) + 1 year = int(date_prefix[0]) if month > 12: year = year + 1 month = month - 12 date_list[i] = str(year) + '-' + str(month) + '-' + date_list[i] self.begin_time = date_list
def send_cmd_int(cls, cmd, msg_type): '''Construct NX-API message. Send commands through NX-API. Only single command for show commands. Internal usage''' if msg_type == "cli_show" or msg_type == "cli_show_ascii": if " ;" in cmd: raise cmd_exec_error("Only single show command supported in internal api") req_msg_str = cls.req_obj.get_req_msg_str(msg_type=msg_type, input_cmd=cmd, out_format=cls.out_format, do_chunk=cls.do_chunk, sid=cls.sid) (resp_headers, resp_str) = \ cls.req_fetcher.get_resp(req_msg_str, cls.cookie, cls.timeout) if 'Set-Cookie' in resp_headers: cls.cookie = resp_headers['Set-Cookie'] content_type = resp_headers['Content-Type'] root = etree.fromstring(resp_str) body = root.findall('.//body') code = root.findall('.//code') msg = root.findall('.//msg') output = "" status = 0 if len(body) != 0: if msg_type == 'cli_show': output = etree.tostring(body[0]) else: output = body[0].text if output == None: output = "" if code[0].text == "200": status = 0 else: status = int(code[0].text) return [output, status, msg[0].text]
def objectify(self): """ Parses the XML string and generates an objectified structure of the document, which is stored in the variable root. The source can be any of the following: - a file name/path - a file object - a file-like object - a URL using the HTTP or FTP protocol """ if not is_available_xml_objectify: print("Module lxml.objectify not available") return try: self.root = objectify.XML(self.Xml) except ValueError: self.Xml=etree.tostring(self.Xml) self.root = objectify.XML(self.Xml) except Exception as e: print("Objectify error: %s " % e)
def transformXML (repository, XSL): printHeading('Transforming OAI records to Solr records') # Run through XML files in oai-temp folder OAITempPath = repositoryPath('oai-temp', repository) OAIPath = repositoryPath('oai', repository) solrTempPath = repositoryPath('solr-temp', repository, None, True) if os.path.exists(OAITempPath): fileList = os.listdir(OAITempPath) if fileList != None: fileList.sort() for fileName in fileList: solrFilePath = solrTempPath + '/' + fileName OAIFilePath = OAITempPath + '/' + fileName try: fileXML = etree.parse(OAIFilePath) solrXML = XSL(fileXML, collections="'geoleo-oai'") solrFile = open(solrFilePath, 'w') solrFile.write(etree.tostring(solrXML, encoding='utf-8', method='xml')) solrFile.close() print u'Created Solr file »' + solrFilePath + u'«' moveFile(fileName, OAITempPath, OAIPath) except: printError(u'Could not convert file »' + fileName + u'«', repository)
def parse_price_pre(self, pattern=u'//div[@class="bg_miancolor"]//div[@class="main_price_wrap_left"]/strong[@class="total_price"]'): self.price_pre = etree.tostring(self.html.xpath(pattern)[0]).split('"')[-2]
def parse_price(self, pattern=u'//div[@class="new_calendar"]//td[@class="bg_blue on"]/a/span[@class="calendar_price01"]'): _prices = self.html.xpath(pattern) for _price in _prices: self.price.push(etree.tostring(_price).split('"')[-2])
def parse_price_pre(self, pattern=u'//div[@class="detail_media_favorable"]/div[@class="detail_price"]'): self.price_pre = etree.tostring(self.html.xpath(pattern)[0]).split('"')[3]
def parse_price(self, pattern=u'//table[@class="calendar_right"]//td[@class="on"]/a/span[@class="calendar_price01"]'): _prices = self.html.xpath(pattern) for _price in _prices: self.price.push(etree.tostring(_price).split('"')[-2])
def element_to_string(element): s = element.text or "" for sub_element in element: s += etree.tostring(sub_element) s += element.tail return s
def addToJson(self, xml): _json = xml2json.data( fromstring(etree.tostring(xml, pretty_print=True))) _jsonStr = dumps(_json, sort_keys=True, indent=2) return _jsonStr
def addToJson(self, xml): _json = xml2json.data(fromstring( etree.tostring(xml, pretty_print=True))) _jsonStr = dumps(_json, sort_keys=True, indent=2) return _jsonStr
def parse_title(self, pattern=u'//div[@class="detail_media_title"]/h2'): self.title = etree.tostring(self.html.xpath(pattern)).split('>')[2]
def parse_title(self, pattern=u'//div[@class="vacation_bd"]//h1[@itemprop="name"]'): self.title = etree.tostring(self.html.xpath(pattern)).split('"')[-2]
def saveConfig(config, path): print "saving %s" % path file = open(path, "w") #file.write(prettify(config)) file.write(ET.tostring(config)) file.close
def parse_begin_city(self, pattern=u'//li[@class="product_city"]/span[@class="city"]'): self.begin_city = etree.tostring(self.html.xpath(pattern)).split(":")[1][0:-1]
def parse_days(self, pattern=u'//ul[@class="detail_media_content"]//span[@class="border_t01 border_t01_cur"]'): self.days = etree.tostring(self.html.xpath(pattern)[0]).split('"')[-2]
def parse_price_pre(self, pattern=u'//div[@class="vacation_bd"]//div[@class="main_price_col basefix"]/strong[@class="total_price"]'): self.price_pre = etree.tostring(self.html.xpath(pattern)[0]).split('"')[-2]
#Author:Tom_Fish #-*- coding:utf-8 -*- import requests from xml import etree headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36' } res = requests.get('http://kaijiang.500.com/shtml/dlt/18001.shtml', headers=headers) html = etree.HTML(res.text) result = etree.tostring(html) print(result) print('hello world')