def run_test_in_docker(self, test, full_name, result): try: exit_with_proper_code = 'EXIT=$?; cat /app/nosetests.xml; exit $EXIT' xml = docker.run( '--rm', '-v', '%s:/app' % abspath(os.curdir), 'nose-docker:%s' % self.container_tag, '/bin/bash', c="cd /app && echo 'running tests for %s...' && nosetests --with-xunit %s; %s" % ( full_name, full_name, exit_with_proper_code ), ) result.addSuccess(test) except sh.ErrorReturnCode: err = sys.exc_info()[1] xml = err.stdout[err.stdout.index('<?xml'):] root = etree.fromstring(xml) failure = FAILURE_SELECTOR(root) if failure: failure_message = su.unescape(failure[0].text).replace('\\n', '\n') result.addFailure(test, failure_message) error = ERROR_SELECTOR(root) if error: result.addError(test, su.unescape(error[0].text)) finally: result.testsRun += 1
def parse_markup(markup_text): 'Return plain text and a list of start, end TextTag' markup_text = BytesIO(_markup(markup_text)) plain_text = '' tag_stack = [] tags = [] for event, element in ET.iterparse(markup_text, events=['start', 'end']): if element.tag == 'markup': if event == 'start' and element.text: plain_text += unescape(element.text) if event == 'end' and element.tail: plain_text += unescape(element.tail) continue if event == 'start': tag_stack.append((element, len(plain_text))) if element.text: plain_text += unescape(element.text) elif event == 'end': if element.tag == 'div': plain_text += '\n' assert tag_stack[-1][0] == element _, start = tag_stack.pop() end = len(plain_text) tags.append((start, end, element)) if element.tail: plain_text += unescape(element.tail) return plain_text, tags
def get_tag_content(doc,tag_name,unescape_xml=1): 'get a tags value by tag name. looks at the type attr. default is string' tags = doc.getElementsByTagName(tag_name) if len(tags) > 0: tag = tags.pop() # Take the last occurance text_node = tag.firstChild if text_node: if len(text_node.data) > 0: if tag.hasAttribute('type'): type = tag.getAttribute('type') if type == 'integer': inner_text = int(text_node.data) elif type == 'boolean': inner_text = XmlHelper.str_as_bool(text_node.data) else: # If type is not recognized, assume it's a string inner_text = str(text_node.data) if unescape_xml: inner_text = unescape(inner_text) else: # If no type is given, assume it's a string inner_text = str(text_node.data) if unescape_xml: inner_text = unescape(inner_text) return inner_text return None
def sanitize_token(self, token): # accommodate filters which use token_type differently token_type = token["type"] if token_type in tokenTypes.keys(): token_type = tokenTypes[token_type] if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"], tokenTypes["EmptyTag"]): if token["name"] in self.allowed_elements: if token.has_key("data"): attrs = dict([(name,val) for name,val in token["data"][::-1] if name in self.allowed_attributes]) for attr in self.attr_val_is_uri: if not attrs.has_key(attr): continue val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '', unescape(attrs[attr])).lower() #remove replacement characters from unescaped characters val_unescaped = val_unescaped.replace(u"\ufffd", "") if (re.match("^[a-z0-9][-+.a-z0-9]*:",val_unescaped) and (val_unescaped.split(':')[0] not in self.allowed_protocols)): del attrs[attr] for attr in self.svg_attr_val_allows_ref: if attr in attrs: attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', ' ', unescape(attrs[attr])) if (token["name"] in self.svg_allow_local_href and 'xlink:href' in attrs and re.search('^\s*[^#\s].*', attrs['xlink:href'])): del attrs['xlink:href'] if attrs.has_key('style'): attrs['style'] = self.sanitize_css(attrs['style']) token["data"] = [[name,val] for name,val in attrs.items()] return token else: if token_type == tokenTypes["EndTag"]: token["data"] = "</%s>" % token["name"] elif token["data"]: attrs = ''.join([' %s="%s"' % (k,escape(v)) for k,v in token["data"]]) token["data"] = "<%s%s>" % (token["name"],attrs) else: token["data"] = "<%s>" % token["name"] if token.get("selfClosing"): token["data"]=token["data"][:-1] + "/>" if token["type"] in tokenTypes.keys(): token["type"] = "Characters" else: token["type"] = tokenTypes["Characters"] del token["name"] return token elif token_type == tokenTypes["Comment"]: pass else: return token
def __init__(self, filename=None): self.author = '' self.description = '' self.instructions = '' self.filename = filename if filename is not None: filexml = minidom.parse(filename) # we have no use for all the xml data in the file. We only care # about what is between the "description" tags templatedata = filexml.getElementsByTagName('templatedata') if len(templatedata): desc_xml = templatedata[0] try: self.author = saxutils.unescape( desc_xml.getElementsByTagName( 'author')[0].firstChild.data) except (IndexError, AttributeError): self.author = '' try: self.description = saxutils.unescape( desc_xml.getElementsByTagName( 'description')[0].firstChild.data) except (IndexError, AttributeError): self.description = '' try: self.instructions = saxutils.unescape( desc_xml.getElementsByTagName( 'instructions')[0].firstChild.data) except (IndexError, AttributeError): self.instructions = '' else: self.author = '' self.description='' self.instructions=''
def endElement(self, name): if name == 'vampire': assert self.current_vampire self.add_vampire(self.current_vampire) self.current_vampire = None elif name == 'experience': assert self.current_experience self.current_experience = None elif name == 'traitlist': assert self.current_traitlist self.current_traitlist = None elif name == 'biography': assert self.reading_biography self.reading_biography = False if self.current_vampire: self.current_vampire['biography'] = unescape(self.current_biography) print self.current_biography self.current_biography = '' elif name == 'notes': assert self.reading_notes self.reading_notes = False if self.current_vampire: self.current_vampire['notes'] = unescape(self.current_notes) print self.current_notes self.current_notes = ''
def read_vampire(v, user, date_hint): vampire = {} previous_entry = None current_vampire = create_base_vampire(v.attrib, user, date_hint=date_hint) for tl in v.findall('traitlist'): read_traitlist_properties(tl.attrib, current_vampire) order = 0 for t in tl.findall('trait'): order += 1 read_trait(t.attrib, tl.attrib, current_vampire, order) exp = v.find('experience') for ee in exp.findall('entry'): previous_entry = read_experience_entry(ee.attrib, current_vampire, previous_entry, date_hint=date_hint) biography = v.find('biography') if biography is not None: current_vampire.biography = unescape(biography.text).strip() notes = v.find('notes') if notes is not None: current_vampire.notes = unescape(notes.text).strip() current_vampire.update_experience_total() current_vampire.save() current_vampire.add_default_traitlist_properties() return current_vampire
def write_csv_file(filename, dictionary): """ Writes the dictionary as CSV to the specified file """ with open(filename, 'w') as f: writer = csv.writer(f) for key, value in dictionary.items(): writer.writerow([unescape(key.encode("utf-8")), unescape(value.encode("utf-8"))])
def parse(client, xml_data): data = StringIO.StringIO(xml_data) try: element = ElementTree.parse(data).getroot().attrib except: raise ParseError('Invalid MSNObject') creator = element["Creator"] size = int(element["Size"]) type = int(element["Type"]) location = xml.unescape(element["Location"]) friendly = base64.b64decode(xml.unescape(element["Friendly"])) shad = element.get("SHA1D", None) if shad is not None: shad = _decode_shad(shad) shac = element.get("SHA1C", None) if shac is not None: try: shac = base64.b64decode(shac) except TypeError: logger.warning("Invalid SHA1C in MSNObject: %s" % shac) shac = None result = MSNObject(creator, size, type, location, friendly, shad, shac) result._repr = xml_data return result
def endElement(self, name): if self.reading_creature: if self.creatures[self.reading_creature]: self.creatures[self.reading_creature].endElement(name) return if name in self.creatures_elements: assert self.reading_creature self.reading_creature = '' if self.creatures[name]: self.creatures[name].endElement(name) return if name == 'grapevine': assert self.chronicle elif name == 'usualplace': assert self.reading_usualplace self.reading_usualplace = False if self.chronicle: self.chronicle['usualplace'] = unescape(self.current_usualplace) self.current_usualplace = '' elif name == 'description': assert self.reading_description self.reading_description = False if self.chronicle: self.chronicle['description'] = unescape(self.current_description) self.current_description = ''
def add_entry(self, result): if (not result.url): return # add only distinct songs (unique by title+artist+duration) to prevent duplicates strhash = ('%s%s%s' % (result.title, result.artist, result.duration)).lower() if strhash in self.entries_hashes: return self.entries_hashes.append(strhash) try: #first, let's try to find if the song with this url is already in db entry = self.db.entry_lookup_by_location(result.url) if entry is not None : return #add song to db entry = RB.RhythmDBEntry.new(self.db, self.entry_type, result.url) self.db.commit() if entry is not None : #update metadata self.db.entry_set(entry, RB.RhythmDBPropType.TITLE, unescape(result.title)) self.db.entry_set(entry, RB.RhythmDBPropType.DURATION, result.duration) self.db.entry_set(entry, RB.RhythmDBPropType.ARTIST, unescape(result.artist)) #all the songs will get "vk.com" album self.db.entry_set(entry, RB.RhythmDBPropType.ALBUM, "vk.com") self.db.commit() except Exception as e: # This happens on duplicate uris being added sys.excepthook(*sys.exc_info()) print("Couldn't add %s - %s" % (result.artist, result.title), e)
def get_parallelsentences(self): """ This function parses a ranking xml file and returns a list of parallel sentence objects. @return ps_list: list of tuples in format (ranking-item_id, ParallelSentence) @type ps_list: list of tuples """ r_items = self.xmlObject.getElementsByTagName('ranking-item') ps_list = [] for r_item in r_items: stc_id = r_item.getAttribute('sentence_id') src = '' tgt_list = [] for rank_child in r_item.childNodes: if rank_child.nodeName == 'source': src = SimpleSentence(unescape(rank_child.childNodes[0].nodeValue)) elif rank_child.nodeName != '#text': tgt = SimpleSentence(unescape(rank_child.childNodes[0].nodeValue)) for attribute_name in rank_child.attributes.keys(): attribute_value = rank_child.getAttribute(attribute_name) tgt.add_attribute(attribute_name, attribute_value) tgt.add_attribute('system', rank_child.getAttribute('name')) # tgt.add_attribute('rank', rank_child.getAttribute('rank')) tgt_list.append(tgt) ps = ParallelSentence(src, tgt_list) #TODO: this was old, may have to change the attribute key. Commented because overlapping with other features # if not ps.get_attributes().has_key("id"): # ps.add_attributes({'id': stc_id}) ps.add_attributes({'sentence_id': stc_id}) ps_list.append(ps) return ps_list
def read(glos, filename): fp = open(filename, 'rb') glos.data = [] xdbText = fp.read() i = 0 for item in infoKeys:################## method should be changed inf0 = xdbText.find('<'+item+'>', i) if inf0==-1: continue inf0 += (len(item)+2) inf1 = xdbText.find('</'+item+'>', inf0) inf = unescape(xdbText[inf0:inf1]) glos.setInfo(item, inf) i = inf1 while True:###################################################### i=xdbText.find('<word>', i) if i==-1: break in0 = xdbText.find('<in>', i) + 4 in1 = xdbText.find('</in>', in0) out0= xdbText.find('<out>', in1) + 5 out1= xdbText.find('</out>', out0) word = unescape(xdbText[in0:in1]) defi = unescape(xdbText[out0:out1]) glos.data.append((word, defi)) #i = out1 i = xdbText.find('</word>', out1) + 7
def sql_connection(xml_element_userid, xml_element_password, database_name): #Pull the Logon info for SQL #"with" opens reads and cloes DocScan.xml with open("Credentials.xml") as doc_scan: #Open the XML document in the minidom parser xml_docscan = minidom.parse(doc_scan) #Search for the Community Element, take the first Child Node and # convert to XML then strip the extra white spaces sql_userid = saxutils.unescape(xml_docscan.getElementsByTagName\ (xml_element_userid)\ [0].childNodes[0].toxml().strip()) sql_password = saxutils.unescape(xml_docscan.getElementsByTagName\ (xml_element_password)[0].childNodes[0].toxml()\ .strip()) #MSSQL Connection String #sql_connection = pyodbc.connect('DRIVER={SQL Server}; SERVER=ECCO-SQL;\ # DATABASE=Workstation; UID=UserName; PWD=Password') #MySQL Connection String sql_connection = mysql.connector.Connect(host="localhost"\ , user=sql_userid, password=sql_password\ , database=database_name) #Clear variables del(sql_userid) del(sql_password) return sql_connection
def allowed_token(self, token, token_type): if "data" in token: attrs = dict([(name, val) for name, val in token["data"][::-1] if name in self.allowed_attributes]) for attr in self.attr_val_is_uri: if attr not in attrs: continue val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '', unescape(attrs[attr])).lower() # remove replacement characters from unescaped characters val_unescaped = val_unescaped.replace("\ufffd", "") if (re.match("^[a-z0-9][-+.a-z0-9]*:", val_unescaped) and (val_unescaped.split(':')[0] not in self.allowed_protocols)): del attrs[attr] for attr in self.svg_attr_val_allows_ref: if attr in attrs: attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', ' ', unescape(attrs[attr])) if (token["name"] in self.svg_allow_local_href and 'xlink:href' in attrs and re.search('^\s*[^#\s].*', attrs['xlink:href'])): del attrs['xlink:href'] if 'style' in attrs: attrs['style'] = self.sanitize_css(attrs['style']) token["data"] = [[name, val] for name, val in list(attrs.items())] return token
def __init__(self,document=None): if document: #http://bugs.python.org/issue5762 parser=etree.XMLParser(resolve_entities=False) try: if type(input) == type(""): self.tree=etree.parse(StringIO.StringIO(unescape(document,entities={""":"'"})),parser) #<?xml version='1.0' encoding='UTF-8'?> will cause a crash #lxml.etree.XMLSyntaxError: XML declaration allowed only at the start of the document, line 1, column 103 else: try: self.tree = etree.parse(StringIO.StringIO(unescape(document.toxml(),entities={""":"'"})),parser) # Not very efficient, the XML is converted to string and then back again to XML except: document=doCleanBug5762(document) self.tree = etree.parse(StringIO.StringIO(unescape(document.toxml(),entities={""":"'"})),parser) except etree.XMLSyntaxError,e: # Generic parsing error raise pywps.NoApplicableCode(e.message) self.root=self.tree.getroot() #Searching of a Envelope element (case sensitive) self.envElement=self.root.xpath("//*[contains(local-name(),'Envelope')]") #It actually retunrs the node #Check for SOAP name space self.nameSpaceSet=set(self.root.nsmap.values()) & set(soap_env_NS) self.nsIndex = soap_env_NS.index(self.nameSpaceSet.pop()) if (self.nsIndex==1): self.soapVersion=11 else: self.soapVersion=12 #Check for ExecuteProcess self.isSoapExecute=bool(self.root.xpath("//*[contains(local-name(),'ExecuteProcess')]")) # just to be certain that is a bool
def parse(client, xml_data): data = StringIO.StringIO(xml_data) try: element = ElementTree.parse(data).getroot().attrib except: raise ParseError('Invalid MSNObject') try: creator = client.address_book.contacts.\ search_by_account(element["Creator"]).\ search_by_network_id(NetworkID.MSN)[0] except IndexError: creator = None size = int(element["Size"]) type = int(element["Type"]) location = xml.unescape(element["Location"]) friendly = base64.b64decode(xml.unescape(element["Friendly"])) shad = element.get("SHA1D", None) if shad is not None: shad = base64.b64decode(shad) shac = element.get("SHA1C", None) if shac is not None: shac = base64.b64decode(shac) result = MSNObject(creator, size, type, location, \ friendly, shad, shac) result._repr = xml_data return result
def _parse_html_source( self, htmlSource ): # regex's regex_items = re.compile( '<li>(.+?)</li>', re.DOTALL ) regex_revisions = re.compile( '<a class="ot-revision-link" href="/p/[^/]+/source/detail\?r=[0-9]+">([^<]+)</a>' ) regex_dates = re.compile( '<span class="date below-more" title="([^"]+)"' ) regex_authors = re.compile( '<a class="ot-profile-link-2" href="/u/[^/]+/">([^<]+)</a></span>' ) regex_details = re.compile( '<div class=\"details\"><span class=\"ot-logmessage\">(.+?)</span></div>', re.DOTALL ) if ( self.category is not None ): regex_subst = re.compile( "\[%s\]" % self.category, re.IGNORECASE ) # scrape info items = regex_items.findall( htmlSource ) # enumerate thru and scrape and combine all info for item in items: try: # scrape info revision = regex_revisions.findall( item )[ 0 ] date = regex_dates.findall( item )[ 0 ] author = regex_authors.findall( item )[ 0 ] detail = regex_details.findall( item )[ 0 ] # add to log if ( self.category is not None and re.findall( "\[.*%s.*\]" % self.category, detail, re.IGNORECASE ) ): if ( self.revision is not None and int( revision[ 1 : ] ) <= self.revision ): self.log += "[I]%s - %s - %s[/I]\n%s\n----\n" % ( revision, date, author, unescape( re.sub( regex_subst, "", detail ).strip(), { "'": "'", """: '"' } ), ) elif ( self.revision is None ): self.log += "[I]%s - %s - %s[/I]\n%s\n----\n" % ( revision, date, author, unescape( re.sub( regex_subst, "", detail ).strip(), { "'": "'", """: '"' } ), ) elif ( self.category is None ): pos = ( detail.find( "]" ) ) if ( pos >= 0 ): msg = unescape( "%s [I]%s - %s - %s[/I]\n%s\n----\n" % ( detail[ : pos + 1 ].strip(), revision, date, author, detail[ pos + 1 : ].strip() ), { "'": "'", """: '"' } ) else: msg = unescape( "[I]%s - %s - %s[/I]\n%s\n----\n" % ( revision, date, author, detail.strip(), ), { "'": "'", """: '"' } ) self.log += msg except: # not a valid log message pass
def play(self, page, mode=''): if Debug: self.LOG('DEBUG: _play()\nurl: %s' % page) # Get current list item details... title = unicode(xbmc.getInfoLabel("ListItem.Title"), "utf-8") thumbnail = xbmc.getInfoImage("ListItem.Thumb") plot = unicode(xbmc.getInfoLabel("ListItem.Plot"), "utf-8") if mode == 'smil': smil = BSS(self._get(page)) rtmp = smil.meta['base'] video = smil.video['src'] swfUrl = 'http://medici.tv/medici.swf' # rtmpdump script for console use rtmpdump = "rtmpdump -r %s --swfUrl http://medici.tv/medici.swf --tcUrl '%s' --playpath '%s' -o '%s.mp4'" % \ (rtmp, rtmp, saxutils.unescape(video), saxutils.unescape(title)) # Build rtmp url... video_url = rtmp + ' swfUrl=' + swfUrl + ' tcUrl=' + rtmp + ' playpath=' + saxutils.unescape(video) if Debug: self.LOG('DEBUG: rtmp link details.\n\trtmp: %s\n\tswfUrl: %s\n\ttcUrl: %s\n\tplaypath: %s\n\trtmpdump: %s' % \ (rtmp, swfUrl, rtmp, saxutils.unescape(video), rtmpdump)) elif mode == 'rtmp_daily': video_url = page.split('&rtmp=1')[0] if Debug: self.LOG('DEBUG: video link details.\n\turl: %s' % video_url) else: video_url = '' if Debug: self.LOG('DEBUG: no video link!') raise # only need to add label, icon and thumbnail, setInfo() and addSortMethod() takes care of label2 listitem = xbmcgui.ListItem(title, iconImage="DefaultVideo.png", thumbnailImage=thumbnail) # set listitem information listitem.setInfo('video', {'title': title, 'label' : title, 'plot': plot, 'plotoutline': plot, }) # Play video... xbmc.Player().play(video_url , listitem)
def parse_execute_anonymous_xml(result): """ Get the compile result in the xml result @result: execute anonymous result, it's a xml @return: formated string """ compiled = result["compiled"] debugLog = result["debugLog"] view_result = '' if compiled == "true": view_result = debugLog elif compiled == "false": line = result["line"] column = result["column"] compileProblem = result["compileProblem"] view_result = compileProblem + " at line " + line +\ " column " + column + "\n" + "-" * 100 + "\n" + debugLog print(view_result) if is_python3x(): view_result = urllib.parse.unquote(unescape(view_result, {"'": "'", """: '"'})) else: view_result = urllib.unquote(unescape(view_result, {"'": "'", """: '"'})) return view_result
def _ofx_txn(self, txn): fields = [] if self._check_field("Type", txn): fields.append(TRNTYPE(txn["Type"].strip())) if self._check_field("Date", txn): fields.append(DTPOSTED(txn["Date"].strip())) if self._check_field("Amount", txn): fields.append(TRNAMT(txn["Amount"].strip())) if self._check_field("Number", txn): fields.append(CHECKNUM(txn["Number"].strip())) if self._check_field("ID", txn): fields.append(FITID(txn["ID"].strip())) if self._check_field("Payee", txn): fields.append(NAME(sax.escape(sax.unescape(txn["Payee"].strip())))) if self._check_field("Memo", txn): fields.append(MEMO(sax.escape(sax.unescape(txn["Memo"].strip())))) if self._check_field("Category", txn): fields.append(CATEGORY(sax.escape(sax.unescape(txn["Category"].strip())))) return STMTTRN(*fields)
def parsePlaylistsJSON(doc): """ Parse Playlists JSON using eval to [ [], [], [] ... ] """ log("parsePlaylistsJSON()") data = [] try: # evals to [ {}, {}, .. ] items = eval( doc.replace('null', '\"\"' ) ) # convert to [ [], [], .. ] as its easier to unpack without key knowlegde for item in items: try: updated = item.get('updated_at','')[:10] # yyyy/mm/dd except: updated = '' data.append( (unescape(item.get('title','')), \ str(item.get('id','')), \ unescape(item.get('description','')), \ item.get('contents_count',0), \ updated, \ item.get('icon_url','')) ) data.sort() except: traceback.print_exc() data = [] return data
def flow(): itemcount = 0 out = '' out += '<div id="contentflow" class="ContentFlow">' out += '<div class="loadIndicator"><div class="indicator"></div></div>' out += '<div class="flow" id="flowtarget">' for row in db().select(db.album.title,db.album.creator,db.album.artname,db.album.data, orderby=db.album.title.upper()): print row.title print row.creator print row.artname print row.data title = unescape(row.title, unescape_entities_quotepos) if title.startswith('<') and title.endswith('>'): title = '[' + title[1:-1] + ']' creator = unescape(row.creator, unescape_entities_quotepos) outtitle = title + "<br/>" + creator outtitle = escape(outtitle, escape_entities_quotepos) data = unescape(row.data, unescape_entities_quotepos) datas = data.split('::') id = datas[0] menu = datas[2] # if itemcount > 20: continue out += '<img class="item" id="' + id + '" menu="' + menu + '" src="' + row.artname + '" title="' + outtitle + '" data="' + data + '"></img>' itemcount += 1 out += '</div>' out += '<div class="globalCaption"></div><br/><br/><br/>' out += '<div class="scrollbar"><div class="slider"><div class="position"></div></div></div>' out += '</div>' # print out return out
def unescape_html(input_file, output_file=os.getcwd() + '/'): f = file(input_file, 'r') for line in f.xreadlines(): if output_file == os.getcwd() + '/': save_result_file(unescape(line, html_unescaped_dict), output_file + input_file + '_escape') else: save_result_file(unescape(line, html_unescaped_dict), output_file + '_escape')
def opmlimport(feeds, args): "Import configuration from OPML." if args.file: _LOG.info('importing feeds from {}'.format(args.file)) f = open(args.file, 'rb') else: _LOG.info('importing feeds from stdin') f = _sys.stdin try: dom = _minidom.parse(f) new_feeds = dom.getElementsByTagName('outline') except Exception as e: raise _error.OPMLReadError() from e if args.file: f.close() name_slug_regexp = _re.compile('[^\w\d.-]+') for feed in new_feeds: if feed.hasAttribute('xmlUrl'): url = _saxutils.unescape(feed.getAttribute('xmlUrl')) name = None if feed.hasAttribute('text'): text = _saxutils.unescape(feed.getAttribute('text')) if text != url: name = name_slug_regexp.sub('-', text) feed = feeds.new_feed(name=name, url=url) _LOG.info('add new feed {}'.format(feed)) feeds.save()
def AnnoFileParseOutput(annofile): """ A generator that extracts log output from an emake annotation file, perform an XML-unescape on it and "yields" it line by line. """ if isinstance(annofile,str): af = open(annofile, "r") else: af = annofile inOutput = False buildid = "" for line in af: line = line.rstrip("\n\r") if not inOutput: o = string_following("<output>", line) if not o: o = string_following('<output src="prog">', line) if o: inOutput = True yield unescape(o)+'\n' continue o = string_following('<build id="',line) if o: buildid = o[:o.find('"')] yield "Starting build: "+buildid+"\n" continue o = string_following('<metric name="duration">', line) if o: secs = int(o[:o.find('<')]) if secs != 0: duration = "%d:%d" % (secs/60, secs % 60) else: duration = "0:0" continue o = string_following('<metric name="clusterAvailability">', line) if o: availability = o[:o.find('<')] continue else: end_output = line.find("</output>") if end_output != -1: line = line[:end_output] inOutput = False if line != "": yield unescape(line)+'\n' yield "Finished build: %s Duration: %s (m:s) Cluster availability: %s%%\n" %(buildid,duration,availability) af.close()
def show_byArtist(djSets): """Adds the set by artist list to XBMC""" for djSet in djSets: item = xbmcgui.ListItem(label=unescape(djSet["title"])) labels ={"title" : unescape(djSet["title"])} item.setInfo(type = 'music', infoLabels=labels) url = sys.argv[0]+"?mode=ShowSet"+"&url="+djSet["path"]#+"&name="+djSet["title"] if not xbmcplugin.addDirectoryItem(int(sys.argv[1]), url, item, True, len(djSets)): break
def xml_to_plain_text(xml_string): remove_tags = re.compile(r'\s*<.*?>', re.DOTALL | re.UNICODE) plain_text = remove_tags.sub('\n', xml_string) # run this twice for weird situations where things are double escaped plain_text = xmlutils.unescape(plain_text, {''': "'", '"': '"'}) plain_text = xmlutils.unescape(plain_text, {''': "'", '"': '"'}) return plain_text.strip()
def show_sets(djSets): """Adds the set list to XBMC""" for djSet in djSets: item = xbmcgui.ListItem(label=unescape(djSet["title"]), thumbnailImage=djSet["thumb_url"]) labels ={"title" : unescape(djSet["title"])} item.setInfo(type = 'music', infoLabels=labels) url = sys.argv[0]+"?mode=SetsByArtist"+"&url="+djSet["browse_path"]#+"&name="+djSet["title"] xbmcplugin.addDirectoryItem(int(sys.argv[1]), url, item, True, len(djSets))
def show_all_shows(shows): """Adds the show list to XBMC""" for show in shows: item = xbmcgui.ListItem(label=unescape(show["title"])) labels ={"title" : unescape(show["title"])} item.setInfo(type = 'movie', infoLabels=labels) url = sys.argv[0]+"?mode=Episodes"+"&url="+show["url"] xbmcplugin.addDirectoryItem(int(sys.argv[1]), url, item, True, len(shows))
########################################################### # Récupère le liens url2 = "http://release.theplatform.com/content.select?pid=%s" % id print "" print url2 xml = urllib2.urlopen(url2).read() ind1 = int(xml.find("rtmp")) ind2 = int(xml.find(".mp4")) # unescape converts entity references back to the corresponding characters # http://wiki.python.org/moin/EscapingXml from xml.sax.saxutils import unescape target = unescape(xml[ind1:ind2 + len('.mp4')]) print "" if len(target) > 0: print "Trouvé la source:\n\n", target else: print "Ne semble pas avoir trouvé la source!" print "" exit() print "" ########################################################### # Crée le nom du fichier téléchargé à partir de l'url ss = url.split("/") nom = ss[-2] + '_' + ss[-1] + '.flv' ###########################################################
def test_unescape_extra(): return unescape("Hei på deg", {"å": "å"}) == "Hei på deg"
def test_unescape_basic(): return unescape("Donald Duck & Co") == "Donald Duck & Co"
def chars_keyword(self, txt): parent, pattr = self.__stack[-2] if not parent == "keywordlist": return self.__keywords[-1] += unescape(txt)
def searchState(request): conn = MySQLdb.connect(host=settings.DATABASES['default']['HOST'], user=settings.DATABASES['default']['USER'], passwd=settings.DATABASES['default']['PASSWORD'], db=settings.DATABASES['default']['NAME'], cursorclass=MySQLdb.cursors.DictCursor) cursor = conn.cursor() error = None #state = request.args.get('state') #parent_id = request.args.get('parent_id') #jurisdiction_id = request.args.get('jurisdiction_id') state = request.GET.get('state', False) parent_id = request.GET.get('parent_id', False) jurisdiction_id = request.GET.get('jurisdiction_id', False) jurisdiction_type = request.GET.get('jurisdiction_type', False) if state: if jurisdiction_type: query = "select id,name,jurisdiction_type,parent_id,state,latitude,longitude from website_jurisdiction where `state` = '" + conn.escape_string( str(state) ) + "' AND `jurisdiction_type` = '" + conn.escape_string( str(jurisdiction_type)) + "'" else: query = "select id,name,jurisdiction_type,parent_id,state,latitude,longitude from website_jurisdiction where `state` = '" + conn.escape_string( str(state)) + "'" cursor.execute(query) output = "<result>\n" # we need to account for empty result sets. if cursor.fetchone(): row = cursor.fetchall() for value in row: output += "\t<jurisdiction>\n" for key, value2 in value.items(): output += "\t\t<" + saxutils.escape( saxutils.unescape(str(key))) + ">" + saxutils.escape( saxutils.unescape( str(value2))) + "</" + saxutils.escape( saxutils.unescape(str(key))) + ">\n" output += "\t</jurisdiction>\n" else: output += "\t<error>Jurisdictions Matching Your Criteria Were Not Found</error>" output += "</result>" elif parent_id: cursor.execute( "SELECT * FROM website_jurisdiction where `parent_id` = " + conn.escape_string(str(parent_id))) if cursor.fetchone(): row = cursor.fetchall() output = "<result>\n" for value in row: output += "\t<jurisdiction>\n" for key, value2 in value.items(): output += "\t\t<" + saxutils.escape( saxutils.unescape(str(key))) + ">" + saxutils.escape( saxutils.unescape( str(value2))) + "</" + saxutils.escape( saxutils.unescape(str(key))) + ">\n" output += "\t</jurisdiction>\n" output += "</result>" else: output = "<result>\n" output += "\t<error>No Jurisdictions Matching Your Criteria Were Found</error>" output += "</result>" elif jurisdiction_id: strippedDescriptionJurisdiction = {} strippedDescriptionQuestions = {} strippedDescriptionAnswers = {} strippedDescriptionComments = {} strippedDescriptionVotes = {} buildDescriptionJurisdiction = {} buildDescriptionAnswers = {} buildDescriptionQuestions = {} buildDescriptionComments = {} buildDescriptionVotes = {} buildAnswerID = [] buildQuestionID = [] buildRelationID = {} buildJurisdictionAnswers = {} jurisdictionQuestions = AutoVivification() oldkey = [] newkey = [] queryJurisdiction = "SELECT * FROM website_jurisdiction where `id` = '" + conn.escape_string( str(jurisdiction_id)) + "'" cursor.execute(queryJurisdiction) jurisdictionInfo = cursor.fetchone() if jurisdictionInfo != None: ##cursor.execute ("SELECT * FROM website_answerreference where `jurisdiction_id` = '" + conn.escape_string(str(jurisdiction_id)) + "'") #queryAnswers = "SELECT website_answerreference.id AS id, website_answerreference.question_id AS question_id, website_answerreference.value AS value, website_answerreference.file_upload AS file_upload, website_answerreference.create_datetime AS create_datetime, website_answerreference.modify_datetime AS modify_datetime, website_answerreference.jurisdiction_id AS jurisdiction_id, website_answerreference.is_current AS is_current, website_answerreference.rating AS rating, website_answerreference.is_callout AS is_callout, website_answerreference.rating_status AS rating_status, website_answerreference.approval_status AS approval_status, website_answerreference.creator_id AS creator_id, website_answerreference.migrated_answer_id AS migrated_answer_id, website_answerreference.status_datetime AS status_datetime, website_answerreference.organization_id AS organization_id FROM website_answerreference, website_question WHERE (website_question.id = website_answerreference.question_id) AND jurisdiction_id = '" + conn.escape_string(str(jurisdiction_id)) + "' AND website_question.form_type != 'CF'" queryAnswers = "SELECT * FROM (SELECT website_answerreference.id AS id, website_answerreference.question_id AS question_id, website_answerreference.value AS value, website_answerreference.file_upload AS file_upload, website_answerreference.create_datetime AS create_datetime, website_answerreference.modify_datetime AS modify_datetime, website_answerreference.jurisdiction_id AS jurisdiction_id, website_answerreference.is_current AS is_current, website_answerreference.rating AS rating, website_answerreference.is_callout AS is_callout, website_answerreference.rating_status AS rating_status, website_answerreference.approval_status AS approval_status, website_answerreference.creator_id AS creator_id, website_answerreference.migrated_answer_id AS migrated_answer_id, website_answerreference.status_datetime AS status_datetime, website_answerreference.organization_id AS organization_id FROM website_answerreference, website_question WHERE (website_question.id = website_answerreference.question_id) AND jurisdiction_id = '" + conn.escape_string( str(jurisdiction_id) ) + "' AND approval_status = 'A' AND website_question.form_type != 'CF' ORDER BY question_id ASC, modify_datetime DESC) as tempTable GROUP BY question_id ASC" cursor.execute(queryAnswers) jurisdictionAnswers = cursor.fetchall() #find the table describers cursor.execute("DESCRIBE website_jurisdiction") descriptionJurisdiction = cursor.fetchall() cursor.execute("DESCRIBE website_answerreference") descriptionAnswers = cursor.fetchall() cursor.execute("DESCRIBE website_comment") descriptionComments = cursor.fetchall() cursor.execute("DESCRIBE website_question") descriptionQuestions = cursor.fetchall() output = "<result>\n" output += "\t<jurisdiction>\n" c = 0 i = len(descriptionJurisdiction) while c < i: strippedDescriptionJurisdiction[descriptionJurisdiction[c][ "Field"]] = descriptionJurisdiction[c]["Type"] c = c + 1 c = 0 i = len(strippedDescriptionJurisdiction) # data = {} # data['xml'] = jurisdictionInfo # cursor.close() # conn.close() # requestProcessor = HttpRequestProcessor(request) # return requestProcessor.render_to_response(request,'website/api.xml', data, 'application/xml') for key, value in strippedDescriptionJurisdiction.items(): buildDescriptionJurisdiction[key] = "\t\t<" + str( key) + " type='" + str(value) + "'>" c = c + 1 for key in jurisdictionInfo.keys(): output += str( buildDescriptionJurisdiction[key]) + saxutils.escape( saxutils.unescape(str( jurisdictionInfo[key]))) + "</" + saxutils.escape( saxutils.unescape(key)) + ">\n" if jurisdictionAnswers: for value in jurisdictionAnswers: buildQuestionID.append(value.get('question_id')) buildRelationID[value.get('id')] = value.get('question_id') else: pass #build question query queryQuestions = "SELECT * FROM website_question WHERE " count = 0 for value in buildQuestionID: if count < 1: queryQuestions += "`id` = '" + conn.escape_string( str(value)) + "'" count = 1 else: queryQuestions += " OR `id` = '" + conn.escape_string( str(value)) + "'" if buildQuestionID: cursor.execute(queryQuestions) jurisdictionQuestions = cursor.fetchall() else: pass #build comment query queryComments = "SELECT * FROM website_comment WHERE " count = 0 for value in buildRelationID: if count < 1: queryComments += "`entity_id` = '" + conn.escape_string( str(value)) + "'" count = 1 else: queryComments += " OR `entity_id` = '" + conn.escape_string( str(value)) + "'" if buildRelationID: cursor.execute(queryComments) jurisdictionComments = cursor.fetchall() else: pass if buildQuestionID: c = 0 i = len(descriptionQuestions) while c < i: strippedDescriptionQuestions[descriptionQuestions[c][ "Field"]] = descriptionQuestions[c]["Type"] c = c + 1 c = 0 i = len(descriptionAnswers) while c < i: strippedDescriptionAnswers[descriptionAnswers[c][ "Field"]] = descriptionAnswers[c]["Type"] c = c + 1 c = 0 i = len(descriptionComments) while c < i: strippedDescriptionComments[descriptionComments[c][ "Field"]] = descriptionComments[c]["Type"] c = c + 1 c = 0 i = len(strippedDescriptionAnswers) for key, value in strippedDescriptionAnswers.items(): buildDescriptionAnswers[key] = "\t\t\t\t<" + str( key) + " type='" + str(value) + "'>" c = c + 1 c = 0 i = len(strippedDescriptionQuestions) for key, value in strippedDescriptionQuestions.items(): buildDescriptionQuestions[key] = "\t\t\t<" + str( key) + " type='" + str(value) + "'>" c = c + 1 c = 0 i = len(strippedDescriptionComments) for key, value in strippedDescriptionComments.items(): buildDescriptionComments[key] = "\t\t\t\t\t<" + str( key) + " type='" + str(value) + "'>" c = c + 1 buildCount = len(buildRelationID) buildInc = 0 while buildCount > buildInc: output += "\t\t<question>\n" try: if jurisdictionQuestions: for key, value in jurisdictionQuestions[ buildInc].items(): raw = str(jurisdictionQuestions[buildInc][key]) phased = raw.replace("&", "&") output += str( buildDescriptionQuestions[key] ) + saxutils.escape(saxutils.unescape( phased)) + "</" + saxutils.escape( saxutils.unescape(key)) + ">\n" output += "\t\t\t<answer>\n" if jurisdictionAnswers: for key, value in jurisdictionAnswers[ buildInc].items(): if key == "id": entity_id = value raw = str(jurisdictionAnswers[buildInc][key]) phased = raw.replace("&", "&") output += str( buildDescriptionAnswers[key] ) + saxutils.escape(saxutils.unescape( phased)) + "</" + saxutils.escape( saxutils.unescape(key)) + ">\n" commentInc = 0 commentCount = len(jurisdictionComments) while commentInc < commentCount: try: if jurisdictionComments[commentInc][ "entity_id"] == entity_id: output += "\t\t\t\t<comments>\n" for key, value in jurisdictionComments[ commentInc].items(): raw = str( jurisdictionComments[commentInc] [key]) phased = raw.replace("&", "&") output += str( buildDescriptionComments[key] ) + saxutils.escape( saxutils.unescape(phased) ) + "</" + saxutils.escape( saxutils.unescape(key)) + ">\n" output += "\t\t\t\t</comments>\n" except IndexError: pass commentInc = commentInc + 1 output += "\t\t\t</answer>\n" except IndexError: pass output += "\t\t</question>\n" buildInc = buildInc + 1 else: pass output += "\t</jurisdiction>\n" output += "</result>" output = output.replace( "\t\t</question>\n\t\t<question>\n\t</jurisdiction>\n</result>", "\t\t</question>\n\t</jurisdiction>\n</result>") else: #jurisdiction does not exist output = "<result>\n" output += "\t<error>No Jurisdictions Matching Your Criteria Were Found</error>" output += "</result>" else: cursor.execute("SELECT DISTINCT(state) FROM website_zipcode") row = cursor.fetchall() output = "<result>\n" output += "\t<states>\n" for value in row: for key, value2 in value.items(): output += "\t\t<" + saxutils.escape(saxutils.unescape( str(key))) + ">" + saxutils.escape( saxutils.unescape( str(value2))) + "</" + saxutils.escape( saxutils.unescape(str(key))) + ">\n" output += "\t</states>\n" output += "</result>" data = {} data['xml'] = output cursor.close() conn.close() requestProcessor = HttpRequestProcessor(request) return requestProcessor.render_to_response(request, 'website/api.xml', data, 'application/xml')
def parameter_unescape(text): return saxutils.unescape(text, UNQUOTEDATTRS)
def chars_ends(self, txt): self.__end_pattern += unescape(txt)
def __init__(self, phonenumber=None, text=None): self.phonenumber = phonenumber self.text = unescape(text) if text else ''
def html_unescape(text): """Unescape ``'&'``, ``'<'``, and ``'>'`` in a string of data.""" return saxutils.unescape(text)
def test_unescape_extra(self): self.assertEqual(unescape("Hei på deg", {"å": "å"}), "Hei på deg")
def test_unescape_amp_extra(self): self.assertEqual(unescape("&foo;", {"&foo;": "splat"}), "&foo;")
def test_unescape_all(self): self.assertEqual(unescape("<Donald Duck & Co>"), "<Donald Duck & Co>")
def test_unescape_basic(self): self.assertEqual(unescape("Donald Duck & Co"), "Donald Duck & Co")
def unescape_full(text): return unescape(text, {"'": "'", """: '"'})
def _xml_unescape(s): from xml.sax.saxutils import unescape u = unescape(s, {""": '"'}) if re.search("&\w+;", u): raise Error("XXX missed XML-unescaping something in: %r" % u) return u
def _save_file(file_content): """保存解密后文件到脚本运行路径下""" with open('result.xml', 'w') as file_handle: #file_content.decode('gb2312').encode('utf-8') #file_content.replace('gb2312', 'utf-8') file_handle.write(unescape(file_content).replace(""", ''))
def task_from_xml(task, xmlnode): # print "********************************" # print xmlnode.toprettyxml() task.set_uuid(xmlnode.getAttribute("uuid")) task.set_title(read_node(xmlnode, "title")) status = xmlnode.getAttribute("status") donedate = Date.parse(read_node(xmlnode, "donedate")) task.set_status(status, donedate=donedate) duedate = Date(read_node(xmlnode, "duedate")) task.set_due_date(duedate) startdate = Date(read_node(xmlnode, "startdate")) task.set_start_date(startdate) modified = read_node(xmlnode, "modified") if modified != "": modified = datetime.strptime(modified, "%Y-%m-%dT%H:%M:%S") task.set_modified(modified) tags = xmlnode.getAttribute("tags").replace(' ', '') tags = (tag for tag in tags.split(',') if tag.strip() != "") for tag in tags: # FIXME why unescape???? task.tag_added(saxutils.unescape(tag)) # FIXME why we need to convert that through an XML? content = read_node(xmlnode, "content") if content != "": content = "<content>%s</content>" % content content = minidom.parseString(content).firstChild.toxml() task.set_text(content) for subtask in xmlnode.getElementsByTagName("subtask"): task.add_child(get_text(subtask)) for attr in xmlnode.getElementsByTagName("attribute"): if len(attr.childNodes) > 0: value = get_text(attr) else: value = "" key = attr.getAttribute("key") namespace = attr.getAttribute("namespace") task.set_attribute(key, value, namespace=namespace) # FIXME do we need remote task ids? I don't think so # FIXME if so => rework them into a more usable structure!!! # (like attributes) # REMOTE TASK IDS ''' remote_ids_list = xmlnode.getElementsByTagName("task-remote-ids") for remote_id in remote_ids_list: if remote_id.childNodes: node = remote_id.childNodes[0] backend_id = node.firstChild.nodeValue remote_task_id = node.childNodes[1].firstChild.nodeValue task.add_remote_id(backend_id, remote_task_id) ''' return task
def _read(elem): text = elem.text or None if text: text = saxutils.unescape(text) return text
predictions = dataframe["predictions"].tolist() # ------------------------------------------------------------- # Score model if not clustering and not anomaly # # if alg.type != 'clustering' and alg.type != 'anomaly': # score = loaded_model.score(dataframe_test, dataframe_label) # print("MODEL SCORE: %.2f" % score) # ------------------------------------------------------------- # CLASSIFICATION AND ANOMALY DETECTION SCORE # if alg.type == 'classification' or alg.type == 'anomaly': reponse_good = '✅' reponse_bad = '❌' dataframe['results'] = np.where( (dataframe[LABEL_COLUMN] == dataframe['predictions']), saxutils.unescape(reponse_good), saxutils.unescape(reponse_bad)) accuracy_score_result = accuracy_score(dataframe_label.values.ravel(), predictions) precision_score_result = precision_score( dataframe_label.values.ravel(), predictions, average='micro') confusion_matrix_result = confusion_matrix( dataframe_label.values.ravel(), predictions) print( "********************** CLASSIFICATION SCORE **********************" ) print("ACCURACY SCORE: %.2f" % accuracy_score_result) print("PRECISION SCORE: %.2f" % precision_score_result) print("CONFUSION MATRIX:\n%s" % confusion_matrix_result) print( "*******************************************************************" )
def __normFormulaExp__(self, formulaExp): tempExp = saxutils.unescape(formulaExp) tempExp = tempExp.replace('"e;', '"') tempExp = tempExp.replace('
', '\n') return tempExp
def chars_starts(self, txt): self.__start_pattern += unescape(txt)
import html s = 'Elements are written as "<tag>text</tag>".' print(s) print(html.escape(s)) print(html.escape(s, quote=False)) print('*' * 50) s = 'Spicy Jalapeño' res = s.encode('ascii', errors='xmlcharrefreplace') print(res) s = 'Spicy "Jalapeño".' from html.parser import HTMLParser p = HTMLParser() print(p.unescape(s)) t = 'The prompt is >>>' from xml.sax.saxutils import unescape print(unescape(t))
def chars_pattern(self, txt): self.__pattern += unescape(txt)
def unesc(str): if not str: return str return su.unescape(str, {'"': '"', ''': "'"})
def test_unescape_amp_extra(): return unescape("&foo;", {"&foo;": "splat"}) == "&foo;"
# parse the incoming requestInfo XML string try: requestXml = et.fromstring(requestInfo) except Exception, e: logger.error('Python REST dispatcher received invalid XML from HTTP server') logger.debug('XML DUMP >>>\n%s' % requestInfo) logger.exception(e) raise requestDict = {} try: httpVerb = requestXml.findtext('method').upper() explicitOutMode = su.unescape(requestXml.findtext('output_mode/explicit_request')).lower() requestDict.update({ 'userName': su.unescape(requestXml.findtext('user/name')), 'userId': su.unescape(requestXml.findtext('user/id')), 'remoteAddr': su.unescape(requestXml.findtext('connectionData/ip')), 'output_mode': su.unescape(requestXml.findtext('output_mode/mode')), 'explicit_output_mode': explicitOutMode == "true" and True or False, 'path': REST_ROOT_PATH + '/' + su.unescape(requestXml.findtext('path')).strip('/'), 'headers': {}, 'query': {}, 'form': {}, 'payload': su.unescape(requestXml.findtext('payload')), 'restmap': {} }) for node in requestXml.findall('headers/header'): requestDict['headers'][node.get('key','').lower()] = su.unescape(node.text or "")
def test_unescape_all(): return unescape("<Donald Duck & Co>") == "<Donald Duck & Co>"
def is_active(self): source_list = self._parent.MainTVAgent2.GetSourceList()[1] source_list = saxutils.unescape(source_list) root = etree.fromstring(source_list) return int(root.find('ID').text) == self.id
def clean_value(self, value): if isinstance(value, basestring): return unescape(value.strip()) return value
def unquote(txt): from xml.sax.saxutils import unescape return unescape(txt, {"'": "'", """: '"'})