def process(self, wealth, imported_file, account=None): gzip_file = GzipFile(fileobj=imported_file.file) decompressed = gzip_file.read() parser = make_parser() model = { 'accounts': {}, 'categories': {}, 'currency': [], 'transactions': [], 'category_splits': [], 'account_splits': [], 'wealth': wealth } handler = KMYXmlHandler(model) parser.setContentHandler(handler) parseString(decompressed, handler) accounts = model['accounts'] categories = self.__build_category_tree(model['categories']) transactions = model['transactions'] account_splits = model['account_splits'] category_splits = model['category_splits'] # if main currencies differ, re-calculate if model['currency'] != model['wealth'].currency: exchange_rate = get_rate(model['currency'], model['wealth'].currency) for split in category_splits: split.amount *= exchange_rate self.accounts = accounts.values() self.categories = categories.values() self.transactions = [transaction for transaction in transactions if transaction.date] self.category_splits = [split for split in category_splits if split.category ] self.account_splits = [split for split in account_splits if split.account ] self.currency = model['currency']
def __init__(self, msw): """ A list of objects in the iServer database. The msw argument can either be a string containing the XML contents of the database or a session object. If a session is passed, it will be used to get the database. Each type is a callable object (for use in XML parser dispatching) and has an associated long name and a more convenient short name for use in test scripts. Beware of the short names when using 'from iserver import *' - they are common and could collide with test script objects. First character is capitalized. """ if type(msw) is ListType: # A text stream created by file.readlines() self.rawXml = string.join(msw) else: self.msw = msw self._getDatabaseFromMSW() parser = sax.make_parser() handler = IServerHandler() sax.parseString(self.rawXml, handler) for i in handler.objList: self.append(i)
def test_process_guid_feed(self): p = FakeItemProcessor(); handler = RssHandler(p) parseString(guid_rss, handler) self.assertEqual(2, p.process_calls) self.assertEqual(p.last_guid, 'http://example/b') self.assertEqual(p.last_url, 'http://example/?page=download&tid=413995')
def test_case1(self): str1= bytes( '<?xml version="1.0" encoding="utf-8" standalone="no" ?>\n' '<root>\r\n' ' <value1>5</value1>\r\n' ' <value2>1.23</value2>\n' ' <section first="1" second="long string">\n' ' <value3>on</value3>\n' ' <value4>1</value4>\n' ' <value4>2</value4>\n' ' <value4>42</value4>\n' ' </section>\n' '</root>'.encode("utf-8") ) result= str( '<?xml version="1.0" encoding="utf-8"?>\n' '<root>\n' ' <value1>5</value1>\n' ' <value2>1.23</value2>\n' ' <section first="1" second="long string">\n' ' <value3>on</value3>\n' ' <value4>1</value4>\n' ' <value4>2</value4>\n' ' <value4>42</value4>\n' ' </section>\n' '</root>\n' ) parseString(str1, XmlReader(self.handler1)) #print( self.stdout.getvalue() ) self.assertEqual(result, self.stdout.getvalue() )
def parseInfo(self): infoXml = self.checkOutput((self.svnCmd, "info", "--xml")) # bytes in python 3. infoHandler = SvnInfoHandler() sax.parseString(infoXml, infoHandler) self.uuid = infoHandler.uuid self.url = infoHandler.url self.lastChangeRev = infoHandler.getLastChangeRevision()
def parseToolXML( self, xml, encoding=None ): """ Pseudo API. """ parser = _TypesToolParser( encoding ) parseString( xml, parser ) return parser._types
def check(self, dialog_on_none=False): """ This function should be called whenever a version test is desired (eg. in the mainloop on a timer). If a dialog is requested, set dialog_on_none. """ data = self.__get_update_file() if data: print data sax.parseString(data, self) for type in ["major", "minor"]: available, version = self.__new_version_available[type] if available: if self.__remind_again[type]: dialog.info(_("A version update is available"), _("You are running version %(version)s.\n\n" "Version %(newer_version)s is available " "at %(URL)s.") % {"version": VERSION, "newer_version": self.__format_version(version), "URL": dialog.urlwrap("http://www.gdesklets.de")}, (_("_Stop reminding me"), lambda t=type: self.__remind(t, False)), (_("_Remind me again"), None)) elif dialog_on_none: dialog.info(_("No version updates available"), _("You are running the latest version (%(version)s).") % {"version": VERSION}) break # Run again next timer expiration return True
def process_file (file) : """parcours un fichier et retourne une liste contenant : (niveau, balise, attribut, valeur) ou (niveau, balise, None, valeur) ou (niveau, None, None, valeur)""" if False : # création d'une classe qui va parcourir le fichier XML p = XML.make_parser () x = XML_List () # on affecte au parseur une instance de la classe XML_List p.setContentHandler (x) # on lit le fichier file ligne par ligne, # chaque ligne est envoyée au parser XML f = open (file, "r") for l in f : p.feed (l) p.close () f.close () return x.get_list () else : # autre solution : on lit le fichier # pour n'obtenir qu'une seule chaîne de caractères f = open (file, "r") li = f.readlines () f.close () s = string.join (li) # puis on utilise une fonction qui lit la chaîne complète x = XML_List () XML.parseString (s, x) return x.get_list ()
def handle_xml_info(self, xml_info): handler = MSCDDBParser() try: parseString(xml_info, handler) except Exception, e: print e return None
def get_leo_data(source): """Return the root node for the specificed .leo file (path or file)""" parser = LeoReader() if g.os_path_isfile(source): source = g.readFileIntoEncodedString(source) parseString(source, parser) return parser.root
def xml2yaml(fname): handler = XMLNodeHandler() parseString(caffeine_cml,handler) PrintVisitor(handler.getroot()) #YAMLPrintVisitor(handler.getroot()) #print PythonObjectVisitor(handler.getroot()) return
def fetch_index(): """Return an iterable of every project name on PyPI.""" r = requests.get('https://pypi.python.org/simple/') sax_handler = PyPIIndexHandler() sax.parseString(r.text, sax_handler) return sax_handler.projects
def read_applexml_string(data, sql_filename): '''Parses the data as Apple XML format. Returns the top node.''' #parser = sax.make_parser() handler = AppleXMLHandler() #parser.setContentHandler(handler) #parser.setEntityResolver(AppleXMLResolver()) sax.parseString(data, handler) album_xml = handler.gettopnode() if sql_filename: # keywords are no longer available in XML # quick hack to pull them out of the sqlite database instead conn = sqlite3.connect(sql_filename) c = conn.cursor() photos = album_xml['Master Image List'] for key in photos: photo = photos[key] if 'Keywords' not in photo: photo['Keywords'] = [] c.execute('select keywordId from RKKeywordForVersion where versionId is ?', (key,)) for keyword in c.fetchall(): if keyword: photo['Keywords'].append(keyword[0]) album_xml['List of Keywords'] = {} c.execute('select modelId, name from RKKeyword') for keyword in c.fetchall(): album_xml['List of Keywords'][keyword[0]] = keyword[1] return album_xml
def checkStatus(seHost): # Check if the node is in status downtime cmdString = CURL_CMD + "\"" + GOCDB_DOWNTIME_URL + seHost + "\"" if DEBUG: print "Command: " + cmdString status, output = commands.getstatusoutput(cmdString) if DEBUG: print "Response: " + output if status <> 0: print "Error when querying the GOCDB for downtimes: " + output else: parseString(output, SaxDowntimeHandle()) # Check if the node is in status "not in production" or "not monitored" cmdString = CURL_CMD + "\"" + GOCDB_SERVICE_URL + seHost + "\"" if DEBUG: print "Command: " + cmdString status, output = commands.getstatusoutput(cmdString) if DEBUG: print "Response: " + output if status <> 0: print "Error when querying the GOCDB for service status: " + output else: parseString(output, SaxServiceHandle()) # Display the node status if len(serviceStatus) <> 0: if PRETTY: sys.stdout.write(" %-8s%-48s" % (service, seHost)) else: sys.stdout.write(service + "|" + seHost + "|") isFirst = True for status in serviceStatus: if isFirst: sys.stdout.write(status) else: sys.stdout.write(", " + status) isFirst = False print
def __init__(self, uri): self.uri = uri self.xml = download_http_content(uri) self.handler = None self._capabilities = [] self.uriHandler = UriGadgetHandler () parseString(self.xml, self.uriHandler)
def weatherResponse(xml): handler = WeatherHandler() parseString(xml, handler) if handler.city == "Aachen": return "<weather>The weather in %s is terrible.</weather" % handler.city else: return "<error>Unknown city %s</error>" % handler.city[:500]
def test_parent_child_stop_point(self): xml_handler = NaptanXMLHandler(['639'], 'identifiers') parseString(test_stop_areas, xml_handler) areas = xml_handler.annotate_stop_area_ancestry(xml_handler.stop_areas) points, areas = xml_handler.annotate_stop_point_ancestry(xml_handler.stop_points, areas) self.assertEqual(points['639000022']['child_of'][0], areas['639GSHI21581']['id']) self.assertEqual(areas['639GSHI21581']['parent_of'][0], points['639000022']['id'])
def backrefs(href): """ href -> ([trackbacks],[pingbacks]) Parse a given html page, and retrieve the rdf:about, X-Pingback header, or pingback link information associated with a given href. At most one is returned (in the above priority). """ base = href.split("#")[0] file = urllib.urlopen(base) info = file.info() data = file.read().replace('\n',' ') file.close() trackback = [] pingback = pb_re.findall(data)[:1] for x in tb_re.findall(data): try: parseString(x, rdf()) except SAXParseException: pass if info.has_key("X-Pingback"): pingback=[info["X-Pingback"]] if rdf.ids.has_key(href): trackback = [rdf.ids[href]] if not trackback and not pingback and href.find("#")>0: if rdf.ids.has_key(base): trackback = [rdf.ids[base]] if trackback: pingback=[] if pingback: pingback=[(href, pingback[0])] return (trackback, pingback)
def run_instances(self, image_id, instance_type_id, blocks = None, instance_count = -1, subnet_id = "", private_ip_address = "", security_group_ids = None, key_name = ""): """ Launch specified number of instances in your account. param args: Arguments passed to the function The function expects following arguments - 1. image id 2. instance type id 3. subnet id (optional) 4. security group id (optional) 5. key name (optional, but needed to access machine) 6. instance count (optional) 7. private ip address (optional) 8. block device mapping (optional) """ response = instance.run_instances(self.url, self.verb, self.headers, self.version, image_id, instance_type_id, blocks, instance_count, subnet_id, private_ip_address, security_group_ids, key_name) if response is not None : res = RunInstancesResponse.RunInstancesResponse() print response.text parseString(str(response.text), res) return res else : return None
def get_ping_urls(url): """ returns a two-tuple of lists, ([pingback urls],[trackback urls]) """ ping_urls = [] tb_urls = [] txt = urllib.urlopen(url).read() print "Got %d bytes" % len(txt) soup = bs(txt) # walk through the links, looking for ping-entries for a in soup.findAll('link'): print a rel = a.get('rel') if rel == 'pingback': print "Got pingback URL:", a.href ping_urls.append(a.get('href')) # now do t he trackbacks... tb_re=re.compile('(<rdf:RDF .*?</rdf:RDF>)') rdfdata = RDF() for x in tb_re.findall(txt.replace('\n',' ')): parseString(x, rdfdata) # print rdf.ids print "URL:", rdfdata.attrs.get('dc:identifier') print "Trackback URL:", rdfdata.attrs.get('trackback:ping') tb_urls.append(rdfdata.attrs.get('trackback:ping')) return ping_urls, tb_urls
def test_case1(self): str1= bytes( '<?xml version="1.0" encoding="UTF-8" standalone="no" ?>\n' '<root>\r\n' ' <value1>5</value1>\r\n' ' <value2>1.23</value2>\n' ' <section>\n' ' <value3>on</value3>\n' ' <value4>1</value4>\n' ' <value4>2</value4>\n' ' <value4>42</value4>\n' ' </section>\n' '</root>'.encode("utf-8") ) parseString(str1, XmlReader(self.handler1)) # except Exception as ex: # print("In Line {0}:{1}:\n{2}".format(handler._parent.locator.getLineNumber(), # handler._parent.locator.getColumnNumber(), # ex)) self.assertEqual(self.val1, 5) self.assertEqual(self.val2, 1.23) self.assertEqual(self.val3, True) self.assertEqual(self.val4, [1,2,42]) self.assertEqual(self.stderr.getvalue(), "") self.assertEqual(self.stdout.getvalue(), "")
def do_build(self, source, fromFile, catalog=None, bagcls=Bag, empty=None, testmode=False): """TODO :param source: TODO :param fromFile: TODO :param catalog: TODO :param bagcls: TODO :param empty: TODO :param testmode: TODO""" if not testmode: bagImport = _SaxImporter() else: bagImport = sax.handler.ContentHandler() if not catalog: catalog = gnrclasses.GnrClassCatalog() bagImport.catalog = catalog bagImport.bagcls = bagcls bagImport.empty = empty bagImportError = _SaxImporterError() if fromFile: infile = open(source) source = infile.read() infile.close() if isinstance(source, unicode): if source.startswith('<?xml'): source = source[source.index('?>'):] source = "<?xml version='1.0' encoding='UTF-8'?>%s" % source.encode('UTF-8') source = re.sub("&(?!([a-zA-Z][a-zA-Z0-9]*|#\d+);)", "&", source) sax.parseString(source, bagImport) if not testmode: result = bagImport.bags[0][0] if bagImport.format == 'GenRoBag': result = result['GenRoBag'] if result == None: result = [] return result
def parseToolXML( self, xml, encoding=None ): """ Pseudo API. """ parser = _WorkflowToolParser( encoding ) parseString( xml, parser ) return parser._workflows, parser._bindings
def parse_media_basic(self, result, filename): log.debug('parse_media_basic: entering "%s"' % type(result)) fullpath = str(self._fc.abspath(filename)) parser = Parser() parseString(result.encode('utf-8'), parser) features = parser.parsed self._save_features(features, 'media_basic') return 'ok'
def parse(self,string_or_fileobj): self.handler.reset() if isinstance(string_or_fileobj,str): sax.parseString(string_or_fileobj,self.handler) return self.handler.getValues() elif isinstance(string_or_fileobj,file): self.parser.parse(string_or_fileobj) return self.handler.getValues()
def read_applexml_string(data): '''Parses the data as Apple XML format. Returns the top node.''' #parser = sax.make_parser() handler = AppleXMLHandler() #parser.setContentHandler(handler) #parser.setEntityResolver(AppleXMLResolver()) sax.parseString(data, handler) return handler.gettopnode()
def test_process_truncated_feed(self): p = FakeItemProcessor(); handler = RssHandler(p) with self.assertRaises(SAXParseException): parseString(truncated_rss, handler) self.assertEqual(1, p.process_calls) self.assertEqual(p.last_guid, 'http://example/' + p.last_url) self.assertEqual(p.last_url, 'http://example/?page=download&tid=413994')
def discover(self): btx = self.src.encode('utf8') xs.parseString(btx, self) self.zones.sort(key=lambda xz: xz.start) self.rzones = sorted(self.zones, key=lambda xz: xz.end) self.discoverURLs()
def regonize(self,content): try: logging.debug('content:%s' % content) sax.parseString(content,self) except StopSAXException: pass return self.downloads
def get_objects_recursive(self, objtype, ids=[], recursive=False): """ Recursively get all osm objects that are listed in the ids. If recursive=False, then you get only the objects that are directly referenced in relations. If recursive=True, then you get all hierarchically referenced from the relations. """ relationids = set([]) wayids = set([]) nodeids = set([]) relationdata, waydata, nodedata = '','','' if objtype == 'node': nodeids = set(ids) elif objtype == 'way': wayids = set(ids) elif objtype == 'relation': relationids = set(ids) else: return "" if recursive: recursions = 100 # maximum recursion level else: recursions = 1 # only get all direct members loaded_relationids = set([]) while relationids: r_data = self.get_objects('relation', relationids) relationdata += '\n' + r_data if not recursions: break else: recursions -= 1 parser = make_parser() osm_handler = SubobjectHandler() parser.setContentHandler(osm_handler) parseString(OSMHEAD + r_data + OSMTAIL, osm_handler) nodeids |= osm_handler.nodes wayids |= osm_handler.ways loaded_relationids |= relationids relationids = osm_handler.relations - loaded_relationids if wayids: waydata = self.get_objects('way', wayids) parser = make_parser() osm_handler = SubobjectHandler() parser.setContentHandler(osm_handler) parseString(OSMHEAD + waydata + OSMTAIL, osm_handler) nodeids |= osm_handler.nodes if nodeids: nodedata = self.get_objects('node', nodeids) return nodedata + waydata + relationdata
def create_handler(hyd_path=HYD_DEF_PATH): """Create and populate a hydrogen handler. :param hyd_def_file: path to hydrogen definition file :type hyd_def_file: string or pathlib.Path object :return: HydrogenHandler object :rtype: HydrogenHandler """ handler = HydrogenHandler() hyd_path = io.test_dat_file(hyd_path) with open(hyd_path, "rt") as hyd_file: sax.make_parser() sax.parseString(hyd_file.read(), handler) return handler
def frameReceived(self, frame): if self.client_protocol is None: pm = ProxyManager() pm.registerServerProtocol(frame, self) else: handler = CommandHandler() parseString(frame, handler) if handler.command is not None and handler.command in [ 'create', 'check', 'logout' ]: self.client_protocol.sendFrame(frame) else: log.msg(frame) log.msg(handler.command)
def __init__(self): """ Create a new Definition Object """ self.map = {} self.patches = {} handler = DefinitionHandler() sax.make_parser() for path in [AAPATH, NAPATH]: defpath = getDatFile(path) if defpath == "": raise ValueError, "%s not found!" % path file = open(defpath) sax.parseString(file.read(), handler) file.close() self.map.update(handler.map) # Now handle patches defpath = getDatFile(PATCHPATH) if defpath == "": raise ValueError, "%s not found!" % PATCHPATH handler.map = {} file = open(defpath) sax.parseString(file.read(), handler) file.close() # Apply specific patches to the reference object, allowing users # to specify protonation states in the PDB file for patch in handler.patches: if patch.newname != "": # Find all residues matching applyto resnames = self.map.keys() for name in resnames: regexp = re.compile(patch.applyto).match(name) if not regexp: continue newname = patch.newname.replace("*", name) self.addPatch(patch, name, newname) # Either way, make sure the main patch name is available self.addPatch(patch, patch.applyto, patch.name)
def parseXML(self, text, encoding=None): """ Parse 'text' into a clean registry. """ self._clear() reader = getattr(text, 'read', None) if reader is not None: text = reader() parser = _ExportStepRegistryParser(encoding) parseString(text, parser) return parser._parsed
def process_site(site, noisy): """ Process the feeds of a site """ logging.info("") logging.info("* site: %s", site) logging.info("") result = subr_rss.fetch(site, noisy=noisy) if not result or not result[0]: return body = result[0] if "<rss" not in body: handler = sax_atom.AtomHandler() else: handler = sax_rss.RssHandler() sax.parseString(body, handler) content = zip(handler.links, handler.pub_dates) for link, date in content: if date[0] < 2013: continue if date[1] != 5: continue if date[2] < 15: continue logging.info("") logging.info("- <%s>", link) logging.info("") folder = subr_misc.make_post_folder(date, site) subr_misc.mkdir_recursive_idempotent(folder) time.sleep(random.randrange(5, 8)) link = subr_bitly.shorten(link, noisy=noisy) filename = subr_misc.bitlink_to_filename(link) pname = os.sep.join([folder, filename]) if os.path.isfile(pname): logging.info("main: file already exists: %s", pname) continue time.sleep(random.randrange(5, 8)) _, body = subr_http.fetch_url(link, noisy=noisy) filep = open(pname, "w") filep.write(body) filep.close()
def testBucketConditionalSetXmlAcl(self): b = self._MakeVersionedBucket() k = b.new_key("foo") s1 = "test1" k.set_contents_from_string(s1) g1 = k.generation mg1 = k.metageneration self.assertEqual(str(mg1), "1") acl_xml = ( '<ACCESSControlList><EntrIes><Entry>' + '<Scope type="AllUsers"></Scope><Permission>READ</Permission>' + '</Entry></EntrIes></ACCESSControlList>') acl = ACL() h = handler.XmlHandler(acl, b) sax.parseString(acl_xml, h) acl = acl.to_xml() b.set_xml_acl(acl, key_name="foo") k = b.get_key("foo") g2 = k.generation mg2 = k.metageneration self.assertEqual(g2, g1) self.assertGreater(mg2, mg1) with self.assertRaisesRegexp(ValueError, ("Received if_metageneration " "argument with no " "if_generation argument")): b.set_xml_acl(acl, key_name="foo", if_metageneration=123) with self.assertRaisesRegexp(GSResponseError, VERSION_MISMATCH): b.set_xml_acl(acl, key_name="foo", if_generation=int(g2) + 1) with self.assertRaisesRegexp(GSResponseError, VERSION_MISMATCH): b.set_xml_acl(acl, key_name="foo", if_generation=g2, if_metageneration=int(mg2) + 1) b.set_xml_acl(acl, key_name="foo", if_generation=g2) k = b.get_key("foo") g3 = k.generation mg3 = k.metageneration self.assertEqual(g3, g2) self.assertGreater(mg3, mg2) b.set_xml_acl(acl, key_name="foo", if_generation=g3, if_metageneration=mg3)
def save_title_and_metadata(self, html): handler = MetaDataSaveHandler() parseString(html, handler) version = self.get_editable() if ICaseStudyVersion.providedBy(version): version.set_subjects(handler.metadata['subjects']) version.set_category(handler.metadata['category']) version.set_format(handler.metadata['format']) version.set_shortdescription(handler.metadata['shortdescription'][0]) version.set_image(handler.metadata['image'][0]) version.set_document(handler.metadata['document'][0]) version.set_title(handler.title)
def get_response(self, http_request, uri, response_headers): uri = urllib.unquote(uri) xml = uri[uri.find('=') + 1:] # A very vulnerable parser that loads remote files over https handler = NoOpContentHandler() try: sax.parseString(xml, handler) except Exception as e: body = str(e) else: body = handler.chars return self.status, response_headers, body
def loadunitfile(self, messagecallback, unitdirectory, unitfile): unitname = unitfile[0:-4] filehandle = open(join(unitdirectory, unitfile), "r") lines = filehandle.readlines() unit = kodi.Unit(unitname, unitfile, lines) contenthandler = ContentHandler() try: parseString("".join(lines), contenthandler) except SAXParseException as exception: messagecallback( "error", "- XML Parsing error in file " + unitfile + ": " + str(exception)) return unit
def parseskinsettings(self, resolution, messagecallback): for unit in resolution.units: contenthandler = SkinSettingContentHandler(unit) parseString("".join(unit.lines), contenthandler) self.skinsettings.extend(contenthandler.skinsettings) messages = contenthandler.messages for message in messages: messagecallback("warning", "- File " + unit.name + ": " + message) messagecallback( "info", "- Number of skin settings: " + str(len(self.skinsettings)))
def testVersionedBucketXmlAcl(self): b = self._MakeVersionedBucket() k = b.new_key("foo") s1 = "test1" k.set_contents_from_string(s1) k = b.get_key("foo") g1 = k.generation s2 = "test2" k.set_contents_from_string(s2) k = b.get_key("foo") g2 = k.generation acl1g1 = b.get_acl("foo", generation=g1) acl1g2 = b.get_acl("foo", generation=g2) owner1g1 = acl1g1.owner.id owner1g2 = acl1g2.owner.id self.assertEqual(owner1g1, owner1g2) entries1g1 = acl1g1.entries.entry_list entries1g2 = acl1g2.entries.entry_list self.assertEqual(len(entries1g1), len(entries1g2)) acl_xml = ( '<ACCESSControlList><EntrIes><Entry>' + '<Scope type="AllUsers"></Scope><Permission>READ</Permission>' + '</Entry></EntrIes></ACCESSControlList>') aclo = acl.ACL() h = handler.XmlHandler(aclo, b) sax.parseString(acl_xml, h) b.set_acl(aclo, key_name="foo", generation=g1) acl2g1 = b.get_acl("foo", generation=g1) acl2g2 = b.get_acl("foo", generation=g2) entries2g1 = acl2g1.entries.entry_list entries2g2 = acl2g2.entries.entry_list self.assertEqual(len(entries2g2), len(entries1g2)) public_read_entries1 = [ e for e in entries2g1 if e.permission == "READ" and e.scope.type == acl.ALL_USERS ] public_read_entries2 = [ e for e in entries2g2 if e.permission == "READ" and e.scope.type == acl.ALL_USERS ] self.assertEqual(len(public_read_entries1), 1) self.assertEqual(len(public_read_entries2), 0)
def parseXML(self, text, encoding=None): """ Pseudo-API """ reader = getattr(text, 'read', None) if reader is not None: text = reader() parser = _ToolsetParser(encoding) parseString(text, parser) for tool_id in parser._forbidden: self.addForbiddenTool(tool_id) for tool_id, dotted_name in parser._required.items(): self.addRequiredTool(tool_id, dotted_name)
def _parseRDF(self): """Parse RDF text""" result = {} # Get encoding charset = None match = FIND_ENCODING_RE.match(self.rdf_text) if match: charset = match.group(1) result['charset'] = charset # Parse XML handler = RDFHandler() parseString(self.rdf_text, handler) result.update(handler.info) return result
def process_aws_zone(zoneId, zoneName): if IMPORT_ZONES and zoneName not in IMPORT_ZONES: print( "Skipping zone {0} with id {1} (not in IMPORT_ZONES list)".format( zoneName, zoneId)) return print("Processing zone {0} with id {1}".format(zoneName, zoneId)) domainId = linode_create_domain(zoneName) if not domainId: return recordXmlText = execute_aws_request("/hostedzone/{0}/rrset".format(zoneId)) recordXmlHandler = AWSRecordSetParser(domainId, zoneName) parseString(recordXmlText, recordXmlHandler)
def each_post(data): """ Parse content and yield a dictionary for each entry """ data = data.strip() handler = AtomHandler() sax.parseString(data, handler) for index in range(len(handler.titles)): yield ({ 'year': handler.pub_dates[index][0], 'month': handler.pub_dates[index][1], 'day': handler.pub_dates[index][2], 'hour': handler.pub_dates[index][3], 'minute': handler.pub_dates[index][4], 'second': handler.pub_dates[index][5], 'title': handler.titles[index], 'link': handler.links[index], })
def parsevariables(self, resolution, messagecallback): for unit in resolution.units: contenthandler = VariableContentHandler(unit) parseString("".join(unit.lines), contenthandler) self.definitions.extend(contenthandler.definitions) self.references.extend(contenthandler.references) messages = contenthandler.messages for message in messages: messagecallback("warning", "- " + unit.name + ": " + message) messagecallback("info", "- Number of variables: " + str(len(self.definitions))) messagecallback("info", "- Number of references: " + str(len(self.references)))
def describe_key_pairs(self): """ Describes all key pair in your account param args: Arguments passed to the function The function expects no arguments """ response = key_pair.describe_key_pairs(self.url, self.verb, self.headers, self.version) if response is not None: res = DescribeKeyPairsResponse.DescribeKeyPairsResponse() parseString(str(response.text), res) return res else: return None
def post(self): try: # 1. unzip epub epub_file = self.request.POST.get('epub').file buffer = cStringIO.StringIO(epub_file.read()) zobj = zipfile.ZipFile(buffer) # 2. find opf file path container_file = zobj.read('META-INF/container.xml') container_dom = minidom.parseString(container_file) rootfile = container_dom.getElementsByTagName('rootfile') opf_path = rootfile[0].getAttribute('full-path') root_path = opf_path[:opf_path.rfind('/') + 1] # 3. parse book info opf_file = zobj.read(opf_path) opf_dom = minidom.parseString(opf_file) book = Book() book.user_id = users.get_current_user() book.title = opf_dom.getElementsByTagName( 'dc:title')[0].firstChild.nodeValue book.author = opf_dom.getElementsByTagName( 'dc:creator')[0].firstChild.nodeValue book.isbn = opf_dom.getElementsByTagName( 'dc:identifier')[0].firstChild.nodeValue book.put() # 4. parse toable of content item_list = opf_dom.getElementsByTagName('item') for item in item_list: if item.getAttribute('id') == 'ncx': ncx_path = item.getAttribute('href') break raise Exception('Wrong opf file. There is no ncx file.') ncx_file = zobj.read(root_path + ncx_path) p = re.compile(r'<!DOCTYPE[\s]+?[^>]*>', re.I) ncx_file = p.sub('', ncx_file) handler = self.TocHandler(zobj, root_path, book) parseString(ncx_file, handler) # 5. clean up zobj.close() # 6. response self.redirect('/bookshelf') except Exception, err: self.generate('exception.html', {'error': err})
def main(): xmlString = "<note>\n<to>Tove</to>\n<from>Jani</from>\n<heading>Reminder</heading>\n<body>Don't forget me this weekend!</body>\n</note>" # bad xml.sax.parseString(xmlString, ExampleContentHandler()) xml.sax.parse("notaxmlfilethatexists.xml", ExampleContentHandler()) sax.parseString(xmlString, ExampleContentHandler()) sax.parse("notaxmlfilethatexists.xml", ExampleContentHandler) # good defusedxml.sax.parseString(xmlString, ExampleContentHandler()) # bad xml.sax.make_parser() sax.make_parser() print("nothing") # good defusedxml.sax.make_parser()
def tokens(self, text): xhtmlsaxhandler = XHTMLSaxHandler(styles=self.styles, tags=self.tags) xhtmlsaxerrhandler = XHTMLSaxErrorHandler(self, self.ignore) # next conversion seems bug in python (why need bytes, not str?!) if type(text) is str: parsetext = core.strtobytes23(text) else: parsetext = text sax.parseString(parsetext, xhtmlsaxhandler, xhtmlsaxerrhandler) text = ''.join(xhtmlsaxhandler.text) #_inlcode_re = r'@start_inline@(.*?)@end_inline@' _blkcode_re = r'@start_block@(.*?)@end_block@' #inlcode_re = re.compile(_inlcode_re, re.MULTILINE) blkcode_re = re.compile(_blkcode_re, re.DOTALL | re.MULTILINE) cmds = core.Cmd.syntax.findtokens('cmddef', text) #inlcodes = inlcode_re.finditer(text) blkcodes = blkcode_re.finditer(text) tokens = [] for m in cmds: token = core.CmdToken( XHTMLSaxHandler._decode_spec_chars(m.group(1)), m.start(0), m.end(0)) tokens.append(token) #for m in inlcodes: # # XXX break line in HTML is coded with paragraph styling and this linearizing # # does not help # tokentext = XHTMLSaxHandler._decode_spec_chars(m.group(1)) # tokentext = core.InlCodeToken.linearize(tokentext) # token = core.InlCodeToken(tokentext, m.start(0), m.end(0)) # tokens.append(token) for m in blkcodes: tokentext = m.group(1).lstrip('\n').rstrip('\n ') tokentext = core.deltextindent(tokentext) token = core.BlkCodeToken( XHTMLSaxHandler._decode_spec_chars(tokentext), m.start(0), m.end(0)) tokens.append(token) tokens.sort(key=lambda tok: tok.start) tokens.append(core.EndToken(None)) return tokens
def parse(file): """Parses an extension from a definition file. :param file: The path of the file containing the extension or url string which represents the extension definition file.. :type file: str :return: The extension object, as defined in the provided file. :rtype: XExtension """ handler = XExtensionParser.XExtensionHandler() if os.path.isfile(file): with open(file) as data: xml_parse(data, handler) elif not os.path.isdir(file): parseString(request.urlopen(file).read(), handler) return handler.get_extension()
def get(self, url="", query={}): """ Perform the GET request and return the parsed results """ qs = urllib.urlencode(query) if qs: qs = "?%s" % qs url = "%s%s%s" % (self.base_url, url, qs) log.debug("GET %s" % (url)) self.__connection.connect() request = self.__connection.request("GET", url, None, self.__headers) response = self.__connection.getresponse() data = response.read() self.__connection.close() log.debug("GET %s status %d" % (url, response.status)) result = {} # Check the return status if response.status == 200: log.debug("%s" % data) parser = DetailsToDict() parseString(data, parser) return parser.data elif response.status == 204: raise EmptyResponseWarning( "%d %s @ https://%s%s" % (response.status, response.reason, self.host, url)) elif response.status == 404: log.debug("%s returned 404 status" % url) raise HTTPException( "%d %s @ https://%s%s" % (response.status, response.reason, self.host, url)) elif response.status >= 400: _result = simplejson.loads(data) log.debug("OUTPUT %s" % _result) raise HTTPException( "%d %s @ https://%s%s" % (response.status, response.reason, self.host, url)) return result
def parseXML(self, text, encoding='utf-8'): """ Parse 'text'. """ reader = getattr(text, 'read', None) if reader is not None: text = reader() if not six.PY2: if isinstance(text, bytes): text = text.decode('utf-8') encoding = None parser = self.RegistryParser(encoding) parseString(text, parser) return parser._parsed
def delete_snapshot(self, snapshot_id): """ Delete an existing and completed snapshot. The snapshot should be in 'completed' state to delete. param args: Arguments passed to the function The function expects snapshot id to be deleted """ response = snapshot.delete_snapshot(self.url, self.verb, self.headers, self.version, snapshot_id) if response is not None: res = DeleteSnapshotResponse.DeleteSnapshotResponse() parseString(str(response.text), res) return res else: return None
def delete_volume(self, volume_id): """ Delete an existing and available volume. The volume should be in 'available' state to delete. param args: Arguments passed to the function The function expects volume id to be deleted """ response = volume.delete_volume(self.url, self.verb, self.headers, self.version, volume_id) if response is not None: res = DeleteVolumeResponse.DeleteVolumeResponse() parseString(str(response.text), res) return res else: return None
def reboot_instances(self, instance_ids): """ Reboot instances in your account param args: Arguments passed to the function The function expects one or more instances to be rebooted. """ response = instance.reboot_instances(self.url, self.verb, self.headers, self.version, instance_ids) if response is not None: res = RebootInstancesResponse.RebootInstancesResponse() parseString(str(response.text), res) return res else: return None
def delete_key_pair(self, key_name): """ Delete a key pair from your account param args: Arguments passed to the function The function expects a key-name as necessary input """ response = key_pair.delete_key_pair(self.url, self.verb, self.headers, self.version, key_name) if response is not None: res = DeleteKeyPairResponse.DeleteKeyPairResponse() parseString(str(response.text), res) return res else: return None
def export(self, note): '''Export the given note as an entry in the master org mode file.''' # trace the output if we're verbose if self['verbose']: print('.', end='', flush=True) # output all attachments self.exportAttachments(note) # convert HTML to org restructured text html = HTMLParser(note, titles=1) parseString(note['content'], html) rst = html._buffer print('* ' + note['title'], file=self._f) print('', file=self._f) print(rst, file=self._f) print('', file=self._f)
def create_key_pair(self, key_name): """ Create a key pair to be used during instance creation param args: Arguments passed to the function The function expects a key-name as necessary input """ response = key_pair.create_key_pair(self.url, self.verb, self.headers, self.version, key_name) if response is not None: res = CreateKeyPairResponse.CreateKeyPairResponse() parseString(str(response.text), res) return res else: return None
def describe_images(self, image_ids=None): """ Gives a detailed list of all images visible in the account param args: Arguments passed to the function The function expects either no input or a list of specific images to describe """ response = image.describe_images(self.url, self.verb, self.headers, self.version, image_ids) if response is not None: res = DescribeImagesResponse.DescribeImagesResponse() parseString(str(response.text), res) return res else: return None