def parse(self): target = GetXmlAttrs() parser = XMLParser(target=target) if self.config: parser.feed(self.config) return target
def load(file, env): cXMLParser = CXMLParser(env) parser = XMLParser(target=cXMLParser) f = open(file, 'r+') read_data = f.read(); f.close() parser.feed(read_data) parser.close() return cXMLParser.env
def build(self, root=None): if root is None: was_root = True root = TreeBuilder() else: was_root = False root.start(self.tagname(), self.attrs()) for i, child in enumerate(self.children): if isinstance(child, HTMLBuilder): child.build(root=root) else: if i in self._formatted: try: proxy = TreeProxy(root) parser = XMLParser(html=True, target=proxy) parser.feed(child) proxy.cleanup() except Exception as e: print("Bad formatting", e) root.data(str(child)) else: root.data(str(child)) root.end(self.tagname()) if was_root: root = root.close() return str(tostring(root, method="html").decode('utf-8'))
def get_max_depth(exampleXml): target = MaxDepth() parser = XMLParser(target=target) parser.feed(exampleXml) depth = parser.close() return depth
def getQueryResult(query,detailedLog=True): parser = XMLParser(target=ProcessCSQueryResult()) queryAnswerXML = urllib.urlopen(query).read() if detailedLog: log_CSQuery(queryAnswerXML) parser.feed(queryAnswerXML) return parser.close()
def getQueryResult(query, detailedLog=True): parser = XMLParser(target=ProcessCSQueryResult()) queryAnswerXML = urllib.urlopen(query).read() if detailedLog: log_CSQuery(queryAnswerXML) parser.feed(queryAnswerXML) return parser.close()
def from_dataframe(cls, df, **kwargs): if isinstance(df, pandas.DataFrame): return xml.etree.ElementTree.fromstring( df.to_html(**kwargs), parser=XMLParser(target=TreeBuilder(element_factory=cls))) elif isinstance(df, pandas.io.formats.style.Styler): render = df.render() render = re.sub(r"colspan=([1234567890]*)>", "colspan=\"\g<1>\">", render, 0) try: return xml.etree.ElementTree.fromstring( f"<div>{render}</div>", parser=XMLParser(target=TreeBuilder(element_factory=cls))) except Exception as parse_err: if type(parse_err).__name__ == 'ParseError': x = Elem('div') x << xml.etree.ElementTree.fromstring( df.data.to_html(**kwargs), parser=XMLParser(target=TreeBuilder( element_factory=cls))) x << Elem('pre', text=render) x << Elem('pre', text=str(parse_err)) return x else: raise
def Parse(self, data): if len(data) < sizeof(CryXMLBHeader): raise ValueError( "File is not a binary XML file (file size is too small).") self._data = data self._header = CryXMLBHeader.from_buffer(data, 0) # TODO: actually do header validation - see references if self._header.signature != b"CryXmlB": if self._header.signature.startswith(b"<"): # try parsing as a normal xml file parser = XMLParser(target=self.target) parser.feed(self._data) raise _StandardXmlFile() raise ParseError("Invalid CryXmlB Signature") self._attributes = [ self._read_attribute(i) for i in range(self._header.attributes_count) ] self._child_indices = [ self._read_child_index(i) for i in range(self._header.child_table_count) ] self._nodes = [ self._read_node(i) for i in range(self._header.node_count) ] root_node = self._read_node(0) assert root_node.parent_index == CRYXML_NO_PARENT self._iter_parse_nodes(root_node)
def get_ebelge_users(): parser = XMLParser(target=EbelgeUsers()) parser.feed( frappe.read_file( frappe.get_site_path("private", "files", "KullaniciListesiXml", "newUserPkList.xml"))) return parser.close()
def __init__(self, xml_encode): super().__init__() parser = XMLParser(target=MaxDepth()) pepe = parser.feed(xml_encode) parser.close() print(self.resul_property) self.xml_encode = xml_encode
def __init__(self, html=0, target=None, encoding=None, forbid_dtd=False, forbid_entities=True, forbid_external=True): if PY26 or PY31: _XMLParser.__init__(self, html, target) else: # Python 2.x old style class _XMLParser.__init__(self, html, target, encoding) self.forbid_dtd = forbid_dtd self.forbid_entities = forbid_entities self.forbid_external = forbid_external if PY3 and not PY31: parser = self.parser else: parser = self._parser if self.forbid_dtd: parser.StartDoctypeDeclHandler = self.defused_start_doctype_decl if self.forbid_entities: parser.EntityDeclHandler = self.defused_entity_decl parser.UnparsedEntityDeclHandler = self.defused_unparsed_entity_decl if self.forbid_external: parser.ExternalEntityRefHandler = self.defused_external_entity_ref_handler
def parse(self, fIn, oHolder): """Parse XML file into the card set holder""" oParser = XMLParser(target=self._cState(oHolder)) try: for sLine in fIn: oParser.feed(sLine) except ParseError as oExp: raise IOError('Not an valid XML file') from oExp
def new_parsetree_from_xml(xml): # For some reason this does not work with cElementTree.XMLBuilder ... from xml.etree.ElementTree import XMLParser from zim.formats import ParseTree builder = XMLParser() builder.feed(xml) root = builder.close() return ParseTree(root)
def parse(self, fIn, oHolder): """Parse XML file into the card set holder""" oParser = XMLParser(target=self._cState(oHolder)) try: for sLine in fIn: oParser.feed(sLine) except ParseError, oExp: raise IOError('Not an XML file: %s' % oExp)
def __init__(self, stream, *pos, **kw): self._stream = stream self._pending = deque() builder = _QueueBuilder(self._pending) self._parser = XMLParser(*pos, target=builder, **kw) self._builders = [TreeBuilder()] [method, pos, kw] = self._read() self.element = getattr(self._builders[-1], method)(*pos, **kw)
def HTMLOfENML(text, resources={}): target = HTMLCreatorTarget(resources) parser = XMLParser(target=target) parser.feed(text) parser.close() return tostring(target.root, encoding='utf8', method='html')
def _parse_xml(self, xml_data): """ Parse the xml into a python dictionary """ parser = XMLParser() tree = parser.feed(xml_data) root = parser.close() data_dict = XmlDictConfig(root) return data_dict
def runTest(self): '''Test OldParseTreeBuilder class''' # - Test \n before and after h / p / pre # - Test break line into lines input = '''\ <?xml version='1.0' encoding='utf-8'?> <zim-tree> foo<h level="1">bar</h>baz dus<pre>ja</pre>hmm <h level="2">foo </h>bar dus ja <emphasis>hmm dus ja </emphasis>grrr <strong>foo bar </strong> <strike></strike><emphasis> </emphasis>. </zim-tree>''' wanted = '''\ <?xml version='1.0' encoding='utf-8'?> <zim-tree> foo <h level="1">bar</h> baz dus <pre>ja </pre>hmm <h level="2">foo</h> bar dus ja <emphasis>hmm</emphasis> <emphasis>dus ja</emphasis> grrr <strong>foo</strong> <strong>bar</strong> . </zim-tree>''' from xml.etree.ElementTree import XMLParser builder = XMLParser(target=OldParseTreeBuilder()) builder.feed(input) root = builder.close() tree = ParseTree(root) self.assertEqual(tree.tostring(), wanted)
def __init__(self, destPacketList, parse_context_type=structinfo.ParseContext): #self.interface = interface self.destination = destPacketList self.cur_proto = None self.parse_context_type = parse_context_type self.parse_context = None self.parser = XMLParser(target=self)
def mm2otl(*arg, **kwarg): fname = arg[0][0] file = codecs.open(fname, 'r', encoding='utf-8') filelines = file.readlines() outline = Outline() parser = XMLParser(target=outline, encoding='utf-8') parser.feed(filelines[0].encode('utf-8')) parser.close()
def main(name): with open(name, "r") as f: parser = XMLParser() for line in f: parser.feed(line) xml = XMLTree(parser.close(), False) M.run(xml.get("files"), xml.get("structure"), xml.get("pages"))
def init(self, args): (image_draw_mode, image, image_draw, image_size, background) = args image_draw_mode = "RGBA" if image_draw_mode == None else image_draw_mode if image != None: self.setimage(image) image_draw = ImageDraw.Draw( self.im, image_draw_mode) if image_draw == None else image_draw self.imagesize = (self.im.width, self.im.height) if image_size == None else image_size self.target = SvgPainter(image_draw, self.imagesize, background) self.parser = XMLParser(target=self.target)
def __init__(self, component, config): asyncore.dispatcher_with_send.__init__(self) self.config = config self.parser = XMLParser(target=self) self.mapping = getOnMapping(self) self.component = component self.create_socket(socket.AF_INET, socket.SOCK_STREAM) self.connect((self.config["xmpp_host"], self.config["xmpp_port"]))
def fromStream(cls, source): parser = XMLParser(target=WebDAVContentHandler()) try: while 1: data = source.read(65536) if not data: break parser.feed(data) except XMLParseError, e: raise ValueError(e)
def kplist_parse(plist): """Parse a kernel-style property list.""" try: builder = _KPlistBuilder() parser = XMLParser(target=builder) parser.feed(plist) return parser.close() except Exception as e: print(e) return None
def __init__(self): f = open(self.config_file) xml_src = f.read() f.close() parser = XMLParser() parser.feed(xml_src) self.config_tree = parser.close() self.parse()
def read(filename): """ Read an atlas from a XML file. """ builder = TreeBuilder(filename) parser = XMLParser(target=builder) data = open(filename).read() parser.feed(data) return parser.close()
def compare_vtk(vtk1, vtk2, absolute=1.2e-7, relative=1e-2, zeroValueThreshold={}, verbose=True): """ Take two vtk files and fuzzy compare them. Returns an exit key as return value. :param vtk1: The filename of the vtk files to compare :type vtk1: string :param vtk2: The filename of the vtk files to compare :type vtk2: string :param absolute: The epsilon used for comparing numbers with an absolute criterion :type absolute: float :param relative: The epsilon used for comparing numbers with an relative criterion :type relative: float :param zeroValueThreshold: A dictionary of parameter value pairs that set the threshold under which a number is treated as zero for a certain parameter. Use this parameter if you have to avoid comparisons of very small numbers for a certain parameter. :type zeroValueThreshold: dict :param verbose: If the script should produce informative output. Enabled by default as the details give the tester a lot more information on why tests fail. :type verbose: bool """ # construct element tree from vtk file root1 = ET.parse(vtk1, parser=XMLParser(target=VTKTreeBuilder())).getroot() root2 = ET.parse(vtk2, parser=XMLParser(target=VTKTreeBuilder())).getroot() # sort the vtk file in case nodes appear in different positions # e.g. because of minor changes in the output code sortedroot1 = sort_vtk(root1) sortedroot2 = sort_vtk(root2) if verbose: print("Comparing {} and {}".format(vtk1, vtk2)) print( "...with a maximum relative error of {} and a maximum absolute error of {}*p_max, where p_max is highest absolute parameter value." .format(relative, absolute)) # sort the vtk file so that the comparison is independent of the # index numbering (coming e.g. from different grid managers) sortedroot1, sortedroot2 = sort_vtk_by_coordinates(sortedroot1, sortedroot2, verbose) # do the fuzzy compare if is_fuzzy_equal_node(sortedroot1, sortedroot2, absolute, relative, zeroValueThreshold, verbose): return 0 else: return 1
def get_tree(xml_string): """ Get the xml tree associated to an xml string :param request: - string :return: """ parser = XMLParser(target=MyParser()) parser.feed(xml_string) tree = parser.close() return tree
def makeparser(): """ Returns an XML parser that knows about a few non-standard XML entities. If your XML source uses other non-standard XML entities, add these to ENTITIES above. """ parser = XMLParser() parser.parser.UseForeignDTD(True) for k in ENTITIES: parser.entity[k] = ENTITIES[k] return parser
def load(self, path): """ """ own_xml_parser = CorpusInterTASSXMLParser() xml_parser = XMLParser(target=own_xml_parser) with open(path, encoding=self.__encoding) as corpus_file: for line in corpus_file: xml_parser.feed(line) if own_xml_parser.full_doc: raw_tweet = own_xml_parser.doc self.__add_document(raw_tweet)
def find_depth(chaine): target = MaxDepth() parser = XMLParser(target=target) exampleXml = """<feed xml:lang='en'> <title>HackerRank</title> <subtitle lang='en'>Programming challenges</subtitle> <link rel='alternate' type='text/html' href='http://hackerrank.com/'/> <updated>2013-12-25T12:00:00</updated> </feed>""" parser.feed(chaine) print(parser.close()-1)
def main(): N = input() lines = [] for n in xrange(N): line = raw_input() lines.append(line) parser = XMLParser(target=MaxDepth()) parser.feed("\n".join(lines)) print (parser.close() - 1)
def __init__(self, config_file=None): if config_file: self.config_file = config_file f = open(self.config_file) xml_src = f.read() f.close() parser = XMLParser() parser.feed(xml_src) self.config_tree = parser.close() self.parse()
def from_xml(xml): """ Deserialize from a XML string. :param: xml string :rtype: object tree from XML string """ handler = XMLHandler() parser = XMLParser(target=handler) parser.feed(xml) parser.close() return handler.root
def __init__(self, conf): f = open(conf.uwsgifile) logging.basicConfig(level=conf.debuglevel) self.log = logging xml_src = f.read() f.close() parser = XMLParser() parser.feed(xml_src) self.config_tree = parser.close() self.parse()
def main(): if 1 < len(sys.argv): # .xml file path in $1 argument, else use /dev/stdin path = sys.argv[1] text = open(path).read() else: text = "\n".join(sys.stdin.readlines()) ntg = sys.argv[2] if 2 < len(sys.argv) else None parser = XMLParser(target=_Parser_xml(ntg)) text = re.sub('\\sxmlns="[^"]+"', '', text, count=1) parser.feed(text) parser.close()
def main(): init() log.info("Parsing '%s'..." % os.path.basename(conf['source_file'])) stopwatch_set() target = CustomParser() parser = XMLParser(target=target) parser.feed(open(conf['source_file']).read()) log.info('') totals = 'Total: posts: {post}; pages: {page}; comments: {comment}' log.info(totals.format(**stats)) log.info('Elapsed time: %s s' % stopwatch_get())
def parse(self, source=None, parser=None): try: if not parser: parser = XMLParser(target=TreeBuilder()) while 1: data = self.mergeScreenConfigs() if not data: break parser.feed(data) return parser.close() # self._root = parser.close() # return self._root except: pass
def main(): super_concat = False options, args = interface() xml = ElementTree().parse(options.input, parser=XMLParser(target=MyTreeBuilder())) # delete the older subs. models from the xml file for node in ['HKYModel','siteModel']: xml = delete_node(xml, node, 1) if super_concat: xml = delete_node(xml, 'treeLikehood', 1) # delete the kappa and frequency parameters in 'operators' for parameter in ['kappa', 'frequencies']: xml = delete_children_from_node(xml, 'operators', parameter) xml = delete_children_from_node(xml, 'prior', parameter, 2) xml = delete_children_from_log_node(xml, 'log', parameter) # jettison some comments xml = comment_remover(xml, ['HKY substitution model','site model']) # get our subs model information sub_models_from_modeltest = {line.strip().split('\t')[0]:line.strip().split('\t')[1].split('-')[1] for line in open(options.subs, 'rU')} model_names, site_names = get_xml_model_names(set(sub_models_from_modeltest.values())) model_data = ElementTree().parse(options.params, parser=XMLParser(target=MyTreeBuilder())) # get the xml data that we need to add for the models and their parameters models_to_add = get_generic_section_to_add(model_names, model_data, 'models') sites_to_add = get_generic_section_to_add(site_names, model_data, 'sites') operators_to_add = get_generic_section_children_to_add(model_names, model_data, 'operators') log_entries_to_add = get_log_entries_to_add(model_names.union(site_names), model_data) priors_to_add = get_generic_section_children_to_add(model_names, model_data, 'priors') if super_concat: likelihood_framework_to_add = get_generic_section_to_add(model_names, model_data, 'likelihoods') # get the last position of the strictClockBranchRates insert_position = get_position(xml, 'strictClockBranchRates') # insert the models and sites we need insert_position = insert_models_and_sites(xml, insert_position, models_to_add, sites_to_add) # modify the tree likelihood statements if not super_concat: xml = update_tree_likelihoods(xml, sub_models_from_modeltest) else: insert_position = get_position(xml, 'siteModel') xml = insert_tree_likelihoods(xml, sub_models_from_modeltest) # insert the operators we need xml = insert_to_generic_sections(xml, operators_to_add, 'operators', 'operators') # insert the priors we need xml = insert_to_generic_sections(xml, priors_to_add, 'prior', 'prior') # alter the log node to collect data xml = insert_to_generic_sections(xml, log_entries_to_add, 'log', 'fileLog') # write to the output file write(xml, options.output)
def __init__(self, html=0, target=None, encoding=None, forbid_dtd=False, forbid_entities=True, forbid_external=True): # Python 2.x old style class _XMLParser.__init__(self, html, target, encoding) self.forbid_dtd = forbid_dtd self.forbid_entities = forbid_entities self.forbid_external = forbid_external parser = self._parser if self.forbid_dtd: parser.StartDoctypeDeclHandler = self.defused_start_doctype_decl if self.forbid_entities: parser.EntityDeclHandler = self.defused_entity_decl parser.UnparsedEntityDeclHandler = self.defused_unparsed_entity_decl if self.forbid_external: parser.ExternalEntityRefHandler = self.defused_external_entity_ref_handler
def parse_reqdata(self, get_data, post_data): """ parse query params and POST data into dictionary """ print 'debuggery: runing parse_reqdata...' data_dict = {} parser = XMLParser() tree = parser.feed(post_data) root = parser.close() data_dict = XmlDictConfig(root) # merge the query_params data for k,v in self.parse_getparams(get_data).iteritems(): data_dict[k] = v print 'debuggery: parsed !' # return the dictionary data return data_dict
def read_transaction(self): parser = XMLParser() line = "" while not line.startswith('<?'): line = yield from self.readline() parser.feed(line) key, attrs = yield from self.read_instruction(line) if key == 'cancel': self.logger.info('Transaction cancelled') elif key == 'end': self.logger.info('Transaction complete') obj = parser.close() self.logger.debug(pretty_xml_str(obj)) return obj elif key == 'start': self.report_error("Start instruction given mid-transaction") else: self.report_error("Unrecognised instruction during transaction: key = {}, attrs = {}".format(key, attrs))
def _unimplement_path_by_id(self,id,spec="9606"): self.pars["cmd"] = "search" self.pars["q"] = id content = self.tnf_obj.ask(self.pars) parser = XMLParser() parser.feed(content) elem = parser.close() # print content print elem.tag print elem.attrib for path in elem: print "hear "+path.tag+" "+path.attrib.keys() return
def main(): init() log.info("Parsing '%s'..." % os.path.basename(conf['source_file'])) stopwatch_set() target = CustomParser() parser = XMLParser(target=target) if PY2: text = open(conf['source_file']).read() else: text = codecs.open(conf['source_file'], encoding='utf-8').read() parser.feed(text) log.info('') total = ''; for key,value in stats.items(): total += str(key) total += ': ' total += str(value) total += '\n' log.info(total); log.info('Elapsed time: %s s' % stopwatch_get())
def fromScrollBoxToSigPlus(self,repertoireScroll,repertoireSigPlus): galerieEnCours=None fichierScrollBox=os.path.join(repertoireScroll,self.fichierScrollBox) if os.path.exists(repertoireScroll) and os.path.exists(fichierScrollBox): if not os.path.exists(repertoireSigPlus): #on crée un répertoire avec les mêmes droits que le répertoire source os.mkdir(repertoireSigPlus,os.stat(repertoireScroll).st_mode) #on va parser le fichier en question handler = ImageParser() parser = XMLParser(target=handler) ficToParse = codecs.open(fichierScrollBox, "r", "utf-8") donneesXML = u'%s' %(ficToParse.read()) parser.feed(donneesXML) galerieEnCours=parser.close() ficSigPlus=open(os.path.join(repertoireSigPlus,self.fichierDescrSigPlus),'w') for image in galerieEnCours.images: ficSigPlus.write(u'%s|%s|%s\n' %(image.nomFic,image.titre,image.comment)) shutil.copy(os.path.join(repertoireScroll,image.nomFic), repertoireSigPlus) msImage=time.mktime(image.date.timetuple()) os.utime(os.path.join(repertoireSigPlus,image.nomFic), (msImage, msImage)) ficSigPlus.close() else: print u'le fichier %s n\'a pu être trouvé, abandon\n' %(fichierScrollBox)
def parse_reqdata(self, get_data, post_data): """ parse query params and POST data into dictionary """ print 'debuggery: runing parse_reqdata...' data_dict = {} parser = XMLParser() try: parser.feed(post_data) root = parser.close() execElement = root.find("executions/execution") if not execElement: raise BleepParserError("Did not find executions/execution element in post data") data_dict['execution_id'] = execElement.attrib["id"] data_dict['execution_href'] = execElement.attrib["href"] data_dict['execution_status']= execElement.attrib["status"] data_dict['execution_user'] = execElement.find("user").text data_dict['execution_date_started']= execElement.find("date-started").text data_dict['execution_date_ended']= execElement.find("date-ended").text data_dict['execution_description']= execElement.find("description").text data_dict['execution_job_name']= execElement.find("job/name").text data_dict['execution_job_group']= execElement.find("job/group").text data_dict['execution_job_project']=execElement.find("job/project").text data_dict['execution_job_description']= execElement.find("job/description").text except KeyError as (keyerr): print "Oops! missing key error: " + str(keyerr) except AttributeError as (atterr): print "Oops! missing attribute error" + str(atterr) except Exception as (parserr): raise BleepParserError("Unexpected error when parsing post data. " + "Cause: " + str(parserr)) # merge the query_params data for k,v in self.parse_getparams(get_data).iteritems(): data_dict[k] = v print 'debuggery: parsed !' # return the dictionary data return data_dict
def __init__(self, config): asyncore.dispatcher_with_send.__init__(self) self.config = config self.database = get_database(self.config.database) self.parser = XMLParser(target = self) members = inspect.getmembers(self, predicate=inspect.ismethod) for m in members: if hasattr(m[1], "callback"): fn = m[1] fname = m[0] self.mapping[fn.callback] = getattr(self, fname) self.create_socket(socket.AF_INET, socket.SOCK_STREAM) self.connect( (self.config.xmpp_host, self.config.xmpp_port) )
def get_tournaments(self, **kwargs): """ Returns set of tournaments from Challonge Keywords: state: One of { all, pending, in_progress, ended } type: One of { single_elimination, double_elimination, round_robin, swiss } created_after: YYYY-MM-DD created_before: YYYY-MM-DD subdomain: String """ if kwargs.has_key('state') and kwargs['state'] not in ['all', 'pending', 'in_progress', 'ended']: raise Exception("Invalid state parameter") if kwargs.has_key('type') and kwargs['type'] not in ['single_elimination', 'double_elimination', 'round_robin', 'swiss']: raise Exception("Invalid type parameter") response = self._call("tournaments", **kwargs) print "got", response target = TournamentConstructor() parser = XMLParser(target=target) parser.feed(response) return parser.close()
from itertools import imap, islice, izip from operator import itemgetter from xml.etree.ElementTree import iterparse, XMLParser from StatNewsWriter import DatabaseController, StatNewsWriterInterface import htmlentitydefs import datetime import calendar import nltk, string class CustomEntity: def __getitem__(self, key): if key == 'umml': key = 'uuml' # Fix invalid entity return unichr(htmlentitydefs.name2codepoint[key]) parser = XMLParser() parser.parser.UseForeignDTD(True) parser.entity = CustomEntity() records = [] count = 0 it = imap(itemgetter(1), iter(iterparse('./input/dblp.xml', events=['start'], parser=parser))); root = next(it) dbc = DatabaseController('./output/dblp_full.db') db = StatNewsWriterInterface(dbc) for node in it: if (node.tag == 'article'): count = count + 1 if (count % 1000 == 0):
import xml.etree.ElementTree as etree from xml.etree.ElementTree import XMLParser class MaxDepth: maxDepth = 0 depth = 0 def start(self, tag, attrib): self.depth += 1 if self.depth > self.maxDepth: self.maxDepth = self.depth def end(self, tag): self.depth -= 1 def data(self, data): pass def close(self): if self.maxDepth > 0: return self.maxDepth - 1 else: return self.maxDepth S = "" for _ in range(int(input())): S += input() target = MaxDepth() parser = XMLParser(target=target) parser.feed(S) print(parser.close())