def make_node_page(self, ident): global g_paths tb = et.TreeBuilder() if ident: index_attribs = {"title": self._make_title(ident, True), "ident": ".".join(self.fcm.get_pslcode(ident)), "version": self.versionstring} filename = self._make_href(ident) else: #contents page index_attribs = {"title": "Contents", "version": self.versionstring} filename = "index.html" tb.start("index", index_attribs) if not ident:#inject revisions list link into index.html tb.start("page", {"href": "revisions.html"}) tb.data("Revision list") tb.end("page") for i in self.fcm.get_children(ident): self._recursive_add_index_section(i, tb) tb.end("index") page_string= subprocess.Popen(["xsltproc", "--nonet", "--novalid", g_paths.xsldir + "index.xsl", "-"], stdin=subprocess.PIPE, stdout=subprocess.PIPE ).communicate(et.tostring(tb.close(), "utf-8"))[0] page_string = page_string.replace("<!--linkbar-->", self._build_linkbar(ident)) print "Creating", filename of = open(g_paths.html_output + filename, "w") of.write(page_string)
def save(self): save = cElementTree.TreeBuilder() save.start('correcting-session') self._save_session(save) xml_str = cElementTree.tostring(save.end('correcting-session')) with open(self.__session_file(), 'bw+') as session_file: session_file.write(xml_str)
def __init__(self, filename=None, run=None, overwrite=False): cElementTree.register_namespace("", "http://psi.hupo.org/ms/mzml") self.filename = filename self.lookup = {} self.newTree = None self.TreeBuilder = cElementTree.TreeBuilder() self.run = run self.info = {'counters': ddict(int)} if self.run.info['filename'].endswith('.gz'): import gzip import codecs io = codecs.getreader("utf-8")(gzip.open( self.run.info['filename'])) else: io = open(self.run.info['filename'], 'r') for event, element in cElementTree.iterparse(io, events=(b'start', b'end')): if self.newTree == None: self.newTree = cElementTree.Element(element.tag, element.attrib) if event == b'start' and element.tag.endswith("}mzML"): self.TreeBuilder.start(element.tag, element.attrib) else: if event == b'start': self.TreeBuilder.start(element.tag, element.attrib) if element.tag.endswith('}run'): self.lookup['run'] = cElementTree.Element( element.tag, element.attrib) if element.tag.endswith('}spectrumList'): self.lookup['spectrumList'] = cElementTree.Element( element.tag, element.attrib) self.lookup['spectrumIndeces'] = cElementTree.Element( 'index', {'name': 'spectrum'}), break elif element.tag.endswith('}chromatogramList'): break else: pass else: if element.tag.endswith('}softwareList'): ### Insert pymzML software tag ## Example :software id="pwiz_Reader_Thermo"><softwareParam accession="MS:1000615" cvRef="MS" name="ProteoWizard" version="1.0" /> self.TreeBuilder.start('software', {'id': 'pymzML 0.7.1'}) self.TreeBuilder.start( 'softwareParam', { 'accession': 'MS:0000000', 'cvRef': 'MS', 'name': 'pymzML writer', 'version': '0.7.1' }) self.newTree.append( self.TreeBuilder.end('softwareParam')) self.newTree.append(self.TreeBuilder.end('software')) self.TreeBuilder.data(element.text) self.newTree.append(self.TreeBuilder.end(element.tag)) return
def obj_to_doc(obj, *args, **kwargs): """Convert an object to am XML document object :rtype: xml.etree.ElementTree.Element """ builder = ET.TreeBuilder() obj.build(builder, *args, **kwargs) return builder.close()
def WriteQhp(symbols, files, qhp_filename): builder = ElementTree.TreeBuilder() with Element(builder, "QtHelpProject", {"version": "1.0"}): Data(builder, "namespace", NAMESPACE) Data(builder, "virtualFolder", "doc") with Element(builder, "customFilter", {"name": FILTER_NAME}): Data(builder, "filterAttribute", FILTER_ID) Data(builder, "filterAttribute", VERSION) with Element(builder, "filterSection"): Data(builder, "filterAttribute", FILTER_ID) Data(builder, "filterAttribute", VERSION) with Element(builder, "toc"): pass with Element(builder, "keywords"): for sym in symbols: Data(builder, "keyword", args={ "name": sym.full_name, "id": sym.full_name, "ref": sym.filename }) with Element(builder, "files"): for filename in files: Data(builder, "file", filename) with open(qhp_filename, "w") as handle: handle.write(ElementTree.tostring(builder.close()))
def fromstringlist(sequence, parser=None): """ Taken from Python2.7 source """ if not parser: parser = ET.XMLParser(target=ET.TreeBuilder()) for text in sequence: parser.feed(text) return parser.close()
def make_revision_list(self): global g_paths print "Writing revision list" tb = et.TreeBuilder() tb.start("revisions", {"title": "Revision list", "version": self.versionstring}) self._recursive_add_revision_node(tb, None) tb.end("revisions") page_string= subprocess.Popen(["xsltproc", "--nonet", "--novalid", g_paths.xsldir + "revisions.xsl", "-"], stdin=subprocess.PIPE, stdout=subprocess.PIPE ).communicate(et.tostring(tb.close(), "utf-8"))[0] page_string = page_string.replace("<!--linkbar-->", self._build_linkbar("REV")) of = open(g_paths.html_output + "revisions.html", "w") of.write(page_string)
def handle_starttag(self, tag, attrs): if tag == 'a': builder = etree.TreeBuilder() builder.start(tag, dict(attrs)) builder.end(tag) full_tag = builder.close() self.link = { 'original_tag': etree.tostring(full_tag).decode("utf-8").replace(' /', ''), 'url': dict(attrs)['href'], 'text': '' } self.found_link = True
def WriteQhcp(qhp_filenames, qch_filenames, qhcp_filename): builder = ElementTree.TreeBuilder() with Element(builder, "QHelpCollectionProject", {"version": "1.0"}): with Element(builder, "docFiles"): with Element(builder, "generate"): for i, filename in enumerate(qhp_filenames): with Element(builder, "file"): Data(builder, "input", filename) Data(builder, "output", qch_filenames[i]) with Element(builder, "register"): for filename in qch_filenames: Data(builder, "file", filename) with open(qhcp_filename, "w") as handle: handle.write(ElementTree.tostring(builder.close()))
def parse_xml(self, xml_output): """ Open and parse an xml file. TODO: Write custom parser to just read the nodes that we need instead of reading the whole file. @return xml_tree An xml tree instance. None if error. """ try: parser = ET.XMLParser(target=ET.TreeBuilder()) parser.feed(xml_output) tree = parser.close() except SyntaxError as err: print("SyntaxError: %s. %s" % (err, xml_output)) return None return tree
def parse(file, builder=None, encoding=None): bob = builder def emit(soup): if isinstance(soup, BS.NavigableString): if isinstance(soup, ignorable_soup): return bob.data(unescape(soup)) else: attrib = dict([(k, unescape(v)) for k, v in soup.attrs]) bob.start(soup.name, attrib) for s in soup: emit(s) bob.end(soup.name) # determine encoding (the document charset is not reliable) try: if not hasattr(file, "read"): file = open(file) text = file.read() except: text = file if not encoding: try: encoding = "utf-8" unicode(text, encoding) except UnicodeError: encoding = "iso-8859-1" soup = BS.BeautifulSoup(text, convertEntities="html", fromEncoding=encoding) # build the tree if not bob: bob = ET.TreeBuilder() emit(soup) root = bob.close() # wrap the document in a html root element, if necessary if len(root) == 1 and root[0].tag == "html": return root[0] root.tag = "html" return root
def make_page(self, sid, prevsid, nextsid): global g_paths filename = self._make_href(sid) print "Creating:", filename tb = et.TreeBuilder() revs = [] jsarray = [] page_attributes = {"title": self._make_title(sid, True), "version": self.versionstring} if prevsid: page_attributes["prev"] = self._make_href(prevsid) page_attributes["prevtitle"] = self._make_title(prevsid) if nextsid: page_attributes["next"] = self._make_href(nextsid) page_attributes["nexttitle"] = self._make_title(nextsid) tb.start("page", page_attributes) self._recursive_build_node(tb, sid, jsarray=jsarray, revs=revs) tb.end("page") stylesheet_name = g_paths.xsldir + "page.xsl" tf = None if revs: tf = self._make_temporary_stylesheet(stylesheet_name, revs) stylesheet_name = tf.name page_string= subprocess.Popen(["xsltproc", "--nonet", "--novalid", stylesheet_name, "-"], stdin=subprocess.PIPE, stdout=subprocess.PIPE ).communicate(et.tostring(tb.close(), "utf-8"))[0] if tf: os.unlink(tf.name) #create javascript variables for controlling folding page_string = page_string.replace( "<!--jsvariable-->", "var folding = " + str(jsarray) + ";") #replace xml links with xhtml links page_string = self._process_links(page_string) #insert link bar page_string = page_string.replace("<!--linkbar-->", self._build_linkbar(sid)) #convert cgm images to png images page_string = re.sub("<img[^>]*></img>", self.cgmtopng, page_string) #write the file of = open(g_paths.html_output + filename, "w") of.write(page_string)
def _build_linkbar(self, ident): title_crop = 30 tb = et.TreeBuilder() tb.start("div", {"class": "linkbar"}) tb.start("p", {}) if ident: #contents page passes in empty list tb.start("a", {"title": "Contents", "href": "index.html"}) tb.data("Contents") tb.end("a") if ident == "REV": ident_list = [] else: ident_list = self.fcm.get_ancestors(ident) for i in ident_list: tb.data(" >> ") title = self._make_title(i) tb.start("a", {"title": title, "href": self._make_href(i)}) tb.data(title[:title_crop]) if len(title) > title_crop: tb.data("...") tb.end("a") else: tb.data(u" ") tb.end("p") tb.start("div", {"class": "otherlinks"}) tb.start("p", {}) tb.data(u"| ") tb.start("a", {"href": "search.html"}) tb.data("Search") tb.end("a") tb.end("p") tb.end("div") tb.end("div") return et.tostring(tb.close(), "utf-8")
def __init__(self): super().__init__() self.tb = etree.TreeBuilder(element_factory=H) self.tb.start("root", {})
def __init__(self, target=None): HTMLParser.__init__(self) self.__builder = target or ET.TreeBuilder()
def __init__(self): self.tree_builder = ET.TreeBuilder() self.close = self.tree_builder.close self.data = self.tree_builder.data self.stack = []
def __init__(self, filename=None, run=None, overwrite=False): cElementTree.register_namespace("", "http://psi.hupo.org/ms/mzml") self.filename = filename self.lookup = {} self.newTree = None self.TreeBuilder = cElementTree.TreeBuilder() self.run = run self.info = {'counters': ddict(int)} if self.run.info['filename'].endswith('.gz'): import gzip import codecs io = codecs.getreader("utf-8")(gzip.open(self.run.info['filename'])) else: io = open(self.run.info['filename'], 'r') #read the rest as original file input_xml_string = '' pymzml_tag_written = False #open again to read as text! for line in open(self.run.info['filename'], 'r').readlines(): if 'indexedmzML' in line: # writing of indexed mzML is not possible at the moment continue if 'run' in line: # the run is appended from the original parser to avoid messing # with the new xml tree, we break before the run data starts break input_xml_string += line if 'softwareList' in line and pymzml_tag_written is False: addon = cElementTree.Element( 'software', { 'id' : 'pymzML', 'version' : "0.7.6" } ) cElementTree.SubElement( addon, 'cvParam', { 'accession' : 'MS:1000531', 'cvRef' : 'MS', 'name' : 'pymzML Writer', 'version' : '0.7.6', } ) new_line = cElementTree.tostring(addon, encoding='utf-8') input_xml_string += new_line pymzml_tag_written = True input_xml_string += '</mzML>\n' self.newTree = cElementTree.fromstring(input_xml_string) for event, element in cElementTree.iterparse(io, events=(b'start', b'end')): if event ==b'start': if element.tag.endswith('}run'): self.lookup['run'] = cElementTree.Element(element.tag, element.attrib) if element.tag.endswith('}spectrumList'): self.lookup['spectrumList'] = \ cElementTree.Element(element.tag, element.attrib) self.lookup['spectrumIndeces'] = \ cElementTree.Element('index', {'name': 'spectrum'}) break return
def __init__(self): HTMLParser.__init__(self) self.tb = etree.TreeBuilder()
def __str__(self): """Return string representation""" builder = ET.TreeBuilder() self.build(builder) return ET.tostring(builder.close(), encoding='utf-8').decode('utf-8')
def unflatten(tree): builder = cElementTree.TreeBuilder() if not tree: return '' tag = None root_element = None paths = [] for row in tree: current_path = [] data = '' create_new_element = True for column in row: if column[0] == '!': create_new_element = False data = column[1:] else: current_path.append(column) tags_to_end = [] last_path = paths[-1] if paths else [] if not data and current_path == last_path: tags_to_end.append(tag) for i, a in enumerate(last_path): tag_to_end = None if i >= len(current_path): tag_to_end = a else: b = current_path[i] if a != b: tag_to_end = a if tag_to_end: tags_to_end.append(tag_to_end) for tag_to_end in tags_to_end[::-1]: builder.end(tag_to_end) paths.pop() last_path = paths[-1] if paths else [] if not data and current_path == last_path: builder.end(tag_to_end) paths.pop() tag = current_path[-1] if create_new_element: builder.start(tag) if data: builder.data(data) paths.append(current_path) builder.end(tag) root_element = builder.close() if root_element is None: return '' return cElementTree.tostring(root_element)
def __init__(self, partial=False, _parsetree_roundtrip=False): self.partial = partial self._b = ElementTreeModule.TreeBuilder() self.stack = [] #: keeps track of current open elements self._last_char = None self._parsetree_roundtrip = _parsetree_roundtrip
def __init__(self): HTMLParser.HTMLParser.__init__(self) self.tree_builder = ElementTree.TreeBuilder() self.tree = None
def __init__(self, encoding=None): _HTMLParser.__init__(self) self._target = ElementTree.TreeBuilder()
def getroot(self): """Build XML object, return the root""" builder = ET.TreeBuilder() self.build(builder) return builder.close()