Beispiel #1
0
 def make_node_page(self, ident):
     global g_paths
     tb = et.TreeBuilder()
     if ident:
         index_attribs = {"title": self._make_title(ident, True),
                          "ident": ".".join(self.fcm.get_pslcode(ident)),
                          "version": self.versionstring}
         filename = self._make_href(ident)
     else: #contents page
         index_attribs =  {"title": "Contents",
                           "version": self.versionstring}
         filename = "index.html"
     tb.start("index", index_attribs)
     if not ident:#inject revisions list link into index.html
         tb.start("page", {"href": "revisions.html"})
         tb.data("Revision list")
         tb.end("page")
     for i in self.fcm.get_children(ident):
         self._recursive_add_index_section(i, tb)
     tb.end("index")
     page_string= subprocess.Popen(["xsltproc", "--nonet", "--novalid", g_paths.xsldir + "index.xsl", "-"],
                               stdin=subprocess.PIPE, stdout=subprocess.PIPE
                               ).communicate(et.tostring(tb.close(), "utf-8"))[0]
     page_string = page_string.replace("<!--linkbar-->", self._build_linkbar(ident))
     print "Creating", filename
     of = open(g_paths.html_output + filename, "w")
     of.write(page_string)
 def save(self):
     save = cElementTree.TreeBuilder()
     save.start('correcting-session')
     self._save_session(save)
     xml_str = cElementTree.tostring(save.end('correcting-session'))
     with open(self.__session_file(), 'bw+') as session_file:
         session_file.write(xml_str)
Beispiel #3
0
    def __init__(self, filename=None, run=None, overwrite=False):
        cElementTree.register_namespace("", "http://psi.hupo.org/ms/mzml")
        self.filename = filename
        self.lookup = {}

        self.newTree = None
        self.TreeBuilder = cElementTree.TreeBuilder()
        self.run = run
        self.info = {'counters': ddict(int)}
        if self.run.info['filename'].endswith('.gz'):
            import gzip
            import codecs
            io = codecs.getreader("utf-8")(gzip.open(
                self.run.info['filename']))
        else:
            io = open(self.run.info['filename'], 'r')

        for event, element in cElementTree.iterparse(io,
                                                     events=(b'start',
                                                             b'end')):
            if self.newTree == None:
                self.newTree = cElementTree.Element(element.tag,
                                                    element.attrib)
                if event == b'start' and element.tag.endswith("}mzML"):
                    self.TreeBuilder.start(element.tag, element.attrib)
            else:
                if event == b'start':
                    self.TreeBuilder.start(element.tag, element.attrib)
                    if element.tag.endswith('}run'):
                        self.lookup['run'] = cElementTree.Element(
                            element.tag, element.attrib)
                    if element.tag.endswith('}spectrumList'):
                        self.lookup['spectrumList'] = cElementTree.Element(
                            element.tag, element.attrib)
                        self.lookup['spectrumIndeces'] = cElementTree.Element(
                            'index', {'name': 'spectrum'}),
                        break
                    elif element.tag.endswith('}chromatogramList'):
                        break
                    else:
                        pass
                else:
                    if element.tag.endswith('}softwareList'):
                        ### Insert pymzML software tag
                        ## Example :software id="pwiz_Reader_Thermo"><softwareParam accession="MS:1000615" cvRef="MS" name="ProteoWizard" version="1.0" />
                        self.TreeBuilder.start('software',
                                               {'id': 'pymzML 0.7.1'})
                        self.TreeBuilder.start(
                            'softwareParam', {
                                'accession': 'MS:0000000',
                                'cvRef': 'MS',
                                'name': 'pymzML writer',
                                'version': '0.7.1'
                            })
                        self.newTree.append(
                            self.TreeBuilder.end('softwareParam'))
                        self.newTree.append(self.TreeBuilder.end('software'))
                    self.TreeBuilder.data(element.text)
                    self.newTree.append(self.TreeBuilder.end(element.tag))
        return
Beispiel #4
0
def obj_to_doc(obj, *args, **kwargs):
    """Convert an object to am XML document object
    :rtype: xml.etree.ElementTree.Element
    """
    builder = ET.TreeBuilder()
    obj.build(builder, *args, **kwargs)
    return builder.close()
Beispiel #5
0
def WriteQhp(symbols, files, qhp_filename):
    builder = ElementTree.TreeBuilder()

    with Element(builder, "QtHelpProject", {"version": "1.0"}):
        Data(builder, "namespace", NAMESPACE)
        Data(builder, "virtualFolder", "doc")

        with Element(builder, "customFilter", {"name": FILTER_NAME}):
            Data(builder, "filterAttribute", FILTER_ID)
            Data(builder, "filterAttribute", VERSION)

        with Element(builder, "filterSection"):
            Data(builder, "filterAttribute", FILTER_ID)
            Data(builder, "filterAttribute", VERSION)

            with Element(builder, "toc"):
                pass

            with Element(builder, "keywords"):
                for sym in symbols:
                    Data(builder,
                         "keyword",
                         args={
                             "name": sym.full_name,
                             "id": sym.full_name,
                             "ref": sym.filename
                         })

            with Element(builder, "files"):
                for filename in files:
                    Data(builder, "file", filename)

    with open(qhp_filename, "w") as handle:
        handle.write(ElementTree.tostring(builder.close()))
Beispiel #6
0
def fromstringlist(sequence, parser=None):
    """
    Taken from Python2.7 source
    """
    if not parser:
        parser = ET.XMLParser(target=ET.TreeBuilder())
    for text in sequence:
        parser.feed(text)
    return parser.close()
Beispiel #7
0
 def make_revision_list(self):
     global g_paths
     print "Writing revision list"
     tb = et.TreeBuilder()
     tb.start("revisions", {"title": "Revision list",
                            "version": self.versionstring})
     self._recursive_add_revision_node(tb, None)
     tb.end("revisions")
     page_string= subprocess.Popen(["xsltproc", "--nonet", "--novalid", g_paths.xsldir + "revisions.xsl", "-"],
                               stdin=subprocess.PIPE, stdout=subprocess.PIPE
                               ).communicate(et.tostring(tb.close(), "utf-8"))[0]
     page_string = page_string.replace("<!--linkbar-->", self._build_linkbar("REV"))
     of = open(g_paths.html_output + "revisions.html", "w")
     of.write(page_string)
Beispiel #8
0
 def handle_starttag(self, tag, attrs):
     if tag == 'a':
         builder = etree.TreeBuilder()
         builder.start(tag, dict(attrs))
         builder.end(tag)
         full_tag = builder.close()
         self.link = {
             'original_tag':
             etree.tostring(full_tag).decode("utf-8").replace(' /', ''),
             'url':
             dict(attrs)['href'],
             'text':
             ''
         }
         self.found_link = True
Beispiel #9
0
def WriteQhcp(qhp_filenames, qch_filenames, qhcp_filename):
    builder = ElementTree.TreeBuilder()

    with Element(builder, "QHelpCollectionProject", {"version": "1.0"}):
        with Element(builder, "docFiles"):
            with Element(builder, "generate"):
                for i, filename in enumerate(qhp_filenames):
                    with Element(builder, "file"):
                        Data(builder, "input", filename)
                        Data(builder, "output", qch_filenames[i])

            with Element(builder, "register"):
                for filename in qch_filenames:
                    Data(builder, "file", filename)

    with open(qhcp_filename, "w") as handle:
        handle.write(ElementTree.tostring(builder.close()))
Beispiel #10
0
    def parse_xml(self, xml_output):
        """
        Open and parse an xml file.

        TODO: Write custom parser to just read the nodes that we need instead of
        reading the whole file.

        @return xml_tree An xml tree instance. None if error.
        """
        try:
            parser = ET.XMLParser(target=ET.TreeBuilder())
            parser.feed(xml_output)
            tree = parser.close()

        except SyntaxError as err:
            print("SyntaxError: %s. %s" % (err, xml_output))
            return None

        return tree
Beispiel #11
0
def parse(file, builder=None, encoding=None):
    bob = builder

    def emit(soup):
        if isinstance(soup, BS.NavigableString):
            if isinstance(soup, ignorable_soup):
                return
            bob.data(unescape(soup))
        else:
            attrib = dict([(k, unescape(v)) for k, v in soup.attrs])
            bob.start(soup.name, attrib)
            for s in soup:
                emit(s)
            bob.end(soup.name)

    # determine encoding (the document charset is not reliable)
    try:
        if not hasattr(file, "read"):
            file = open(file)
        text = file.read()
    except:
        text = file
    if not encoding:
        try:
            encoding = "utf-8"
            unicode(text, encoding)
        except UnicodeError:
            encoding = "iso-8859-1"
    soup = BS.BeautifulSoup(text,
                            convertEntities="html",
                            fromEncoding=encoding)
    # build the tree
    if not bob:
        bob = ET.TreeBuilder()
    emit(soup)
    root = bob.close()
    # wrap the document in a html root element, if necessary
    if len(root) == 1 and root[0].tag == "html":
        return root[0]
    root.tag = "html"
    return root
Beispiel #12
0
 def make_page(self, sid, prevsid, nextsid):
     global g_paths
     filename = self._make_href(sid)
     print "Creating:", filename
     tb = et.TreeBuilder()
     revs = []
     jsarray = []
     page_attributes = {"title": self._make_title(sid, True),
                        "version": self.versionstring}
     if prevsid:
         page_attributes["prev"] = self._make_href(prevsid)
         page_attributes["prevtitle"] = self._make_title(prevsid)
     if nextsid:
         page_attributes["next"] = self._make_href(nextsid)
         page_attributes["nexttitle"] = self._make_title(nextsid)
     tb.start("page", page_attributes)
     self._recursive_build_node(tb, sid, jsarray=jsarray, revs=revs)
     tb.end("page")
     stylesheet_name = g_paths.xsldir + "page.xsl"
     tf = None
     if revs:
         tf = self._make_temporary_stylesheet(stylesheet_name, revs)
         stylesheet_name = tf.name
     page_string= subprocess.Popen(["xsltproc", "--nonet", "--novalid", stylesheet_name, "-"],
                               stdin=subprocess.PIPE, stdout=subprocess.PIPE
                               ).communicate(et.tostring(tb.close(), "utf-8"))[0]
     if tf: os.unlink(tf.name)
     #create javascript variables for controlling folding
     page_string = page_string.replace(
         "<!--jsvariable-->",
         "var folding = " + str(jsarray) + ";")
     #replace xml links with xhtml links
     page_string = self._process_links(page_string)
     #insert link bar
     page_string = page_string.replace("<!--linkbar-->", self._build_linkbar(sid))
     #convert cgm images to png images
     page_string = re.sub("<img[^>]*></img>", self.cgmtopng, page_string)
     #write the file
     of = open(g_paths.html_output + filename, "w")
     of.write(page_string)
Beispiel #13
0
 def _build_linkbar(self, ident):
     title_crop = 30
     tb = et.TreeBuilder()
     tb.start("div", {"class": "linkbar"})
     tb.start("p", {})
     if ident: #contents page passes in empty list
         tb.start("a", {"title": "Contents",
                        "href": "index.html"})
         tb.data("Contents")
         tb.end("a")
         if ident == "REV":
             ident_list = []
         else:
             ident_list = self.fcm.get_ancestors(ident)
         for i in ident_list:
             tb.data(" >> ")
             title = self._make_title(i)
             tb.start("a", {"title": title,
                            "href": self._make_href(i)})
             tb.data(title[:title_crop])
             if len(title) > title_crop:
                 tb.data("...")
             tb.end("a")
     else:
         tb.data(u" ")
     tb.end("p")
     tb.start("div", {"class": "otherlinks"})
     tb.start("p", {})
     tb.data(u"| ")
     tb.start("a", {"href": "search.html"})
     tb.data("Search")
     tb.end("a")
     tb.end("p")
     tb.end("div")
     tb.end("div")
     return et.tostring(tb.close(), "utf-8")
Beispiel #14
0
 def __init__(self):
     super().__init__()
     self.tb = etree.TreeBuilder(element_factory=H)
     self.tb.start("root", {})
Beispiel #15
0
 def __init__(self, target=None):
     HTMLParser.__init__(self)
     self.__builder = target or ET.TreeBuilder()
Beispiel #16
0
 def __init__(self):
     self.tree_builder = ET.TreeBuilder()
     self.close = self.tree_builder.close
     self.data = self.tree_builder.data
     self.stack = []
Beispiel #17
0
    def __init__(self, filename=None, run=None, overwrite=False):

        cElementTree.register_namespace("", "http://psi.hupo.org/ms/mzml")
        self.filename = filename
        self.lookup = {}

        self.newTree = None
        self.TreeBuilder = cElementTree.TreeBuilder()
        self.run = run
        self.info = {'counters': ddict(int)}

        if self.run.info['filename'].endswith('.gz'):
            import gzip
            import codecs
            io = codecs.getreader("utf-8")(gzip.open(self.run.info['filename']))
        else:
            io = open(self.run.info['filename'], 'r')

        #read the rest as original file
        input_xml_string = ''
        pymzml_tag_written = False
        #open again to read as text!
        for line in open(self.run.info['filename'], 'r').readlines():
            if 'indexedmzML' in line:
                # writing of indexed mzML is not possible at the moment
                continue
            if 'run' in line:
                # the run is appended from the original parser to avoid messing
                # with the new xml tree, we break before the run data starts
                break

            input_xml_string += line
            if 'softwareList' in line and pymzml_tag_written is False:
                addon =  cElementTree.Element(
                    'software',
                    {
                        'id'      : 'pymzML',
                        'version' : "0.7.6"
                    }
                )
                cElementTree.SubElement(
                    addon,
                    'cvParam',
                    {
                        'accession' : 'MS:1000531',
                        'cvRef'     : 'MS',
                        'name'      : 'pymzML Writer',
                        'version'   : '0.7.6',
                    }
                )
                new_line = cElementTree.tostring(addon, encoding='utf-8')
                input_xml_string += new_line
                pymzml_tag_written = True
        input_xml_string += '</mzML>\n'

        self.newTree = cElementTree.fromstring(input_xml_string)

        for event, element in cElementTree.iterparse(io, events=(b'start', b'end')):
            if event ==b'start':
                if element.tag.endswith('}run'):
                    self.lookup['run'] = cElementTree.Element(element.tag, element.attrib)
                if element.tag.endswith('}spectrumList'):
                    self.lookup['spectrumList'] = \
                        cElementTree.Element(element.tag, element.attrib)
                    self.lookup['spectrumIndeces'] = \
                        cElementTree.Element('index', {'name': 'spectrum'})
                    break
        return
Beispiel #18
0
 def __init__(self):
     HTMLParser.__init__(self)
     self.tb = etree.TreeBuilder()
Beispiel #19
0
 def __str__(self):
     """Return string representation"""
     builder = ET.TreeBuilder()
     self.build(builder)
     return ET.tostring(builder.close(), encoding='utf-8').decode('utf-8')
Beispiel #20
0
def unflatten(tree):
    builder = cElementTree.TreeBuilder()

    if not tree:
        return ''

    tag = None
    root_element = None
    paths = []
    for row in tree:
        current_path = []
        data = ''
        create_new_element = True
        for column in row:
            if column[0] == '!':
                create_new_element = False
                data = column[1:]
            else:
                current_path.append(column)

        tags_to_end = []

        last_path = paths[-1] if paths else []

        if not data and current_path == last_path:
            tags_to_end.append(tag)

        for i, a in enumerate(last_path):
            tag_to_end = None
            if i >= len(current_path):
                tag_to_end = a
            else:
                b = current_path[i]
                if a != b:
                    tag_to_end = a
            if tag_to_end:
                tags_to_end.append(tag_to_end)

        for tag_to_end in tags_to_end[::-1]:
            builder.end(tag_to_end)
            paths.pop()

        last_path = paths[-1] if paths else []
        if not data and current_path == last_path:
            builder.end(tag_to_end)
            paths.pop()

        tag = current_path[-1]
        if create_new_element:
            builder.start(tag)

        if data:
            builder.data(data)

        paths.append(current_path)

    builder.end(tag)

    root_element = builder.close()
    if root_element is None:
        return ''
    return cElementTree.tostring(root_element)
Beispiel #21
0
	def __init__(self, partial=False, _parsetree_roundtrip=False):
		self.partial = partial
		self._b = ElementTreeModule.TreeBuilder()
		self.stack = [] #: keeps track of current open elements
		self._last_char = None
		self._parsetree_roundtrip = _parsetree_roundtrip
 def __init__(self):
     HTMLParser.HTMLParser.__init__(self)
     self.tree_builder = ElementTree.TreeBuilder()
     self.tree = None
Beispiel #23
0
 def __init__(self, encoding=None):
     _HTMLParser.__init__(self)
     self._target = ElementTree.TreeBuilder()
Beispiel #24
0
 def getroot(self):
     """Build XML object, return the root"""
     builder = ET.TreeBuilder()
     self.build(builder)
     return builder.close()