Esempio n. 1
0
    def write_as_html(self, foutput, name: str, url: str,
                      tables: List[ContentTable], html_doc: html.Element):

        s = html.Element("div")
        h = html.Element("h1")
        h.text = name
        s.append(h)

        m = html.Element("div")
        m.text = self.cache.read_date_time_str(name + ".html")
        s.append(m)

        for t in tables:
            s.append(t.new_element)

        x = html.Element("br")
        s.append(x)
        a = html.Element("a")
        a.attrib["href"] = url
        a.text = url
        s.append(a)

        h = html.Element("html")
        h.append(html.Element("body"))
        h[0].append(deepcopy(s))
        foutput.write(html.tostring(h, pretty_print=True))

        html_doc.append(s)
        html_doc.append(html.Element("hr"))
Esempio n. 2
0
    def write_miss_to_html(self, name: str, url: str, msg: str,
                           html_doc: html.Element):

        s = html.Element("div")
        h = html.Element("h1")
        h.text = name
        s.append(h)

        m = html.Element("div")
        m.text = self.cache.read_date_time_str(name + ".html")
        s.append(m)

        m = html.Element("span")
        m.text = msg
        s.append(m)

        x = html.Element("br")
        s.append(x)
        a = html.Element("a")
        a.attrib["href"] = url
        a.text = url
        s.append(a)

        html_doc.append(s)
        html_doc.append(html.Element("hr"))
Esempio n. 3
0
def get_progressbar_element(percentage):
    progressbar_child_element = Element("div")
    percentage = int(percentage)
    if percentage >= 70:
        # green
        progressbar_child_element.attrib[
            "class"] = "progress-bar progress-bar-success progress-bar-striped"
    elif percentage >= 50:
        progressbar_child_element.attrib[
            "class"] = "progress-bar progress-bar-warning progress-bar-striped"
    else:
        progressbar_child_element.attrib[
            "class"] = "progress-bar progress-bar-danger progress-bar-striped"
    progressbar_child_element.attrib["role"] = "progressbar"
    progressbar_child_element.attrib[
        "aria-valuenow"] = "10"  # "{}".format(percentage)
    progressbar_child_element.attrib["aria-valuemin"] = "0"
    progressbar_child_element.attrib["aria-valuemax"] = "100"
    progressbar_child_element.attrib[
        "style"] = "width:{}%; text-align:left; padding-left: 5px;".format(
            percentage)
    progressbar_child_element.text = "{}%".format(percentage)
    progressbar_element = Element("div")
    progressbar_element.attrib["class"] = "progress"
    progressbar_element.attrib["style"] = "margin-bottom:0;"
    progressbar_element.append(progressbar_child_element)
    return progressbar_element
Esempio n. 4
0
def process_img(self, doc, el):
    """ Process <img> tag in the source document.
    """
    self.add_alt_tags(el)

    # Skip over images with the nomobileresize attribute
    if el.attrib.pop("nomobileresize", "") != "":
        return

    src = el.attrib.get("src", None)
    if src:
        originalSrc = src
        site = getSite()
        # catch exceptions to ensure broken images don't
        # prevent the page from rendering 
        try:
            src = self.rewrite(src)
            shorturl = getUtility(IMobileImageShortURLStorage)
            key = shorturl.getkey(src)
            if key is None:
                key = shorturl.suggest()
                # just check that suggest() is working as expected
                assert shorturl.get(key) is None
                shorturl.add(key, src)
            src = '%s/@@shortimageurl/%s' % (site.absolute_url(), key)
            el.attrib["src"] = src
        except:
            # blank alt text
            del el.attrib["alt"]
            el.attrib["src"] = src
            error = ['src: %s' % src,
                     'URL: %s' % site.REQUEST.URL,
                     'Referer: %s' % site.REQUEST.HTTP_REFERER,
                     'User Agent: %s' % site.REQUEST.get('HTTP_USER_AGENT', 
                                                         'Unknown'),
                     traceback.format_exc()]
            # Stop logging image processing errors, it creates
            # unnecessary noise in the error log
            # error = '\n'.join(error)
            # LOG.info(error)
        
        # Make image clickable and point to original src
        a = Element('a')
        a.attrib['href'] = originalSrc
        el.getparent().replace(el, a)
        a.append(el)

        # Remove explicit width declarations
        if "width" in el.attrib:            
            del el.attrib["width"]

        if "height" in el.attrib:            
            del el.attrib["height"]
        
    if self.needs_clearing(el):
        self.clear_floats(el)
    
    self.add_processed_class(el)
Esempio n. 5
0
File: xml.py Progetto: brabadu/pml
def _dumps_xml_from_pml_nodes(root_node):
    node_name, attributes, sub_nodes = root_node

    element = Element(node_name, **attributes)

    for sub_node in sub_nodes:
        element.append(_dumps_xml_from_pml_nodes(sub_node))

    return element
Esempio n. 6
0
 def footer(self, node):
     """Create a standard footer block for HTML files."""
     footer = Element('footer')
     footer.append(E.HR())
     footer.append(
         E.
         P("Generated automatically from {source} at {time:%d %b %Y %H:%M}."
           .format(source=node.sourcefile, time=datetime.datetime.now())), )
     return footer
Esempio n. 7
0
def get_user(username, data):
    if not data:
        element = Element('span')
        element.text = username
        return element

    element = Element('span', {'class': Profile.get_user_css_class(*data)})
    link = Element('a', {'href': reverse('user_page', args=[username])})
    link.text = username
    element.append(link)
    return element
Esempio n. 8
0
def get_user(username, data):
    if not data:
        element = Element('span')
        element.text = username
        return element

    element = Element('span', {'class': Profile.get_user_css_class(*data)})
    link = Element('a', {'href': reverse('user_page', args=[username])})
    link.text = username
    element.append(link)
    return element
Esempio n. 9
0
def wrap_set(dom, child_tag, parent_tag):
    """Wrap unbroken sets of elements in a parent container:
        - <li> in a <ul>
        - <tr> in a <table>
    """
    nxt = 0
    for e in dom.cssselect(child_tag):
        if nxt != e:
            box = Element(parent_tag)
            insert(box, e)
        box.append(e)
        nxt = parent(e).getnext()
        if nxt is None:
            nxt = e.getnext()
Esempio n. 10
0
 def _apply_headers_anchors(html: str) -> str:
     root_element = fromstring(wrap_unwrap_fake_tag(html))
     for element in root_element:
         if element.tag in HEADERS:
             id_ = make_header_id(element.text)
             a_element = Element('a', {'id': id_, 'href': f'#{id_}'})
             span_element = Element('span', attrib={'class': 'iconify',
                                                    'data-icon': HTMLGen.ANCHOR_LINK_ICON_CLASS})
             a_element.append(span_element)
             element.text += ' '
             element.insert(0, a_element)
     html = tostring(root_element)
     html = wrap_unwrap_fake_tag(html, wrap=False)
     return html
Esempio n. 11
0
def wrap_set(dom, child_tag, parent_tag):
    """Wrap unbroken sets of elements in a parent container:
        - <li> in a <ul>
        - <tr> in a <table>
    """
    nxt = 0
    for e in dom.cssselect(child_tag):
        if nxt != e:
            box = Element(parent_tag)
            insert(box, e)
        box.append(e)
        nxt = parent(e).getnext()
        if nxt is None:
            nxt = e.getnext()
Esempio n. 12
0
    def _add_html_info_row(self, t: html.Element, label: str, val: str, cls: str = None):
        tr = html.Element("tr")

        td = html.Element("td")
        td.text = label
        if cls != None: td.attrib["class"] = cls
        tr.append(td)

        td = html.Element("td")
        td.text = val
        if cls != None: td.attrib["class"] = cls
        tr.append(td)

        tr.tail = "\n      "
        t.append(tr)        
Esempio n. 13
0
def brs_to_paragraphs(tree, inline_tags=None):
    """
    Return an lxml tree with all <br> elements stripped and paragraphs put in
    place where necessary.
    """
    # add these tags to p's that we're currently building, any other tags will
    # close the current p
    inline_tags = inline_tags or ['a']

    # if this tree doesn't have any child elements, just return it as is
    if len(tree) == 0:
        return tree

    # if this tree doesn't contain any <br> tags, we don't need to touch it
    if tree.find('.//br') is None:
        return tree

    # XXX: We're building a whole new tree here and leaving out any attributes.
    # A) That might be a little slower and more memory intensive than modifying
    # the tree in place, and B) we're dropping any attributes on block elements.
    # The latter is probably fine for current use, but certainly not ideal.
    new_tree = Element(tree.tag)

    # if this tree starts out with text, create a new paragraph for it, and
    # add it to the tree
    if tree.text:
        p = E.P()
        p.text = tree.text
        new_tree.append(p)

    for e in tree:
        if e.tag == 'br':
            # avoid adding empty p elements
            if e.tail is None:
                continue
            # start a new p
            p = E.P()
            p.text = e.tail
            new_tree.append(p)
        # if this is a block tag, and it has trailing text, that text needs to
        # go into a new paragraph... only if the tail has actual content and
        # not just whitespace though.
        elif e.tail and re.match('[^\s]', e.tail) and e.tag not in inline_tags:
            p = E.P()
            p.text = e.tail
            e.tail = ''
            new_tree.append(e)
            new_tree.append(p)
        # keep inline tags inside the current paragraph
        elif e.tag in inline_tags:
            p.append(e)
        else:
            new_tree.append(brs_to_paragraphs(e))

    return new_tree
Esempio n. 14
0
def brs_to_paragraphs(tree, inline_tags=None):
    """
    Return an lxml tree with all <br> elements stripped and paragraphs put in
    place where necessary.
    """
    # add these tags to p's that we're currently building, any other tags will
    # close the current p
    inline_tags = inline_tags or ["a"]

    # if this tree doesn't have any child elements, just return it as is
    if len(tree) == 0:
        return tree

    # if this tree doesn't contain any <br> tags, we don't need to touch it
    if tree.find(".//br") is None:
        return tree

    # XXX: We're building a whole new tree here and leaving out any attributes.
    # A) That might be a little slower and more memory intensive than modifying
    # the tree in place, and B) we're dropping any attributes on block elements.
    # The latter is probably fine for current use, but certainly not ideal.
    new_tree = Element(tree.tag)

    # if this tree starts out with text, create a new paragraph for it, and
    # add it to the tree
    if tree.text:
        p = E.P()
        p.text = tree.text
        new_tree.append(p)

    for e in tree:
        if e.tag == "br":
            # avoid adding empty p elements
            if e.tail is None:
                continue
            # start a new p
            p = E.P()
            p.text = e.tail
            new_tree.append(p)
        # if this is a block tag, and it has trailing text, that text needs to
        # go into a new paragraph... only if the tail has actual content and
        # not just whitespace though.
        elif e.tail and re.match("[^\s]", e.tail) and e.tag not in inline_tags:
            p = E.P()
            p.text = e.tail
            e.tail = ""
            new_tree.append(e)
            new_tree.append(p)
        # keep inline tags inside the current paragraph
        elif e.tag in inline_tags:
            p.append(e)
        else:
            new_tree.append(brs_to_paragraphs(e))

    return new_tree
Esempio n. 15
0
 def clean(self, element):
     cleanElement = None
     dropEmpty = ('span', 'p', 'div') 
     downloadDir = self.task.getProperty('download')
     if 'img' == element.tag:
            src = urlparse.urljoin(self.url, element.attrib['src'])
            file, info = urllib.urlretrieve(src)
            url = urlparse.urlparse(src)
            disposition = info.getheader('Content-Disposition')
            filename = None
            if disposition:
                type, filename = disposition.split(';')
                key, filename = filename.split('=')
                filename = filename.strip('"')
            if not filename:
                filename = os.path.basename(file)
            splitf = filename.split('.')
            lenf = len(splitf)
            ext = splitf.pop()
            if lenf < 2 or info.subtype != ext:
                filename = '.'.join((filename, info.subtype))
            element.attrib['src']  = filename
            os.rename(file, '/'.join((downloadDir, filename)))
     #moin specific hack for now
     if 'a' == element.tag and '/Category' in element.attrib['href']:
         pass
     elif element.tag not in dropEmpty \
             or bool(element.getchildren()) \
             or (bool(element.text) \
                 and bool(element.text.strip())):
         cleanElement = Element(element.tag)
         cleanElement.text = element.text
         stripattribs = ('class', 'style', 'id')
         for a in element.attrib:
             if a not in stripattribs:
                 cleanElement.set(a, element.attrib[a])  
         for e in element.getchildren():
             clean = (self.clean(e))
             if clean is not None:
                 cleanElement.append(clean)
     return cleanElement        
Esempio n. 16
0
    def load_info(self, item: ChangeItem, body: html.Element):

        body.text = "\n    "
        h3 = html.Element("h3")
        h3.text = item.name
        h3.tail = "\n\n    "
        body.append(h3)

        div = html_helpers.make_source_links("extract", item.name, item.source)
        body.append(div)

        body[len(body) - 1].tail = "\n    "
        br = html.Element("br")
        br.tail = "\n    "
        body.append(br)
Esempio n. 17
0
def get_user_rating(username, rating):
    element = Element('a', {
        'class': 'rate-group',
        'href': reverse('user_page', args=[username])
    })
    if rating:
        rating_css = rating_class(rating)
        rate_box = Element('span', {'class': 'rate-box ' + rating_css})
        rate_box.append(
            Element('span',
                    {'style': 'height: %3.fem' % rating_progress(rating)}))
        user = Element('span', {'class': 'rating ' + rating_css})
        user.text = username
        element.append(rate_box)
        element.append(user)
    else:
        element.text = username
    return element
Esempio n. 18
0
def get_user_rating(username, data):
    if not data:
        element = Element('span')
        element.text = username
        return element

    rating = data[1]
    element = Element('a', {'class': 'rate-group', 'href': reverse('user_page', args=[username])})
    if rating:
        rating_css = rating_class(rating)
        rate_box = Element('span', {'class': 'rate-box ' + rating_css})
        rate_box.append(Element('span', {'style': 'height: %3.fem' % rating_progress(rating)}))
        user = Element('span', {'class': 'rating ' + rating_css})
        user.text = username
        element.append(rate_box)
        element.append(user)
    else:
        element.text = username
    return element
def massarge_input_file(input_file_name):

    # test for bad classes
    # try:
    bad_classes(input_file_name)
    # except:
    #     # we dont reall NEED to chaeck for bad classes so wond do anything here
    #     show_error('There was a problem when checking for bad classes.')

    input_root = html.parse(input_file_name).getroot()

    # remove the contents div
    contents_div = input_root.xpath('body/div[@class="Contents-Box"]')
    if len(contents_div) > 0:
        contents_div[0].getparent().remove(contents_div[0])

    # remove all the _idGenParaOverrides
    all_paragraphs = input_root.xpath('//p|//h1|//h2|//h3|//h4|//h5|//h6')
    for paragraph in all_paragraphs:
        if re.search(r' ?_idGenParraOveride\d\d?\d?',
                     paragraph.get('class', default='')) is not None:
            print('override')
            paragraph.set(
                'class',
                re.sub(r' ?_idGenParraOveride\d\d?\d?', '',
                       paragraph.get('class', default='')))
    # remove filename for internal hyperlinks
    # inDesign_file_name = os.path.basename(input_file_name)
    inDesign_file_name = Path(input_file_name).name
    all_links = input_root.xpath('//a')
    for link in all_links:
        if 'href' in link.attrib:
            link.attrib['href'] = link.attrib['href'].replace(
                inDesign_file_name, '')

    # there are 3 paragraph style with hanging indednts that must be manipulated
    for paragraph in input_root.xpath(
            '//p[@class="paraMotionSub-Paragraph" or '
            '@class="paraMotionSub-Sub-Paragraph" or '
            '@class="paraMotionSub-Sub-Sub-Paragraph"][text()]'):
        try:
            split_on_tab = paragraph.text.split('\u0009', 1)
            span_hanging = Element('span')
            span_hanging.set('class', 'hanging1')
            span_hanging.tail = split_on_tab[1]
            span_hanging.text = split_on_tab[0]
            paragraph.append(span_hanging)
            paragraph.text = ''
        except IndexError:
            # dont do anything if there is no tab
            pass

    # sort out all the bullets
    bullets = input_root.xpath('//span[@class="pythonFindBullet"]')
    for bullet in bullets:
        bullet.drop_tree()
        # bullet.text = ""
        # # also turn the strong to a span. This is for FBA where there are tabs between the time and the rest.
        # next_strong_t = bullet.getnext()
        # if iselement(next_strong_t) and next_strong_t.tag == 'strong':
        #     # now check that there is a bold class and the next char is a tab
        #     if next_strong_t.get('class') == 'Bold' and next_strong_t.tail and next_strong_t.tail[0] == '\u0009':
        #         next_strong_t.tag = 'span'
        #         # next_strong_t.attrib.pop('class', None)
        #         # next_strong_t.attrib['style'] = 'display : block; float : left; width : 5.7em; height : 1em;'

    # sort the numbers
    numbers = input_root.xpath('//p[@class="paraQuestion"]/span[1]')
    for number in numbers:
        # cosider changing this in InDesign
        number.attrib['class'] = 'charBallotNumber'
        new_span = Element('span')
        new_span.classes.add('number-span')
        # new_span = html.fromstring('<span style="display : block; float : left; width : 2.1em; height : 1em;"></span>')
        number_parent = number.getparent()
        new_span.append(number)
        number_parent.insert(0, new_span)

    # sort ministerial statements
    statements = input_root.xpath(
        '//p[@class="paraMinisterialStatement"]/span[1]')
    for statement in statements:
        statement.attrib['class'] = 'charItemNumber'
        statement_tail_text = statement.tail
        statement.tail = ''
        new_span = Element('span')
        new_span.classes.add('number-span')
        # new_span = html.fromstring('<span style="display : block; float : left; width : 2.1em; height : 1em;"></span>')
        new_span.tail = statement_tail_text
        number_parent = statement.getparent()
        new_span.append(statement)
        number_parent.insert(0, new_span)

    # sort the front page tables
    front_page_tables = input_root.xpath('//table[@class="Front-Page-Table"]')
    for table in front_page_tables:
        # added as a result of an accessibility audit
        table.set('role', 'presentation')
    front_page_table_colgroups = input_root.xpath(
        '//table[@class="Front-Page-Table"]/colgroup')
    for colgroup in front_page_table_colgroups:
        colgroup[0].attrib.pop("class", None)
        colgroup[0].attrib['width'] = '24%'
        colgroup[1].attrib.pop("class", None)
        colgroup[1].attrib['width'] = '76%'

    # sort motion sponsor groups
    sponsor_groups_xpath = '//p[@class="paraMotionSponsorGroup"]' \
                           '|//p[@class="MotionAmmendmentSponsorGroup"]' \
                           '|//p[@class="MotionAmmendmentSponsorGroup"]/span' \
                           '|//p[@class="A2A-SponsorGroup"]'
    sponsor_groups = input_root.xpath(sponsor_groups_xpath)
    for sponsor_group in sponsor_groups:
        # print(html.tostring(sponsor_group))
        if not sponsor_group.text:
            continue
        sponsor_group.classes.add('row')
        # split text on the tab character (InDesign puts in)
        sponosr_names = sponsor_group.text.split('\u0009')
        sponsor_group.text = None
        for sponosr_name in sponosr_names:
            sponsor_span = SubElement(sponsor_group, 'span')
            sponsor_span.classes.update(('col-12', 'col-sm-6', 'col-lg-4'))
            sponsor_span.text = sponosr_name

    # change FBA location to .heading-level-3
    # for fba_location_heading in input_root.xpath('//*[@class="FbaLocation"]'):
    #     fba_location_heading.classes.discard('FbaLocation')
    #     fba_location_heading.classes.add('heading-level-3')

    # <strong class="Bold"> is overkill
    for strong_ele in input_root.xpath('//strong'):
        strong_ele.classes.discard('Bold')

    # dont need <span class="Hyperlink"> in a <a>
    for span in input_root.xpath('//a/span[@class="Hyperlink"]'):
        span.drop_tag()

    # seems like sometimes there are empty span.charStandingOrderReference
    for span in input_root.xpath(
            '//span[@class="charStandingOrderReference"]'):
        if not span.text or span.text.isspace():
            span.drop_tag()

    # Front-Page-Table doesnt need to be on the table the row and the td
    for tr in input_root.xpath(
            '//table[@class="Front-Page-Table"]//tr[@class="Front-Page-Table"]'
    ):
        tr.classes.discard('Front-Page-Table')
        for child in tr.iterchildren('td', 'th'):
            child.classes.discard('Front-Page-Table')

    heading_tags = ['h6', 'h5', 'h4', 'h3', 'h2', 'h1']

    for i, heading_tag in enumerate(heading_tags):
        # dont do anything if i ==0 because we have h6
        if i != 0:
            # we will replace heading_tag with heading_tags[i-1]
            # so h1 -> h2, h2 -> h3 etc.
            # print('here')
            new_heading_tag = heading_tags[i - 1]
            for heading in input_root.xpath(f'//{heading_tag}'):
                heading.tag = new_heading_tag

    # return the modified input html root element
    return input_root
Esempio n. 20
0
    perc = float(output[1].split(':')[1].split('%')[0])
    gcov = output[2].strip().split()[1].strip("'")

    # move generated gcov to coverage folder
    new_dir = os.path.join(target_dir, os.path.dirname(source))
    try:
        os.makedirs(new_dir)
    except OSError:
        pass
    os.rename(os.path.join(obspy_dir, gcov), os.path.join(new_dir, gcov))
    cov.append((filename, os.path.join(new_dir, gcov), perc))


# GENERATE HTML
page = fromstring("<html><table></table></html>")
table = page.xpath('.//table')[0]
for name, gcov, perc in cov:
    td1, td2 = Element('td'), Element('td')
    gcov = gcov.replace(target_dir, './')
    a = Element('a', attrib={'href': gcov})
    a.text = name
    td1.append(a)
    td2.text = "%6.2f%%" % perc
    tr = Element('tr')
    tr.extend([td1, td2])
    table.append(tr)
with open(os.path.join(target_dir, 'index.html'), 'wb') as fp:
    fp.write(tostring(page))

cleanup('*.o')
Esempio n. 21
0
    def _add_data_row(self, t: html.Element, x: ChangeItem, kind: str):

    # {
    #   "name": "AK.html",
    #   "status": "unchanged",
    #   "url": "http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-19/default.aspx",
    #   "msg": null,
    #   "complete": true,
    #   "added": "2020-03-13T06:17:50.550545",
    #   "checked": "2020-03-16T22:00:07.143700",
    #   "updated": "2020-03-16T21:40:10.611841",
    #   "failed": null,
    #   "source": "google-states"
    # }

        name = x.name
        status = x.status

        if name == "main_sheet.html": return
        if name.endswith("_data.html") and status == "duplicate": return

        prefix = "\n      "

        tr = html.Element("tr")
        tr.tail = prefix

        # Name
        td = html.Element("td")
        td.tail = prefix
        a = html.Element("a")
        a.attrib["href"] = name
        a.text = name.replace(".html", "")
        td.append(a)
        tr.append(td)
        t.append(tr)

        # Status
        td = html.Element("td")
        td.tail = prefix
        td.attrib["class"] = status
        td.text = status
        tr.append(td)

        # Last Changed
        updated_at = x.updated
        failed_at = x.failed
        td = html.Element("td")
        td.tail = prefix
        if failed_at != None:
            td.attrib["class"] = "failed"
            td.text = udatetime.to_displayformat(failed_at)
        else:
            td.text = udatetime.to_displayformat(updated_at)
        tr.append(td)

        # Delta
        td = html.Element("td")
        td.tail = prefix

        v = updated_at if failed_at == None else failed_at
        td.text = udatetime.format_difference(self.start_date, v) if status != "CHANGED" else ""
        tr.append(td)
        t.append(tr)

        # Live Page
        url = x.url
        td = html.Element("td")
        td.tail = prefix
        a = html.Element("a")
        a.attrib["href"] = url
        if len(url) < 80:
            a.text = url
        else:
            a.text = url[0: 80] + " ..."
            a.attrib["class"] = "tooltip"
            s = html.Element("span")
            s.text = url
            s.attrib["class"] = "tooltiptext"
            a.append(s)
        td.append(a)
        tr.append(td)

        # Pipeline        
        source = x.source
        if source == None: source = "google-states"
        td = html.Element("td")
        td.tail = prefix[:-2]
        div = html_helpers.make_source_links(kind, name, source)
        td.append(div)
        tr.append(td)

        t.append(tr)
Esempio n. 22
0
        # read stdout
        filename = fp.readline().strip().split()[1].strip("'")
        perc = float(fp.readline().split(':')[1].split('%')[0])
        gcov = fp.readline().strip().split()[1].strip("'")
        # move genereted gcov to coverage folder
        new_dir = join(target_dir, dirname(source))
        try:
            makedirs(new_dir)
        except OSError:
            pass
        rename(join(obspy_dir, gcov), join(new_dir, gcov))
        cov.append((filename, join(new_dir, gcov), perc))

# GENERATE HTML
page = fromstring("<html><table></table></html>")
table = page.xpath('.//table')[0]
for name, gcov, perc in cov:
    td1, td2 = Element('td'), Element('td')
    gcov = gcov.replace(target_dir, './')
    a = Element('a', attrib={'href': gcov})
    a.text = name
    td1.append(a)
    td2.text = "%6.2f%%" % perc
    tr = Element('tr')
    tr.extend([td1, td2])
    table.append(tr)
with open(join(target_dir, 'index.html'), 'wb') as fp:
    fp.write(tostring(page))

cleanup('*.o')
Esempio n. 23
0
def message_proc(message):
  url = f"https://t.me/s/{message.get('data-post')}"

  author = message.xpath('.//span[@class="tgme_widget_message_from_author"]') or ''
  if author:
    author = author[0].text_content()

  date = message.xpath('.//a[@class="tgme_widget_message_date"]/time')[0]
  date = datetime.datetime.fromisoformat(date.get('datetime'))

  text = message.xpath('.//div[starts-with(@class, "tgme_widget_message_text ")]')[0]
  del text.attrib['class']
  content = tostring(text, encoding=str).strip().replace('\r', '')

  reply = message.xpath('.//a[@class="tgme_widget_message_reply"]')
  if reply:
    reply = reply[0]
    reply.tag = 'div'
    reply[0].tag = 'a'
    reply[0].set('href', reply.get('href').replace('https://t.me/', 'https://t.me/s/'))
    del reply.attrib['href']
    content = "<blockquote>%s</blockquote>" % tostring(reply, encoding=str).strip().replace('\r', '') + content

  linkpreview = message.xpath('.//a[@class="tgme_widget_message_link_preview"]')
  if linkpreview:
    linkpreview = linkpreview[0]
    linkpreview.tag = 'div'

    sitename = linkpreview.xpath('.//div[@class="link_preview_site_name"]')[0]
    sitediv = Element('div')
    sitestrong = Element('strong')
    sitestrong.text = sitename.text_content()
    sitediv.append(sitestrong)
    sitename.getparent().replace(sitename, sitediv)

    previewtitle = linkpreview.xpath('.//div[@class="link_preview_title"]')[0]
    previewtitle.tag = 'a'
    previewtitle.set('href', linkpreview.get('href'))
    del linkpreview.attrib['href']
    image = linkpreview.xpath('.//i[@class="link_preview_right_image"]')
    if image:
      image = image[0]
      image.tag = 'img'
      image.set('src', image.attrib.pop('style').split("'")[1])
      image.set('style', 'max-height: 5em;')
    content += "<blockquote>%s</blockquote>" % tostring(linkpreview, encoding=str).strip().replace('\r', '')

  content_text = text.text_content()
  if len(content_text) > 30:
    title = "%s……" % (content_text[:30])
  else:
    title = content_text

  item = PyRSS2Gen.RSSItem(
    title = title,
    link = url,
    guid = url,
    description = content,
    author = author,
    pubDate = date,
  )
  return item