예제 #1
0
def xsltparse(xsltpath, xmlpath, resultpath):
    name = "xsltparse"
    log = logging.getLogger(__name__ + "." + name)

    if not os.path.isfile(xsltpath):
        log.error("xsltpath " + str(xsltpath) + " is not a valid file!")
        return
    if not os.path.isfile(xmlpath):
        log.error("xmlpath " + str(xmlpath) + " is not a valid file!")
        return

    try:
        xslt = etree.parse(xsltpath)
        trans = etree.XSLT(xslt)

        # https://lxml.de/parsing.html
        parser = etree.XMLParser(ns_clean=True, huge_tree=True, recover=True)
        source = etree.parse(xmlpath, parser)

        html = trans(source)
        html.write(resultpath)
    except Exception as e:
        log.exception("Failed to parse xml using xslt!")
        return

    log.debug("Parsed xml written to" + str(resultpath))
예제 #2
0
def get_deps_html(url, dest):
    html = lxml.html.parse(dest)
    for u in html.xpath("//img/@src | //input/@src | //link/@href | //object/@data | //script/@src"):
        _url = urllib.parse.urljoin(url, u)
        urlp = urllib.parse.urlparse(_url)
        if urlp.netloc and urlp.netloc != "access.redhat.com": continue

        q.put((download, _url, os.path.normpath(urlp.path[1:])))

        update_url_html(u, os.path.split(dest)[0])

    html.write(dest)
def post2markdown(
    tree
):  # Process html; Keep only the <article> content - where blogpost actualy is
    article = (tree.xpath('//article'))[0]
    header = (tree.xpath('//header[@class="article-header"]'))[0]
    p_blog = (article.xpath('.//p[@id="breadcrumb"]'))[0]  # contains: "Blog:"
    header.remove(p_blog)
    if (article.xpath('.//footer')):
        footer = (article.xpath('.//footer'))[0]
        article.remove(footer)
    iframes = article.xpath('//iframe')
    post_videos(iframes)  # videos: replace video's iframe with <a><img>

    images = (article.xpath('.//img'))
    post_imgs(images)
    post_clean_html(article)

    # author # add class to author wrapping <a>
    author_tag = (article.xpath('.//span[@class="author"]/a'))
    #    author_tag[0].set('class', 'author')
    #    author_tag[0].attrib.pop('rel')
    #    author_tag[0].attrib.pop('title')
    #    author_tag[0].set('title', '')

    #    print lxml.html.tostring(author_tag[0])

    # get info
    date = ((article.xpath('//time'))[0]).attrib['datetime']
    author = (article.xpath('//a[@rel="author"]'))[0].text
    title = (article.xpath('//h1[@class="entry-title single-title"]'))[0].text

    #save modified html
    html_article = lxml.html.tostring(article,
                                      pretty_print=True,
                                      include_meta_content_type=True,
                                      encoding='utf-8',
                                      method='html',
                                      with_tail=False)
    html = open('tmp_article.html', 'w')
    html.write(html_article)
    html.close()
    return (date, author, title)
예제 #4
0
        event.add(
            'SUMMARY',
            event_info.xpath("span[contains(@class, 'discipline')]")[0].text)

        kind = event_info.xpath("span[contains(@class, 'kindOfWork')]")[0].text
        lecturer = event_info.xpath(
            "span[contains(@class, 'lecturer')]")[0].text
        group_info = event_info.xpath("span[contains(@class, 'group')]")
        group = group_info[0].text if group_info else ''
        event.add('DESCRIPTION', '\n'.join([kind, lecturer, group]))
        event.add('COMMENT', status)
        cal.add_component(event)

with open(f'{group_id}.ics', 'w+b') as ics:
    ics.write(cal.to_ical())

with open('index.html', 'w', encoding='utf-8') as html:
    html.write(f'''
<!DOCTYPE html>
<head>
    <meta charset="utf-8">
</head>
<body>
    <ul>
        <li>
            <a href="{group_id}.ics">{group_name}</a> ({status})
        </li>
    </ul>
</body>
''')
                last_script = script_list[-1]
                add_link_script = """  
  var progs = data["programs"];
  for (var i = 0; i < progs.length; i++) {
    var prog = progs[i];
    if (prog["outputs"].length > 0) {
      var outputs = prog["outputs"];
      for (var j = 0; j < outputs.length; j++) {
        outputs[j]["file"] =  "%s\/" + outputs[j]["file"]
      }
    }
  }
"""%(os.path.relpath(meme_path,os.path.dirname(reSt_html_path)))
                script_el = lxml.html.builder.SCRIPT(add_link_script)
                last_script.addnext(script_el)
                html.write("meme-chip_index.html",method="html")

                doc.add(ReStRaw(format="html",options={'file':"meme-chip_index.html"}))
            #end if (os.path.exists(meme_index_path)):

    doc.write()
    doc.close()

    # 6. convert reSt to PDF and HTML
#    rst2html_call = 'rst2html.py --stylesheet-path=/nfs/antdata/web_stage/css/lsr.css ' \
#                   '%s %s'%(reSt_path,reSt_html_path)
    rst2html_call = 'rst2html.py --stylesheet-path=%s ' \
        '%s %s'%(css_path,reSt_path,reSt_html_path)
    sys.stderr.write(rst2html_call+'\n')
    r = call(rst2html_call,shell=True)