Esempio n. 1
0
def strip_article(url):
    htmlcode = urllib2.urlopen(url).read().decode('utf-8')

    article = readability.Readability(htmlcode, url)

    #data = urllib2.urlopen(url).read()
    print article.content
Esempio n. 2
0
def get_html(url):
    print url
    page = s.get(url, headers=headers, verify=False).text
    page = page.encode("utf-8")
    html = readability.Readability(page, url)
    filename = "./coolshell/" + html.getArticleTitle().encode(
        "utf-8") + ".html"
    print filename
    #filename = filename.decode("utf-8").encode("gbk","ignore")
    content = html.grabArticle().encode("utf-8")
    content = content.decode("utf-8").encode("gbk", "ignore")
    save_file(filename, content)
Esempio n. 3
0
def LIXscore(text):
    rd = readability.Readability(text)
    score = rd.LIX()
    return float(score)
Esempio n. 4
0
def ARIscore(text):
    rd = readability.Readability(text)
    score = rd.ARI()
    return float(score)
Esempio n. 5
0
def computeRead(text):
    rd = readability.Readability(text)
    score = rd.FleschKincaidGradeLevel()
    return int(score)