コード例 #1
0
ファイル: CommentsScrape.py プロジェクト: fro391/Investing
def StockKeyWords(symbol):
    url = "http://www.reuters.com/finance/stocks/companyProfile?symbol=" + symbol
    toBeWritten = ''
    toBeWritten += symbol + ","
    c = -1
    try:
        a = Article(url)
        a.download()
        a.parse()
        UnicodeArticle = a.text
        StringArticle = UnicodeArticle.encode('ascii', 'ignore')
        StrippedArticle = StringArticle.replace('\n', '')
        KeyWords = articletext.getKeywords(StrippedArticle)

        for i in KeyWords:
            c += 1
            toBeWritten += KeyWords[c] + ' '
        toBeWritten += '\n'

    except Exception as ex:
        template = "An exception of type {0} occured. Arguments:\n{1!r}"
        message = template.format(type(ex).__name__, ex.args)
        print message, symbol
    #write variable to file if there are keywords

    if len(KeyWords) > 1:
        lock.acquire()
        try:
            myfile.write(toBeWritten)
        finally:
            lock.release()
コード例 #2
0
ファイル: main.py プロジェクト: MannyAcevedo/adbnews
import gethtml
import articletext
import articletrans
import getarticle
import test1
from bs4 import BeautifulSoup
import paraphrase
import googlesearch
import articletext

urls = []
topics = []
visited = []
root_topic = "news"

for googl in googlesearch.getGoogleLinks(root_topic):
    urls.append(googl)

for u in urls:
    mytext =  articletext.getArticle(u)
    keywords = articletext.getKeywords(mytext)
    for k in keywords:
        if k not in topics:
            topics.append(k)
    print mytext

#print paraphrase.getTrans("http://sparkbrowser.com")
コード例 #3
0
import gethtml
import articletext

url="www.nytimes.com/2016/02/15/sports/westminster-dog-show-judge-is-alone-on-center-stage.html"

article = articletext.getArticle(url)

print articletext.getKeywords(article)
コード例 #4
0
ファイル: main.py プロジェクト: shivam04/python_crawl
import gethtml
import articletext

url = "http://english.jagran.com/uttar-pradesh-suspense-on-priyanka-gandhis-role-for-uttar-pradesh-assembly-polls-85069"
#web_text = gethtml.getHtmlText(url)
#print articletext.getArticleText(web_text)
article = articletext.getArticle(url)
for w in articletext.getKeywords(article):
    print w