def StockKeyWords(symbol): url = "http://www.reuters.com/finance/stocks/companyProfile?symbol=" + symbol toBeWritten = '' toBeWritten += symbol + "," c = -1 try: a = Article(url) a.download() a.parse() UnicodeArticle = a.text StringArticle = UnicodeArticle.encode('ascii', 'ignore') StrippedArticle = StringArticle.replace('\n', '') KeyWords = articletext.getKeywords(StrippedArticle) for i in KeyWords: c += 1 toBeWritten += KeyWords[c] + ' ' toBeWritten += '\n' except Exception as ex: template = "An exception of type {0} occured. Arguments:\n{1!r}" message = template.format(type(ex).__name__, ex.args) print message, symbol #write variable to file if there are keywords if len(KeyWords) > 1: lock.acquire() try: myfile.write(toBeWritten) finally: lock.release()
import gethtml import articletext import articletrans import getarticle import test1 from bs4 import BeautifulSoup import paraphrase import googlesearch import articletext urls = [] topics = [] visited = [] root_topic = "news" for googl in googlesearch.getGoogleLinks(root_topic): urls.append(googl) for u in urls: mytext = articletext.getArticle(u) keywords = articletext.getKeywords(mytext) for k in keywords: if k not in topics: topics.append(k) print mytext #print paraphrase.getTrans("http://sparkbrowser.com")
import gethtml import articletext url="www.nytimes.com/2016/02/15/sports/westminster-dog-show-judge-is-alone-on-center-stage.html" article = articletext.getArticle(url) print articletext.getKeywords(article)
import gethtml import articletext url = "http://english.jagran.com/uttar-pradesh-suspense-on-priyanka-gandhis-role-for-uttar-pradesh-assembly-polls-85069" #web_text = gethtml.getHtmlText(url) #print articletext.getArticleText(web_text) article = articletext.getArticle(url) for w in articletext.getKeywords(article): print w