def scrap(request): entry = "" for i in range(1, 15): url = "http://www.ndtv.com/world-news/page-" + str(i) articles = list() articles = articletext.getArticle(url) if len(articles) > 0: print "Entry Done Of page " + str(i) + " Of world news" for article in articles: news = NewsNdtv(title=article['title'], image=article['image'], place=article['place'], day=article['day'], short_desc=article['short_desc']) news.save() print news.title entry = "done" else: print "Error" news = NewsNdtv.objects.all().first() entry = news.title print entry return render(request, 'index.html', {'entry': entry})
import gethtml import articletext import articletrans import getarticle import test1 from bs4 import BeautifulSoup import paraphrase import googlesearch import articletext urls = [] topics = [] visited = [] root_topic = "news" for googl in googlesearch.getGoogleLinks(root_topic): urls.append(googl) for u in urls: mytext = articletext.getArticle(u) keywords = articletext.getKeywords(mytext) for k in keywords: if k not in topics: topics.append(k) print mytext #print paraphrase.getTrans("http://sparkbrowser.com")
# articletext.py # gethtml.py import gethtml import articletext url = "https://www.youtube.com/user/creeveshft/videos" print articletext.getArticle(url)
import gethtml import articletext url="www.nytimes.com/2016/02/15/sports/westminster-dog-show-judge-is-alone-on-center-stage.html" article = articletext.getArticle(url) print articletext.getKeywords(article)
import gethtml import articletext url = "http://english.jagran.com/uttar-pradesh-suspense-on-priyanka-gandhis-role-for-uttar-pradesh-assembly-polls-85069" #web_text = gethtml.getHtmlText(url) #print articletext.getArticleText(web_text) article = articletext.getArticle(url) for w in articletext.getKeywords(article): print w