Example #1
0
def scrap(request):
    entry = ""
    for i in range(1, 15):
        url = "http://www.ndtv.com/world-news/page-" + str(i)
        articles = list()
        articles = articletext.getArticle(url)
        if len(articles) > 0:
            print "Entry Done Of page " + str(i) + " Of world news"
            for article in articles:
                news = NewsNdtv(title=article['title'],
                                image=article['image'],
                                place=article['place'],
                                day=article['day'],
                                short_desc=article['short_desc'])
                news.save()
                print news.title
            entry = "done"
        else:
            print "Error"
    news = NewsNdtv.objects.all().first()
    entry = news.title
    print entry
    return render(request, 'index.html', {'entry': entry})
Example #2
0
import gethtml
import articletext
import articletrans
import getarticle
import test1
from bs4 import BeautifulSoup
import paraphrase
import googlesearch
import articletext

urls = []
topics = []
visited = []
root_topic = "news"

for googl in googlesearch.getGoogleLinks(root_topic):
    urls.append(googl)

for u in urls:
    mytext =  articletext.getArticle(u)
    keywords = articletext.getKeywords(mytext)
    for k in keywords:
        if k not in topics:
            topics.append(k)
    print mytext

#print paraphrase.getTrans("http://sparkbrowser.com")
# articletext.py
# gethtml.py

import gethtml
import articletext

url = "https://www.youtube.com/user/creeveshft/videos"

print articletext.getArticle(url)
Example #4
0
import gethtml
import articletext

url="www.nytimes.com/2016/02/15/sports/westminster-dog-show-judge-is-alone-on-center-stage.html"

article = articletext.getArticle(url)

print articletext.getKeywords(article)
Example #5
0
import gethtml
import articletext

url = "http://english.jagran.com/uttar-pradesh-suspense-on-priyanka-gandhis-role-for-uttar-pradesh-assembly-polls-85069"
#web_text = gethtml.getHtmlText(url)
#print articletext.getArticleText(web_text)
article = articletext.getArticle(url)
for w in articletext.getKeywords(article):
    print w