Esempio n. 1
0
	def get(self, news_id):
		data = getNews(news_id)
		if data is None:
			data = requestData(newsUrl + str(news_id))
			if data is None:
				notFound(self)
			data['body'] = imgReplace(data['body'])
			try:
				data['image'] = imgReplace(data['image'])
			except KeyError:
				data['image'] = 'default-lg.jpg'
			news = News(int(data['id']))
			news.save(data)
		self.render('news.html', data = data)
Esempio n. 2
0
def update_news():
    s = session()
    # 1. Получить данные с новостного сайта
    global pages
    newsdict = get_news(
        'https://news.ycombinator.com/' + "news?p=" + str(pages), 1)
    pages = pages + 1
    # 2. Проверить, каких новостей еще нет в БД. Будем считать,
    #    что каждая новость может быть уникально идентифицирована
    #    по совокупности двух значений: заголовка и автора
    ex_news = s.query(News).options(load_only("title", "author")).all()
    ex_tit_au = [(news.title, news.author) for news in ex_news]
    for news in newsdict:
        if (news['title'], news['author']) not in ex_tit_au:
            news_add = News(title=news['title'],
                            author=news['author'],
                            url=news['url'],
                            comments=news['comments'],
                            points=news['points'])
            print(news_add)
            print('/n')
            s.add(news_add)
    # 3. Сохранить в БД те новости, которых там нет
    s.commit()
    print('end')
    redirect("/news")
Esempio n. 3
0
def update_news():

	# 1. Получить данные с новостного сайта
    # 2. Проверить, каких новостей еще нет в БД. Будем считать,
    #    что каждая новость может быть уникально идентифицирована
    #    по совокупности двух значений: заголовка и автора
    # 3. Сохранить в БД те новости, которых там нет


    nNews = get_news('https://news.ycombinator.com/', 10)
    s = session()
    old_news = s.query(News).all()
    old_news = [(news.title, news.author) for news in old_news]

    for entry in nNews:
        if (entry['title'], entry['author']) not in old_news:
            print('adding to db...')

            f_news = News(title=entry['title'],
                       author=entry['author'],
                       url=entry['url'],
                       comments=entry['comments'],
                       points=entry['points'])
            s.add(f_news)
            s.commit()

    redirect("/news")
Esempio n. 4
0
def update_news():

    pages_parsed = 10
    s = session()
    old_news_list = s.query(News).all()
    new_rows = 0

    probable_news = get_news("https://news.ycombinator.com/newest",
                             pages_parsed)
    for news in probable_news:
        #is_new = True
        for old_news in old_news_list:
            if (news['title'] == old_news.title
                    and news['author'] == old_news.author):
                #is_new = False
                break
        #if is_new == True:
        else:
            new_news = News(title=news['title'],
                            author=news['author'],
                            url=news['url'],
                            points=news['points'])
            s.add(new_news)
            new_rows += 1
            if new_rows == 50:
                s.commit()
                print("Commited ", new_rows, " news")
                redirect("/news")
                return
Esempio n. 5
0
def update_news():
    """
    # 1. Получить данные с новостного сайта
    # 2. Проверить, каких новостей еще нет в БД. Будем считать,
    #    что каждая новость может быть уникально идентифицирована
    #    по совокупности двух значений: заголовка и автора
    # 3. Сохранить в БД те новости, которых там нет
    """
    news = get_news("https://news.ycombinator.com/newest", 1)
    s = session()
    for new in news:
        (record_exists, ), = s.query(exists().where(
            News.title == new['title'] and News.author == new['author']))
        if record_exists:
            break
        print('Adding record...')
        record = News(title=new['title'],
                      author=new['author'],
                      url=new['url'],
                      comments=new['comments'],
                      points=new['points'],
                      cleaned=new['cleaned'])
        s.add(record)
    s.commit()
    redirect("/news")
Esempio n. 6
0
def update_news():
    # 1. Получить данные с новостного сайта
    # 2. Проверить, каких новостей еще нет в БД. Будем считать,
    #    что каждая новость может быть уникально идентифицирована
    #    по совокупности двух значений: заголовка и автора
    # 3. Сохранить в БД те новости, которых там нет
    s = session()
    news = get_news("https://news.ycombinator.com/newest", n_pages=1)
    titles = s.query(News.title).all()
    authors = s.query(News.author).all()
    t = []
    for i in range(len(titles)):
        t.append(titles[i][0])
    au = []
    for i in range(len(authors)):
        au.append(authors[i][0])
    for i in range(len(news)):
        a = news[i].get('title')
        b = news[i].get('author')
        if a not in t and b not in au:
            s.add(
                News(author=news[i]['author'],
                     title=news[i]['title'],
                     url=news[i]['url'],
                     points=news[i]['points'],
                     comments=news[i]['comments']))
    time.sleep(0.333333334)
    s.commit()
    redirect('/news')
Esempio n. 7
0
def classify_news():
    X, y, info = [], [], []
    s = session()
    for i in range(1001):
        for item in s.query(News).get(News.id == i):
            X.append(item.title)
            y.append(item.label)
    X_test = []
    for i in range(1001, len(s.query(News).all()) + 1):
        for item in s.query(News).filter(News.id == i).all():
            X_test.append(item.title)
            info.append(News(author=item.author,
                             points=item.points,
                             comments=item.comments,
                             url=item.url))
    X = [x.translate(str.maketrans("", "", string.punctuation)).lower() for x in X]
    X_cleared = [x.translate(str.maketrans("", "", string.punctuation)).lower() for x in X_test]
    model = NaiveBayesClassifier(alpha=0.01)
    model.fit(X, y)
    predicted_news = model.predict(X_cleared)
    classified_news = []
    for i in range(len(predicted_news)):
        classified_news.append([y[i], X_test[i], info[i]])
    classified_news = sorted(classified_news, key=lambda item: item[0])
    return template('homework06/news_recommendations', rows=classified_news)
Esempio n. 8
0
 def refresh_kommersant_corp(self):
     data = feedparser.parse(self.links[6])
     self.news += [
         News(i['title'], i['link'], self.clck_ru.short_link(i['link']),
              i['link'].split('/')[2][4:], self.parse_kommers_category(i),
              int(calendar.timegm((i['published_parsed']))))
         for i in self.new_rss_row(data['entries'])
     ]
Esempio n. 9
0
 def refresh_vedomosti_news(self):
     data = feedparser.parse(self.links[2])
     self.news += [
         News(i['title'], i['link'], self.clck_ru.short_link(i['link']),
              i['link'].split('/')[2][4:], i['link'].split('/')[3],
              int(calendar.timegm((i['published_parsed']))))
         for i in self.new_rss_row(data['entries'])
     ]
Esempio n. 10
0
def update_news():
    news = get_news('https://habrahabr.ru/all/top50', n_pages=1)
    for n in news:
        (is_exists, ), = s.query(exists().where(News.id == n.get('id')))
        if not is_exists:
            s.add(News(**n))
            s.commit()

    redirect('/news')
Esempio n. 11
0
def update_news():
    news = get_news(url='https://news.ycombinator.com/', n_pages=33)
    s = session()
    for i in news:
        if s.query(News).filter(News.title == i['title'], News.author == i['author']).first():
            break
        else:
            s.add(News(**i))
    s.commit()
    redirect("/news")
Esempio n. 12
0
 def refresh_rbc_money(self):
     for_news_in_money = lambda x: 'money' if x == 'news' else x
     data = feedparser.parse(self.links[1])
     self.news += [
         News(i['title'], i['link'], self.clck_ru.short_link(i['link']),
              i['link'].split('/')[2][6:],
              for_news_in_money(i['link'].split('/')[3]),
              int(calendar.timegm((i['published_parsed']))))
         for i in self.new_rss_row(data['entries'])
     ]
Esempio n. 13
0
def update_news():
    s = session()
    news_list = get_news("https://news.ycombinator.com/newest", n_pages=5)
    news_list_bd = s.query(News).all()
    if len(news_list_bd) > 0:
        for new_news in news_list:
            f = False
            for old_news_bd in news_list_bd:
                if new_news['author'] == old_news_bd.author and new_news['title'] == old_news_bd.title:
                    f = True
                    break
            if not f:
                s.add(News(**new_news))

    else:
        for new_news in news_list:
            s.add(News(**new_news))
    s.commit()
    redirect("/news")
Esempio n. 14
0
def update_news():
    news = get_news('https://news.ycombinator.com/newest', 1)
    s = session()
    for item in news:
        if s.query(News).filter(News.title == item['title'],
                                News.author == item['author']).first():
            continue
        s.add(News(**item))

    s.commit()
    redirect("/news")
Esempio n. 15
0
def add():
    if request.method == "POST":
        global news_id
        news_id += 1
        title = request.form['title']
        detail = request.form['detail']
        add_news = News(news_id, title, detail)
        all_news.append(add_news)
        return redirect(url_for('news'))
    else:
        return render_template("new_news.html")
Esempio n. 16
0
def update_news():
    s = session()
    last_news = get_news()
    for news in last_news:
        check = s.query(News).filter(News.author==news['author'], News.title==news['title']).count()
        if check == 0:
            new = News(title=news['title'], author=news['author'], url=news['url'],
                comments=news['comments'], points=news['points'])
            s.add(new)
    s.commit()
    redirect("/news")
Esempio n. 17
0
def update_news():
    news_list = get_news('https://news.ycombinator.com/newest', 1)
    for news in news_list:
        s.add(
            News(title=news['title'],
                 author=news['author'],
                 url=news['url'],
                 comments=news['comments'],
                 points=news['points']))
    s.commit()
    redirect("/news")
Esempio n. 18
0
 def refresh_rbc(self):
     data = feedparser.parse(self.links[0])
     for_money_rbc = lambda x: 'rbc.ru' if x == 'y.rbc.ru' else 'rbc.ru'
     for_rbcfreenews = lambda x: x.split('/')[3] if x.split('/')[
         3] != 'rbcfreenews' else self.parse_rbcfreenews(x)
     self.news += [
         News(i['title'], i['link'], self.clck_ru.short_link(i['link']),
              for_money_rbc(i['link'].split('/')[2][4:]),
              for_rbcfreenews(i['link']),
              int(calendar.timegm((i['published_parsed']))))
         for i in self.new_rss_row(data['entries'])
     ]
Esempio n. 19
0
def update_news():
    s = session()
    news = get_news('https://news.ycombinator.com/newest')
    for i in range(0, 10):
        item = News(title = news[i]['title'],
                    author = news[i]['author'],
                    url = news[i]['url'],
                    comments = news[i]['comments'],
                    points = news[i]['score'])
        s.add(item)
        s.commit()
    redirect("/news")
Esempio n. 20
0
def update_news():
    news_lst = get_news('https://news.ycombinator.com/newest', 5)
    s = session()
    for i in range(len(news_lst)):
        if len(s.query(News).filter(News.title == news_lst[i]['title'],News.author == news_lst[i]['author']).all()) == 0:
            new_news = News(title=news_lst[i]['title'],
                            author=news_lst[i].get('author'),
                            points=news_lst[i]['points'],
                            comments=news_lst[i]['comments'],
                            url=news_lst[i]['url'])
            s.add(new_news)
    s.commit()
    redirect("/news")
Esempio n. 21
0
 def refresh_vedomosti_material(self):
     if_not_vedomosti = lambda x: x.split('/')[2][4:] if x.split('/')[2][
         4:] == 'vedomosti.ru' else 'vedomosti.ru'
     for_vedomosti_material = lambda x: x.split('/')[3] if x.split('/')[2][
         4:] == 'vedomosti.ru' else 'Правительство'
     data = feedparser.parse(self.links[3])
     self.news += [
         News(i['title'], i['link'], self.clck_ru.short_link(i['link']),
              if_not_vedomosti(i['link']),
              for_vedomosti_material(i['link']),
              int(calendar.timegm((i['published_parsed']))))
         for i in self.new_rss_row(data['entries'])
     ]
Esempio n. 22
0
def update_news():
    s = session()

    current_news = get_news('https://news.ycombinator.com/newest', 1)
    old_news = [n.id for n in s.query(News).options(load_only('id')).all()]

    for headline in current_news:
        if int(headline['id']) not in old_news:
            news_add = News(**headline)
            s.add(news_add)

    s.commit()
    redirect("/news")
Esempio n. 23
0
def save(pre_base):
    s = session()
    rows = s.query(News).filter(not News.label).all()
    labels = []
    for row in rows:
        labels.append(row.title)
    for cur_row in pre_base:
        if cur_row['title'] not in labels:
            news = News(title=cur_row['title'],
                        announce=cur_row['announce'],
                        url=cur_row['url'])
            s.add(news)
    s.commit()
Esempio n. 24
0
def update_news():
    s = session()
    news_list = get_news("https://news.ycombinator.com/newest")
    for news in news_list:
        if s.query(News).filter(News.title == news["title"],
                                News.author == news['author']).first():
            row = News(title=news['title'],
                       author=news['author'],
                       url=news['url'],
                       comments=news['comments'],
                       points=news['points'])
            s.add(row)
            s.commit()
    redirect("/")
Esempio n. 25
0
def update_news():
    s = session()
    latest_news = get_news("https://news.ycombinator.com/newest", n_pages=1)
    titles = [new['title'] for new in latest_news]
    authors = [new['author'] for new in latest_news]
    existing_news = s.query(News).filter(
        and_((News.author.in_(authors)), (News.title.in_(titles)))).all()
    for item in latest_news:
        if item['title'] not in [
                n.title for n in existing_news
        ] and item['author'] not in [n.author for n in existing_news]:
            s.add(News(**item))
    s.commit()
    redirect("/news")
Esempio n. 26
0
def update_news():
    s = session()
    news_list = get_news("https://news.ycombinator.com/newest", n_pages=5)
    news_list_db = s.query(News).all()
    for new_news in news_list:
        is_in_the_db = False
        for old_news_db in news_list_db:
            if new_news["author"] == old_news_db.author and new_news["title"] == old_news_db.title:
                is_in_the_db = True
                break
        if not is_in_the_db:
            s.add(News(**new_news))
    s.commit()
    redirect("/news")
Esempio n. 27
0
def update_news():
    fresh_news = get_news('https://news.ycombinator.com/newest', n_pages=1)
    for i in fresh_news:
        for j in s.query(News).all():
            if j.get('title') != i.get('title') and j.get('author') != i.get(
                    'author'):
                update = News(title=i.get('title'),
                              author=i.get('author'),
                              url=i.get('url'),
                              points=i.get('points'),
                              comments=i.get('comments'))
                s.add(update)
                s.commit()
    redirect("/news")
Esempio n. 28
0
def update_news():
    session = session()
    link = get_news("https://news.ycombinator.com/newest", 1)
    for i in range(len(link)):
        a = News(title=link[i]["title"],
                 author=author[i]["author"],
                 comments=link[i]["comments"],
                 points=link[i]["points"],
                 url=link[i]["url"])
        if (session.query(News).filter(
                News.tittle == a.title
                and News.author == a.author).count()) == 0:
            session.add(a)
    session.commit()
    redirect("/news")
Esempio n. 29
0
def update_news():
    s = session()
    news = get_news("https://news.ycombinator.com/", 5)
    for n in news:
        row = News(title=n["title"],
                   author=n["author"],
                   url=n["url"],
                   comments=n["comments"],
                   points=n["points"])
        if s.query(News).filter(News.title == row.title
                                and News.author == row.author).all():
            continue
        s.add(row)
        s.commit()
    redirect("/news")
Esempio n. 30
0
def update_news():
    # 1. Получить данные с новостного сайта
    # 2. Проверить, каких новостей еще нет в БД. Будем считать,
    #    что каждая новость может быть уникально идентифицирована
    #    по совокупности двух значений: заголовка и автора
    # 3. Сохранить в БД те новости, которых там нет
    updates = get_news('https://news.ycombinator.com/newest', n_pages=3)
    s = session()
    for d in updates:
        old = s.query(News).filter(News.title == d['title']).filter(News.author == d['author']).all()
        if not old:
            s.add(News(**d))
    s.commit()

    redirect("/news")
Esempio n. 31
0
def update_news():
    s = session()
    current_news = get_news('https://news.ycombinator.com/', 1)
    existing_news = s.query(News).options(load_only("title", "author")).all()
    existing_t_a = [(news.title, news.author) for news in existing_news]
    for news in current_news:
        if (news['title'], news['author']) not in existing_t_a:
            news_add = News(title=news['title'],
                            author=news['author'],
                            url=news['url'],
                            comments=news['comments'],
                            points=news['points'])
            s.add(news_add)
    s.commit()
    redirect("/news")
Esempio n. 32
0
def getNews(news_id):
	news = News(news_id)
	data = news.get()
	return data