def get(self, news_id): data = getNews(news_id) if data is None: data = requestData(newsUrl + str(news_id)) if data is None: notFound(self) data['body'] = imgReplace(data['body']) try: data['image'] = imgReplace(data['image']) except KeyError: data['image'] = 'default-lg.jpg' news = News(int(data['id'])) news.save(data) self.render('news.html', data = data)
def update_news(): s = session() # 1. Получить данные с новостного сайта global pages newsdict = get_news( 'https://news.ycombinator.com/' + "news?p=" + str(pages), 1) pages = pages + 1 # 2. Проверить, каких новостей еще нет в БД. Будем считать, # что каждая новость может быть уникально идентифицирована # по совокупности двух значений: заголовка и автора ex_news = s.query(News).options(load_only("title", "author")).all() ex_tit_au = [(news.title, news.author) for news in ex_news] for news in newsdict: if (news['title'], news['author']) not in ex_tit_au: news_add = News(title=news['title'], author=news['author'], url=news['url'], comments=news['comments'], points=news['points']) print(news_add) print('/n') s.add(news_add) # 3. Сохранить в БД те новости, которых там нет s.commit() print('end') redirect("/news")
def update_news(): # 1. Получить данные с новостного сайта # 2. Проверить, каких новостей еще нет в БД. Будем считать, # что каждая новость может быть уникально идентифицирована # по совокупности двух значений: заголовка и автора # 3. Сохранить в БД те новости, которых там нет nNews = get_news('https://news.ycombinator.com/', 10) s = session() old_news = s.query(News).all() old_news = [(news.title, news.author) for news in old_news] for entry in nNews: if (entry['title'], entry['author']) not in old_news: print('adding to db...') f_news = News(title=entry['title'], author=entry['author'], url=entry['url'], comments=entry['comments'], points=entry['points']) s.add(f_news) s.commit() redirect("/news")
def update_news(): pages_parsed = 10 s = session() old_news_list = s.query(News).all() new_rows = 0 probable_news = get_news("https://news.ycombinator.com/newest", pages_parsed) for news in probable_news: #is_new = True for old_news in old_news_list: if (news['title'] == old_news.title and news['author'] == old_news.author): #is_new = False break #if is_new == True: else: new_news = News(title=news['title'], author=news['author'], url=news['url'], points=news['points']) s.add(new_news) new_rows += 1 if new_rows == 50: s.commit() print("Commited ", new_rows, " news") redirect("/news") return
def update_news(): """ # 1. Получить данные с новостного сайта # 2. Проверить, каких новостей еще нет в БД. Будем считать, # что каждая новость может быть уникально идентифицирована # по совокупности двух значений: заголовка и автора # 3. Сохранить в БД те новости, которых там нет """ news = get_news("https://news.ycombinator.com/newest", 1) s = session() for new in news: (record_exists, ), = s.query(exists().where( News.title == new['title'] and News.author == new['author'])) if record_exists: break print('Adding record...') record = News(title=new['title'], author=new['author'], url=new['url'], comments=new['comments'], points=new['points'], cleaned=new['cleaned']) s.add(record) s.commit() redirect("/news")
def update_news(): # 1. Получить данные с новостного сайта # 2. Проверить, каких новостей еще нет в БД. Будем считать, # что каждая новость может быть уникально идентифицирована # по совокупности двух значений: заголовка и автора # 3. Сохранить в БД те новости, которых там нет s = session() news = get_news("https://news.ycombinator.com/newest", n_pages=1) titles = s.query(News.title).all() authors = s.query(News.author).all() t = [] for i in range(len(titles)): t.append(titles[i][0]) au = [] for i in range(len(authors)): au.append(authors[i][0]) for i in range(len(news)): a = news[i].get('title') b = news[i].get('author') if a not in t and b not in au: s.add( News(author=news[i]['author'], title=news[i]['title'], url=news[i]['url'], points=news[i]['points'], comments=news[i]['comments'])) time.sleep(0.333333334) s.commit() redirect('/news')
def classify_news(): X, y, info = [], [], [] s = session() for i in range(1001): for item in s.query(News).get(News.id == i): X.append(item.title) y.append(item.label) X_test = [] for i in range(1001, len(s.query(News).all()) + 1): for item in s.query(News).filter(News.id == i).all(): X_test.append(item.title) info.append(News(author=item.author, points=item.points, comments=item.comments, url=item.url)) X = [x.translate(str.maketrans("", "", string.punctuation)).lower() for x in X] X_cleared = [x.translate(str.maketrans("", "", string.punctuation)).lower() for x in X_test] model = NaiveBayesClassifier(alpha=0.01) model.fit(X, y) predicted_news = model.predict(X_cleared) classified_news = [] for i in range(len(predicted_news)): classified_news.append([y[i], X_test[i], info[i]]) classified_news = sorted(classified_news, key=lambda item: item[0]) return template('homework06/news_recommendations', rows=classified_news)
def refresh_kommersant_corp(self): data = feedparser.parse(self.links[6]) self.news += [ News(i['title'], i['link'], self.clck_ru.short_link(i['link']), i['link'].split('/')[2][4:], self.parse_kommers_category(i), int(calendar.timegm((i['published_parsed'])))) for i in self.new_rss_row(data['entries']) ]
def refresh_vedomosti_news(self): data = feedparser.parse(self.links[2]) self.news += [ News(i['title'], i['link'], self.clck_ru.short_link(i['link']), i['link'].split('/')[2][4:], i['link'].split('/')[3], int(calendar.timegm((i['published_parsed'])))) for i in self.new_rss_row(data['entries']) ]
def update_news(): news = get_news('https://habrahabr.ru/all/top50', n_pages=1) for n in news: (is_exists, ), = s.query(exists().where(News.id == n.get('id'))) if not is_exists: s.add(News(**n)) s.commit() redirect('/news')
def update_news(): news = get_news(url='https://news.ycombinator.com/', n_pages=33) s = session() for i in news: if s.query(News).filter(News.title == i['title'], News.author == i['author']).first(): break else: s.add(News(**i)) s.commit() redirect("/news")
def refresh_rbc_money(self): for_news_in_money = lambda x: 'money' if x == 'news' else x data = feedparser.parse(self.links[1]) self.news += [ News(i['title'], i['link'], self.clck_ru.short_link(i['link']), i['link'].split('/')[2][6:], for_news_in_money(i['link'].split('/')[3]), int(calendar.timegm((i['published_parsed'])))) for i in self.new_rss_row(data['entries']) ]
def update_news(): s = session() news_list = get_news("https://news.ycombinator.com/newest", n_pages=5) news_list_bd = s.query(News).all() if len(news_list_bd) > 0: for new_news in news_list: f = False for old_news_bd in news_list_bd: if new_news['author'] == old_news_bd.author and new_news['title'] == old_news_bd.title: f = True break if not f: s.add(News(**new_news)) else: for new_news in news_list: s.add(News(**new_news)) s.commit() redirect("/news")
def update_news(): news = get_news('https://news.ycombinator.com/newest', 1) s = session() for item in news: if s.query(News).filter(News.title == item['title'], News.author == item['author']).first(): continue s.add(News(**item)) s.commit() redirect("/news")
def add(): if request.method == "POST": global news_id news_id += 1 title = request.form['title'] detail = request.form['detail'] add_news = News(news_id, title, detail) all_news.append(add_news) return redirect(url_for('news')) else: return render_template("new_news.html")
def update_news(): s = session() last_news = get_news() for news in last_news: check = s.query(News).filter(News.author==news['author'], News.title==news['title']).count() if check == 0: new = News(title=news['title'], author=news['author'], url=news['url'], comments=news['comments'], points=news['points']) s.add(new) s.commit() redirect("/news")
def update_news(): news_list = get_news('https://news.ycombinator.com/newest', 1) for news in news_list: s.add( News(title=news['title'], author=news['author'], url=news['url'], comments=news['comments'], points=news['points'])) s.commit() redirect("/news")
def refresh_rbc(self): data = feedparser.parse(self.links[0]) for_money_rbc = lambda x: 'rbc.ru' if x == 'y.rbc.ru' else 'rbc.ru' for_rbcfreenews = lambda x: x.split('/')[3] if x.split('/')[ 3] != 'rbcfreenews' else self.parse_rbcfreenews(x) self.news += [ News(i['title'], i['link'], self.clck_ru.short_link(i['link']), for_money_rbc(i['link'].split('/')[2][4:]), for_rbcfreenews(i['link']), int(calendar.timegm((i['published_parsed'])))) for i in self.new_rss_row(data['entries']) ]
def update_news(): s = session() news = get_news('https://news.ycombinator.com/newest') for i in range(0, 10): item = News(title = news[i]['title'], author = news[i]['author'], url = news[i]['url'], comments = news[i]['comments'], points = news[i]['score']) s.add(item) s.commit() redirect("/news")
def update_news(): news_lst = get_news('https://news.ycombinator.com/newest', 5) s = session() for i in range(len(news_lst)): if len(s.query(News).filter(News.title == news_lst[i]['title'],News.author == news_lst[i]['author']).all()) == 0: new_news = News(title=news_lst[i]['title'], author=news_lst[i].get('author'), points=news_lst[i]['points'], comments=news_lst[i]['comments'], url=news_lst[i]['url']) s.add(new_news) s.commit() redirect("/news")
def refresh_vedomosti_material(self): if_not_vedomosti = lambda x: x.split('/')[2][4:] if x.split('/')[2][ 4:] == 'vedomosti.ru' else 'vedomosti.ru' for_vedomosti_material = lambda x: x.split('/')[3] if x.split('/')[2][ 4:] == 'vedomosti.ru' else 'Правительство' data = feedparser.parse(self.links[3]) self.news += [ News(i['title'], i['link'], self.clck_ru.short_link(i['link']), if_not_vedomosti(i['link']), for_vedomosti_material(i['link']), int(calendar.timegm((i['published_parsed'])))) for i in self.new_rss_row(data['entries']) ]
def update_news(): s = session() current_news = get_news('https://news.ycombinator.com/newest', 1) old_news = [n.id for n in s.query(News).options(load_only('id')).all()] for headline in current_news: if int(headline['id']) not in old_news: news_add = News(**headline) s.add(news_add) s.commit() redirect("/news")
def save(pre_base): s = session() rows = s.query(News).filter(not News.label).all() labels = [] for row in rows: labels.append(row.title) for cur_row in pre_base: if cur_row['title'] not in labels: news = News(title=cur_row['title'], announce=cur_row['announce'], url=cur_row['url']) s.add(news) s.commit()
def update_news(): s = session() news_list = get_news("https://news.ycombinator.com/newest") for news in news_list: if s.query(News).filter(News.title == news["title"], News.author == news['author']).first(): row = News(title=news['title'], author=news['author'], url=news['url'], comments=news['comments'], points=news['points']) s.add(row) s.commit() redirect("/")
def update_news(): s = session() latest_news = get_news("https://news.ycombinator.com/newest", n_pages=1) titles = [new['title'] for new in latest_news] authors = [new['author'] for new in latest_news] existing_news = s.query(News).filter( and_((News.author.in_(authors)), (News.title.in_(titles)))).all() for item in latest_news: if item['title'] not in [ n.title for n in existing_news ] and item['author'] not in [n.author for n in existing_news]: s.add(News(**item)) s.commit() redirect("/news")
def update_news(): s = session() news_list = get_news("https://news.ycombinator.com/newest", n_pages=5) news_list_db = s.query(News).all() for new_news in news_list: is_in_the_db = False for old_news_db in news_list_db: if new_news["author"] == old_news_db.author and new_news["title"] == old_news_db.title: is_in_the_db = True break if not is_in_the_db: s.add(News(**new_news)) s.commit() redirect("/news")
def update_news(): fresh_news = get_news('https://news.ycombinator.com/newest', n_pages=1) for i in fresh_news: for j in s.query(News).all(): if j.get('title') != i.get('title') and j.get('author') != i.get( 'author'): update = News(title=i.get('title'), author=i.get('author'), url=i.get('url'), points=i.get('points'), comments=i.get('comments')) s.add(update) s.commit() redirect("/news")
def update_news(): session = session() link = get_news("https://news.ycombinator.com/newest", 1) for i in range(len(link)): a = News(title=link[i]["title"], author=author[i]["author"], comments=link[i]["comments"], points=link[i]["points"], url=link[i]["url"]) if (session.query(News).filter( News.tittle == a.title and News.author == a.author).count()) == 0: session.add(a) session.commit() redirect("/news")
def update_news(): s = session() news = get_news("https://news.ycombinator.com/", 5) for n in news: row = News(title=n["title"], author=n["author"], url=n["url"], comments=n["comments"], points=n["points"]) if s.query(News).filter(News.title == row.title and News.author == row.author).all(): continue s.add(row) s.commit() redirect("/news")
def update_news(): # 1. Получить данные с новостного сайта # 2. Проверить, каких новостей еще нет в БД. Будем считать, # что каждая новость может быть уникально идентифицирована # по совокупности двух значений: заголовка и автора # 3. Сохранить в БД те новости, которых там нет updates = get_news('https://news.ycombinator.com/newest', n_pages=3) s = session() for d in updates: old = s.query(News).filter(News.title == d['title']).filter(News.author == d['author']).all() if not old: s.add(News(**d)) s.commit() redirect("/news")
def update_news(): s = session() current_news = get_news('https://news.ycombinator.com/', 1) existing_news = s.query(News).options(load_only("title", "author")).all() existing_t_a = [(news.title, news.author) for news in existing_news] for news in current_news: if (news['title'], news['author']) not in existing_t_a: news_add = News(title=news['title'], author=news['author'], url=news['url'], comments=news['comments'], points=news['points']) s.add(news_add) s.commit() redirect("/news")
def getNews(news_id): news = News(news_id) data = news.get() return data