def update_news(): s = session() # 1. Получить данные с новостного сайта global pages newsdict = get_news( 'https://news.ycombinator.com/' + "news?p=" + str(pages), 1) pages = pages + 1 # 2. Проверить, каких новостей еще нет в БД. Будем считать, # что каждая новость может быть уникально идентифицирована # по совокупности двух значений: заголовка и автора ex_news = s.query(News).options(load_only("title", "author")).all() ex_tit_au = [(news.title, news.author) for news in ex_news] for news in newsdict: if (news['title'], news['author']) not in ex_tit_au: news_add = News(title=news['title'], author=news['author'], url=news['url'], comments=news['comments'], points=news['points']) print(news_add) print('/n') s.add(news_add) # 3. Сохранить в БД те новости, которых там нет s.commit() print('end') redirect("/news")
def update_news(): # 1. Получить данные с новостного сайта # 2. Проверить, каких новостей еще нет в БД. Будем считать, # что каждая новость может быть уникально идентифицирована # по совокупности двух значений: заголовка и автора # 3. Сохранить в БД те новости, которых там нет news_ar = get_news('https://news.ycombinator.com/newest', n_pages=1) s = session news_in_base = s.query(News).all() for new in news_ar: counter = 0 for new_in_base in news_in_base: if (new['title'] == new_in_base.title) and (new['author'] == new_in_base.author): counter = 1 if counter == 0: new_new = News(title=new['title'], author=new['author'], url=new['url'], comments=new['comments'], points=new['points']) s.add(new_new) s.commit() redirect('/news')
def update_news(): """ # 1. Получить данные с новостного сайта # 2. Проверить, каких новостей еще нет в БД. Будем считать, # что каждая новость может быть уникально идентифицирована # по совокупности двух значений: заголовка и автора # 3. Сохранить в БД те новости, которых там нет """ news = get_news("https://news.ycombinator.com/newest", 1) s = session() for new in news: (record_exists, ), = s.query(exists().where( News.title == new['title'] and News.author == new['author'])) if record_exists: break print('Adding record...') record = News(title=new['title'], author=new['author'], url=new['url'], comments=new['comments'], points=new['points'], cleaned=new['cleaned']) s.add(record) s.commit() redirect("/news")
def update_news(): pages_parsed = 10 s = session() old_news_list = s.query(News).all() new_rows = 0 probable_news = get_news("https://news.ycombinator.com/newest", pages_parsed) for news in probable_news: #is_new = True for old_news in old_news_list: if (news['title'] == old_news.title and news['author'] == old_news.author): #is_new = False break #if is_new == True: else: new_news = News(title=news['title'], author=news['author'], url=news['url'], points=news['points']) s.add(new_news) new_rows += 1 if new_rows == 50: s.commit() print("Commited ", new_rows, " news") redirect("/news") return
def update_news(): # 1. Получить данные с новостного сайта # 2. Проверить, каких новостей еще нет в БД. Будем считать, # что каждая новость может быть уникально идентифицирована # по совокупности двух значений: заголовка и автора # 3. Сохранить в БД те новости, которых там нет s = session() news = get_news("https://news.ycombinator.com/newest", n_pages=1) titles = s.query(News.title).all() authors = s.query(News.author).all() t = [] for i in range(len(titles)): t.append(titles[i][0]) au = [] for i in range(len(authors)): au.append(authors[i][0]) for i in range(len(news)): a = news[i].get('title') b = news[i].get('author') if a not in t and b not in au: s.add( News(author=news[i]['author'], title=news[i]['title'], url=news[i]['url'], points=news[i]['points'], comments=news[i]['comments'])) time.sleep(0.333333334) s.commit() redirect('/news')
def update_news(): s = session() news = get_news("https://news.ycombinator.com/newest", 10) top_news = get_news("https://news.ycombinator.com/", 11) news.extend(top_news) for n in range(len(news)): row = News(title=news[n]["title"], author=news[n]["author"], url=news[n]["url"], comments=news[n]["comments"], points=news[n]["points"]) if s.query(News).filter(News.title == row.title and News.author == row.author).all(): continue s.add(row) s.commit() redirect("/news")
def transfer(): for i in get_news('https://news.ycombinator.com/', 3): news = News(title=i['title'], author=i['author'], url=i["url"], comments=i['comments'], points=i['points']) s.add(news) s.commit()
def update_news(): recent_news = get_news() authors = [news['author'] for news in recent_news] titles = s.query(News.title).filter(News.author.in_(authors)).subquery() existing_news = s.query(News).filter(News.title.in_(titles)).all() for news in recent_news: if not existing_news or news not in existing_news: fill(news) redirect("/news")
def db_News(url, n_pages=34): s = session() for news in get_news(url, n_pages=n_pages): row = News(title=news['title'], author=news['author'], url=news['url'], comments=news['comments'], points=news['points']) s.add(row) s.commit()
def fill_db(n_pages=35): s = Session() arr = get_news("https://news.ycombinator.com/newest", n_pages) for i in range(len(arr)): news = News(title=arr[i]['title'], author=arr[i]['author'], url=arr[i]['url'], comments=arr[i]['comments'], points=arr[i]['points']) s.add(news) s.commit()
def update_news(): news_list = get_news('https://news.ycombinator.com/newest', 1) for news in news_list: s.add( News(title=news['title'], author=news['author'], url=news['url'], comments=news['comments'], points=news['points'])) s.commit() redirect("/news")
def update_news(): news = get_news('https://news.ycombinator.com/newest', 1) s = session() for item in news: if s.query(News).filter(News.title == item['title'], News.author == item['author']).first(): continue s.add(News(**item)) s.commit() redirect("/news")
def update_news(): s = session() last_news = get_news() for news in last_news: check = s.query(News).filter(News.author==news['author'], News.title==news['title']).count() if check == 0: new = News(title=news['title'], author=news['author'], url=news['url'], comments=news['comments'], points=news['points']) s.add(new) s.commit() redirect("/news")
def fill_database(n_pages=10): s = session() news_list = get_news('https://news.ycombinator.com/newest', n_pages) for n in news_list: news = News(title=n['title'], author=n['author'], url=n['url'], comments=n['comments'], points=n['points']) s.add(news) s.commit()
def update_news(): recent_news = get_news() authors = [news['author'] for news in recent_news] titles = s.query(News.title).filter(News.author.in_(authors)).subquery() existing_news = s.query(News).filter(News.title.in_(titles)).all() titles_bd = [i.title for i in existing_news] authors_bd = [i.author for i in existing_news] for news in recent_news: if not existing_news or (news['title'] not in titles_bd and news["author"] not in authors_bd): fill(news) redirect("/news")
def update_news(): s = session() news = get_news('https://news.ycombinator.com/newest') for i in range(0, 10): item = News(title = news[i]['title'], author = news[i]['author'], url = news[i]['url'], comments = news[i]['comments'], points = news[i]['score']) s.add(item) s.commit() redirect("/news")
def update_news(): news = get_news(url="https://news.ycombinator.com/newest") make_news = [] for i in news: title, author = i["title"], i["author"] if not list( session.query(News).filter(News.title == title, News.author == author)): make_news.append(i) make_table_news(session, make_news) redirect("/news")
def update_news(): s = session() current_news = get_news('https://news.ycombinator.com/newest', 1) old_news = [n.id for n in s.query(News).options(load_only('id')).all()] for headline in current_news: if int(headline['id']) not in old_news: news_add = News(**headline) s.add(news_add) s.commit() redirect("/news")
def add_db(): # Добавление 1000 новостей news_list_ = scraputils.get_news('https://www.itnews.com.au', 50) q = 0 for k in news_list_: s = db.session() news_ = db.News(title=k.get('title'), author=k.get('author'), url=k.get('url'), comments=k.get('comments')) s.add(news_) s.commit() q += 1 print("added news ", q)
def save_data(pages=1): news = scraputils.get_news("https://news.ycombinator.com/newest", pages) s = db.session() for i in news: obj = db.News( title=i["title"], author=i["author"], url=i["url"], comments=i["comments"], points=i["points"], ) s.add(obj) s.commit()
def update_news(): news = get_news() rows = s.query(News).all() for n in news: flag = True for row in rows: if n['title'] == row.title: flag = False break if flag: add_to_db(n) redirect("/news")
def update_news(): news_lst = get_news('https://news.ycombinator.com/newest', 5) s = session() for i in range(len(news_lst)): if len(s.query(News).filter(News.title == news_lst[i]['title'],News.author == news_lst[i]['author']).all()) == 0: new_news = News(title=news_lst[i]['title'], author=news_lst[i].get('author'), points=news_lst[i]['points'], comments=news_lst[i]['comments'], url=news_lst[i]['url']) s.add(new_news) s.commit() redirect("/news")
def update_news(): s = session() latest_news = get_news("https://news.ycombinator.com/newest", n_pages=1) titles = [new['title'] for new in latest_news] authors = [new['author'] for new in latest_news] existing_news = s.query(News).filter( and_((News.author.in_(authors)), (News.title.in_(titles)))).all() for item in latest_news: if item['title'] not in [ n.title for n in existing_news ] and item['author'] not in [n.author for n in existing_news]: s.add(News(**item)) s.commit() redirect("/news")
def recommendations(): global classifier if classifier: news_list = get_news('https://news.ycombinator.com/newest', 1) titles = [n['title'] for n in news_list] normalized_titles = [] for title in titles: normalized_titles.append(normalize(title)) labels = classifier.predict(normalized_titles) for i in range(len(news_list)): news_list[i]['label'] = labels[i] return template('news_recommendations', rows=news_list) else: return redirect('/classify')
def update_news(): s = session() news_list = get_news("https://news.ycombinator.com/newest", n_pages=5) news_list_db = s.query(News).all() for new_news in news_list: is_in_the_db = False for old_news_db in news_list_db: if new_news["author"] == old_news_db.author and new_news["title"] == old_news_db.title: is_in_the_db = True break if not is_in_the_db: s.add(News(**new_news)) s.commit() redirect("/news")
def update_news(): news = get_news("https://news.ycombinator.com/", 35) s = session() for n in news: if has(s, n["author"], n["title"]): s.add( News( title=n["title"], author=n["author"], url=n["url"], points=n["points"], comments=n["comments"], )) s.commit() redirect("/news")
def update_news(): news = get_news('https://news.ycombinator.com/newest') s = session() for new in news: if not len( s.query(News).filter(News.author == new['author'], News.title == new['title']).all()): s.add( News(author=new['author'], title=new['title'], points=new['points'], comments=new['comments'], url=new['url'])) s.commit() redirect("/news")
def update_news(): s = session() current_news = get_news() existing_news = s.query(News).options(load_only("title", "author")).all() existing_t_a = [(news.title, news.author) for news in existing_news] for news in current_news: if (news['title'], news['author']) not in existing_t_a: news_add = News(title=news['title'], author=news['author'], url=news['url'], comments=news['comments'], points=news['points']) s.add(news_add) s.commit() redirect("/news")
def update_news(): # 1. Получить данные с новостного сайта # 2. Проверить, каких новостей еще нет в БД. Будем считать, # что каждая новость может быть уникально идентифицирована # по совокупности двух значений: заголовка и автора # 3. Сохранить в БД те новости, которых там нет updates = get_news('https://news.ycombinator.com/newest', n_pages=3) s = session() for d in updates: old = s.query(News).filter(News.title == d['title']).filter(News.author == d['author']).all() if not old: s.add(News(**d)) s.commit() redirect("/news")
def update_news(): s = session() news = get_news("https://news.ycombinator.com/", 5) for n in news: row = News(title=n["title"], author=n["author"], url=n["url"], comments=n["comments"], points=n["points"]) if s.query(News).filter(News.title == row.title and News.author == row.author).all(): continue s.add(row) s.commit() redirect("/news")