def update_news(): update_news = get_news() authors = [news['author'] for news in update_news] titles = s.query(News.title).filter(News.author.in_(authors)).subquery() created_news = s.query(News).filter(News.title.in_(titles)).all() for news in update_news: if not created_news or news not in created_news: fill(news) redirect("/news")
def update_news(): recent_news = get_news() authors = [news['author'] for news in recent_news] titles = s.query(News.title).filter(News.author.in_(authors)).subquery() existing_news = s.query(News).filter(News.title.in_(titles)).all() for news in recent_news: if not existing_news or news not in existing_news: fill(news) redirect("/news")
def update_news(): news = get_news(url='https://news.ycombinator.com/', n_pages=33) s = session() for i in news: if s.query(News).filter(News.title == i['title'], News.author == i['author']).first(): break else: s.add(News(**i)) s.commit() redirect("/news")
def db_create(url, n_pages=1): s = session() news_list = get_news(url, n_pages=n_pages) for news in news_list: row = News(title=news['title'], author=news['author'], url=news['url'], comments=news['comments'], points=news['points']) s.add(row) s.commit()
def update_news(): s = session() news_list = get_news("https://news.ycombinator.com/newest") for news in news_list: if s.query(News).filter(News.title == news["title"], News.author == news['author']).first(): row = News(title=news['title'], author=news['author'], url=news['url'], comments=news['comments'], points=news['points']) s.add(row) s.commit() redirect("/")
def update_news(): s = session() current_news = get_news('https://news.ycombinator.com/', 1) existing_news = s.query(News).options(load_only("title", "author")).all() existing_t_a = [(news.title, news.author) for news in existing_news] for news in current_news: if (news['title'], news['author']) not in existing_t_a: news_add = News(title=news['title'], author=news['author'], url=news['url'], comments=news['comments'], points=news['points']) s.add(news_add) s.commit() redirect("/news")
def update_news(): session = session() link = get_news("https://news.ycombinator.com/newest", 1) for i in range(len(link)): a = News(title=link[i]["title"], author=author[i]["author"], comments=link[i]["comments"], points=link[i]["points"], url=link[i]["url"]) if (session.query(News).filter( News.tittle == a.title and News.author == a.author).count()) == 0: session.add(a) session.commit() redirect("/news")
def update_news(): url = "https://news.ycombinator.com/newest" all_news = get_news(url) s = session() for news in all_news: ret = s.query(exists().where((News.title == news['title'] and News.author == news['author']))).scalar() if not ret: news = News(title=news['title'], author=news['author'], url=news['url'], comments=news['comments'], points=news['points']) s.add(news) s.commit() redirect("/news")
def news_list(): s = session() rows = s.query(News).filter(News.label == None).all() if len(rows) == 0: news = get_news("https://news.ycombinator.com/newest", 5) for neew in news[::-1]: s = session() add_news = News(title=neew['title'], author=neew['author'], url=neew['url'], comments=neew['comments'], points=neew['points']) s.add(add_news) s.commit() rows = s.query(News).filter(News.label == None).all()[::-1] return template('news_template', rows=rows)
def update_news(): s = session() last_news = get_news('https://news.ycombinator.com/', 3) db_news = [] news_pattern = { 'author': '', 'comments': '', 'points': '', 'title': '', 'url': '', } db_size = s.query(News.id).count() for i in range(1, db_size + 1): title = s.query(News).filter(News.id == i).first().title author = s.query(News).filter(News.id == i).first().author comments = s.query(News).filter(News.id == i).first().comments points = s.query(News).filter(News.id == i).first().points url = s.query(News).filter(News.id == i).first().url news_pattern['author'], news_pattern['comments'], news_pattern['points'], news_pattern['title'], news_pattern[ 'url'] = \ author, comments, points, title, url db_news.append(news_pattern) news_pattern = { 'author': '', 'comments': '', 'points': '', 'title': '', 'url': '', } for j in last_news: if s.query(News).filter( News.title == j.get('title', '')).first() is not None: if j.get('author', '') != s.query(News).filter( News.title == j.get('title', '')).first().author: pass else: news = News(title=j.get('title', ''), author=j.get('author', ''), url=j.get('url', ''), comments=j.get('comments', ''), points=j.get('points', '')) s.add(news) s.commit() redirect("/news")
def update_news(): s = session() rows = s.query(News).filter().all() need_title = rows[0].title need_url = rows[0].url added_news = get_news("https://news.ycombinator.com/newest") for news in added_news[::-1]: if need_title == news["title"] and need_url == news["url"]: break else: s = session() news = News(title=news['title'], author=news['author'], url=news['url'], comments=news['comments'], points=news['points']) s.add(news) s.commit() redirect("/news")
def update_news(): s = session() number_of_pages = 5 url = "https://news.ycombinator.com/newest" news = get_news(url, number_of_pages) for article in news: article_id = int(article['article_id']) row = s.query(News).filter(News.article_id == article_id).all() if not row: new_entry = News(title=article.get('title'), author=article.get('author'), url=article.get( 'url'), comments=article.get('comments'), points=article.get('points'), article_id=article.get('article_id')) s.add(new_entry) s.commit() redirect("/news")
def update_news(): # 1. Получить данные с новостного сайта # 2. Проверить, каких новостей еще нет в БД. Будем считать, # что каждая новость может быть уникально идентифицирована # по совокупности двух значений: заголовка и автора # 3. Сохранить в БД те новости, которых там нет s = session() last_news = get_news() for n in last_news: check = s.query(News).filter(News.author == n['author'], News.title == n['title']).count() if check == 0: new = News(title=n['title'], author=n['author'], url=n['url'], comments=n['comments'], points=n['points']) s.add(new) s.commit() redirect("/news")
from twython import Twython import auth import scrapper import time url = 'http://newsapi.org/v2/top-headlines?country=us&apiKey=a30440902cbe4ed099dbf8b02fa7f7a7' headlines = scrapper.get_news(url) tweeted = [] twitter = Twython(auth.consumer_key, auth.consumer_secret, auth.access_token, auth.access_token_secret) def tweet(): counter = 0 while counter < len(headlines): description = headlines[counter][0] link = headlines[counter][1] if link not in tweeted: message = f"{description}\n{link}" # tweet news twitter.update_status(status=message) print(f"Tweeted: {message}") counter += 1 tweeted.append(link) time.sleep(3600) else: counter += 1 if __name__ == '__main__':
def save(): list_news = get_news(url, n_pages=33) for n in list_news: s.add(News(**n)) s.commit()
engine = create_engine("sqlite:///news.db") session = sessionmaker(bind=engine) class News(Base): __tablename__ = "news" id = Column(Integer, primary_key=True) title = Column(String) author = Column(String) url = Column(String) comments = Column(Integer) points = Column(Integer) label = Column(String) Base.metadata.create_all(bind=engine) if __name__ == '__main__': url = "https://news.ycombinator.com/newest" all_news = get_news(url, 100) s = session() for n in all_news: news = News(title=n['title'], author=n['author'], url=n['url'], comments=n['comments'], points=n['points']) s.add(news) s.commit()
engine = create_engine("sqlite:///news.db") session = sessionmaker(bind=engine) class News(Base): __tablename__ = "news" id = Column(Integer, primary_key=True) title = Column(String) author = Column(String) url = Column(String) comments = Column(Integer) points = Column(Integer) label = Column(String) Base.metadata.create_all(bind=engine) if __name__ == "__main__": n_pages = 14 # кол-во полных страниц для сбора news_list = get_news('https://news.ycombinator.com/', n_pages) n_news = n_pages * 30 # кол-во новостей for i in range(n_news): s = session() news = News(title=news_list[i].get('title', ''), author=news_list[i].get('author', ''), url=news_list[i].get('url', ''), comments=news_list[i].get('comments', ''), points=news_list[i].get('points', '')) s.add(news) s.commit()