def scrape_article_name(article_id): article = db.get_article(article_id) if article == None: print('There is no article with that ID. article NOT found.\n') else: print() display_single_article(article, str(article.ArticleID)) article_choice = btc.read_int_ranged( '1 to rescrape title, 2 to leave as is: ', min_value=1, max_value=2) if article_choice == 1: try: new_article_news_item = na.get_article_from_url(article.link) new_title = new_article_news_item.title print(''' New title: {0} Old title: {1}'''.format(new_title, article.name)) except: new_title = 'Title scrape failed' title_choice = btc.read_int_ranged( '1 to replace title, 2 to keep original title: ', min_value=1, max_value=2) if title_choice == 1: db.update_article_name(article_id, new_title) elif title_choice == 2: print('article update cancelled') elif article_choice == 2: print('article update cancelled')
def csv_item_to_article(csv_list_item): new_article_news_item = na.get_article_from_url(csv_list_item[0]) new_article_link = new_article_news_item.url new_article_title = new_article_news_item.title print(new_article_title) new_article_category = get_category_id(csv_list_item[1]) new_article_datetime = parse(csv_list_item[2]) new_article_date = new_article_datetime.date() article_from_csv = Article(name=new_article_title, link=new_article_link, category=new_article_category, date=new_article_date, description='Not specified', author='Not specified', publication='Not specified') return article_from_csv
def update_article_name(article_id): article = db.get_article(article_id) if article == None: print("There is no article with that ID. article NOT found.\n") else: print() display_single_article(article, str(article.ArticleID)) article_choice = btc.read_int_ranged( '1 to edit article title, 2 to leave as is: ', min_value=1, max_value=2) if article_choice == 1: try: newsItem1 = na.get_article_from_url(article.link) updated_title = newsItem1.title except Exception as e: print('Scrape failed because of {0}'.format(e)) updated_title = 'Invalid' print('Rescraped title: {0}'.format(updated_title)) title_choice = btc.read_int_ranged( '1 - existing title, 2 - scraped title, 3 - manual input: ', 1, 3) if title_choice == 1: print('Title update cancelled, article title unchanged.') return elif title_choice == 2: db.update_article_name(article_id, updated_title) print('Title update complete. Return to main menu.') elif title_choice == 3: new_title = btc.read_text('Enter new title or . to cancel: ') if new_title != '.': db.update_article_name(article_id, new_title) else: print('Edit cancelled, return to main menu') return else: print('Edit cancelled, article title unchanged')
def from_newspaper(link): ''' Adds an article from the newspaper module after downloading it ''' for i in tqdm.tqdm(range(1)): try: newNewsItem = na.get_article_from_url(link) except: print('Article download failed, invalid URL') print('Returning to main menu') return print(newNewsItem) try: name = newNewsItem.title #get the title for the article except Exception as e: print(e) name = btc.read_text('Please enter title: ') #get article author try: author = ' '.join(newNewsItem.authors) #get article publication except Exception as e: print(e) author = btc.read_text('Please enter author: ') try: #works for most websites, but not Sudan Tribune publication = newNewsItem.meta_data['og']['site_name'] except Exception as e: print(e) publication = btc.read_text('Please enter publication: ') try: year = newNewsItem.publish_date.year month = newNewsItem.publish_date.month day = newNewsItem.publish_date.day new_date = datetime.date(day=day, month=month, year=year) except Exception as e: print(e) #use the new btc.read_date() function to simplify this try: new_date = btc.read_date('Enter article date MM/DD/YYYY: ') except Exception as e: print('invalid date', e) try: summary = newNewsItem.summary except Exception as e: print(e) print('Summary download failed') summary = 'Summary not found' try: keywords = ', '.join(newNewsItem.keywords) except Exception as e: print(e) print('Keyword download failed') keywords = 'keywords not found' print('TITLE - {0} - AUTHOR {1}'.format(name, author)) print('DATE - {0} - PUBLICATION {1}'.format(new_date.strftime("%m/%d/%Y"), publication)) print('KEYWORDS: ', keywords) display_categories() category_id = btc.read_text("Category ID: ") category = db.get_category(category_id) if category == None: print('There is no category with that ID. article NOT added.\n') return description_choice = btc.read_text('View article description? y/n: ') if description_choice == 'y': print('Title: {0}'.format(name)) print('Summary: {0}'.format(summary)) print('Keywords: {0}'.format(keywords)) description = btc.read_text("Description or '.' to cancel: ") if description == ".": return else: new_article = Article(name=name, date=new_date, category=category, link=link, description=description, author=author, publication=publication) display_single_article(article=new_article, title_term=new_article.name) confirm_article = btc.read_bool(decision="Finalize the article?", yes='y', no='n', yes_option='Confirm', no_option='Cancel') #This is the user's last chance to decide if they want to add the article if confirm_article == True: db.add_article(new_article) print(new_article.name + " was added to database.\n") elif confirm_article == False: print('Article add cancelled. Return to main menu.')
def rescrape(article_id, update_type): #update_types: name and description #We need to shorten this function, or replace it with a class article = db.get_article(article_id) if article == None: print("There is no article with that ID. article NOT found.\n") else: print() display_single_article(article, str(article.articleID)) article_choice = btc.read_int_ranged( '1 to edit article {0}, 2 to leave as is: '.format(update_type), min_value=1, max_value=2) if article_choice == 1: try: newsItem1 = na.get_article_from_url(article.link) except Exception as e: print(e) if update_type == 'name': try: updated_title = newsItem1.title print('Rescraped title: {0}'.format(updated_title)) except Exception as e: print(e) updated_title = 'Invalid' title_choice = btc.read_int_ranged( '1 - existing title, 2 - scraped title, 3 - manual input: ', 1, 3) if title_choice == 1: print('Title update cancelled, article title unchanged.') return elif title_choice == 2: new_value = updated_title elif title_choice == 3: new_value = btc.read_text( 'Enter new title or . to cancel: ') if new_value == '.': print('Edit cancelled, return to main menu') return elif update_type == 'description': description_choice = btc.read_text( 'View article summary? y/n: ') if description_choice == 'y': try: article_summary = newsItem1.summary except Exception as e: print(e) article_summary = 'Not found' print(article_summary) new_value = btc.read_text( 'Enter new description or "." to cancel: ') if new_value == '.': print( 'Description update cancelled, return to main menu' ) return db.update_article(article_id=article_id, new_value=new_value, update_type=update_type) print('Article {0} updated.\n'.format(update_type)) else: print('Edit cancelled, article description unchanged')