예제 #1
0
def scrape_article_name(article_id):
    article = db.get_article(article_id)
    if article == None:
        print('There is no article with that ID. article NOT found.\n')
    else:
        print()
        display_single_article(article, str(article.ArticleID))
        article_choice = btc.read_int_ranged(
            '1 to rescrape title, 2 to leave as is: ',
            min_value=1,
            max_value=2)
        if article_choice == 1:
            try:
                new_article_news_item = na.get_article_from_url(article.link)
                new_title = new_article_news_item.title
                print('''
New title: {0}
Old title: {1}'''.format(new_title, article.name))
            except:
                new_title = 'Title scrape failed'
            title_choice = btc.read_int_ranged(
                '1 to replace title, 2 to keep original title: ',
                min_value=1,
                max_value=2)
            if title_choice == 1:
                db.update_article_name(article_id, new_title)
            elif title_choice == 2:
                print('article update cancelled')

        elif article_choice == 2:
            print('article update cancelled')
예제 #2
0
def csv_item_to_article(csv_list_item):
    new_article_news_item = na.get_article_from_url(csv_list_item[0])
    new_article_link = new_article_news_item.url
    new_article_title = new_article_news_item.title
    print(new_article_title)
    new_article_category = get_category_id(csv_list_item[1])
    new_article_datetime = parse(csv_list_item[2])
    new_article_date = new_article_datetime.date()

    article_from_csv = Article(name=new_article_title,
                               link=new_article_link,
                               category=new_article_category,
                               date=new_article_date,
                               description='Not specified',
                               author='Not specified',
                               publication='Not specified')
    return article_from_csv
예제 #3
0
def update_article_name(article_id):
    article = db.get_article(article_id)
    if article == None:
        print("There is no article with that ID. article NOT found.\n")
    else:
        print()
        display_single_article(article, str(article.ArticleID))
        article_choice = btc.read_int_ranged(
            '1 to edit article title, 2 to leave as is: ',
            min_value=1,
            max_value=2)
        if article_choice == 1:
            try:
                newsItem1 = na.get_article_from_url(article.link)
                updated_title = newsItem1.title
            except Exception as e:
                print('Scrape failed because of {0}'.format(e))
                updated_title = 'Invalid'
            print('Rescraped title: {0}'.format(updated_title))
            title_choice = btc.read_int_ranged(
                '1 - existing title, 2 - scraped title, 3 - manual input: ', 1,
                3)

            if title_choice == 1:
                print('Title update cancelled, article title unchanged.')
                return
            elif title_choice == 2:
                db.update_article_name(article_id, updated_title)
                print('Title update complete. Return to main menu.')
            elif title_choice == 3:
                new_title = btc.read_text('Enter new title or . to cancel: ')
                if new_title != '.':
                    db.update_article_name(article_id, new_title)
                else:
                    print('Edit cancelled, return to main menu')
                    return
        else:
            print('Edit cancelled, article title unchanged')
예제 #4
0
def from_newspaper(link):
    '''
    Adds an article from the newspaper module after downloading it
    '''
    for i in tqdm.tqdm(range(1)):
        try:
            newNewsItem = na.get_article_from_url(link)
        except:
            print('Article download failed, invalid URL')
            print('Returning to main menu')
            return
    print(newNewsItem)
    try:
        name = newNewsItem.title  #get the title for the article
    except Exception as e:
        print(e)
        name = btc.read_text('Please enter title: ')
        #get article author
    try:
        author = ' '.join(newNewsItem.authors)
        #get article publication
    except Exception as e:
        print(e)
        author = btc.read_text('Please enter author: ')
    try:
        #works for most websites, but not Sudan Tribune
        publication = newNewsItem.meta_data['og']['site_name']
    except Exception as e:
        print(e)
        publication = btc.read_text('Please enter publication: ')
    try:
        year = newNewsItem.publish_date.year
        month = newNewsItem.publish_date.month
        day = newNewsItem.publish_date.day
        new_date = datetime.date(day=day, month=month, year=year)
    except Exception as e:
        print(e)
    #use the new btc.read_date() function to simplify this
    try:
        new_date = btc.read_date('Enter article date MM/DD/YYYY: ')

    except Exception as e:
        print('invalid date', e)
    try:
        summary = newNewsItem.summary
    except Exception as e:
        print(e)
        print('Summary download failed')
        summary = 'Summary not found'
    try:
        keywords = ', '.join(newNewsItem.keywords)
    except Exception as e:
        print(e)
        print('Keyword download failed')
        keywords = 'keywords not found'
    print('TITLE - {0} - AUTHOR {1}'.format(name, author))
    print('DATE - {0} - PUBLICATION {1}'.format(new_date.strftime("%m/%d/%Y"),
                                                publication))
    print('KEYWORDS: ', keywords)
    display_categories()
    category_id = btc.read_text("Category ID: ")
    category = db.get_category(category_id)
    if category == None:
        print('There is no category with that ID. article NOT added.\n')
        return
    description_choice = btc.read_text('View article description? y/n: ')
    if description_choice == 'y':
        print('Title: {0}'.format(name))
        print('Summary: {0}'.format(summary))
        print('Keywords: {0}'.format(keywords))
    description = btc.read_text("Description or '.' to cancel: ")

    if description == ".":
        return
    else:
        new_article = Article(name=name,
                              date=new_date,
                              category=category,
                              link=link,
                              description=description,
                              author=author,
                              publication=publication)
        display_single_article(article=new_article,
                               title_term=new_article.name)
        confirm_article = btc.read_bool(decision="Finalize the article?",
                                        yes='y',
                                        no='n',
                                        yes_option='Confirm',
                                        no_option='Cancel')
        #This is the user's last chance to decide if they want to add the article
        if confirm_article == True:
            db.add_article(new_article)
            print(new_article.name + " was added to database.\n")
        elif confirm_article == False:
            print('Article add cancelled. Return to main menu.')
예제 #5
0
def rescrape(article_id, update_type):
    #update_types: name and description
    #We need to shorten this function, or replace it with a class
    article = db.get_article(article_id)
    if article == None:
        print("There is no article with that ID. article NOT found.\n")
    else:
        print()
        display_single_article(article, str(article.articleID))
        article_choice = btc.read_int_ranged(
            '1 to edit article {0}, 2 to leave as is: '.format(update_type),
            min_value=1,
            max_value=2)
        if article_choice == 1:
            try:
                newsItem1 = na.get_article_from_url(article.link)
            except Exception as e:
                print(e)
            if update_type == 'name':
                try:
                    updated_title = newsItem1.title
                    print('Rescraped title: {0}'.format(updated_title))
                except Exception as e:
                    print(e)
                    updated_title = 'Invalid'
                title_choice = btc.read_int_ranged(
                    '1 - existing title, 2 - scraped title, 3 - manual input: ',
                    1, 3)

                if title_choice == 1:
                    print('Title update cancelled, article title unchanged.')
                    return
                elif title_choice == 2:
                    new_value = updated_title
                elif title_choice == 3:
                    new_value = btc.read_text(
                        'Enter new title or . to cancel: ')
                    if new_value == '.':
                        print('Edit cancelled, return to main menu')
                        return
            elif update_type == 'description':
                description_choice = btc.read_text(
                    'View article summary? y/n: ')
                if description_choice == 'y':
                    try:
                        article_summary = newsItem1.summary
                    except Exception as e:
                        print(e)
                        article_summary = 'Not found'
                    print(article_summary)
                    new_value = btc.read_text(
                        'Enter new description or "." to cancel: ')
                    if new_value == '.':
                        print(
                            'Description update cancelled, return to main menu'
                        )
                        return
            db.update_article(article_id=article_id,
                              new_value=new_value,
                              update_type=update_type)
            print('Article {0} updated.\n'.format(update_type))
        else:
            print('Edit cancelled, article description unchanged')