Beispiel #1
0
def cl_news_util(args, cache):
    if not cache:
        htmlfile = utils.get_html_file('http://bigstory.ap.org/')
        parser = APHTMLParser()
        parser.feed(htmlfile)
        articlelist = parser.articlelist
    else:
        articlelist = cache

    if len(args) > 1:
        if args[1] == '--headlines' or args[1] == '-h':
            utils.ap_headlines(articlelist)
            return articlelist

        if len(args) > 2:

            if args[1] == '--open' or args[1] == '-o':
                index = args[2]
                article = get_ap_article(articlelist, index)
                utils.go_to_page(article['url'])
                return articlelist

            if args[1] == '--read' or args[1] == '-r':
                index = args[2]
                article = get_ap_article(articlelist, index)
                htmlfile = utils.get_html_file(article['url'])
                content = re.search(
                    r'<meta name="description" content="(.+?)" />', htmlfile)
                print_article_header(article['title'], content.group(1))
                return articlelist

    utils.handle_error('ap_error')
Beispiel #2
0
def cl_news_util(args, cache):
    if not cache:
        htmlfile = utils.get_html_file('https://www.washingtonpost.com')
        htmlfile = htmlfile.decode('utf-8')
        parser = WPHTMLParser()
        parser.feed(htmlfile)
        articlelist = parser.articlelist
    else:
        articlelist = cache

    if len(args) > 1:
        if args[1] == '--headlines' or args[1] == '-h':
            utils.wp_headlines(articlelist)
            return articlelist

        if len(args) > 2:

            if args[1] == '--open' or args[1] == '-o':
                index = args[2]
                article = articlelist[index]
                utils.go_to_page(article['url'])
                return articlelist

            if args[1] == '--read' or args[1] == '-r':
                index = int(args[2]) - 1
                article = articlelist[index]
                htmlfile = utils.get_html_file(article['url'])
                htmlfile = htmlfile.decode('utf-8')
                print '\n' + article['title']
                print '==================\n'
                parser = WPARTICLEParser()
                parser.feed(htmlfile)
                return articlelist

    utils.handle_error('wp_error')
Beispiel #3
0
def cl_news_util(args, cache):
    if not cache:
        article_list = get_article_list()
    else:
        article_list = cache

    if len(args) > 1:
        if args[1] == '--headlines' or args[1] == '-h':
            utils.cnn_headlines(article_list)
            return article_list

        if len(args) > 2:
            index = int(args[2]) - 1
            cnn_url = 'http://www.cnn.com/' + article_list[index]['uri']

            if args[1] == '--open' or args[1] == '-o':
                utils.go_to_page(cnn_url)
                return article_list

            if args[1] == '--read' or args[1] == '-r':
                os.system('clear')
                print article_list[index]['headline']
                cnn_article_abbreviator.main(cnn_url)
                return article_list

    utils.handle_error('cnn_error')
Beispiel #4
0
def cl_news_util(args, cache):
    if not cache:
        htmlfile = utils.get_html_file('http://www.theguardian.com/us')
        parser = GUARDIANHTMLParser()
        parser.feed(htmlfile)
        articlelist = parser.articlelist
    else:
        articlelist = cache

    if len(args) > 1:
        if args[1] == '--headlines' or args[1] =='-h':
            utils.gu_headlines(articlelist)
            return articlelist

        if len(args) > 2:

            if args[1] == '--open' or args[1] == '-o':
                index = args[2]
                article = get_gu_article(articlelist, index)
                utils.go_to_page(article['url'])
                return articlelist

            if args[1] == '--read' or args[1] == '-r':
                index = args[2]
                article = get_gu_article(articlelist, index)
                htmlfile = utils.get_html_file(article['url'])
                abbrevurl = article['url'][28:]
                print '\n' + article['title'] + ' -- ' + abbrevurl
                print '==================\n'
                htmlfile = htmlfile.decode('utf-8')
                parser = GUARDIANARTICLEParser()
                parser.feed(htmlfile)
                return articlelist

    utils.handle_error('ap_error')
def main(cnn_url):
    uf = urllib.urlopen(cnn_url)
    htmlfile = uf.read()
    highlights = re.findall(r'storyhighlights__list">(.+?)</ul>', htmlfile)
    if len(highlights):
        highlights = re.findall(r'normal">(.+?)</li>', highlights[0])
        print '=== Story Hightlights ==='
        for hl in highlights:
            print hl
        print '========================='
        content = re.findall(r'body__paragraph">(.+?)</', htmlfile)
        print content[0][-5:]
        del content[0]
        for p in content:
            print p
    else:
        description = re.findall(
            r'media__video-description--inline">(.+?)</div>', htmlfile)
        print description[0]
        print '========================='
        print 'This is a video article, would you like to open the page?(y/n)'
        user_input = raw_input()
        if user_input == 'y':
            utils.go_to_page(cnn_url)
    return
Beispiel #6
0
def cl_news_util(args, cache):
    if not cache:
        htmlfile = utils.get_html_file('http://www.reuters.com/')
        parser = REUTERSHTMLParser()
        parser.feed(htmlfile)
        articlelist = parser.articlelist
    else:
        articlelist = cache

    if len(args) > 1:
        if args[1] == '--headlines' or args[1] == '-h':
            utils.reuters_headlines(articlelist)
            return articlelist

        if len(args) > 2:

            if args[1] == '--open' or args[1] == '-o':
                index = args[2]
                article = get_reuters_article(articlelist, index)
                utils.go_to_page(article['url'])
                return articlelist

            if args[1] == '--read' or args[1] == '-r':
                index = args[2]
                article = get_reuters_article(articlelist, index)
                htmlfile = utils.get_html_file(article['url'])
                abbrevurl = article['url'][28:]
                print '\n' + article['title'] + ' -- ' + abbrevurl
                print '==================\n'
                parser = REUTERSARTICLEParser()
                parser.feed(htmlfile)
                return articlelist

    utils.handle_error('reuters_error')
Beispiel #7
0
def cl_news_util(args, cache):
    if not cache:
        # Used to parse homepage
        # parser = SERVICEHTMLParser()
        # parser.feed(htmlfile)
        articlelist = parser.articlelist
    else:
        articlelist = cache

    if len(args) > 1:
        if args[1] == '--headlines' or args[1] == '-h':
            utils.service_headlines(articlelist)
            return articlelist

        if len(args) > 2:
            # SERVICEARTICLEParser will be called for these options
            # Dont worry about this, when you get reading working, opening works the same way
            # if you use a service like hacker news with multiple sources, use open instead of read
            if args[1] == '--open' or args[1] == '-o':
                index = args[2]
                article = get_service_article(articlelist, index)
                utils.go_to_page(article['url'])
                return articlelist

            if args[1] == '--read' or args[1] == '-r':
                # Used to parse articles
                index = args[2]

                return articlelist
Beispiel #8
0
def main():
    args = sys.argv
    if len(args) > 1:
        article_list = get_article_list()

        if args[1] == '--headlines' or args[1] == '-h':
            utils.cnn_headlines(article_list)
            return

        if len(args) > 2:
            index = int(args[2])
            cnn_url = 'http://www.cnn.com/' + article_list[index]['uri']

            if args[1] == '--open' or args[1] == '-o':
                utils.go_to_page(cnn_url)
                return

            if args[1] == '--read' or args[1] == '-r':
                print article_list[index]['headline']
                cnn_article_abbreviator.main(cnn_url)
                return

    utils.handle_error('cnn_error')
Beispiel #9
0
def cl_news_util(args, cache):
    if not cache:
        htmlfile = utils.get_html_file('http://www.aljazeera.com/')
        htmlfile = htmlfile.decode('utf-8')
        parser = AJHTMLParser()
        parser.feed(htmlfile)
        articlelist = parser.articlelist
    else:
        articlelist = cache

    if len(args) > 1:
        if args[1] == '--headlines' or args[1] == '-h':
            utils.aj_headlines(articlelist)
            return articlelist

        if len(args) > 2:

            if args[1] == '--open' or args[1] == '-o':
                index = args[2]
                article = get_aj_article(articlelist, index)
                utils.go_to_page(article['url'])
                return articlelist

            if args[1] == '--read' or args[1] == '-r':
                index = args[2]
                article = get_aj_article(articlelist, index)
                htmlfile = utils.get_html_file('http://www.aljazeera.com/' +
                                               article['url'])
                htmlfile = htmlfile.decode('utf-8')
                print '\n' + article['title']
                print '====================='
                parser = AJARTICLEParser()
                parser.feed(htmlfile)
                return articlelist

    utils.handle_error('aj_error')
Beispiel #10
0
def openpage(storylinks, index):
    url = storylinks[index - 1][0]
    utils.go_to_page(url)