def __init__(self, folder):
        self.folder = folder
        self.feeds = load_feeds(folder)
        self.entries = { 
            feed: feed.get_entries() for feed in self.feeds 
        }

        self.nlp = English()
Esempio n. 2
0
def run(metadata_folder):
    global feeds, entries_metadata
    globals()['metadata_folder'] = metadata_folder

    feeds += load_feeds(metadata_folder)
    for feed in feeds:
        entries_metadata[feed.name] = feed.get_entries()

    return app.run(threaded=False, port=8080, debug=False)
Esempio n. 3
0
def run(metadata_folder):
    global feeds, entries_metadata
    globals()['metadata_folder'] = metadata_folder
    
    feeds += load_feeds(metadata_folder)
    for feed in feeds:
        entries_metadata[feed.name] = feed.get_entries()

    return app.run(
        threaded=False,
        port=8080,
        debug=False
    )
Esempio n. 4
0
def load_entries(folder, it=None):
    if it is None:
        feeds = load_feeds(folder)

        return [
            BreakableEntry(entry.folder, feed, entry.get_metadata())
            for feed in feeds for entry in feed.get_entries()
            if entry.data.get('newsbreaker')
        ]
    else:
        return [
            BreakableEntry(entry.folder, feed, entry.get_metadata())
            for feed, entries in it for entry in entries
            if entry.data.get('newsbreaker')
        ]
Esempio n. 5
0
def load_entries(folder, it=None):
    if it is None:
        feeds = load_feeds(folder)
        
        return [
            BreakableEntry(entry.folder, feed, entry.get_metadata())

            for feed in feeds
            for entry in feed.get_entries()
            if entry.data.get('newsbreaker')
        ]
    else:
        return [
            BreakableEntry(entry.folder, feed, entry.get_metadata())

            for feed, entries in it
            for entry in entries
            if entry.data.get('newsbreaker')
        ]
# -*- coding: utf-8-*-
# Author: Álvaro Parafita ([email protected])

import pandas as pd

from newsparser.data import load_feeds

folder = '/Users/alvaro_parafita/Desktop/TFG/data'

# Display statistics
feeds = load_feeds(folder)
entries = { feed: feed.get_entries() for feed in feeds }

df = pd.DataFrame(
    [
        [
            feed.name, 
            len(entries[feed]),
            sum(
                not entry.data.get('filter', {}).get('discarded', True)
                for entry in entries[feed]
            ),
            sum(
                not entry.data.get('filter', {}).get('duplicate', True)
                for entry in entries[feed]
            ),
            sum(
                not entry.data.get('filter', {}).get('news_agency_discarded', True)
                for entry in entries[feed]
            ),
            sum(
Esempio n. 7
0
# -*- coding: utf-8-*-
# Author: Álvaro Parafita ([email protected])

from newsparser.data import load_feeds

folder = '/Users/alvaro_parafita/Desktop/TFG/data'

feeds = load_feeds(folder)

for feed in feeds:
    e = feed.get_entries(low=-1, high=feed.num_entries)[0]
    print(feed.name, e.published)
Esempio n. 8
0
    def __init__(self, folder):
        self.folder = folder
        self.feeds = load_feeds(folder)
        self.entries = {feed: feed.get_entries() for feed in self.feeds}

        self.nlp = English()