Esempio n. 1
0
def fetch(feed):
    """Fetches entries from a feed and stores them in the database"""
    d = feedparser.parse(feed.url)
    got_entries = len(d['entries'])
    fetch_ = Fetch()
    fetch_.feed_id = feed.id
    fetch_.result = str(got_entries)
    Session.add(fetch_)
    feed.last_fetched_at = datetime.datetime.now()
    Session.add(feed)
    count = 0
    for e in d['entries']:
        url = e.get('link')
        exists = Session.query(Entry).filter_by(url=url).first()
        if exists: 
            continue
        title = e.get('title')
        
        # Try to get a published time, differs widely by feed..  
        published = e.get('published_parsed')
        if not published:
            published = e.get('updated_parsed')
        if not published:
            published = e.get('created_parsed')
        if not published:
            # If all aobe failed we will just use current gmtime
            published = time.gmtime()
        
        # Now convert published to a datetime     
        published = datetime.datetime(*published[:6])

        summary = e.get('summary')
        
        # Now save the entry into the db...
        entry = Entry()
        entry.feed_id = feed.id
        entry.title = title
        entry.feed_title = feed.title
        entry.url = url
        entry.pubtime = published
        entry.summary = summary
        entry.host = get_host(feed.weburl)
        Session.add(entry)
        Session.commit()
        count += 1
        
    Session.commit()
    return count
Esempio n. 2
0
                      help=help_,
                      action='store_true',
                      default=False)
    (options, args) = parser.parse_args()

    conf = appconfig('config:' + options.ini, relative_to='.')
    load_environment(conf.global_conf, conf.local_conf)

    engine = create_engine(conf['sqlalchemy.url'], echo=True)
    meta = MetaData()
    conn = engine.connect()

    feed_table = sa.Table('feed', meta, autoload=True, autoload_with=engine)
    query = select([feed_table])

    feeds = Session.query(Feed)
    for f in feeds:
        if f.last_fetched_at:
            ux_time_last_fetched = time.mktime(f.last_fetched_at.timetuple())
            print "last fetched: %s" % \
                (datetime.datetime.now() - f.last_fetched_at)
            seconds_ago = \
                unix_time(datetime.datetime.now()) - ux_time_last_fetched
        else:
            seconds_ago = 99999999
        print seconds_ago
        new = 0
        if seconds_ago < (throttle * 60) and not options.force:
            print "Not updating %s because inside throttle time." % f.title
        else:
            #try:
Esempio n. 3
0
    parser.add_option('--ini',
                      help='INI file to use for application settings',
                      type='str',
                      default='development.ini')
    help_ = 'Force a download of the feed even if its within throttle time.'
    parser.add_option('--force',
                      help=help_,
                      action='store_true',
                      default=False)
    (options, args) = parser.parse_args()

    conf = appconfig('config:' + options.ini, relative_to='.')
    load_environment(conf.global_conf, conf.local_conf)

    engine = create_engine(conf['sqlalchemy.url'], echo=True)
    meta = MetaData()
    conn = engine.connect()

    entries = Session.query(Entry).filter_by(tags=None)
    for e in entries:
        text = e.title
        tags = tag(text)
        print tags
        e.tags = ' '.join(tags)
        e.tag(tags)
        Session.add(e)
    Session.commit()