Esempio n. 1
0
File: fetcher.py Progetto: fay/wt
def fetch_entries(feed,entries):

    for entry in entries:
        entry_link = entry['link']  
        entry_model = dao.get_by_link(entry_link, Entry)
        if not entry_model:
            entry_model = Entry(feed=feed, link=entry_link)
        else:
            continue
        entry_model.title = entry['title']
        if len(entry_model.title) >= 200:
            continue
        entry_model.author = entry.get('author', 'unknow')
        entry_model.summary = entry.get('summary', '')
        if not entry_model.summary:
            content = entry.get('content', '')
            try:
                entry_model.summary = (type(content) == unicode and content) or content[0].get('value', '')
            except:
                continue
        #clear html tags
        entry_model.summary = strip_tags(entry_model.summary)
        if len(entry_model.summary) <= 100:
            return 
        entry_model.when = entry.get('updated_parsed','') or time.localtime(entry.get('updated'))
        if entry_model.when:
            entry_model.when = datetime.datetime(entry_model.when[0],entry_model.when[1],entry_model.when[2],entry_model.when[3],entry_model.when[4])
        tags = None
        if entry.has_key('tags'):
            tags = entry.get('tags', '')
            tags = tags[0].get('term','')
        if not tags and entry.has_key('categories'):
            tags = entry.get('categories')
            tags = tags.values()[0]
        if not tags:
            print 'no tags.ignored...'
            #continue
        else:
            cat = dao.save_category(tags)
            entry_model.category = cat
        try:
            dao.save_model(entry_model)
        except Exception,e:
            print 'save error:',e
Esempio n. 2
0
File: fetcher.py Progetto: fay/wt
    try:
        soup = feedparser.parse(url) 
    except Exception,e:
        print 'parsing error',e
        return  
     
    feed_link = soup.feed.get('link','')
    feed = dao.get_by_link(feed_link, Feed)
    if not feed:
        feed = Feed(link=feed_link)
    feed.title = soup.feed.get('title','')
    if Feed.objects.filter(title=feed.title):
        return 'pass'
    feed.description = soup.feed.get('description','')
    feed.rss_link = url
    dao.save_model(feed)
    if not soup['entries']:
        print 'this feed has not entries'
        return
    if is_write:
        fetch_entries(feed,soup['entries'])

def fetch_entries(feed,entries):

    for entry in entries:
        entry_link = entry['link']  
        entry_model = dao.get_by_link(entry_link, Entry)
        if not entry_model:
            entry_model = Entry(feed=feed, link=entry_link)
        else:
            continue