Esempio n. 1
0
def get_feed(blogurl,bloglist):
    """
    Get a blog and write its contents out to a json file
    """
    try:
        feed_url = le.extract_feed_link_from_url(blogurl)
        fp = feedparser.parse(feed_url)
    except:
        "Unable to retrieve or parse %s" % blogurl
        return
    try:
        print >> sys.stderr, "Fetched %s entries from '%s'" % (len(fp.entries[0].title.encode('ascii','ignore')), fp.feed.title.encode('ascii','ignore'))
    except IndexError:
        print >> sys.stderr, "Retrieved no entries from '%s'" % feed_url
        return None
        
    blog_data = {'blogurl':blogurl,'title': fp.feed.title, 'blogroll':le.extract_links_from_url(blogurl,bloglist)}
    blog_posts = [blog_data]
    for e in fp.entries:
        try:
            blog_posts.append({'blogtitle':fp.feed.title,
                'content': cleanHtml(e.content[0].value), 
                'link': e.links[0].href,
                'links':le.extract_links(e.content[0].value),
                'bloglinks':le.extract_links_from_list(e.content[0].value,bloglist)
                })
        except AttributeError:
            blog_posts.append({'blogtitle':fp.feed.title, 
                'content': cleanHtml(e.summary), 
                'link': e.links[0].href,
                'links':le.extract_links(e.summary),
                'bloglinks':le.extract_links_from_list(e.summary,bloglist)
                })
    
    if not os.path.isdir('out'):
        os.mkdir('out')
    
    #out_file = '%s__%s.json' % (fp.feed.title.replace("'","").replace("-",""), dt.utcnow())
    out_file = '%s.json' % (fm.munge(fp.feed.title))
    #out_file = 'foo.json'
    f = codecs.open(os.path.join(os.getcwd(), 'out', out_file), 'w',encoding='iso-8859-1')
    f.write(json.dumps(blog_posts))
    f.close()
    print >> sys.stderr, 'Wrote output file to %s' % (f.name, )
    return f.name
Esempio n. 2
0
 def test_munge(self):
     fn = "t\\h!s is.a?bad file'n|ame"
     self.assertEquals(fm.munge(fn),"t_h_s_is_a_bad_file_n_ame")