def parse_json(url, item_type, callback, discriminator='id', list_key=None): print "Fetching %s items" % (item_type, ) fetched = urlopen(url).read() data = json.loads(fetched) if list_key: data = data[list_key] map_fun = 'function(doc) { emit(doc.%s, null); }' % (discriminator, ) for item in map(callback, data): item['item_type'] = item_type if len(db.query(map_fun, key=item[discriminator])) == 0: db.create(item) print "%s items fetched" % (item_type, )
def parse_json(url, item_type, callback, discriminator='id', list_key=None): print "Fetching %s items" % (item_type,) fetched = urlopen(url).read() data = json.loads(fetched) if list_key: data = data[list_key] map_fun = 'function(doc) { emit(doc.%s, null); }' % (discriminator,) for item in map(callback, data): item['item_type'] = item_type if len(db.query(map_fun, key=item[discriminator])) == 0: db.create(item) print "%s items fetched" % (item_type,)
def fetch_twitter_items(): from dateutil.parser import parse try: last_id_fun = 'function(doc) { emit(doc["couch_lifestream_date"], doc.id);}' since = 'since_id=%d' % db.query(last_id_fun, descending=True).rows[0].value except: since = 'count=200' def callback(item): item['couch_lifestream_date'] = parse(item['created_at']).isoformat() return item url = 'http://twitter.com/statuses/user_timeline.json?id=%s&%s' % (un['TWITTER'], since, ) parse_json(url, 'twitter', callback)
def parse_feed(url, item_type, discriminator='id'): import feedparser print "Fetching %s items" % (item_type, ) d = feedparser.parse(url) map_fun = 'function(doc) { emit(doc.%s, null); }' % (discriminator, ) for item in map(dict, d['entries']): item['item_type'] = item_type item['couch_lifestream_date'] = datetime.datetime.fromtimestamp( time.mktime(item['updated_parsed'])) if len(db.query(map_fun, key=item[discriminator])) == 0: for (key, val) in item.items(): if 'parsed' in key: del item[key] elif isinstance(val, datetime.datetime): item[key] = val.isoformat() elif isinstance(val, datetime.date): item[key] = val.isoformat() db.create(item) print "%s items fetched" % (item_type, )
def parse_feed(url, item_type, discriminator='id'): import feedparser print "Fetching %s items" % (item_type,) d = feedparser.parse(url) map_fun = 'function(doc) { emit(doc.%s, null); }' % (discriminator,) for item in map(dict, d['entries']): item['item_type'] = item_type item['couch_lifestream_date'] = datetime.datetime.fromtimestamp( time.mktime(item['updated_parsed'])) if len(db.query(map_fun, key=item[discriminator])) == 0: for (key, val) in item.items(): if 'parsed' in key: del item[key] elif isinstance(val, datetime.datetime): item[key] = val.isoformat() elif isinstance(val, datetime.date): item[key] = val.isoformat() db.create(item) print "%s items fetched" % (item_type,)