def saveBlogs(user_name, blogs, offset): if blogs: pass else: return for blog in blogs: blog['created_date'] = time_bz.timestampToDateTime(blog['timestamp']) del blog['timestamp'] blog['id_str'] = blog['id'] del blog['id'] del blog['date'] del blog['recommended_source'] del blog['recommended_color'] del blog['highlighted'] blog['user_name'] = user_name blog['tags'] = json.dumps(blog.get('tags')) blog['reblog'] = json.dumps(blog.get('reblog')) blog['trail'] = json.dumps(blog.get('trail')) blog['photos'] = json.dumps(blog.get('photos')) blog['post_author'] = json.dumps(blog.get('post_author')) blog['player'] = json.dumps(blog.get('player')) blog['dialogue'] = json.dumps(blog.get('dialogue')) result = pg.insertIfNotExist(pg, 'tumblr_blog', blog, "id_str='%s'" % blog['id_str']) if result is None: # 有重复记录了,就不再继续了 print 'have some data' return else: print 'new ', blog['id_str'], blog['type'], 'offset:', offset # 继续取 new_offset = offset + 20 new_blogs = callGetMeidaApi(user_name, offset=new_offset)['response']['posts'] saveBlogs(user_name, new_blogs, new_offset)
def saveBlogs(user_name, blogs, offset): if blogs: pass else: return for blog in blogs: m = public_bz.storage() m.id_str = blog['id'] m.m_type = 'tumblr' m.m_user_id = user_name m.created_at = time_bz.timestampToDateTime(blog['timestamp']) m.extended_entities = json.dumps(blog.get('photos')) m.content = None m.text = blog.get('caption') m.href = blog.get('short_url') m.type = blog.get('type') result = pg.insertIfNotExist(pg, 'm', m, "id_str='%s' and m_type='tumblr' " % m.id_str) if result is None: # 有重复记录了,就不再继续了 print 'have same data' return else: print 'new ', m.id_str, m.type, 'offset:', offset, 'name:', user_name # 继续取 new_offset = offset + 20 new_blogs = callGetMeidaApi(user_name, offset=new_offset)['response']['posts'] saveBlogs(user_name, new_blogs, new_offset)
def saveMedias(user, medias): ''' create by bigzhu at 15/09/04 20:58:54 保存meedias "attribution":null, "tags":[ ], "type":"image", "location":{ }, "comments":{ }, "filter":"Normal", "created_time":"1441362020", "link":"https:\/\/instagram.com\/p\/7NIHiLJJs3\/", "likes":{ }, "images":{ }, "users_in_photo":[ ], "caption":{ }, "user_has_liked":false, "id":"1066544388859271991_262341", "user":{ } ''' for media_d in medias['data']: media = storage(media_d) db_media = storage() #db_media.attribution = media.attribution #db_media.tags = json.dumps(media.tags, cls=public_bz.ExtEncoder) db_media.type = media.type #db_media.location = json.dumps(media.location, cls=public_bz.ExtEncoder) db_media.comments = json.dumps(media.comments, cls=public_bz.ExtEncoder) db_media.filter = media.filter #db_media.created_time = time_bz.timestampToDateTime(media.created_time) + timedelta(hours=8) db_media.created_time = time_bz.timestampToDateTime(media.created_time) db_media.link = media.link #db_media.likes = json.dumps(media.likes, cls=public_bz.ExtEncoder) db_media.low_resolution = json.dumps(media.images['low_resolution']) db_media.standard_resolution = json.dumps(media.images['standard_resolution']) db_media.thumbnail = json.dumps(media.images['thumbnail']) #db_media.users_in_photo = json.dumps(media.users_in_photo, cls=public_bz.ExtEncoder) if media.caption: caption = media.caption caption['user_id'] = caption['from']['id'] del caption['from'] else: caption = '' db_media.caption = json.dumps(caption, cls=public_bz.ExtEncoder) db_media.id_str = media.id db_media.user_id = user.id id = pg.insertIfNotExist(pg, 'instagram_media', db_media, "id_str='%s'" % db_media.id_str) if id is None: raise Exception('重复记录 id=%s, name=%s' % (media.id, user.username)) else: print 'new=', media.id, user.username if id is not None and len(medias) <= 2: # 新增加消息,微信通知只通知2条以内 openids = public_db.getOpenidsByName('instagram', user.username) for data in openids: if caption != '': text = caption.get('text') else: text = '' wechat_oper.sendInstagram(data.openid, text, media.images['low_resolution']['url'], user.username, id) if medias['pagination']: next_url = medias['pagination']['next_url'] medias = callGetMeidaApi(next_url=next_url) saveMedias(user, medias)
def saveMedias(user, medias): ''' create by bigzhu at 15/09/04 20:58:54 保存meedias "attribution":null, "tags":[ ], "type":"image", "location":{ }, "comments":{ }, "filter":"Normal", "created_time":"1441362020", "link":"https:\/\/instagram.com\/p\/7NIHiLJJs3\/", "likes":{ }, "images":{ }, "users_in_photo":[ ], "caption":{ }, "user_has_liked":false, "id":"1066544388859271991_262341", "user":{ } ''' for media_d in medias['data']: media = storage(media_d) db_media = storage() #db_media.attribution = media.attribution #db_media.tags = json.dumps(media.tags, cls=public_bz.ExtEncoder) db_media.type = media.type #db_media.location = json.dumps(media.location, cls=public_bz.ExtEncoder) db_media.comments = json.dumps(media.comments, cls=public_bz.ExtEncoder) db_media.filter = media.filter #db_media.created_time = time_bz.timestampToDateTime(media.created_time) + timedelta(hours=8) db_media.created_time = time_bz.timestampToDateTime(media.created_time) db_media.link = media.link #db_media.likes = json.dumps(media.likes, cls=public_bz.ExtEncoder) db_media.low_resolution = json.dumps(media.images['low_resolution']) db_media.standard_resolution = json.dumps( media.images['standard_resolution']) db_media.thumbnail = json.dumps(media.images['thumbnail']) #db_media.users_in_photo = json.dumps(media.users_in_photo, cls=public_bz.ExtEncoder) if media.caption: caption = media.caption caption['user_id'] = caption['from']['id'] del caption['from'] else: caption = '' db_media.caption = json.dumps(caption, cls=public_bz.ExtEncoder) db_media.id_str = media.id db_media.user_id_str = user.id #id = pg.insertIfNotExist(pg, 'instagram_media', db_media, "id_str='%s'" % db_media.id_str) m = public_bz.storage() m.id_str = db_media.id_str m.m_type = 'instagram' m.m_user_id = db_media.user_id_str m.created_at = db_media.created_time m.extended_entities = db_media.standard_resolution m.content = db_media.comments if media.caption: m.text = media.caption['text'] else: m.text = None m.href = db_media.link m.type = db_media.type id = pg.insertIfNotExist(pg, 'm', m, "id_str='%s'" % db_media.id_str) if id is None: # 似乎就是会有重复的 error = '重复记录 id=%s, name=%s' % (media.id, user.username) print error #raise Exception(error) else: print 'new=', media.id, user.username # if id is not None and len(medias) <= 2: # 新增加消息,微信通知只通知2条以内 # openids = public_db.getOpenidsByName('instagram', user.username) # for data in openids: # if caption != '': # text = caption.get('text') # else: # text = '' # wechat_oper.sendInstagram(data.openid, text, media.images['low_resolution']['url'], user.username, id) if medias['pagination']: next_url = medias['pagination']['next_url'] medias = callGetMeidaApi(next_url=next_url) saveMedias(user, medias)
def saveTwitter(tweet): ''' create by bigzhu at 15/07/10 14:39:48 保存twitter ''' del tweet.user._json #del tweet.user._api tweet.user.entities = json.dumps(tweet.user.entities) del tweet.user.id pg.insertOrUpdate(pg, 'twitter_user', vars(tweet.user), "id_str='%s'" % tweet.user.id_str) tweet.t_user_id = tweet.user.id_str screen_name = tweet.user.screen_name del tweet.user if hasattr(tweet, 'author'): #del tweet.author.id pg.insertOrUpdate(pg, 'twitter_user', vars(tweet.author), "id_str='%s'" % tweet.author.id_str) tweet.t_author_id = tweet.author.id_str del tweet.author if hasattr(tweet, '_api'): del tweet._api if hasattr(tweet, '_json'): del tweet._json # twitter id 太大了 "id": 618948810941673472 导致 psycopg2.DataError: integer out of range if hasattr(tweet, 'id'): del tweet.id if hasattr(tweet, 'entities'): tweet.entities = json.dumps(tweet.entities) if hasattr(tweet, 'geo'): tweet.geo = json.dumps(tweet.geo) if hasattr(tweet, 'coordinates'): tweet.coordinates = json.dumps(tweet.coordinates) if hasattr(tweet, 'extended_entities'): tweet.extended_entities = json.dumps(tweet.extended_entities) if hasattr(tweet, 'scopes'): tweet.scopes = json.dumps(tweet.scopes) if hasattr(tweet, 'retweeted_status'): saveTwitter(tweet.retweeted_status) tweet.retweeted_status = tweet.retweeted_status.id_str if hasattr(tweet, 'quoted_status'): # print tweet.quoted_status del tweet.quoted_status # saveTwitter(tweet.quoted_status) # tweet.quoted_status = tweet.quoted_status.id_str # place 是一个对象(我不知道如何处理): Place(_api=<tweepy.api.API object at 0x1808050> if hasattr(tweet, 'place'): del tweet.place # for k, v in vars(tweet).items(): # print '%s=%s' % (k, v) #return pg.insertIfNotExist(pg, 'twitter_message', vars(tweet), "id_str='%s'" % tweet.id_str) m = public_bz.storage() m.id_str = tweet.id_str m.m_type = 'twitter' m.m_user_id = tweet.t_user_id m.created_at = tweet.created_at if hasattr(tweet, 'extended_entities'): m.extended_entities = tweet.extended_entities m.content = None m.text = tweet.text m.href = 'https://twitter.com/'+screen_name+'/status/'+tweet.id_str return pg.insertIfNotExist(pg, 'm', m, "id_str='%s' and m_type='twitter'" % tweet.id_str)
def saveTwitter(tweet): ''' create by bigzhu at 15/07/10 14:39:48 保存twitter ''' del tweet.user._json #del tweet.user._api tweet.user.entities = json.dumps(tweet.user.entities) del tweet.user.id pg.insertOrUpdate(pg, 'twitter_user', vars(tweet.user), "id_str='%s'" % tweet.user.id_str) tweet.t_user_id = tweet.user.id_str screen_name = tweet.user.screen_name del tweet.user if hasattr(tweet, 'author'): #del tweet.author.id pg.insertOrUpdate(pg, 'twitter_user', vars(tweet.author), "id_str='%s'" % tweet.author.id_str) tweet.t_author_id = tweet.author.id_str del tweet.author if hasattr(tweet, '_api'): del tweet._api if hasattr(tweet, '_json'): del tweet._json # twitter id 太大了 "id": 618948810941673472 导致 psycopg2.DataError: integer out of range if hasattr(tweet, 'id'): del tweet.id if hasattr(tweet, 'entities'): tweet.entities = json.dumps(tweet.entities) if hasattr(tweet, 'geo'): tweet.geo = json.dumps(tweet.geo) if hasattr(tweet, 'coordinates'): tweet.coordinates = json.dumps(tweet.coordinates) if hasattr(tweet, 'extended_entities'): tweet.extended_entities = json.dumps(tweet.extended_entities) if hasattr(tweet, 'scopes'): tweet.scopes = json.dumps(tweet.scopes) if hasattr(tweet, 'retweeted_status'): saveTwitter(tweet.retweeted_status) tweet.retweeted_status = tweet.retweeted_status.id_str if hasattr(tweet, 'quoted_status'): # print tweet.quoted_status del tweet.quoted_status # saveTwitter(tweet.quoted_status) # tweet.quoted_status = tweet.quoted_status.id_str # place 是一个对象(我不知道如何处理): Place(_api=<tweepy.api.API object at 0x1808050> if hasattr(tweet, 'place'): del tweet.place # for k, v in vars(tweet).items(): # print '%s=%s' % (k, v) #return pg.insertIfNotExist(pg, 'twitter_message', vars(tweet), "id_str='%s'" % tweet.id_str) m = public_bz.storage() m.id_str = tweet.id_str m.m_type = 'twitter' m.m_user_id = tweet.t_user_id m.created_at = tweet.created_at if hasattr(tweet, 'extended_entities'): m.extended_entities = tweet.extended_entities m.content = None m.text = tweet.text m.href = 'https://twitter.com/' + screen_name + '/status/' + tweet.id_str return pg.insertIfNotExist( pg, 'm', m, "id_str='%s' and m_type='twitter'" % tweet.id_str)