def saveMessage(god_name, github_name, god_id, message): ''' create by bigzhu at 15/07/16 09:44:39 为了抽取数据方便,合并数据到 content 里 ''' content = storage() content.type = message.type content.repo = message.repo content.payload = message.payload content = json.dumps(content) m = public_bz.storage() m.god_id = god_id m.god_name = god_name m.name = github_name # m.avatar = message.actor['avatar_url'] m.id_str = message['id'] m.m_type = 'github' m.created_at = time_bz.unicodeToDateTIme(message.created_at) m.created_at += timedelta(hours=8) m.content = content m.text = None m.href = None id = pg.insertIfNotExist('message', m, "id_str='%s' and m_type='github'" % m.id_str) if id is not None: print '%s new github %s' % (m.name, m.id_str) return id
def sendTwitter(openid, tweet, screen_name, id): ''' 发送twitter的消息 ''' articles = [] if hasattr(tweet, 'extended_entities') and tweet.extended_entities['media']: for media in tweet.extended_entities['media']: article = storage() article.picurl = "http://follow.center/ProxyHandler/%s" % media[ 'media_url_https'] article.url = "http://follow.center/message?t=twitter&id=%s" % id articles.append(article) if len(articles) == 1: articles[0].title = screen_name else: articles[0].title = screen_name + ': ' + tweet.text articles[0].description = tweet.text else: article = storage() article.title = screen_name article.url = "http://follow.center/message?t=twitter&id=%s" % id article.description = tweet.text articles = [article] sendArticle(openid, articles)
def saveMessage(message): ''' create by bigzhu at 15/07/16 09:44:39 为了抽取数据方便,合并数据到 content 里 ''' message.id_str = message.pop('id') content = storage() content.type = message.type content.repo = message.pop('repo') content.payload = message.pop('payload') message.content = json.dumps(content) if message.get('org'): message.org = json.dumps(message.org) # return db_bz.insertIfNotExist(pg, 'github_message', message, "id_str='%s'" % message.id_str) m = public_bz.storage() m.id_str = message.id_str m.m_type = 'github' m.m_user_id = message.actor m.created_at = message.created_at m.content = message.content m.text = None m.href = None return db_bz.insertIfNotExist( pg, 'm', m, "id_str='%s' and m_type='github'" % message.id_str)
def saveMessage(god_name, facebook_name, god_id, message): ''' ''' message = public_bz.storage(message) m = public_bz.storage() m.god_id = god_id m.god_name = god_name m.name = facebook_name m.m_type = 'facebook' m.id_str = message.id m.created_at = message.created_time m.content = json.dumps({'description': message.get('description')}) m.text = message.get('message') m.extended_entities = json.dumps({ 'pictrue': message.get('full_picture'), 'source': message.get('source') }) m.type = message.get('type') if m.type == 'video': print('facebook 不再同步 video, 有时效, 无法一直看') return m.href = message.get('link') id = pg.insertIfNotExist('message', m, "id_str='%s' and m_type='facebook'" % m.id_str) if id is not None: print '%s new facebook message %s' % (m.name, m.id_str) return id
def saveMessage(message): ''' create by bigzhu at 15/07/16 09:44:39 为了抽取数据方便,合并数据到 content 里 ''' message.id_str = message.pop('id') content = storage() content.type = message.type content.repo = message.pop('repo') content.payload = message.pop('payload') message.content = json.dumps(content) if message.get('org'): message.org = json.dumps(message.org) # return db_bz.insertIfNotExist(pg, 'github_message', message, "id_str='%s'" % message.id_str) m = public_bz.storage() m.id_str = message.id_str m.m_type = 'github' m.m_user_id = message.actor m.created_at = message.created_at m.content = message.content m.text = None m.href = None return db_bz.insertIfNotExist(pg, 'm', m, "id_str='%s' and m_type='github'" % message.id_str)
def getHtmlListByNameLike(search_name): ''' 根据名字查找对应html 返回 list {name:'', time: ''} ''' html_list = [] for html in os.listdir(HTML_PATH): html_name = cutName(html) if html_name == '': continue if isNameLike(html_name, search_name): pass else: continue try: modify_time = getModifyTime(html_name) except OSError as e: # 可能会有非html结尾的文件,忽略 print e continue the_html = storage() the_html.name = html_name the_html.time = modify_time html_list.append(the_html) # 按时间排序 html_list = sorted(html_list, key=lambda d: d.time, reverse=True) return html_list
def saveMessage(god_name, twitter_name, god_id, tweet): ''' create by bigzhu at 15/07/10 14:39:48 保存twitter create by bigzhu at 16/03/26 06:05:12 重构,改很多 modify by bigzhu at 16/03/26 20:33:59 重构 user 本系统用户信息 tweet 消息本身 modify by bigzhu at 17/01/13 15:38:11 去了用不到的 ''' m = public_bz.storage() m.god_id = god_id m.god_name = god_name m.name = twitter_name m.id_str = tweet.id_str m.m_type = 'twitter' m.created_at = tweet.created_at m.content = None m.text = tweet.text if hasattr(tweet, 'extended_entities'): m.extended_entities = json.dumps(tweet.extended_entities) m.type = tweet.extended_entities['media'][0]['type'] m.href = 'https://twitter.com/' + m.name + '/status/' + m.id_str id = pg.insertIfNotExist('message', m, "id_str='%s' and m_type='twitter'" % tweet.id_str) if id is not None: print '%s new twitter %s' % (m.name, m.id_str) return id
def post(self): ''' 新增文件 ''' global md5 self.set_header("Content-Type", "application/json") results = [] if self.request.files: for i in self.request.files: fd = self.request.files[i] for f in fd: file_name = f.get("filename") file_suffix = file_name[file_name.rfind("."):] file_body = f["body"] md5.update(file_body) file_hash = md5.hexdigest() file_path = "static/uploaded_files/%s_%s_%s" % (self.current_user, int(time.time()), file_name) # hash img = open(file_path, 'w') img.write(file_body) img.close() new_file = storage(file_name=file_name, file_path="/" + file_path, file_hash=file_hash, file_type="file", suffix=file_suffix, seqname='uploaded_files_id_seq') file_id = self.pg.db.insert("uploaded_files", **new_file) r = { 'file_id': file_id, 'file_name': file_name, 'file_path': '/' + file_path, 'suffix': file_suffix } results.append(r) self.write(json.dumps({'error': '0', 'results': results}))
def render(self, comments, key_type, key): parm = storage() parm.comments = comments parm.key_type = key_type parm.key = key return self.render_string(self.html_name, parm=parm)
def getUser(user_name): ''' modify by bigzhu at 15/07/31 16:41:16 api找用户时是按专注度来排序的,名字绝对匹配的未必是第一位, 从10个里面找 ''' users = list(pg.select('instagram_user', where="lower(username)=lower('%s')" % user_name)) if users: return users[0] else: user = None for this_user in api.user_search(user_name, 10): if this_user.username.lower() == user_name.lower(): user = this_user break #如果没有这个用户 if user is None: public_db.delNoName('instagram', user_name) #user = api.user_search(user_name, 1)[0] user = api.user(user.id) db_user = storage() db_user.id = user.id db_user.username = user.username db_user.full_name = user.full_name db_user.profile_picture = user.profile_picture db_user.bio = user.bio db_user.website = user.website db_user.counts = json.dumps(user.counts) pg.insert('instagram_user', **db_user) #db_bz.insertIfNotExist(pg, 'instagram_user', db_user, "id=%s" % db_user.id) return getUser(user_name)
def main(god, wait): ''' create by bigzhu at 15/07/15 17:54:08 取github modify by bigzhu at 15/07/22 16:20:42 时间同样要加入8小时,否则不正确 ''' god_name = god.name github_name = god.github['name'] god_id = god.id etag = god.github.get('sync_key') github_user = getGithubUser(github_name, god_name) if github_user is None: return headers = {'If-None-Match': etag} try: r = requests.get('https://api.github.com/users/%s/events' % github_name, headers=headers, params=params) except requests.exceptions.ConnectionError: print public_bz.getExpInfoAll() return if r.status_code == 200: etag = r.headers['etag'] limit = r.headers['X-RateLimit-Remaining'] if limit == '0': return for message in r.json(): message = storage(message) saveMessage(god_name, github_name, god_id, message) # oper.noMessageTooLong('github', github_name) saveUser(god_name, github_name, github_user, etag) # 为了update etag if r.status_code == 404: # public_db.sendDelApply('github', god_name, github_name, '404') god_oper.delNoName('github', god_name)
def saveMessage(god_name, twitter_name, god_id, blog): m = public_bz.storage() m.god_id = god_id m.god_name = god_name m.name = twitter_name m.id_str = blog['id'] m.m_type = 'tumblr' m.created_at = time_bz.timestampToDateTime(blog['timestamp']) type = blog.get('type') m.href = blog.get('short_url') m.type = type if type == 'text': m.title = blog.get('title') m.text = blog.get('body') elif type == 'photo': m.text = blog.get('caption') m.extended_entities = json.dumps(blog.get('photos')) elif type == 'video': m.extended_entities = json.dumps({'video_url': blog.get('video_url')}) m.content = None id = pg.insertIfNotExist('message', m, "id_str='%s' and m_type='tumblr' " % m.id_str) if id is None: pass else: print '%s new tumblr message %s' % (m.name, m.id_str)
def saveBlogs(user_name, blogs, offset): if blogs: pass else: return for blog in blogs: m = public_bz.storage() m.id_str = blog['id'] m.m_type = 'tumblr' m.m_user_id = user_name m.created_at = time_bz.timestampToDateTime(blog['timestamp']) m.extended_entities = json.dumps(blog.get('photos')) m.content = None m.text = blog.get('caption') m.href = blog.get('short_url') m.type = blog.get('type') result = pg.insertIfNotExist(pg, 'm', m, "id_str='%s' and m_type='tumblr' " % m.id_str) if result is None: # 有重复记录了,就不再继续了 print 'have same data' return else: print 'new ', m.id_str, m.type, 'offset:', offset, 'name:', user_name # 继续取 new_offset = offset + 20 new_blogs = callGetMeidaApi(user_name, offset=new_offset)['response']['posts'] saveBlogs(user_name, new_blogs, new_offset)
def saveGraphqlMessage(ins_name, user_name, god_id, message): ''' 用 Graphql 取到的数据 ''' message = storage(message) m = public_bz.storage() m.god_name = user_name m.name = ins_name m.m_type = 'instagram' m.id_str = message.id m.created_at = time_bz.timestampToDateTime(message.taken_at_timestamp) if message.get('edge_media_to_caption').get('edges'): m.text = message.get('edge_media_to_caption').get('edges')[0].get( 'node').get('text') else: m.text = None m.extended_entities = json.dumps({'url': message.display_url}) m.href = 'https://www.instagram.com/p/%s/' % message.shortcode if message.__typename == 'GraphSidecar': # mutiple image edges = getMutipleImage(message.shortcode) images = [] for edge in edges: url = edge['node']['display_url'] images.append({'url': url}) m.extended_entities = json.dumps(images) m.type = 'images' elif message.is_video: m.type = 'video' video_url = getVideoUrl(m.href) m.extended_entities = json.dumps({ 'url': message.display_url, 'video_url': video_url }) else: m.type = 'image' id = pg.insertIfNotExist('message', m, "id_str='%s' and m_type='instagram'" % m.id_str) if id is not None: print '%s new instagram message %s' % (m.name, m.id_str) # 肯定会有一条重复 # else: # print '%s 重复记录 %s' % (m.user_name, m.id_str) return id
def addTimeLine(self, oper, user_id, target_type, target_id, other_info=None): timeline = storage() timeline.oper = oper timeline.user_id = user_id timeline.target_type = target_type timeline.target_id = target_id if other_info: timeline.other_info = json.dumps(other_info, cls=public_bz.ExtEncoder) return self.pg.db.insert('timeline', **timeline)
def getTenContent(name): ten_names = getList(name)[0][1][:SHOW_COUNT] lists = [] for i in ten_names: c = public_bz.storage() c.name = i[0] c.time = i[1] c.content = getHtmlContent(c.name) lists.append(c) return lists
def sendGithub(openid, text, user_name, id): ''' create by bigzhu at 15/07/22 15:05:01 发送github的消息 ''' articles = [] article = storage() article.title = user_name article.url = "http://follow.center/message?t=github&id=%s" % id article.description = text articles = [article] sendArticle(openid, articles)
def post(self): self.set_header("Content-Type", "application/json") data = json.loads(self.request.body) anki_info = storage() anki_info.user_name = data['user_name'] anki_info.password = data['password'] anki_info.user_id = self.current_user db_bz.insertOrUpdate(pg, 'anki', anki_info, "user_id='%s'" % anki_info.user_id) anki.getMidAndCsrfTokenHolder(anki_info.user_id, reset_cookie=True) self.write(json.dumps(self.data))
def getUserEvent(user_name, etag): ''' create by bigzhu at 15/07/15 17:54:08 取github modify by bigzhu at 15/07/22 16:20:42 时间同样要加入8小时,否则不正确 ''' headers = {'If-None-Match': etag} try: r = requests.get('https://api.github.com/users/%s/events' % user_name, headers=headers) except requests.exceptions.ConnectionError: print public_bz.getExpInfoAll() return if r.status_code == 200: messages = r.json() if not messages: delGithubUser(user_name) # 没有这个github用户,取消 return actor = messages[0]['actor'] # actor不定是作者名字,有可能org才是 if actor['login'].lower() == user_name.lower(): the_user = actor else: org = messages[0]['org'] if org['login'].lower() == user_name.lower(): the_user = org # 如果是org,那么url不同 the_user['url'] = "https://api.github.com/users/" + user_name else: raise "in this github can't find user_name=%s" user_id = saveUser(the_user['id'], the_user['url']) # 更新etag etag = r.headers['etag'] updateEtag(user_name, etag) for i in r.json(): i['actor'] = user_id message = storage(i) message.created_at = time_bz.unicodeToDateTIme(message.created_at) message.created_at += timedelta(hours=8) saveMessage(copy.deepcopy(message)) #id = saveMessage(copy.deepcopy(message)) # if id is not None: # text = formatInfo(message) # print text # openids = public_db.getOpenidsByName('github', user_name) # for data in openids: # wechat_oper.sendGithub(data.openid, text, user_name, id) if r.status_code == 404: delGithubUser(user_name) else: print r.status_code
def sendInstagram(openid, text, img_url, user_name, id): ''' create by bigzhu at 15/08/01 00:35:08 modify by bigzhu at 15/08/01 00:57:28 不用代理,没被屏蔽 ''' article = storage() article.title = user_name article.picurl = 'http://follow.center/sp/' + oper.encodeUrl(img_url) article.url = "http://follow.center/message?t=instagram&id=%s" % id article.description = text articles = [article] sendArticle(openid, articles)
def initialize(self): ''' 针对 oauth2 ,需要你重载的时候来设置为你自己的参数, 以下是 google twitter douban 的例子 modify by bigzhu at 15/04/26 21:56:09 对应的oauth登录的参数,应该在对应的oauth里面来设置,这里不用再设置了 ''' UserInfoHandler.initialize(self) oauth2 = storage() oauth2.google = storage(enabled=False, url='/google') oauth2.twitter = storage(enabled=False, url='/twitter') oauth2.douban = storage(enabled=False, url='/douban') oauth2.github = storage(enabled=False, url='/github') self.oauth2 = oauth2 # 用户操作相关的 self.user_oper = user_bz.UserOper(self.pg) # 是否要验证 self.validate = False #salt self.salt = "hold is watching you"
def post(self): self.set_header("Content-Type", "application/json") data = json.loads(self.request.body) god_id = data['god_id'] remark = data['remark'] where = "user_id='%s' and god_id=%s" % (self.current_user, god_id) values = storage() values.user_id = self.current_user values.remark = remark values.god_id = god_id db_bz.insertOrUpdate(pg, 'remark', values, where) self.write(json.dumps({'error': '0'}))
def saveUser(god_name, tumblr_name, user): social_user = public_bz.storage() social_user.type = 'tumblr' # social_user.name = user['name'] social_user.name = tumblr_name social_user.count = user.get('likes', -1) # 有人会不分享likes数 avatar_url = 'https://api.tumblr.com/v2/blog/%s.tumblr.com/avatar/512' % user[ 'name'] social_user.avatar = tumblrRealAvatar(avatar_url) social_user.description = user['description'] social_user.sync_key = user['updated'] pg.update('god', where={'name': god_name}, tumblr=json.dumps(social_user)) return social_user
def saveUser(god_name, twitter_name, twitter_user): social_user = public_bz.storage() # 不要用返回的name, 大小写会发生变化 # social_user.name = twitter_user.screen_name social_user.name = twitter_name social_user.type = 'twitter' social_user.count = twitter_user.followers_count social_user.avatar = twitter_user.profile_image_url_https.replace('_normal', '_400x400') social_user.description = twitter_user.description # 没有找到 # social_user.sync_key = twitter_user.description pg.update('god', where={'name': god_name}, twitter=json.dumps(social_user)) return social_user
def saveUser(god_name, ins_name, user, sync_key): social_user = public_bz.storage() social_user.type = 'instagram' # social_user.name = user['username'] social_user.name = ins_name social_user.count = user['followed_by']['count'] social_user.avatar = user['profile_pic_url'] social_user.description = user['biography'] social_user.id = user['id'] social_user.sync_key = sync_key pg.update('god', where={'name': god_name}, instagram=json.dumps(social_user)) return social_user
def getContent(name, count=15, lenght=None): ten_names = getList(name)[0][1][:count] lists = [] for i in ten_names: c = public_bz.storage() c.name = i[0] c.time = i[1] c.content = getHtmlContent(c.name) if lenght: print len(c.content) if len(c.content) < lenght: print 'continue ' + c.name continue lists.append(c) return lists
def sendTwitter(openid, tweet, screen_name, id): ''' 发送twitter的消息 ''' articles = [] if hasattr(tweet, 'extended_entities') and tweet.extended_entities['media']: for media in tweet.extended_entities['media']: article = storage() article.picurl = "http://follow.center/ProxyHandler/%s" % media['media_url_https'] article.url = "http://follow.center/message?t=twitter&id=%s" % id articles.append(article) if len(articles) == 1: articles[0].title = screen_name else: articles[0].title = screen_name + ': ' + tweet.text articles[0].description = tweet.text else: article = storage() article.title = screen_name article.url = "http://follow.center/message?t=twitter&id=%s" % id article.description = tweet.text articles = [article] sendArticle(openid, articles)
def saveMessage(message): ''' create by bigzhu at 15/07/16 09:44:39 为了抽取数据方便,合并数据到 content 里 ''' message.id_str = message.pop('id') content = storage() content.type = message.type content.repo = message.pop('repo') content.payload = message.pop('payload') message.content = json.dumps(content) if message.get('org'): message.org = json.dumps(message.org) return db_bz.insertIfNotExist(pg, 'github_message', message, "id_str='%s'" % message.id_str)
def saveUser(god_name, facebook_name, user, etag): social_user = public_bz.storage() social_user.type = 'facebook' # social_user.name = user['username'] social_user.name = facebook_name # facebook 取不到 friend count or followed count social_user.count = -1 social_user.avatar = user['picture']['data']['url'] social_user.description = user.get('bio') # bio 可能没有 if etag is not None: social_user.sync_key = etag social_user.out_id = user['id'] pg.update('god', where={'name': god_name}, facebook=json.dumps(social_user)) return social_user
def groupByCreatedDateDay(self, timelines): ''' create by bigzhu at 15/02/03 13:44:24 按照天的精度,归并timeline modify by bigzhu at 15/02/06 13:29:58 修改为返回 list 而不是一个 dic, 以便于对时间按天的排序 ''' day_time_lines = [] group_time_line = {} for timeline in timelines: day = timeline.created_date.strftime('%Y年%m月%d日') this_day_timeline = group_time_line.get(day) if this_day_timeline: #this_day_timeline.insert(0, timeline) this_day_timeline.append(timeline) else: group_time_line[day] = [timeline] day_time_lines.append(storage(day = day, timelines = group_time_line[day])) return day_time_lines
def put(self): self.set_header("Content-Type", "application/json") data = json.loads(self.request.body) last_time = int(data.get('last_time')) last_time = time_bz.timestampToDateTime(last_time, True) # last_message_id = data.get('message_id') user_id = self.current_user if user_id is None: pass else: last_oper.saveLast(last_time, user_id) data = storage() data.error = OK data.unread_message_count = oper.getUnreadCount(user_id) self.write(json.dumps(data, cls=json_bz.ExtEncoder))
def getUser(user_name, always_check=False): ''' modify by bigzhu at 15/07/31 16:41:16 api找用户时是按专注度来排序的,名字绝对匹配的未必是第一位, 从10个里面找 ''' users = list( pg.select('instagram_user', where="lower(username)=lower('%s')" % user_name)) if users and not always_check: return users[0] else: user = None try: for this_user in api.user_search(user_name, 10): if this_user.username.lower() == user_name.lower(): user = this_user break except instagram.bind.InstagramClientError: print public_bz.getExpInfoAll() return # 如果没有这个用户 if user is None: public_db.delNoName('instagram', user_name) return try: user = api.user(user.id) except (instagram.bind.InstagramAPIError, instagram.bind.InstagramClientError): # 通常是没有访问权限 print public_bz.getExpInfoAll() public_db.delNoName('instagram', user_name) return db_user = storage() db_user.id_str = user.id db_user.username = user.username db_user.full_name = user.full_name db_user.profile_picture = user.profile_picture db_user.bio = user.bio db_user.website = user.website db_user.counts = json.dumps(user.counts) #pg.insert('instagram_user', **db_user) pg.insertOrUpdate(pg, 'instagram_user', db_user, "id_str='%s'" % db_user.id_str) #db_bz.insertIfNotExist(pg, 'instagram_user', db_user, "id=%s" % db_user.id) return getUser(user_name)
def getUser(user_name, always_check=False): ''' modify by bigzhu at 15/07/31 16:41:16 api找用户时是按专注度来排序的,名字绝对匹配的未必是第一位, 从10个里面找 ''' users = list(pg.select('instagram_user', where="lower(username)=lower('%s')" % user_name)) if users and not always_check: return users[0] else: user = None try: for this_user in api.user_search(user_name, 10): if this_user.username.lower() == user_name.lower(): user = this_user break except instagram.bind.InstagramClientError: print public_bz.getExpInfoAll() return # 如果没有这个用户 if user is None: public_db.delNoName('instagram', user_name) return try: user = api.user(user.id) except (instagram.bind.InstagramAPIError, instagram.bind.InstagramClientError): # 通常是没有访问权限 print public_bz.getExpInfoAll() public_db.delNoName('instagram', user_name) return db_user = storage() db_user.id_str = user.id db_user.username = user.username db_user.full_name = user.full_name db_user.profile_picture = user.profile_picture db_user.bio = user.bio db_user.website = user.website db_user.counts = json.dumps(user.counts) #pg.insert('instagram_user', **db_user) pg.insertOrUpdate(pg, 'instagram_user', db_user, "id_str='%s'" % db_user.id_str) #db_bz.insertIfNotExist(pg, 'instagram_user', db_user, "id=%s" % db_user.id) return getUser(user_name)
def saveUser(god_name, github_name, user, sync_key=None): social_user = public_bz.storage() try: # social_user.name = user['login'] social_user.name = github_name except Exception as e: print e print user social_user.type = 'github' if user.get('followers') is None: social_user.count = -1 else: social_user.count = user['followers'] social_user.avatar = user.get('avatar_url', '') social_user.description = user.get('bio') if sync_key is not None: social_user.sync_key = sync_key pg.update('god', where={'name': god_name}, github=json.dumps(social_user)) return social_user
def get(self, parm=None): starttime = datetime.datetime.now() self.set_header("Content-Type", "application/json") after = None limit = None search_key = None god_name = None if parm: parm = json.loads(parm) after = parm.get('after') # 晚于这个时间的 limit = parm.get('limit') search_key = parm.get('search_key') god_name = parm.get('god_name') # 只查这个god user_id = self.current_user if after: after = time_bz.timestampToDateTime(after, True) elif search_key is None and god_name is None: # 这些条件查询不能卡上次看到那条的时间 after = last_oper.getLastTime(user_id) messages = public_db.getNewMessages(user_id=user_id, after=after, limit=limit, god_name=god_name, search_key=search_key) data = storage() data.error = OK data.messages = messages data.unread_message_count = oper.getUnreadCount(user_id) if (len(messages) == 0): if (user_id): data.followed_god_count = god_oper.getFollowedGodCount(user_id) else: data.followed_god_count = 0 endtime = datetime.datetime.now() print((endtime - starttime).seconds) self.write(json.dumps(data, cls=json_bz.ExtEncoder))
def saveUser(id, url): ''' create by bigzhu at 15/07/15 21:27:19 保存github信息 create by bigzhu at 15/07/22 16:17:37 fix bug, not return id ''' if list(pg.select('github_user', where='id=%s' % id)): return id else: r = requests.get(url) user = storage(r.json()) del user.url del user.followers_url del user.following_url del user.gists_url del user.starred_url del user.subscriptions_url del user.organizations_url del user.repos_url del user.events_url del user.received_events_url del user.type return db_bz.insertIfNotExist(pg, 'github_user', user)
def saveUser(id, url): ''' create by bigzhu at 15/07/15 21:27:19 保存github信息 create by bigzhu at 15/07/22 16:17:37 fix bug, not return id ''' if list(pg.select('github_user', where='id=%s' % id)): return id else: r = requests.get(url) user = storage(r.json()) print user if hasattr(user, 'url'): del user.url del user.followers_url del user.following_url del user.gists_url del user.starred_url del user.subscriptions_url del user.organizations_url del user.repos_url del user.events_url del user.received_events_url del user.type return db_bz.insertIfNotExist(pg, 'github_user', user)
def getHtmlByName(name): the_html = storage() the_html.name = name the_html.time = getModifyTime(name) the_html.content = getHtmlContent(name) return the_html
def saveMedias(user, medias): ''' create by bigzhu at 15/09/04 20:58:54 保存meedias "attribution":null, "tags":[ ], "type":"image", "location":{ }, "comments":{ }, "filter":"Normal", "created_time":"1441362020", "link":"https:\/\/instagram.com\/p\/7NIHiLJJs3\/", "likes":{ }, "images":{ }, "users_in_photo":[ ], "caption":{ }, "user_has_liked":false, "id":"1066544388859271991_262341", "user":{ } ''' for media_d in medias['data']: media = storage(media_d) db_media = storage() #db_media.attribution = media.attribution #db_media.tags = json.dumps(media.tags, cls=public_bz.ExtEncoder) db_media.type = media.type #db_media.location = json.dumps(media.location, cls=public_bz.ExtEncoder) db_media.comments = json.dumps(media.comments, cls=public_bz.ExtEncoder) db_media.filter = media.filter #db_media.created_time = time_bz.timestampToDateTime(media.created_time) + timedelta(hours=8) db_media.created_time = time_bz.timestampToDateTime(media.created_time) db_media.link = media.link #db_media.likes = json.dumps(media.likes, cls=public_bz.ExtEncoder) db_media.low_resolution = json.dumps(media.images['low_resolution']) db_media.standard_resolution = json.dumps(media.images['standard_resolution']) db_media.thumbnail = json.dumps(media.images['thumbnail']) #db_media.users_in_photo = json.dumps(media.users_in_photo, cls=public_bz.ExtEncoder) if media.caption: caption = media.caption caption['user_id'] = caption['from']['id'] del caption['from'] else: caption = '' db_media.caption = json.dumps(caption, cls=public_bz.ExtEncoder) db_media.id_str = media.id db_media.user_id = user.id id = pg.insertIfNotExist(pg, 'instagram_media', db_media, "id_str='%s'" % db_media.id_str) if id is None: raise Exception('重复记录 id=%s, name=%s' % (media.id, user.username)) else: print 'new=', media.id, user.username if id is not None and len(medias) <= 2: # 新增加消息,微信通知只通知2条以内 openids = public_db.getOpenidsByName('instagram', user.username) for data in openids: if caption != '': text = caption.get('text') else: text = '' wechat_oper.sendInstagram(data.openid, text, media.images['low_resolution']['url'], user.username, id) if medias['pagination']: next_url = medias['pagination']['next_url'] medias = callGetMeidaApi(next_url=next_url) saveMedias(user, medias)
def getMedia(user_name=None, with_next_url=None, user=None): if user_name: user = getUser(user_name) if user is None: return # min_id 会查出大于等于这个id的 try: medias, next_ = api.user_recent_media(user_id=user.id, min_id=user.last_id) except instagram.bind.InstagramClientError: print public_bz.getExpInfoAll() public_db.delNoName('instagram', user_name) return if medias: last_id = medias[0].id pg.update('instagram_user', where="lower(username)=lower('%s')" % user_name, last_id=last_id) else: medias, next_ = api.user_recent_media(with_next_url=with_next_url) for media in medias: db_media = storage() if media.caption: caption = media.caption.__dict__ caption['user_id'] = caption['user'].id del caption['user'] else: caption = '' db_media.caption = json.dumps(caption, cls=public_bz.ExtEncoder) db_media.comment_count = media.comment_count if media.comments: media.comments = [d.__dict__ for d in media.comments] for comment in media.comments: comment['user'] = comment['user'].__dict__ db_media.comments = json.dumps(media.comments, cls=public_bz.ExtEncoder) db_media.created_time = media.created_time # 8小时的问题 db_media.created_time += timedelta(hours=8) db_media.filter = media.filter db_media.low_resolution = json.dumps(media.images['low_resolution'].__dict__) db_media.standard_resolution = json.dumps(media.images['standard_resolution'].__dict__) db_media.thumbnail = json.dumps(media.images['thumbnail'].__dict__) db_media.id_str = media.id db_media.like_count = media.like_count # likes里有User对象,暂时不存了 #db_media.likes = json.dumps(media.likes) db_media.link = media.link db_media.type = media.type db_media.user_id = user.id id = db_bz.insertIfNotExist(pg, 'instagram_media', db_media, "id_str='%s'" % db_media.id_str) print 'new=', media.id, user.username if id is not None and len(medias) <= 2: # 新增加消息,微信通知只通知2条以内 openids = public_db.getOpenidsByName('instagram', user.username) for data in openids: if caption != '': text = caption.get('text') else: text = '' wechat_oper.sendInstagram(data.openid, text, media.images['standard_resolution'].url, user.username, id) # 递归查出 if next_ != with_next_url: getMedia(with_next_url=next_, user=user)
def getEmptyUserInfo(self): user_info = public_bz.storage() for p in model_oper_bz.getModelAttributes(model_bz.user_info): user_info[p] = "" return user_info
def saveMedias(user, medias): ''' create by bigzhu at 15/09/04 20:58:54 保存meedias "attribution":null, "tags":[ ], "type":"image", "location":{ }, "comments":{ }, "filter":"Normal", "created_time":"1441362020", "link":"https:\/\/instagram.com\/p\/7NIHiLJJs3\/", "likes":{ }, "images":{ }, "users_in_photo":[ ], "caption":{ }, "user_has_liked":false, "id":"1066544388859271991_262341", "user":{ } ''' for media_d in medias['data']: media = storage(media_d) db_media = storage() #db_media.attribution = media.attribution #db_media.tags = json.dumps(media.tags, cls=public_bz.ExtEncoder) db_media.type = media.type #db_media.location = json.dumps(media.location, cls=public_bz.ExtEncoder) db_media.comments = json.dumps(media.comments, cls=public_bz.ExtEncoder) db_media.filter = media.filter #db_media.created_time = time_bz.timestampToDateTime(media.created_time) + timedelta(hours=8) db_media.created_time = time_bz.timestampToDateTime(media.created_time) db_media.link = media.link #db_media.likes = json.dumps(media.likes, cls=public_bz.ExtEncoder) db_media.low_resolution = json.dumps(media.images['low_resolution']) db_media.standard_resolution = json.dumps( media.images['standard_resolution']) db_media.thumbnail = json.dumps(media.images['thumbnail']) #db_media.users_in_photo = json.dumps(media.users_in_photo, cls=public_bz.ExtEncoder) if media.caption: caption = media.caption caption['user_id'] = caption['from']['id'] del caption['from'] else: caption = '' db_media.caption = json.dumps(caption, cls=public_bz.ExtEncoder) db_media.id_str = media.id db_media.user_id_str = user.id #id = pg.insertIfNotExist(pg, 'instagram_media', db_media, "id_str='%s'" % db_media.id_str) m = public_bz.storage() m.id_str = db_media.id_str m.m_type = 'instagram' m.m_user_id = db_media.user_id_str m.created_at = db_media.created_time m.extended_entities = db_media.standard_resolution m.content = db_media.comments if media.caption: m.text = media.caption['text'] else: m.text = None m.href = db_media.link m.type = db_media.type id = pg.insertIfNotExist(pg, 'm', m, "id_str='%s'" % db_media.id_str) if id is None: # 似乎就是会有重复的 error = '重复记录 id=%s, name=%s' % (media.id, user.username) print error #raise Exception(error) else: print 'new=', media.id, user.username # if id is not None and len(medias) <= 2: # 新增加消息,微信通知只通知2条以内 # openids = public_db.getOpenidsByName('instagram', user.username) # for data in openids: # if caption != '': # text = caption.get('text') # else: # text = '' # wechat_oper.sendInstagram(data.openid, text, media.images['low_resolution']['url'], user.username, id) if medias['pagination']: next_url = medias['pagination']['next_url'] medias = callGetMeidaApi(next_url=next_url) saveMedias(user, medias)
def saveTwitter(tweet): ''' create by bigzhu at 15/07/10 14:39:48 保存twitter ''' del tweet.user._json #del tweet.user._api tweet.user.entities = json.dumps(tweet.user.entities) del tweet.user.id pg.insertOrUpdate(pg, 'twitter_user', vars(tweet.user), "id_str='%s'" % tweet.user.id_str) tweet.t_user_id = tweet.user.id_str screen_name = tweet.user.screen_name del tweet.user if hasattr(tweet, 'author'): #del tweet.author.id pg.insertOrUpdate(pg, 'twitter_user', vars(tweet.author), "id_str='%s'" % tweet.author.id_str) tweet.t_author_id = tweet.author.id_str del tweet.author if hasattr(tweet, '_api'): del tweet._api if hasattr(tweet, '_json'): del tweet._json # twitter id 太大了 "id": 618948810941673472 导致 psycopg2.DataError: integer out of range if hasattr(tweet, 'id'): del tweet.id if hasattr(tweet, 'entities'): tweet.entities = json.dumps(tweet.entities) if hasattr(tweet, 'geo'): tweet.geo = json.dumps(tweet.geo) if hasattr(tweet, 'coordinates'): tweet.coordinates = json.dumps(tweet.coordinates) if hasattr(tweet, 'extended_entities'): tweet.extended_entities = json.dumps(tweet.extended_entities) if hasattr(tweet, 'scopes'): tweet.scopes = json.dumps(tweet.scopes) if hasattr(tweet, 'retweeted_status'): saveTwitter(tweet.retweeted_status) tweet.retweeted_status = tweet.retweeted_status.id_str if hasattr(tweet, 'quoted_status'): # print tweet.quoted_status del tweet.quoted_status # saveTwitter(tweet.quoted_status) # tweet.quoted_status = tweet.quoted_status.id_str # place 是一个对象(我不知道如何处理): Place(_api=<tweepy.api.API object at 0x1808050> if hasattr(tweet, 'place'): del tweet.place # for k, v in vars(tweet).items(): # print '%s=%s' % (k, v) #return pg.insertIfNotExist(pg, 'twitter_message', vars(tweet), "id_str='%s'" % tweet.id_str) m = public_bz.storage() m.id_str = tweet.id_str m.m_type = 'twitter' m.m_user_id = tweet.t_user_id m.created_at = tweet.created_at if hasattr(tweet, 'extended_entities'): m.extended_entities = tweet.extended_entities m.content = None m.text = tweet.text m.href = 'https://twitter.com/'+screen_name+'/status/'+tweet.id_str return pg.insertIfNotExist(pg, 'm', m, "id_str='%s' and m_type='twitter'" % tweet.id_str)
def saveTwitter(tweet): ''' create by bigzhu at 15/07/10 14:39:48 保存twitter ''' del tweet.user._json #del tweet.user._api tweet.user.entities = json.dumps(tweet.user.entities) del tweet.user.id pg.insertOrUpdate(pg, 'twitter_user', vars(tweet.user), "id_str='%s'" % tweet.user.id_str) tweet.t_user_id = tweet.user.id_str screen_name = tweet.user.screen_name del tweet.user if hasattr(tweet, 'author'): #del tweet.author.id pg.insertOrUpdate(pg, 'twitter_user', vars(tweet.author), "id_str='%s'" % tweet.author.id_str) tweet.t_author_id = tweet.author.id_str del tweet.author if hasattr(tweet, '_api'): del tweet._api if hasattr(tweet, '_json'): del tweet._json # twitter id 太大了 "id": 618948810941673472 导致 psycopg2.DataError: integer out of range if hasattr(tweet, 'id'): del tweet.id if hasattr(tweet, 'entities'): tweet.entities = json.dumps(tweet.entities) if hasattr(tweet, 'geo'): tweet.geo = json.dumps(tweet.geo) if hasattr(tweet, 'coordinates'): tweet.coordinates = json.dumps(tweet.coordinates) if hasattr(tweet, 'extended_entities'): tweet.extended_entities = json.dumps(tweet.extended_entities) if hasattr(tweet, 'scopes'): tweet.scopes = json.dumps(tweet.scopes) if hasattr(tweet, 'retweeted_status'): saveTwitter(tweet.retweeted_status) tweet.retweeted_status = tweet.retweeted_status.id_str if hasattr(tweet, 'quoted_status'): # print tweet.quoted_status del tweet.quoted_status # saveTwitter(tweet.quoted_status) # tweet.quoted_status = tweet.quoted_status.id_str # place 是一个对象(我不知道如何处理): Place(_api=<tweepy.api.API object at 0x1808050> if hasattr(tweet, 'place'): del tweet.place # for k, v in vars(tweet).items(): # print '%s=%s' % (k, v) #return pg.insertIfNotExist(pg, 'twitter_message', vars(tweet), "id_str='%s'" % tweet.id_str) m = public_bz.storage() m.id_str = tweet.id_str m.m_type = 'twitter' m.m_user_id = tweet.t_user_id m.created_at = tweet.created_at if hasattr(tweet, 'extended_entities'): m.extended_entities = tweet.extended_entities m.content = None m.text = tweet.text m.href = 'https://twitter.com/' + screen_name + '/status/' + tweet.id_str return pg.insertIfNotExist( pg, 'm', m, "id_str='%s' and m_type='twitter'" % tweet.id_str)