def load(self, dic): # 获取发布微博的用户信息 uid = 0L user = dic.get('user',None) if user is None: uid = dic.get('uid', dic.get('user_id', None)) if uid is None: raise ValueError('Data Error: Either User information or user id is expected in the status data!') else: self.uid = uid else: from profile import UserProfile uprofile = UserProfile() uprofile.load(user) uid = long(uprofile.idstr) self.uid = uid # 将用户插入到用户表 self.batches.extend(uprofile.get_batches()) self.mid = long( dic.get('idstr',dic.get('id')) ) # 处理转发微博 r_id = 0 repost = dic.get('retweeted_status') if repost is None: pass else: from weibo import Repost # 将转发微博存在repost列族 retweet = Repost() retweet.load(self.uid, self.mid, repost) # print retweet r_id = retweet.id self.batches.extend(retweet.get_batches()) # 将转发微博对应的原创微博单独存放一份作为原始微博 if repost.get('deleted', 0) == 0: # 如果被转发的微博已经被删除,则不保存为原创微博 original = Status() original.load(repost) self.batches.extend(original.get_batches()) self.r_id = long(r_id) # 处理其他字段信息 for attr in Status.attrs: v = dic.get(attr) if attr == 'key': v = dic.get('id') if v is None: if attr in ['reposts_count', 'comments_count', 'attitudes_count']: v = 0 else: continue self.setattr(attr,v) ustatus = UserStatuses() ustatus.load(self, self.uid) self.batches.extend(ustatus.get_batches())
def get_batches(self): #用于存储需要更新的内容,可能涉及多个表、多个列族 batches = [] #根据关系用户,将关系用户的Profile插入到表中 for uid, user in self.users.iteritems(): uprofile = UserProfile() uprofile.load(user) batches_uprofile = uprofile.get_batches() batches.extend(batches_uprofile) #根据关系情况,把用户的id插入到_user:relation列族中 mutations = [] for uid, relation in self.uids.iteritems(): qualifier = struct.pack('<q', uid) user = self.users[uid] gender = user['gender'] vt = user['verified_type'] vt = verified_type_map.get(vt, vt) v = vt << 3 v |= 4L if gender == 'm' else 0L v |= relation v <<= 32 v |= util.now2epoch() v = struct.pack('>q', v) m = Mutation(column="%s:%s" % (Relation.column_family, qualifier), value=v) mutations.append(m) key = struct.pack('<q', self.self_uid) u_relation = { 'tableName': Relation.tableName, 'rowBatches': BatchMutation(row=key, mutations=mutations) } batches.append(u_relation) return batches
def load(self, dic): # 获取发布微博的用户信息 uid = 0L user = dic.get('user', None) if user is None: uid = dic.get('uid', dic.get('user_id', None)) if uid is None: raise ValueError( 'Data Error: Either User information or user id is expected in the status data!' ) else: self.uid = uid else: from profile import UserProfile uprofile = UserProfile() uprofile.load(user) uid = long(uprofile.idstr) self.uid = uid # 将用户插入到用户表 self.batches.extend(uprofile.get_batches()) self.mid = long(dic.get('idstr', dic.get('id'))) # 处理转发微博 r_id = 0 repost = dic.get('retweeted_status') if repost is None: pass else: from weibo import Repost # 将转发微博存在repost列族 retweet = Repost() retweet.load(self.uid, self.mid, repost) # print retweet r_id = retweet.id self.batches.extend(retweet.get_batches()) # 将转发微博对应的原创微博单独存放一份作为原始微博 if repost.get('deleted', 0) == 0: # 如果被转发的微博已经被删除,则不保存为原创微博 original = Status() original.load(repost) self.batches.extend(original.get_batches()) self.r_id = long(r_id) # 处理其他字段信息 for attr in Status.attrs: v = dic.get(attr) if attr == 'key': v = dic.get('id') if v is None: if attr in [ 'reposts_count', 'comments_count', 'attitudes_count' ]: v = 0 else: continue self.setattr(attr, v) ustatus = UserStatuses() ustatus.load(self, self.uid) self.batches.extend(ustatus.get_batches())