コード例 #1
0
ファイル: status.py プロジェクト: zhang-zhan/miner
    def load(self,dic):
        #获取发布微博的用户信息
        uid = 0L
        user = dic.get('user',None)
        if user is None:
            uid = dic.get('id',dic.get('user_id'))
            if uid is None:
                raise ValueError('Data Error: Either User information or user id is expected in the status data!')
            else:
                self.uid = uid
        else:
            from weibo import UserProfile
            uprofile = UserProfile()
            uprofile.load(user)
            uid = long( uprofile.idstr )
            self.uid = uid

            #将用户插入到用户表
            self.batches.extend(uprofile.get_batches())

        self.mid = long( dic.get('idstr',dic.get('id')) )

        #处理转发微博
        r_id = 0
        repost = dic.get('retweeted_status')

        if repost is None:
            pass
        else:
            from weibo import Repost
            #将转发微博存在repost列族
            retweet = Repost()
            retweet.load(self.uid, self.mid, repost)
            #print retweet
            r_id = retweet.id
            self.batches.extend(retweet.get_batches())

            #将转发微博对应的原创微博单独存放一份作为原始微博
            if repost.get('deleted',0)==0:  #如果被转发的微博已经被删除,则不保存为原创微博
                original = Status()
                original.load(repost)
                self.batches.extend(original.get_batches())
        self.r_id = long(r_id)

        #处理其他字段信息
        for attr in Status.attrs:
            v = dic.get(attr)
            if attr=='key':
                v = dic.get('id')
            if v is None:
                if attr in ['reposts_count', 'comments_count', 'attitudes_count']:
                    v = 0
                else:
                    continue

            self.setattr(attr,v)

        ustatus = UserStatuses()
        ustatus.load(self, self.uid)
        self.batches.extend(ustatus.get_batches())
コード例 #2
0
    def load(self, original_uid, original_mid, dic):
        self.original_uid = original_uid
        self.original_mid = original_mid

        uid = None
        deleted = dic.get('deleted', None)
        if deleted is None:  #转发的微博没有被删除
            user = dic.get('user', None)
            if user is None:
                uid = dic.get('uid', dic.get('user_id', None))
                if uid is None:
                    raise ValueError(
                        'Data Error: Either User information or user id is expected in the status data!'
                    )
                else:
                    self.uid = uid
            else:
                from weibo import UserProfile
                uprofile = UserProfile()
                uprofile.load(user)
                uid = long(uprofile.idstr)
                self.uid = uid

                #将用户插入到用户表
                self.batches.extend(uprofile.get_batches())

        else:  #转发的微博已经被删除
            self.id = uid = 0

        if uid > 0:
            for attr in Repost.attrs:
                v = dic.get(attr)
                if attr == 'id': v = long(v)
                if v is None:
                    if attr in [
                            'reposts_count', 'comments_count',
                            'attitudes_count'
                    ]:
                        v = 0
                    else:
                        continue

                self.setattr(attr, v)
コード例 #3
0
ファイル: repost.py プロジェクト: zhang-zhan/miner
    def load(self, repost_uid, repost_mid, dic):
        self.repost_uid = repost_uid
        self.repost_mid = repost_mid

        uid = None
        deleted = dic.get('deleted',None)
        if deleted is None: #转发的微博没有被删除
            user = dic.get('user',None)
            if user is None:
                uid = dic.get('id',dic.get('user_id'))
                if uid is None:
                    raise ValueError('Data Error: Either User information or user id is expected in the status data!')
                else:
                    self.uid = uid
            else:
                from weibo import UserProfile
                uprofile = UserProfile()
                uprofile.load(user)
                uid = long( uprofile.idstr )
                self.uid = uid

                #将用户插入到用户表
                self.batches.extend(uprofile.get_batches())

        else:   #转发的微博已经被删除
            self.id = uid = 0

        if uid>0:
            for attr in Repost.attrs:
                v = dic.get(attr)
                if attr == 'id': v = long(v)
                if v is None:
                    if attr in ['reposts_count', 'comments_count', 'attitudes_count']:
                        v = 0
                    else:
                        continue

                self.setattr(attr,v)

            ustatus = UserStatuses()
            ustatus.load(self, uid)
            self.batches.extend(ustatus.get_batches())
コード例 #4
0
    def get_batches(self):
        #用于存储需要更新的内容,可能涉及多个表、多个列族
        batches = []

        #根据关系用户,将关系用户的Profile插入到表中
        for uid, user in self.users.iteritems():
            uprofile = UserProfile()
            uprofile.load(user)
            batches_uprofile = uprofile.get_batches()
            batches.extend(batches_uprofile)

        #根据关系情况,把用户的id插入到sina_user:relation列族中
        mutations = []
        for uid, relation in self.uids.iteritems():
            qualifier = struct.pack('<q', uid)

            user = self.users[uid]
            gender = user['gender']
            vt = user['verified_type']
            vt = verified_type_map.get(vt, vt)
            v = vt << 3
            v |= 4L if gender == 'm' else 0L
            v |= relation

            v <<= 32
            v |= util.now2epoch()

            v = struct.pack('>q', v)
            m = Mutation(column="%s:%s" % (Relation.column_family, qualifier),
                         value=v)
            mutations.append(m)
        key = struct.pack('<q', self.self_uid)

        u_relation = {
            'tableName': Relation.tableName,
            'rowBatches': BatchMutation(row=key, mutations=mutations)
        }
        batches.append(u_relation)

        return batches
コード例 #5
0
ファイル: relation.py プロジェクト: zhang-zhan/miner
    def get_batches(self):
        #用于存储需要更新的内容,可能涉及多个表、多个列族
        batches = []

        #根据关系用户,将关系用户的Profile插入到表中
        for uid, user in self.users.iteritems():
            uprofile = UserProfile()
            uprofile.load(user)
            batches_uprofile = uprofile.get_batches()
            batches.extend(batches_uprofile)

        #根据关系情况,把用户的id插入到sina_user:relation列族中
        mutations = []
        for uid,relation in self.uids.iteritems():
            qualifier = struct.pack('<q',uid)

            user = self.users[uid]
            gender = user['gender']
            vt = user['verified_type']
            vt = verified_type_map.get(vt,vt)
            v = vt << 3
            v |= 4L if gender == 'm' else 0L
            v |= relation

            v <<= 32
            v |= util.now2epoch()

            v = struct.pack('>q',v)
            m = Mutation(column="%s:%s" % (Relation.column_family,qualifier), value=v)
            mutations.append(m)
        key = struct.pack('<q',self.self_uid)

        u_relation = {
            'tableName': Relation.tableName,
            'rowBatches': BatchMutation(row=key, mutations=mutations)
        }
        batches.append(u_relation)

        return batches
コード例 #6
0
    def load(self, dic):
        #获取发布微博的用户信息
        uid = 0L
        user = dic.get('user', None)
        if user is None:
            uid = dic.get('uid', dic.get('user_id', None))
            if uid is None:
                raise ValueError(
                    'Data Error: Either User information or user id is expected in the status data!'
                )
            else:
                self.uid = uid
        else:
            from weibo import UserProfile
            uprofile = UserProfile()
            uprofile.load(user)
            uid = long(uprofile.idstr)
            self.uid = uid

            #将用户插入到用户表
            self.batches.extend(uprofile.get_batches())

        self.mid = long(dic.get('idstr', dic.get('id')))

        #处理转发微博
        r_id = 0
        repost = dic.get('retweeted_status')

        if repost is None:
            pass
        else:
            from weibo import Repost
            #将转发微博存在repost列族
            retweet = Repost()
            retweet.load(self.uid, self.mid, repost)
            #print retweet
            r_id = retweet.id
            self.batches.extend(retweet.get_batches())

            #将转发微博对应的原创微博单独存放一份作为原始微博
            """
            if repost.get('deleted',888)==888:  #如果被转发的微博已经被删除,则不保存为原创微博
                original = Status()
                original.load(repost)
                self.batches.extend(original.get_batches())
	    """
        self.r_id = long(r_id)

        #处理其他字段信息
        for attr in Status.attrs:
            v = dic.get(attr)
            if attr == 'key':
                v = dic.get('id')
            if v is None:
                if attr in [
                        'reposts_count', 'comments_count', 'attitudes_count'
                ]:
                    v = 0
                else:
                    continue

            self.setattr(attr, v)

        ustatus = UserStatuses()
        ustatus.load(self, self.uid)
        self.batches.extend(ustatus.get_batches())