예제 #1
0
class GetRooms():
    def __init__(self):
        self.get_raw_data = GetRawData()
        self.redis_client = RedisClient()
        self.sec_user_id_list = []
        self.room_id_list = []

    def get_channel(self):
        try:
            channel_raw_data = self.get_raw_data.get_channel()
        except Exception as e:
            logger.error('get_channel出错-' + e.args[0])
            return None

        try:
            self.parse_channel(channel_raw_data)
        except Exception as e:
            logger.error('parse_channel出错-' + e.args[0])
            return None

        #logger.info(json.dumps([i[-10:-1] for i in self.sec_user_id_list]))
        for each in self.room_id_list:
            self.redis_client.add_rooms(each)
        for each in self.sec_user_id_list:
            self.redis_client.add_users(each, 1)

    def parse_channel(self, channel_raw_data):
        for each in channel_raw_data.get('data'):
            room_id = each.get('data').get('id_str')
            sec_user_id = each.get('data').get('owner').get('sec_uid')
            follower = each.get('data').get('owner').get('follow_info').get(
                'follower_count')

            if follower >= 10000:
                try:
                    item_list = self.get_raw_data.get_item_list(
                        sec_user_id, room_id)
                except Exception as e:
                    logger.error('get_item_list出错-' + e.args[0])
                    return None

                if len(item_list.get('promotions')) != 0:
                    self.room_id_list.append(room_id)
                    self.sec_user_id_list.append(sec_user_id)

    def run(self):
        tasks = [gevent.spawn(self.get_channel) for i in range(1)]
        gevent.joinall(tasks)
        logger.info('本批次共获得room_id和sec_user_id-' +
                    str(len(self.sec_user_id_list)) + '-' +
                    str(len(self.room_id_list)))
        self.sec_user_id_list.clear()
        self.room_id_list.clear()
예제 #2
0
class GetItemLists():
    def __init__(self):
        self.get_raw_data = GetRawData()
        self.redis_client = RedisClient()
        self.item_lists_saved_list = []
        self.item_list = []

    def get_room_sec_ids(self):
        return self.redis_client.get_item_lists()

    def delete_item_lists(self):
        for each in self.item_lists_saved_list:
            self.redis_client.delete_item_lists(each)

    def save_items(self):
        self.redis_client.add_items(self.item_list)

    def get_item_lists(self, room_sec_ids):
        ids = room_sec_ids.split('_', 1)
        room_id = ids[0]
        sec_user_id = ids[1]

        try:
            item_list_raw_data = self.get_raw_data.get_item_list(
                sec_user_id, room_id)
        except Exception as e:
            logger.error('get_item_list出错-' + e.args[0] + '-room_sec_ids-' +
                         room_sec_ids)
            return None

        try:
            item_list = self.parse_item_lists(item_list_raw_data, room_id,
                                              sec_user_id)
        except Exception as e:
            logger.error(room_sec_ids + '-parse_item_list失败-' + e.args[0])
            return None

        if len(item_list) != 0:  #表示这场直播挂商品了
            self.write_to_file(json.dumps(item_list,
                                          ensure_ascii=False))  #先写入,再删除,没毛病
        self.item_lists_saved_list.append(room_sec_ids)

    def parse_item_lists(self, item_list_raw_data, room_id, sec_user_id):
        item_list = []
        data = item_list_raw_data.get('promotions')
        for item in data:
            item_info = {}
            item_info['room_id'] = room_id
            item_info['sec_user_id'] = sec_user_id
            item_info['title'] = item.get('title')
            item_info['short_title'] = item.get('short_title')
            item_info['product_id'] = item.get('product_id')
            item_info['promotion_id'] = item.get('promotion_id')
            item_info['price'] = item.get('price') / 100
            item_info['min_price'] = item.get('min_price') / 100
            item_info['item_source'] = item.get('platform_label')
            item_info['shop_id'] = item.get('shop_id')
            item_info['item_type'] = item.get('item_type')
            item_info['cover'] = item.get('cover')
            item_info['index'] = item.get('index')

            coupon_info = item.get('coupons')
            if coupon_info:
                item_info['coupon_tag'] = coupon_info[0].get('tag')
                item_info['coupon_url'] = coupon_info[0].get('coupon_url')

            item_list.append(item_info)
            self.item_list.append(item_info['promotion_id'] + '_' + room_id +
                                  '_' + sec_user_id)
        return item_list

    def write_to_file(self, item_list):
        today = time.strftime('%Y-%m-%d', time.localtime())
        today = today.replace('-', '')
        with open(FILE_DIRECTORY + '/' + 'item_lists' + '_' + today + '.txt',
                  'a',
                  encoding='utf-8') as file:
            file.write(item_list + '\n')

    def run(self):
        all_room_sec_ids = self.get_room_sec_ids()
        logger.info('此前已结束直播并需要获取商品信息的直播间数量:' + str(len(all_room_sec_ids)))
        batch_size = 200
        for batch_limit in range(0, len(all_room_sec_ids), batch_size):
            start = batch_limit
            stop = min(batch_limit + batch_size, len(all_room_sec_ids))
            logger.info('待获取的商品所对应的直播间-' + str(start + 1) + '-' + str(stop))
            tasks = [
                gevent.spawn(self.get_item_lists, room_sec_ids)
                for room_sec_ids in all_room_sec_ids[start:stop]
            ]
            gevent.joinall(tasks)

            logger.info('新获取商品列表/未挂商品的直播间数量-' +
                        str(len(self.item_lists_saved_list)))
            logger.info('新获取商品的数量-' + str(len(self.item_list)))
            self.save_items()  #可能是这个拖慢了速度,得想办法
            self.delete_item_lists()
            self.item_lists_saved_list.clear()
            self.item_list.clear()
예제 #3
0
class CheckRooms():
    def __init__(self):
        self.get_raw_data = GetRawData()
        self.redis_client = RedisClient()
        self.lives_on_list = []

    def get_rooms(self):
        return self.redis_client.get_rooms(0, 0)

    def change_room_status(self):
        for each in self.lives_on_list:
            self.redis_client.add_rooms(each, 1)

    def check_room(self, room_id):
        try:
            room_raw_data = self.get_raw_data.get_live(room_id)
        except Exception as e:
            logger.error('get_live出错-' + e.args[0] + '-room_id-' + room_id)
            return None
        try:
            owner = room_raw_data.get('data').get('owner')
            follower_count = owner.get('follow_info').get('follower_count')
            sec_user_id = owner.get('sec_uid')
        except Exception as e:
            logger.error('解析room_raw_data出错-' + e.args[0] + '-room_id-' +
                         room_id)
            return None

        if follower_count < 10000:
            self.redis_client.delete_users(sec_user_id)
            self.redis_client.delete_rooms(room_id)
        else:
            status = room_raw_data.get('data').get('status')
            if status == 2:
                try:  #判断该场直播是否带货
                    item_list_raw_data = self.get_raw_data.get_item_list(
                        sec_user_id, room_id)
                except Exception as e:
                    logger.error('get_item_list出错' + e.args[0] +
                                 '-sec_user_id和room_id-' + sec_user_id + '-' +
                                 room_id)
                    return None
                if len(item_list_raw_data.get('promotions')) != 0:
                    self.lives_on_list.append(room_id)

    def run(self):
        all_room_ids = self.get_rooms()
        logger.info('此前未在直播的直播间数量:' + str(len(all_room_ids)))
        batch_size = 200
        for batch_limit in range(0, len(all_room_ids), batch_size):
            start = batch_limit
            stop = min(batch_limit + batch_size, len(all_room_ids))
            logger.info('待查看的此前未在直播的直播间-' + str(start + 1) + '-' + str(stop))
            tasks = [
                gevent.spawn(self.check_room, room_id)
                for room_id in all_room_ids[start:stop]
            ]
            gevent.joinall(tasks)

            logger.info('新发现开始的直播数量-' + str(len(self.lives_on_list)))
            self.change_room_status()
            self.lives_on_list.clear()