Beispiel #1
0
class SnapshoterTest(unittest.TestCase):
    '''
    Тестирование класса для создания снимка
    '''        
    
    def setUp(self):
        self.account = Account(LOGIN, PASSWORD)
        self.account.auth()
        self.snapshoter = GroupSnapshoter(GID, API(self.account.oauth(APP_ID, PERMISSIONS)))
        
    def test_count(self):
        self.assertGreater(self.snapshoter.get_wall_count(), 0)
        self.assertGreater(self.snapshoter.get_videos_count(), 0)        
        self.assertGreater(self.snapshoter.get_photos_count(), 0)
Beispiel #2
0
 def fetch_likes(self, item, change, shares=False):
     '''
     Возвращает список пользователей, совершивших действие 
     "поставил лайк" пользователей для item.
     
     change -- [<начало списка лайков>, <конец списка лайков>]
     shares -- если True, то обрабатывает пользователей, которые сделали 
               репост объекта
     '''
     sequence = GroupSnapshoter.create_requests_sequence(
                 change[0], 
                 change[1], 
                 GroupSnapshoter.LIKES_COUNT
     )
     
     retval = []
     for offset, count in sequence:
         try:
             likes = self.api.likes.getList(
                 type=item.type, 
                 owner_id=item.owner, 
                 item_id=item.data.internal_id,
                 offset=offset, 
                 count=count,
                 filter='likes' if not shares else 'copies'
             )
             
             retval.extend(likes['users'])
         except APIError:
             continue
         
     return retval
Beispiel #3
0
 def setUp(self):
     self.account = Account(LOGIN, PASSWORD)
     self.account.auth()
     self.snapshoter = GroupSnapshoter(GID, API(self.account.oauth(APP_ID, PERMISSIONS)))
Beispiel #4
0
    def run(self):
        '''
        Запускает работу кроулера
        '''
        self.account = self.choose_account()
        if self.account is None:
            raise Exception, "No valid account for parsing"

        
        self.api = API(self.account.access_token)
        self.board = Board(self.gid, api=self.api, resolve_names=False)
        
        # Если True, то для для всех (новых и обновленных) объектов будут
        # записываться действия пользователей, иначе только для 
        # обновленных
        self.save_actions = len(Item.objects.all()) != 0 or CREATE_ACTIONS_WHEN_EMPTY
        vk_group = VkGroup.objects.get(gid=self.gid)        
        
        logging.info("Start crawling for group %s", self.gid)
        logging.info("Save actions = %s", self.save_actions)
        logging.info("Wall Scanning Depth = %s", vk_group.wall_max)
        logging.info("Video Scanning Depth = %s", vk_group.video_max)
        logging.info("Photo Scanning Depth = %s", vk_group.photo_max)
        logging.info("Board Scanning Depth = %s", vk_group.board_max)
        
                
        snapshoter = GroupSnapshoter(self.gid, self.api)

        logging.info("Old Wall Posts Count = %s", vk_group.wall_count)
        logging.info("Old Photos Count = %s", vk_group.photos_count)
        logging.info("Old Videos Count = %s", vk_group.videos_count)
        logging.info("Old Board Topics Count = %s", vk_group.board_count)
        
        wall_count = snapshoter.get_wall_count()
        photos_count = snapshoter.get_photos_count()
        videos_count = snapshoter.get_videos_count()
        board_count = self.board.get_count()
        
        '''
        Рассчитываем разницу в количестве объектов, на которую надо
        увеличить глубину сканирования, чтобы собрать все новые объекты и 
        обновить старые
        '''
        new_posts = max(wall_count - vk_group.wall_count, 0)
        new_videos = max(videos_count - vk_group.videos_count, 0)
        new_photos = max(photos_count - vk_group.photos_count, 0)
        new_topics = max(board_count - vk_group.board_count, 0)
        
        '''
        '''
        vk_group.wall_count = wall_count
        vk_group.photos_count = photos_count
        vk_group.videos_count = videos_count
        vk_group.board_count = board_count

        
        logging.info("New Wall Posts Count = %s", wall_count)
        logging.info("New Photos Count = %s", photos_count)
        logging.info("New Videos Count = %s", videos_count)
        logging.info("New Board Topics Count = %s", board_count)        

        snapshot = []
        snapshot =      snapshoter.make_for_wall(size=vk_group.wall_max + new_posts)
        snapshot.extend(snapshoter.make_for_videos(size=vk_group.video_max + new_videos))
        snapshot.extend(snapshoter.make_for_photos(size=vk_group.photo_max + new_photos))
        snapshot.extend(snapshoter.make_for_board(size=vk_group.board_max + new_topics))
       
        logging.info("Crawling finished")
        logging.info("%s items are at snapshot", len(snapshot))
        
        logging.info("Creating tasks for updating")
        
        tasks = self.create_update_task(snapshot)  
        
        logging.info("Created tasks for updating")       
        logging.info("%s items are to be added", len(filter(lambda task: task.action=='add', tasks)))
        logging.info("%s items are to be updated", len(filter(lambda task: task.action=='update', tasks)))
        
        new_comments, new_likes, new_shares = 0, 0, 0
        for task in tasks:
            if task.comments_change:
                new_comments += task.comments_change[1] - task.comments_change[0] + 1
            if task.likes_change:
                new_likes += task.likes_change[1] - task.likes_change[0] + 1
            if task.shares_change:
                new_shares += task.shares_change[1] - task.shares_change[0] + 1

        logging.info("New comments: %s", new_comments)
        logging.info("New likes: %s", new_likes)
        logging.info("New shares: %s", new_shares)
        
        logging.info("Start fetching updates")
        
        tasks = self.fetch_updates(tasks)
        
        logging.info("Completed fetching updates")
        
        new_comments, new_likes, new_shares = 0, 0, 0
        for task in tasks:
            if task.comments_change:
                new_comments += len(task.comments)
            if task.likes_change:
                new_likes += len(task.likes)
            if task.shares_change:
                new_shares += len(task.shares)

        logging.info("New comments are fetched: %s", new_comments)
        logging.info("New likes are fetched: %s", new_likes)
        logging.info("New shares are fetched: %s", new_shares)
        
        logging.info("Start fixing updates")
        
        self.fix_updates(tasks)
        
        logging.info("Completed fixing updates")
Beispiel #5
0
    def fetch_comments(self, item, change):
        '''
        Возвращает список комментариев для item.
        
        change -- [<начало списка лайков>, <конец списка лайков>]
        '''            
        if item.type == 'post':
            sequence = GroupSnapshoter.create_requests_sequence(
                change[0], 
                change[1],
                GroupSnapshoter.WALL_COUNT
            )
            get = lambda offset, count: self.api.wall.getComments(
                    owner_id=item.owner,
                    post_id=item.data.internal_id, 
                    offset=offset, 
                    count=count
                )[1:]
        elif item.type == 'topic':
            sequence = GroupSnapshoter.create_requests_sequence(
                change[0], 
                change[1],
                GroupSnapshoter.BOARD_COMMENT_COUNT
            )
            get = lambda offset, count: self.board.get_comments(
                    topic_id=item.data.internal_id, 
                    offset=offset,
            )
        elif item.type == 'photo':
            sequence = GroupSnapshoter.create_requests_sequence(
                change[0], 
                change[1],
                GroupSnapshoter.PHOTOS_COUNT
            )
            get = lambda offset, count: self.api.photos.getComments(
                    owner_id=item.owner,
                    pid=item.data.internal_id, 
                    offset=offset, 
                    count=count
                )[1:]
        elif item.type == 'video':
            sequence = GroupSnapshoter.create_requests_sequence(
                change[0], 
                change[1],
                GroupSnapshoter.VIDEOS_COUNT
            )
            get = lambda offset, count: self.api.video.getComments(
                    owner_id=item.owner,
                    vid=item.data.internal_id, 
                    offset=offset, 
                    count=count
                )[1:]
        
        retval = []
        for offset, count in sequence: 
            try:
                comments = get(offset, count)
            except APIError:
                continue
            
            for comment in comments:
                if 'uid' in comment: 
                    comment['from_id'] = comment['uid']
                    
                if 'message' in comment: 
                    comment['text'] = comment['message']
                    
                if isinstance(comment['date'], basestring) or isinstance(comment['date'], int):
                    try:
                        comment['date'] = date.fromtimestamp(int(comment['date']))
                    except (ValueError, KeyError):
                        comment['date'] = None          
                        
            retval.extend(comments)

                        
        return retval