class SnapshoterTest(unittest.TestCase): ''' Тестирование класса для создания снимка ''' def setUp(self): self.account = Account(LOGIN, PASSWORD) self.account.auth() self.snapshoter = GroupSnapshoter(GID, API(self.account.oauth(APP_ID, PERMISSIONS))) def test_count(self): self.assertGreater(self.snapshoter.get_wall_count(), 0) self.assertGreater(self.snapshoter.get_videos_count(), 0) self.assertGreater(self.snapshoter.get_photos_count(), 0)
def fetch_likes(self, item, change, shares=False): ''' Возвращает список пользователей, совершивших действие "поставил лайк" пользователей для item. change -- [<начало списка лайков>, <конец списка лайков>] shares -- если True, то обрабатывает пользователей, которые сделали репост объекта ''' sequence = GroupSnapshoter.create_requests_sequence( change[0], change[1], GroupSnapshoter.LIKES_COUNT ) retval = [] for offset, count in sequence: try: likes = self.api.likes.getList( type=item.type, owner_id=item.owner, item_id=item.data.internal_id, offset=offset, count=count, filter='likes' if not shares else 'copies' ) retval.extend(likes['users']) except APIError: continue return retval
def setUp(self): self.account = Account(LOGIN, PASSWORD) self.account.auth() self.snapshoter = GroupSnapshoter(GID, API(self.account.oauth(APP_ID, PERMISSIONS)))
def run(self): ''' Запускает работу кроулера ''' self.account = self.choose_account() if self.account is None: raise Exception, "No valid account for parsing" self.api = API(self.account.access_token) self.board = Board(self.gid, api=self.api, resolve_names=False) # Если True, то для для всех (новых и обновленных) объектов будут # записываться действия пользователей, иначе только для # обновленных self.save_actions = len(Item.objects.all()) != 0 or CREATE_ACTIONS_WHEN_EMPTY vk_group = VkGroup.objects.get(gid=self.gid) logging.info("Start crawling for group %s", self.gid) logging.info("Save actions = %s", self.save_actions) logging.info("Wall Scanning Depth = %s", vk_group.wall_max) logging.info("Video Scanning Depth = %s", vk_group.video_max) logging.info("Photo Scanning Depth = %s", vk_group.photo_max) logging.info("Board Scanning Depth = %s", vk_group.board_max) snapshoter = GroupSnapshoter(self.gid, self.api) logging.info("Old Wall Posts Count = %s", vk_group.wall_count) logging.info("Old Photos Count = %s", vk_group.photos_count) logging.info("Old Videos Count = %s", vk_group.videos_count) logging.info("Old Board Topics Count = %s", vk_group.board_count) wall_count = snapshoter.get_wall_count() photos_count = snapshoter.get_photos_count() videos_count = snapshoter.get_videos_count() board_count = self.board.get_count() ''' Рассчитываем разницу в количестве объектов, на которую надо увеличить глубину сканирования, чтобы собрать все новые объекты и обновить старые ''' new_posts = max(wall_count - vk_group.wall_count, 0) new_videos = max(videos_count - vk_group.videos_count, 0) new_photos = max(photos_count - vk_group.photos_count, 0) new_topics = max(board_count - vk_group.board_count, 0) ''' ''' vk_group.wall_count = wall_count vk_group.photos_count = photos_count vk_group.videos_count = videos_count vk_group.board_count = board_count logging.info("New Wall Posts Count = %s", wall_count) logging.info("New Photos Count = %s", photos_count) logging.info("New Videos Count = %s", videos_count) logging.info("New Board Topics Count = %s", board_count) snapshot = [] snapshot = snapshoter.make_for_wall(size=vk_group.wall_max + new_posts) snapshot.extend(snapshoter.make_for_videos(size=vk_group.video_max + new_videos)) snapshot.extend(snapshoter.make_for_photos(size=vk_group.photo_max + new_photos)) snapshot.extend(snapshoter.make_for_board(size=vk_group.board_max + new_topics)) logging.info("Crawling finished") logging.info("%s items are at snapshot", len(snapshot)) logging.info("Creating tasks for updating") tasks = self.create_update_task(snapshot) logging.info("Created tasks for updating") logging.info("%s items are to be added", len(filter(lambda task: task.action=='add', tasks))) logging.info("%s items are to be updated", len(filter(lambda task: task.action=='update', tasks))) new_comments, new_likes, new_shares = 0, 0, 0 for task in tasks: if task.comments_change: new_comments += task.comments_change[1] - task.comments_change[0] + 1 if task.likes_change: new_likes += task.likes_change[1] - task.likes_change[0] + 1 if task.shares_change: new_shares += task.shares_change[1] - task.shares_change[0] + 1 logging.info("New comments: %s", new_comments) logging.info("New likes: %s", new_likes) logging.info("New shares: %s", new_shares) logging.info("Start fetching updates") tasks = self.fetch_updates(tasks) logging.info("Completed fetching updates") new_comments, new_likes, new_shares = 0, 0, 0 for task in tasks: if task.comments_change: new_comments += len(task.comments) if task.likes_change: new_likes += len(task.likes) if task.shares_change: new_shares += len(task.shares) logging.info("New comments are fetched: %s", new_comments) logging.info("New likes are fetched: %s", new_likes) logging.info("New shares are fetched: %s", new_shares) logging.info("Start fixing updates") self.fix_updates(tasks) logging.info("Completed fixing updates")
def fetch_comments(self, item, change): ''' Возвращает список комментариев для item. change -- [<начало списка лайков>, <конец списка лайков>] ''' if item.type == 'post': sequence = GroupSnapshoter.create_requests_sequence( change[0], change[1], GroupSnapshoter.WALL_COUNT ) get = lambda offset, count: self.api.wall.getComments( owner_id=item.owner, post_id=item.data.internal_id, offset=offset, count=count )[1:] elif item.type == 'topic': sequence = GroupSnapshoter.create_requests_sequence( change[0], change[1], GroupSnapshoter.BOARD_COMMENT_COUNT ) get = lambda offset, count: self.board.get_comments( topic_id=item.data.internal_id, offset=offset, ) elif item.type == 'photo': sequence = GroupSnapshoter.create_requests_sequence( change[0], change[1], GroupSnapshoter.PHOTOS_COUNT ) get = lambda offset, count: self.api.photos.getComments( owner_id=item.owner, pid=item.data.internal_id, offset=offset, count=count )[1:] elif item.type == 'video': sequence = GroupSnapshoter.create_requests_sequence( change[0], change[1], GroupSnapshoter.VIDEOS_COUNT ) get = lambda offset, count: self.api.video.getComments( owner_id=item.owner, vid=item.data.internal_id, offset=offset, count=count )[1:] retval = [] for offset, count in sequence: try: comments = get(offset, count) except APIError: continue for comment in comments: if 'uid' in comment: comment['from_id'] = comment['uid'] if 'message' in comment: comment['text'] = comment['message'] if isinstance(comment['date'], basestring) or isinstance(comment['date'], int): try: comment['date'] = date.fromtimestamp(int(comment['date'])) except (ValueError, KeyError): comment['date'] = None retval.extend(comments) return retval