Exemple #1
0
    def _parse_content(self, url, content):
        if url in self._visited:
            return
        self._visited.add(url)
        self.logger.info(f'url {url} visited')

        parser = Parser(content)
        links = parser.get_links() - self._visited
        self.logger.info(f'{len(links)} links added')
        for link in links:
            self._push_url(link)

        if 'other/moviePoint' in url:
            ratings = self._parse_ratings_per_user(url, parser)
            if ratings:
                self._ratings.extend(ratings)
        elif 'moviedb/main' in url:
            metadata = self._parse_metadata(url, parser)
            if metadata:
                self._metadata.append(metadata)
            ratings = self._parse_ratings_per_movie(url, parser)
            if ratings:
                self._ratings.extend(ratings)
        elif 'moviedb/grade' in url:
            ratings = self._parse_ratings_per_movie(url, parser)
            if ratings:
                self._ratings.extend(ratings)
    async def test_get_links(self):
        content = await self._load_movie_main()
        parser = Parser(content)
        links = parser.get_links()

        self.logger.info('# of links: %d', len(links))

        self.assertTrue(any(('movieId=128635' in link for link in links)))
        self.assertTrue(any(('personId=271829' in link for link in links)))
        self.assertTrue(any(('personId=518464' in link for link in links)))