def _parse_content(self, url, content): if url in self._visited: return self._visited.add(url) self.logger.info(f'url {url} visited') parser = Parser(content) links = parser.get_links() - self._visited self.logger.info(f'{len(links)} links added') for link in links: self._push_url(link) if 'other/moviePoint' in url: ratings = self._parse_ratings_per_user(url, parser) if ratings: self._ratings.extend(ratings) elif 'moviedb/main' in url: metadata = self._parse_metadata(url, parser) if metadata: self._metadata.append(metadata) ratings = self._parse_ratings_per_movie(url, parser) if ratings: self._ratings.extend(ratings) elif 'moviedb/grade' in url: ratings = self._parse_ratings_per_movie(url, parser) if ratings: self._ratings.extend(ratings)
async def test_get_links(self): content = await self._load_movie_main() parser = Parser(content) links = parser.get_links() self.logger.info('# of links: %d', len(links)) self.assertTrue(any(('movieId=128635' in link for link in links))) self.assertTrue(any(('personId=271829' in link for link in links))) self.assertTrue(any(('personId=518464' in link for link in links)))