def test(self): class Post(Entity): pass config = RelationshipConfig( [Relationship(Post, Relationship.Option.many, "n_post")]) assert config.get_relationship(Post) == Relationship.Option.many assert config.get_n_child_key(Post) == "n_post"
def test_check_subclass_implementation_goodcase1(self): class Country(Entity): n_state = "n_state_field" class State(Entity): n_zipcode = "n_zipcode_field" class Zipcode(Entity): pass Country.CONF_RELATIONSHIP = RelationshipConfig([ Relationship(State, Relationship.Option.many, "n_state"), ]) State.CONF_RELATIONSHIP = RelationshipConfig([ Relationship(Zipcode, Relationship.Option.many, "n_zipcode"), ]) Entity.validate_relationship_config()
def test_check_subclass_implementation_goodcase2(self): class ImagePage(Entity): id = "image_page_id" class ImageDownload(Entity): id = "image_page_id" ImagePage.CONF_RELATIONSHIP = RelationshipConfig([ Relationship(ImageDownload, Relationship.Option.one, None), ]) Entity.validate_relationship_config()
class ArtistPage(MusicWebsiteEntity): CONF_UPDATE_INTERVAL = 3600 CONF_RELATIONSHIP = RelationshipConfig([ Relationship(MusicPage, Relationship.Option.many, "n_music", recursive=False) ]) _id = fields.IntField(primary_key=True) musics = fields.ListField(fields.IntField()) n_music = fields.IntField() meta = dict( collection="site_music_artist", db_alias=Config.MongoDB.database, ) @property def artist_id(self): return self._id def build_url(self): return url_builder.url_artist(self._id) def parse_response(self, url, request, response, html=None, **kwargs): if html is None: html = response.text soup = BeautifulSoup(html, "html.parser") div = soup.find("div", id="detail") musics = [ int(a["href"].split("/")[-1]) for a in div.find_all("a") ] entity = ArtistPage(musics=musics) children = list() for music_id in musics: music = MusicPage(_id=music_id) children.append(music) status = Status.S50_Finished.id pres = ParseResult( entity=entity, children=children, data={}, status=status, ) return pres
class ListPage(MovieWebsiteEntity): CONF_UPDATE_INTERVAL = 24 * 3600 CONF_RELATIONSHIP = RelationshipConfig([ Relationship(MoviePage, Relationship.Option.many, "n_movie") ]) _id = fields.IntField(primary_key=True) n_movie = fields.IntField() meta = dict( collection="site_movie_listpage", db_alias=Config.MongoDB.database, ) @property def page_num(self): return self._id def build_url(self): return url_builder.url_nth_listpage(self.page_num) def parse_response(self, url, request, response, html=None, **kwargs): if html is None: html = response.text soup = BeautifulSoup(html, "html.parser") div_listpage = soup.find("div", id="listpage") a_tag_list = div_listpage.find_all("a") entity = ListPage() children = list() for a in a_tag_list: href = a["href"] movie_id = int(href.split("/")[-1]) movie = MoviePage(_id=movie_id) children.append(movie) status = Status.S50_Finished.id pres = ParseResult( entity=entity, children=children, data={}, status=status, ) return pres
class HomePage(MovieWebsiteEntity): CONF_UPDATE_INTERVAL = 1 CONF_RELATIONSHIP = RelationshipConfig( [Relationship(ListPage, Relationship.Option.many, "n_listpage")]) _id = fields.IntField(primary_key=True) description = fields.StringField() max_page_num = fields.IntField() n_listpage = fields.IntField() meta = dict( collection="site_movie_homepage", db_alias=Config.MongoDB.database, ) def build_url(self, **kwargs): return url_builder.url_first_listpage() def parse_response(self, url, request, response, html=None, **kwargs): if html is None: html = response.text soup = BeautifulSoup(html, "html.parser") div_pagination = soup.find("div", id="pagination") a_tag_list = div_pagination.find_all("a") href = a_tag_list[-1]["href"] max_page_num = int(href.split("/")[-1]) entity = HomePage(max_page_num=max_page_num) children = list() for page_num in range(1, 1 + max_page_num): listpage = ListPage(_id=page_num) children.append(listpage) status = Status.S50_Finished.id pres = ParseResult( entity=entity, children=children, data={}, status=status, ) return pres
musics = [ int(a["href"].split("/")[-1]) for a in div.find_all("a") ] entity = GenrePage(musics=musics) children = list() for music_id in musics: music = MusicPage(_id=music_id) children.append(music) status = Status.S50_Finished.id pres = ParseResult( entity=entity, children=children, data={}, status=status, ) return pres MusicPage.CONF_RELATIONSHIP = RelationshipConfig([ Relationship(ArtistPage, Relationship.Option.many, "n_artist"), Relationship(GenrePage, Relationship.Option.many, "n_genre"), ]) MusicPage.validate_implementation() ArtistPage.validate_implementation() GenrePage.validate_implementation()