def load_reviews(self): """Fetch the MAL user reviews page and sets the current user's reviews attributes. :rtype: :class:`.User` :return: Current user object. """ page = 0 # collect all reviews over all pages. review_collection = [] while True: user_reviews = self.session.session.get( u'http://myanimelist.net/profile/' + utilities.urlencode(self.username) + u'/reviews&' + urlencode({u'p': page}) ).text parse_result = self.parse_reviews(utilities.get_clean_dom(user_reviews)) if page == 0: # only set attributes once the first time around. self.set(parse_result) if len(parse_result['reviews']) == 0: break review_collection.append(parse_result['reviews']) page += 1 # merge the review collections into one review dict, and set it. self.set({ 'reviews': {k: v for d in review_collection for k, v in d.items()} }) return self
def load_reviews(self): """Fetches the MAL user reviews page and sets the current user's reviews attributes. :rtype: :class:`.User` :return: Current user object. """ page = 0 # collect all reviews over all pages. review_collection = [] while True: user_reviews = self.session.session.get( u'http://myanimelist.net/profile/' + utilities.urlencode(self.username) + u'/reviews&' + urllib.urlencode({u'p': page})).text parse_result = self.parse_reviews( utilities.get_clean_dom(user_reviews)) if page == 0: # only set attributes once the first time around. self.set(parse_result) if len(parse_result[u'reviews']) == 0: break review_collection.append(parse_result[u'reviews']) page += 1 # merge the review collections into one review dict, and set it. self.set({ 'reviews': {k: v for d in review_collection for k, v in d.iteritems()} }) return self
def load_clubs(self): """Fetches the MAL character clubs page and sets the current character's clubs attributes. :rtype: :class:`.Character` :return: Current character object. """ character = self.session.session.get(u'http://myanimelist.net/character/' + str(self.id) + u'/' + utilities.urlencode(self.name) + u'/clubs').text self.set(self.parse_clubs(utilities.get_clean_dom(character))) return self
def load_characters(self): """Fetches the MAL media characters page and sets the current media's character attributes. :rtype: :class:`.Media` :return: current media object. """ characters_page = self.session.session.get(u'http://myanimelist.net/' + self.__class__.__name__.lower() + u'/' + str(self.id) + u'/' + utilities.urlencode(self.title) + u'/characters').text self.set(self.parse_characters(utilities.get_clean_dom(characters_page))) return self
def load(self): """Fetche the MAL person page and sets the current person's attributes. :rtype: :class:`.Person` :return: Current person object. """ person = self.session.session.get(u'http://myanimelist.net/people/' + str(self.id)).text self.set(self.parse(utilities.get_clean_dom(person))) return self
def load(self): """Fetches the MAL character page and sets the current character's attributes. :rtype: :class:`.Character` :return: Current character object. """ character = self.session.session.get(u'http://myanimelist.net/character/' + str(self.id)).text self.set(self.parse(utilities.get_clean_dom(character))) return self
def load_friends(self): """Fetches the MAL user friends page and sets the current user's friends attributes. :rtype: :class:`.User` :return: Current user object. """ user_friends = self.session.session.get(u'http://myanimelist.net/profile/' + utilities.urlencode(self.username) + u'/friends').text self.set(self.parse_friends(utilities.get_clean_dom(user_friends))) return self
def load(self): """Fetches the MAL media page and sets the current media's attributes. :rtype: :class:`.Media` :return: current media object. """ media_page = self.session.session.get(u'http://myanimelist.net/' + self.__class__.__name__.lower() + u'/' + str(self.id)).text self.set(self.parse(utilities.get_clean_dom(media_page))) return self
def load(self): """Fetch the MAL user page and sets the current user's attributes. :rtype: :class:`.User` :return: Current user object. """ user_profile = self.session.session.get( 'http://myanimelist.net/profile/' + utilities.urlencode(self.username)).text self.set(self.parse(utilities.get_clean_dom(user_profile))) return self
def load(self): """Fetche the MAL media page and sets the current media's attributes. :rtype: :class:`.Media` :return: current media object. """ media_page = self.session.session.get( 'http://myanimelist.net/' + self.__class__.__name__.lower() + '/' + str(self.id)).text media_page_original = bs4.BeautifulSoup(media_page, 'lxml') self.set(self.parse(utilities.get_clean_dom(media_page), media_page_original)) return self
def load_pictures(self): """Fetche the MAL person pictures page and sets the current character's pictures attributes. :rtype: :class:`.Person` :return: Current person object. """ person = self.session.session.get( u'http://myanimelist.net/person/' + str(self.id) + u'/' + utilities.urlencode( self.name) + u'/pictures').text self.set(self.parse_pictures(utilities.get_clean_dom(person))) return self
def load_pictures(self): """Fetche the MAL person pictures page and sets the current character's pictures attributes. :rtype: :class:`.Person` :return: Current person object. """ person = self.session.session.get(u'http://myanimelist.net/person/' + str(self.id) + u'/' + utilities.urlencode(self.name) + u'/pictures').text self.set(self.parse_pictures(utilities.get_clean_dom(person))) return self
def load_stats(self): """Fetche the MAL media statistics page and sets the current media's statistics attributes. :rtype: :class:`.Media` :return: current media object. """ stats_page = self.session.session.get('http://myanimelist.net/' + self.__class__.__name__.lower() + '/' + str(self.id) + '/' + utilities.urlencode(self.title) + '/stats').text self.set(self.parse_stats(utilities.get_clean_dom(stats_page))) return self
def load_recommendations(self): """Fetch the MAL user recommendations page. :rtype: :class:`.User` :return: Current user object. """ user_recommendations = self.session.session.get( u'http://myanimelist.net/profile/' + utilities.urlencode(self.username) + u'/recommendations' ).text self.set(self.parse_recommendations(utilities.get_clean_dom(user_recommendations))) return self
def load_characters(self): """Fetches the MAL media characters page and sets the current media's character attributes. :rtype: :class:`.Media` :return: current media object. """ characters_page = self.session.session.get( u'http://myanimelist.net/' + self.__class__.__name__.lower() + u'/' + str(self.id) + u'/' + utilities.urlencode(self.title) + u'/characters').text self.set( self.parse_characters(utilities.get_clean_dom(characters_page))) return self
def load_characters(self): """Fetche the MAL media characters page and sets the current media's character attributes. :rtype: :class:`.Media` :return: current media object. """ character_page_url = ('http://myanimelist.net/' + self.__class__.__name__.lower() + '/' + str(self.id) + '/' + utilities.urlencode(self.title) + '/characters') characters_page = self.session.session.get(character_page_url).text characters_page_original = bs4.BeautifulSoup(characters_page, 'lxml') self.set(self.parse_characters(utilities.get_clean_dom(characters_page), characters_page_original)) return self
def _search_generic(self, keyword, base_url_tmpl, parse_func): """generic function to parse search page. `base_url_tmpl` is url template for each search. As example for :func:`search_anime`: :Example: base_url_tmpl = 'https://myanimelist.net/manga.php?q={query}' It require `{query}` to contain the `keyword`. As for :func:`parse_func` it have to raise :class:`ValueError`to stop the function, if not it will run forever. :param base_url_tmpl: url template. :type query: str :param parse_func: parser function that will run on each page. :type query: func :return: generator from list of parsed result :rtype: `types.GeneratorType` """ page_num = 0 item_per_page = 50 is_item_found = None while not is_item_found or is_item_found is None: is_item_found = False # prepare url page_num += 1 item_idx = (page_num - 1) * item_per_page if item_idx > 0: url_tmpl = base_url_tmpl + '&show={}'.format(item_idx) else: url_tmpl = base_url_tmpl # prepare url page_url = url_tmpl.format(**{'query': keyword}) page = self.session.get(page_url).text html_soup = utilities.get_clean_dom(page, fix_html=False) try: parsed_res = parse_func(html_soup) is_item_found = True return parsed_res except ValueError: # it is expected the parse_func may raise Value error. # it mean `is_item_found` will have `False` value and the loop will stop pass
def newest(cls, session): """Fetches the latest media added to MAL. :type session: :class:`myanimelist.session.Session` :param session: A valid MAL session :rtype: :class:`.Media` :return: the newest media on MAL :raises: :class:`.MalformedMediaPageError` """ media_type = cls.__name__.lower() p = session.session.get(u'http://myanimelist.net/' + media_type + '.php?o=9&c[]=a&c[]=d&cv=2&w=1').text soup = utilities.get_clean_dom(p) latest_entry = soup.find(u"div", {u"class": u"hoverinfo"}) if not latest_entry: raise MalformedMediaPageError(0, p, u"No media entries found on recently-added page") latest_id = int(latest_entry[u'rel'][1:]) return getattr(session, media_type)(latest_id)
def load(self): """load info.""" url_tmpl = 'https://myanimelist.net/clubs.php?cid={club_id}' page_url = url_tmpl.format(**{'club_id': self.id}) page = self.session.session.get(page_url).text html_soup = utilities.get_clean_dom(page, fix_html=False) # club name self._name = html_soup.select_one('h1').text # club information self._information = html_soup.select_one('.clearfix').text.strip() # club status self._status = self._parse_club_status(html_soup) # club number of members self._num_members = self._status.members # club type self._type = self._parse_club_type(html_soup) # club officers self._officers = self._parse_club_officers(html_soup) # self.set return self
def newest(cls, session): """Fetches the latest media added to MAL. :type session: :class:`myanimelist.session.Session` :param session: A valid MAL session :rtype: :class:`.Media` :return: the newest media on MAL :raises: :class:`.MalformedMediaPageError` """ media_type = cls.__name__.lower() p = session.session.get(u'http://myanimelist.net/' + media_type + '.php?o=9&c[]=a&c[]=d&cv=2&w=1').text soup = utilities.get_clean_dom(p) latest_entry = soup.find(u"div", {u"class": u"hoverinfo"}) if not latest_entry: raise MalformedMediaPageError( 0, p, u"No media entries found on recently-added page") latest_id = int(latest_entry[u'rel'][1:]) return getattr(session, media_type)(latest_id)
def load_videos(self): """Fetch the MAL media videos page and sets the current media's promotion videos attr. :rtype: :class:`.Anime` :return: current media object. """ url = ParseResult( scheme='https', netloc='myanimelist.net', path='/'.join([ '', # empty string to make path start with '/' self.__class__.__name__.lower(), str(self.id), utilities.urlencode(self.title), 'video' ]), params='', query='', fragment='' ).geturl() videos_page = self.session.session.get(url).text promotion_videos = self.parse_promotion_videos(utilities.get_clean_dom(videos_page)) self.set({'promotion_videos': promotion_videos}) return self
def parse_page(): """parse the page.""" user_per_page = 36 # second page url example # https://myanimelist.net/clubs.php?action=view&t=members&id={club_id}&show=36 page_num = 0 is_user_found = None while is_user_found or is_user_found is None: is_user_found = False url_tmpl = 'https://myanimelist.net/clubs.php?id={club_id}&action=view&t=members' page_num += 1 user_idx = (page_num - 1) * user_per_page if user_idx > 0: url_tmpl += '&show={}'.format(user_idx) page_url = url_tmpl.format(**{'club_id': self.id}) page = self.session.session.get(page_url).text html_soup = utilities.get_clean_dom(page, fix_html=False) member_ids = [ x.select_one('a').get('href').split('/profile/')[1] for x in html_soup.select('td')] is_user_found = bool(member_ids) for mid in member_ids: yield self.session.user(mid)
def search(self, keyword, mode='all'): """search using given keyword and mode. :param keyword: keyword to search. :param mode: mode used to search. :type keyword: str :type mode: str :return: list of found media/object. :rtype: list """ self._check_search_input(keyword) # anime search can received empty keyword but can't 1 or 2 characters. # so use the min, max limit used by mode 'all' if mode == 'anime': return self.search_anime(keyword) elif mode == 'manga': return self.search_manga(keyword) # the query have following format for each mode: query_dict = { 'all': 'search/all?q={query}', } not_implemented_mode_dict = { # already implemented # 'anime': 'anime.php?q={query}', # 'manga': 'manga.php?q={query}', # not yet implemented. 'character': 'character.php?q={query}', 'people': 'people.php?q={query}', 'clubs': 'clubs.php?action=find&cn={query}', 'users': 'users.php?q={query}', # no object/class created for this search. 'news': 'news/search?q={query}', 'featured': 'featured/search?q={query}', 'forum': 'forum/?action=search&u=&uloc=1&loc=-1&q={query}', } # check mode if mode not in query_dict and mode not in not_implemented_mode_dict: raise ValueError('Search mode is not available.') elif mode in not_implemented_mode_dict: raise NotImplementedError('"{}" category search is not yet implemented.'.format(mode)) url = 'https://myanimelist.net' search_page_url = '/'.join([url, query_dict[mode]]) search_page_url = search_page_url.format(**{'query': keyword}) search_page = self.session.get(search_page_url).text html_soup = utilities.get_clean_dom(search_page, fix_html=False) result = [] categories = ['characters', 'anime', 'manga', 'people', 'clubs'] disallowed_url_part = [ 'myanimelist.net/topanime.php', 'myanimelist.net/login', '/login.php', ] for catg in categories: article = html_soup.select_one('#{}'.format(catg)).find_next('article') if catg == 'clubs': article_divs = [x for x in article.select('div') if x.select_one('a')] a_tags = [ x.select_one('a') for x in article_divs if x.select_one('a').get('href')] links = [x.get('href') for x in a_tags] a_tags_result = list(map(self.load_from_url, links)) result.extend(a_tags_result) else: a_tags = article.select('.information a') # find all link to correct object. a_tags_result = [] for tag in a_tags: link = tag.get('href') # pass the login link is_skipped_url = any(x in link for x in disallowed_url_part) if is_skipped_url: continue a_tags_result.append(self.load_from_url(link)) # fix the bug on when parsing manga on search page. # it is caused by unclosed a tag on 'article > div > div.information > div' if catg == 'manga' and len(a_tags_result) == 1: a_tags_hrefs = [x.get('href') for x in html_soup.select('a') if x.get('href')] manga_link = [x for x in a_tags_hrefs if 'myanimelist.net/manga/' in x] a_tags_result = list(map(self.load_from_url, manga_link)) result.extend(a_tags_result) # parse club for all mode search return list(set(result))