Ejemplo n.º 1
0
    def get_movie_id(self, movie, year=None):
        """Get the best matching movie id for `movie`, `year`.

        :param str movie: movie.
        :param year: year of the movie, if any.
        :type year: int
        :return: the movie id, if found.
        :rtype: int
        """
        movie_id = None

        # get the movie id
        logger.info('Getting movie id')

        r = self.session.get(self.server_url + 'search.php?search=' + quote_plus(movie), timeout=10)
        r.raise_for_status()

        soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser'])

        # populate the movie id
        movies_table = soup.find('table', {'class': 'tabel'})
        movies = movies_table.find_all('tr')
        for item in movies:
            link = item.find('a', href=True)
            if link:
                if link['href'].startswith('movie/'):
                    splitted_uri = link['href'].split('/')
                    if len(splitted_uri) == 2:
                        media_id = splitted_uri[1]
                    else:
                        continue
                    media_title = link.text
                    match = re.search(r'(.+)\s\((\d{4})\)$', media_title)
                    if match:
                        media_name = match.group(1)
                        media_year = match.group(2)
                        if sanitize(media_name.lower()) == sanitize(movie.lower()) and media_year == str(year):
                            movie_id = media_id

        soup.decompose()
        soup = None

        logger.debug(f'Found this movie id: {movie_id}')

        if not movie_id:
            logging.debug(f"Addic7ed: Cannot find this movie with guessed year {year}: {movie}")

        return movie_id
Ejemplo n.º 2
0
    def _get_show_ids(self):
        """Get the ``dict`` of show ids per series by querying the `shows.php` page.
        :return: show id per series, lower case and without quotes.
        :rtype: dict

        # patch: add punctuation cleaning
        """
        # get the show page
        logger.info('Getting show ids')
        region.set(self.last_show_ids_fetch_key, datetime.datetime.now())

        r = self.session.get(self.server_url, timeout=10)
        r.raise_for_status()

        soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser'])

        # populate the show ids
        show_ids = {}
        shows = soup.find(id='qsShow')
        for show in shows:
            if hasattr(show, 'attrs'):
                try:
                    show_id = int(show.attrs['value'])
                except ValueError:
                    continue

                if show_id != 0:
                    show_clean = sanitize(show.text, default_characters=self.sanitize_characters)

                    show_ids[show_clean] = show_id
                    match = series_year_re.match(show_clean)
                    if match and match.group(2) and match.group(1) not in show_ids:
                        # year found, also add it without year
                        show_ids[match.group(1)] = show_id

        soup.decompose()
        soup = None

        logger.debug('Found %d show ids', len(show_ids))

        if not show_ids:
            raise Exception("Addic7ed: No show IDs found!")

        return show_ids