Ejemplo n.º 1
0
    def query(self, series, season, year=None, country=None):
        # get the show id
        show_id = self.get_show_id(series, year, country)
        if show_id is None:
            logger.error('No show id found for %r (%r)', series, {
                'year': year,
                'country': country
            })
            return []

        # get the page of the season of the show
        logger.info('Getting the page of show id %d, season %d', show_id,
                    season)
        r = self.session.get(self.server_url + 'show/%d' % show_id,
                             params={'season': season},
                             timeout=10)
        r.raise_for_status()

        if not r.content:
            # Provider returns a status of 304 Not Modified with an empty content
            # raise_for_status won't raise exception for that status code
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over subtitle rows
        match = series_year_re.match(
            soup.select('#header font')[0].text.strip()[:-10])
        series = match.group('series')
        year = int(match.group('year')) if match.group('year') else None
        subtitles = []
        for row in soup.select('tr.epeven'):
            cells = row('td')

            # ignore incomplete subtitles
            status = cells[5].text
            if status != 'Completed':
                logger.debug('Ignoring subtitle with status %s', status)
                continue

            # read the item
            language = Language.fromaddic7ed(cells[3].text)
            hearing_impaired = bool(cells[6].text)
            page_link = self.server_url + cells[2].a['href'][1:]
            season = int(cells[0].text)
            episode = int(cells[1].text)
            title = cells[2].text
            version = cells[4].text
            download_link = cells[9].a['href'][1:]

            subtitle = self.subtitle_class(language, hearing_impaired,
                                           page_link, series, season, episode,
                                           title, year, version, download_link)
            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles
Ejemplo n.º 2
0
    def query(self, keyword, season=None, episode=None, year=None):
        params = keyword
        if season and episode:
            params += ' S{season:02d}E{episode:02d}'.format(season=season,
                                                            episode=episode)
        elif year:
            params += ' {:4d}'.format(year)

        logger.debug('Searching subtitles %r', params)
        subtitles = []
        search_link = self.server_url + text_type(
            self.search_url).format(params)
        while True:
            r = self.session.get(search_link, timeout=30)
            r.raise_for_status()

            if not r.content:
                logger.debug('No data returned from provider')
                return []

            soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'),
                                       ['lxml', 'html.parser'])

            # loop over subtitles cells
            for cell in soup.select('td.latest_name > a:nth-of-type(1)'):
                # read the item
                subtitle_id = int(cell['href'].rsplit('/', 2)[1])
                page_link = cell['href']
                language = Language.fromalpha2(
                    cell.parent.find('img')['src'].split('/')[-1].split('.')
                    [0])
                version = cell.text.strip() or None
                if version is None:
                    version = ""

                subtitle = self.subtitle_class(
                    language, page_link, version,
                    self.download_url.format(subtitle_id))

                logger.debug('Found subtitle %r', subtitle)
                subtitles.append(subtitle)

            anchors = soup.select('td a')
            next_page_available = False
            for anchor in anchors:
                if 'Next' in anchor.text and 'search.php' in anchor['href']:
                    search_link = self.server_url + anchor['href']
                    next_page_available = True
                    break
            if not next_page_available:
                break

        return subtitles
Ejemplo n.º 3
0
    def query(self, series, season, year=None, country=None):
        # get the show id
        show_id = self.get_show_id(series, year, country)
        if show_id is None:
            logger.error('No show id found for %r (%r)', series, {'year': year, 'country': country})
            return []

        # get the page of the season of the show
        logger.info('Getting the page of show id %d, season %d', show_id, season)
        r = self.session.get(self.server_url + 'show/%d' % show_id, params={'season': season}, timeout=10)
        r.raise_for_status()

        if not r.content:
            # Provider returns a status of 304 Not Modified with an empty content
            # raise_for_status won't raise exception for that status code
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over subtitle rows
        match = series_year_re.match(soup.select('#header font')[0].text.strip()[:-10])
        series = match.group('series')
        year = int(match.group('year')) if match.group('year') else None
        subtitles = []
        for row in soup.select('tr.epeven'):
            cells = row('td')

            # ignore incomplete subtitles
            status = cells[5].text
            if status != 'Completed':
                logger.debug('Ignoring subtitle with status %s', status)
                continue

            # read the item
            language = Language.fromaddic7ed(cells[3].text)
            hearing_impaired = bool(cells[6].text)
            page_link = self.server_url + cells[2].a['href'][1:]
            season = int(cells[0].text)
            episode = int(cells[1].text)
            title = cells[2].text
            version = cells[4].text
            download_link = cells[9].a['href'][1:]

            subtitle = self.subtitle_class(language, hearing_impaired, page_link, series, season, episode, title, year,
                                           version, download_link)
            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles
Ejemplo n.º 4
0
    def search_show_id(self, series, year=None):
        """Search the show id from the `series` and `year`.
        :param string series: series of the episode.
        :param year: year of the series, if any.
        :type year: int or None
        :return: the show id, if any.
        :rtype: int or None
        """
        # make the search
        series_clean = self.clean_punctuation(series).lower()
        logger.info('Searching show id for %r', series_clean)
        r = self.session.post(self.server_url + 'search.php', data={'q': series_clean}, timeout=10)
        r.raise_for_status()

        # get the series out of the suggestions
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
        show_id = None
        for suggestion in soup.select('div.left li div a[href^="/tvshow-"]'):
            match = link_re.match(self.clean_punctuation(suggestion.text))
            if not match:
                logger.error('Failed to match %s', suggestion.text)
                continue

            if self.clean_punctuation(match.group('series')).lower() == series_clean:
                if year is not None and int(match.group('first_year')) != year:
                    logger.debug('Year does not match')
                    continue
                show_id = int(suggestion['href'][8:-5])
                logger.debug('Found show id %d', show_id)
                break

        return show_id
Ejemplo n.º 5
0
    def _search_url_titles(self, title):
        """Search the URL titles by kind for the given `title`.

        :param str title: title to search for.
        :return: the URL titles by kind.
        :rtype: collections.defaultdict

        """
        # make the search
        logger.info('Searching title name for %r', title)
        r = self.session.get(self.server_url + 'subtitle/search/',
                             params={'q': title},
                             timeout=10)
        r.raise_for_status()

        # check for redirections
        if r.history and all([h.status_code == 302 for h in r.history]):
            logger.debug('Redirected to the subtitles page')
            links = [r.url]
        else:
            # get the suggestions (if needed)
            soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
            links = [
                link.attrs['href']
                for link in soup.select('#processes div.generalWindowTop a')
            ]
            logger.debug('Found %d suggestions', len(links))

        url_titles = defaultdict(list)
        for link in links:
            parts = link.split('/')
            url_titles[parts[-3]].append(parts[-2])

        return url_titles
Ejemplo n.º 6
0
    def _get_show_ids(self):
        """Get the ``dict`` of show ids per series by querying the `shows.php` page.

        :return: show id per series, lower case and without quotes.
        :rtype: dict
        """
        # get the show page
        logger.info('Getting show ids')
        r = self.session.get(self.server_url + 'shows.php',
                             timeout=20,
                             cookies=self.cookies)
        r.raise_for_status()

        # LXML parser seems to fail when parsing Addic7ed.com HTML markup.
        # Last known version to work properly is 3.6.4 (next version, 3.7.0, fails)
        # Assuming the site's markup is bad, and stripping it down to only contain what's needed.
        show_cells = re.findall(show_cells_re, r.content)
        if show_cells:
            soup = ParserBeautifulSoup(b''.join(show_cells),
                                       ['lxml', 'html.parser'])
        else:
            # If RegEx fails, fall back to original r.content and use 'html.parser'
            soup = ParserBeautifulSoup(r.content, ['html.parser'])

        # populate the show ids
        show_ids = {}
        for show in soup.select('td.vr > h3 > a[href^="/show/"]'):
            show_ids[sanitize(show.text)] = int(show['href'][6:])
        logger.debug('Found %d show ids', len(show_ids))

        return show_ids
Ejemplo n.º 7
0
    def _search_movie(self, movie_id):
        subs = []
        url = self.server_url + self.movie_info_url + movie_id
        r = self.session.get(url, timeout=10)
        r.raise_for_status()

        if len(r.content) < 10:
            logger.debug(
                "Too short content-length in response: [{}]. Treating as No Subtitles Found "
                .format(str(r.content)))
            return []

        html = ParserBeautifulSoup(r.content, ["html.parser"])
        sub_rows = html.select("table#subtitlesList tbody > tr")

        for row in sub_rows:
            columns = row.find_all("td")
            sub = {"id": movie_id}
            for index, column in enumerate(columns):
                if index == 0:
                    sub["rls"] = column.get_text().strip().split("\n")[0]
                if index == 5:
                    sub["sub_id"] = column.find(
                        "a", attrs={"data-subtitle-id":
                                    True})["data-subtitle-id"]

            if 'sub_id' in sub:
                subs.append(sub)
        return subs
Ejemplo n.º 8
0
    def _search_show_id(self, series, year=None):
        """Search the show id from the `series` and `year`.

        :param str series: series of the episode.
        :param year: year of the series, if any.
        :type year: int
        :return: the show id, if found.
        :rtype: int

        """
        # addic7ed doesn't support search with quotes
        series = series.replace('\'', ' ')

        # build the params
        series_year = '%s %d' % (series, year) if year is not None else series
        params = {'search': series_year, 'Submit': 'Search'}

        # make the search
        logger.info('Searching show ids with %r', params)
        r = self.session.get(self.server_url + 'srch.php', params=params, timeout=10)
        r.raise_for_status()
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # get the suggestion
        suggestion = soup.select('span.titulo > a[href^="/show/"]')
        if not suggestion:
            logger.warning('Show id not found: no suggestion')
            return None
        if not sanitize(suggestion[0].i.text.replace('\'', ' ')) == sanitize(series_year):
            logger.warning('Show id not found: suggestion does not match')
            return None
        show_id = int(suggestion[0]['href'][6:])
        logger.debug('Found show id %d', show_id)

        return show_id
Ejemplo n.º 9
0
    def _get_show_ids(self):
        """Get the ``dict`` of show ids per series by querying the `shows.php` page.

        :return: show id per series, lower case and without quotes.
        :rtype: dict

        """
        # get the show page
        logger.info('Getting show ids')
        r = self.session.get(self.server_url + 'shows.php', timeout=10)
        r.raise_for_status()

        # LXML parser seems to fail when parsing Addic7ed.com HTML markup.
        # Last known version to work properly is 3.6.4 (next version, 3.7.0, fails)
        # Assuming the site's markup is bad, and stripping it down to only contain what's needed.
        show_cells = re.findall(show_cells_re, r.content)
        if show_cells:
            soup = ParserBeautifulSoup(b''.join(show_cells), ['lxml', 'html.parser'])
        else:
            # If RegEx fails, fall back to original r.content and use 'html.parser'
            soup = ParserBeautifulSoup(r.content, ['html.parser'])

        # populate the show ids
        show_ids = {}
        for show in soup.select('td.version > h3 > a[href^="/show/"]'):
            show_ids[sanitize(show.text)] = int(show['href'][6:])
        logger.debug('Found %d show ids', len(show_ids))

        return show_ids
Ejemplo n.º 10
0
    def query(self, series, season, episode, year=None):
        # search the show id
        show_id = self.search_show_id(series, year)
        if show_id is None:
            logger.error('No show id found for %r (%r)', series, {'year': year})
            return []

        # get the episode ids
        episode_ids = self.retry(lambda: self.get_episode_ids(show_id, season))
        if episode not in episode_ids:
            logger.error('Episode %d not found', episode)
            return []

        # get the episode page
        logger.info('Getting the page for episode %d', episode_ids[episode])
        r = self.retry(lambda: self.session.get(self.server_url + 'episode-%d.html' % episode_ids[episode], timeout=10))
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over subtitles rows
        subtitles = []
        for row in soup.select('.subtitlen'):
            # read the item
            language = Language.fromtvsubtitles(row.h5.img['src'][13:-4])
            subtitle_id = int(row.parent['href'][10:-5])
            page_link = self.server_url + 'subtitle-%d.html' % subtitle_id
            rip = row.find('p', title='rip').text.strip() or None
            release = row.find('p', title='release').text.strip() or None

            subtitle = PatchedTVsubtitlesSubtitle(language, page_link, subtitle_id, series, season, episode, year, rip,
                                           release)
            logger.info('Found subtitle %s', subtitle)
            subtitles.append(subtitle)

        return subtitles
Ejemplo n.º 11
0
    def _get_suggestions(self, title):
        """Search the show or movie id from the `title` and `year`.

        :param str title: title of the show.
        :return: the show suggestions found.
        :rtype: list of dict

        """
        # make the search
        logger.info('Searching show ids with %r', title)
        r = self.session.get(self.server_url + self.search_url.format(title),
                             headers={'Referer': self.server_url},
                             timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['html.parser'])
        suggestions = [{
            'link': l.attrs['value'],
            'title': l.text
        } for l in soup.select('select[name="Mov_sel"] > option[value]')]
        logger.debug('Found suggestions: %r', suggestions)

        return suggestions
Ejemplo n.º 12
0
    def _search_url_titles(self, title):
        """Search the URL titles by kind for the given `title`.

        :param str title: title to search for.
        :return: the URL titles by kind.
        :rtype: collections.defaultdict

        """
        # make the search
        logger.info('Searching title name for %r', title)
        r = self.session.get(self.server_url + 'subtitle/search/', params={'q': title}, timeout=10)
        r.raise_for_status()

        # check for redirections
        if r.history and all([h.status_code == 302 for h in r.history]):
            logger.debug('Redirected to the subtitles page')
            links = [r.url]
        else:
            # get the suggestions (if needed)
            soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
            links = [link.attrs['href'] for link in soup.select('#processes div.generalWindowTop a')]
            logger.debug('Found %d suggestions', len(links))

        url_titles = defaultdict(list)
        for link in links:
            parts = link.split('/')
            url_titles[parts[-3]].append(parts[-2])

        return url_titles
Ejemplo n.º 13
0
    def query(self, show_id, series, season, episode, year=None):
        # get the episode ids
        episode_ids = self.get_episode_ids(show_id, season)
        # Provider doesn't store multi episode information
        episode = min(episode) if episode and isinstance(episode, list) else episode

        if episode not in episode_ids:
            logger.error('Episode %d not found', episode)
            return []

        # get the episode page
        logger.info('Getting the page for episode %d', episode_ids[episode])
        r = self.session.get(self.server_url + 'episode-%d.html' % episode_ids[episode], timeout=10)
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over subtitles rows
        subtitles = []
        for row in soup.select('.subtitlen'):
            # read the item
            language = Language.fromtvsubtitles(row.h5.img['src'][13:-4])
            subtitle_id = int(row.parent['href'][10:-5])
            page_link = self.server_url + 'subtitle-%d.html' % subtitle_id
            rip = row.find('p', title='rip').text.strip() or None
            release = row.find('h5').text.strip() or None

            subtitle = self.subtitle_class(language, page_link, subtitle_id, series, season, episode, year, rip,
                                           release)
            logger.info('Found subtitle %s', subtitle)
            subtitles.append(subtitle)

        soup.decompose()
        soup = None

        return subtitles
Ejemplo n.º 14
0
    def _get_show_ids(self):
        """Get the ``dict`` of show ids per series by querying the `series.php` page.

        :return: show id per series, lower case and without quotes.
        :rtype: dict

        """
        # get the show page
        logger.info('Getting show ids')
        r = self.session.get(self.series_url, timeout=10)
        r.raise_for_status()

        if r.status_code != 200:
            logger.error('Error getting show ids')
            raise ProviderError('Error getting show ids')

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # populate the show ids
        show_ids = {}
        for show in soup.select('td > a[href^="/show/"]'):
            show_ids[sanitize(show.get_text())] = int(show['href'][6:])
        logger.debug('Found %d show ids', len(show_ids))

        return show_ids
Ejemplo n.º 15
0
    def search_show_id(self, series, year=None):
        """Search the show id from the `series` and `year`.
        :param string series: series of the episode.
        :param year: year of the series, if any.
        :type year: int or None
        :return: the show id, if any.
        :rtype: int or None
        """
        # make the search
        logger.info('Searching show id for %r', series)
        r = self.session.post(self.server_url + 'search.php', data={'q': series}, timeout=10)
        r.raise_for_status()

        # get the series out of the suggestions
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
        show_id = None
        for suggestion in soup.select('div.left li div a[href^="/tvshow-"]'):
            match = link_re.match(suggestion.text)
            if not match:
                logger.error('Failed to match %s', suggestion.text)
                continue

            if sanitize(match.group('series')).lower() == series.lower():
                if year is not None and int(match.group('first_year')) != year:
                    logger.debug('Year does not match')
                    continue
                show_id = int(suggestion['href'][8:-5])
                logger.debug('Found show id %d', show_id)
                break

        soup.decompose()
        soup = None

        return show_id
Ejemplo n.º 16
0
    def query(self, series, season, episode, year=None):
        # get the show id
        show_id = self.get_show_id(series, year)
        if show_id is None:
            logger.error('No show id found for %s (%r)', series, year)
            return []

        # get the episode url
        episode_url = self.get_episode_url(show_id, series, season, episode, year)
        if episode_url is None:
            logger.error('No episode url found for %s, season %d, episode %d', series, season, episode)
            return []

        # get the page of the episode of the show
        r = self.session.get(episode_url, timeout=10)
        r.raise_for_status()
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # get episode title
        title_pattern = re.compile('Subt.+tulos de {}(.+){}x{:02d} - (.+)'.format(series, season, episode).lower())
        title = title_pattern.search(soup.select('#cabecera-subtitulo')[0].get_text().strip().lower()).group(2)

        # loop over subtitle rows
        subtitles = []

        for sub in soup.find_all('div', attrs={'id': re.compile('version([0-9]+)')}):
            # read the release subtitle
            release = sanitize_release_group(release_pattern.search(sub.find('p', class_='title-sub')
                                                                    .contents[2]).group(1))

            for html_language in sub.select('ul.sslist'):
                language = Language.fromtusubtitulo(html_language.find_next('b').get_text().strip())
                hearing_impaired = False

                # modify spanish latino subtitle language to only spanish and set hearing_impaired = True
                # because if exists spanish and spanish latino subtitle for the same episode, the score will be
                # higher with spanish subtitle. Spanish subtitle takes priority.
                if language == Language('spa', 'MX'):
                    language = Language('spa')
                    hearing_impaired = True

                # ignore incomplete subtitles
                status = sanitize(html_language.find_next('li', class_=re.compile('li-estado')).get_text())
                if status != 'completado':
                    logger.debug('Ignoring subtitle with status %s', status)
                    continue

                # get the most updated version of the subtitle and if it doesn't exist get the original version
                html_status = html_language.select('a[href^="updated/"]')
                if len(html_status) == 0:
                    html_status = html_language.select('a[href^="original/"]')

                subtitle_url = self.server_url + html_status[0]['href']
                subtitle = TuSubtituloSubtitle(language, hearing_impaired, episode_url, series, season, episode, title,
                                               year, release, subtitle_url)
                logger.debug('Found subtitle %r', subtitle)
                subtitles.append(subtitle)

        return subtitles
Ejemplo n.º 17
0
    def query(self, series, season, episode, year=None):
        # get the episode url
        episode_url = self._search_url_titles(series, season, episode, year)
        if episode_url is None:
            logger.info(
                f"[{self.provider_name}]: No episode url found for {series}, season {season}, episode {episode}"
            )
            return []

        r = self.session.get(episode_url,
                             headers={"Referer": self.server_url},
                             timeout=10)
        r.raise_for_status()
        soup = ParserBeautifulSoup(r.content, ["lxml", "html.parser"])

        # get episode title
        title_pattern = re.compile("{}(.+){}x{:02d}- (.+)".format(
            series, season, episode).lower())
        title = title_pattern.search(
            soup.select("#episode_title")[0].get_text().strip().lower()).group(
                2)

        subtitles = []
        for sub in soup.find_all("div", attrs={"id": "progress_buttons_row"}):
            # read the language
            language = Language.fromsubtitulamos(
                sub.find_previous(
                    "div", class_="subtitle_language").get_text().strip())
            hearing_impaired = False

            # modify spanish latino subtitle language to only spanish and set hearing_impaired = True
            # because if exists spanish and spanish latino subtitle for the same episode, the score will be
            # higher with spanish subtitle. Spanish subtitle takes priority.
            if language == Language("spa", "MX"):
                language = Language("spa")
                hearing_impaired = True

            # read the release subtitle
            release = sub.find_next("div",
                                    class_="version_name").get_text().strip()

            # ignore incomplete subtitles
            status = sub.find_next("div",
                                   class_="subtitle_buttons").contents[1]
            if status.name != "a":
                logger.debug("Ignoring subtitle in [%s] not finished",
                             language)
                continue

            # read the subtitle url
            subtitle_url = self.server_url + status["href"][1:]
            subtitle = SubtitulamosSubtitle(language, hearing_impaired,
                                            episode_url, series, season,
                                            episode, title, year, release,
                                            subtitle_url)
            logger.debug("Found subtitle %r", subtitle)
            subtitles.append(subtitle)

        return subtitles
Ejemplo n.º 18
0
    def query(self, languages=None, title=None, imdb_id=None, video=None):
        subtitles = []

        params = self.getQueryParams(imdb_id, title)

        search_response = self.session.get(self.api_url, params=params, timeout=15)
        search_response.raise_for_status()

        if not search_response.content:
            logger.debug('[#### Provider: titrari.ro] No data returned from provider')
            return []

        soup = ParserBeautifulSoup(search_response.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser'])

        # loop over subtitle cells
        rows = soup.select('td[rowspan=\'5\']')
        for index, row in enumerate(rows):
            result_anchor_el = row.select_one('a')

            # Download link
            href = result_anchor_el.get('href')
            download_link = self.api_url + href

            fullTitle = row.parent.find("h1").find("a").text

            #Get title
            try:
                title = fullTitle.split("(")[0]
            except:
                logger.error("[#### Provider: titrari.ro] Error parsing title.")

            # Get downloads count
            try:
                downloads = int(row.parent.parent.select("span")[index].text[12:])
            except:
                logger.error("[#### Provider: titrari.ro] Error parsing downloads.")

            # Get year
            try:
                year = int(fullTitle.split("(")[1].split(")")[0])
            except:
                year = None
                logger.error("[#### Provider: titrari.ro] Error parsing year.")

            # Get imdbId
            sub_imdb_id = self.getImdbIdFromSubtitle(row)

            try:
                comments = row.parent.parent.find_all("td", class_=re.compile("comment"))[index*2+1].text
            except:
                logger.error("Error parsing comments.")

            subtitle = self.subtitle_class(next(iter(languages)), download_link, index, None, title, sub_imdb_id, year, downloads, comments)
            logger.debug('[#### Provider: titrari.ro] Found subtitle %r', str(subtitle))
            subtitles.append(subtitle)

        ordered_subs = self.order(subtitles, video)

        return ordered_subs
Ejemplo n.º 19
0
    def query(self, title):
        subtitles = []

        data = {
            'ajax': '1',
            'sSearch': title,
        }

        r = self.session.post(self.search_url, data=data, timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'),
                                   ['lxml', 'html.parser'])

        # loop over subtitle cells
        rows = soup.select('tbody > tr')
        for row in rows:
            # title
            title_anchor_el = row.select_one('.title > a')
            title_inner_text = [
                element for element in title_anchor_el
                if isinstance(element, NavigableString)
            ]
            title = title_inner_text[0].strip()

            # year
            year = row.select_one('.year').text.strip('()')

            # download link
            href = title_anchor_el.get('href')
            download_link = self.server_url + href

            # imdb id
            imdb_td = row.select_one('td:nth-of-type(4)')
            imdb_link = imdb_td.select_one('a').get('href')
            imdb_id = imdb_link.split('/')[-2]

            # fps
            fps = row.select_one('.fps').text.strip()

            # additional notes
            notes = row.select_one('.notes').text.strip()

            # page link = download link (there is no seperate subtitle page link)
            page_link = download_link

            # create/add the subitle
            subtitle = self.subtitle_class(Language.fromalpha2('lv'),
                                           page_link, download_link, title,
                                           year, imdb_id, fps, notes)
            logger.debug('nekur: Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles
Ejemplo n.º 20
0
    def query(self, series, season, episode, year=None):
        # get the episode url
        episode_url = self._search_url_titles(series, season, episode, year)
        if episode_url is None:
            logger.error('No episode url found for %s, season %d, episode %d',
                         series, season, episode)
            return []

        r = self.session.get(episode_url,
                             headers={'Referer': self.server_url},
                             timeout=10)
        r.raise_for_status()
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # get episode title
        title_pattern = re.compile('{}(.+){}x{:02d}- (.+)'.format(
            series, season, episode).lower())
        title = title_pattern.search(
            soup.select('#episode_title')[0].get_text().strip().lower()).group(
                2)

        subtitles = []
        for sub in soup.find_all('div', attrs={'id': 'progress_buttons_row'}):
            # read the language
            language = Language.fromsubtitulamos(
                sub.find_previous(
                    'div', class_='subtitle_language').get_text().strip())
            hearing_impaired = False

            # modify spanish latino subtitle language to only spanish and set hearing_impaired = True
            # because if exists spanish and spanish latino subtitle for the same episode, the score will be
            # higher with spanish subtitle. Spanish subtitle takes priority.
            if language == Language('spa', 'MX'):
                language = Language('spa')
                hearing_impaired = True

            # read the release subtitle
            release = sub.find_next('div',
                                    class_='version_name').get_text().strip()

            # ignore incomplete subtitles
            status = sub.find_next('div',
                                   class_='subtitle_buttons').contents[1]
            if status.name != 'a':
                logger.debug('Ignoring subtitle in [%s] not finished',
                             language)
                continue

            # read the subtitle url
            subtitle_url = self.server_url + status['href'][1:]
            subtitle = SubtitulamosSubtitle(language, hearing_impaired,
                                            episode_url, series, season,
                                            episode, title, year, release,
                                            subtitle_url)
            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles
Ejemplo n.º 21
0
    def query_movies(self, video, title):
        subtitles = []

        r = self.session.get(self.search_url, params={'q': title}, timeout=30)
        r.raise_for_status()

        soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'),
                                   ['html.parser'])

        # loop over movies name
        movies_url = []
        self.is_perfect_match = False
        movies = soup.select('.film > h3 > a')
        for item in movies:
            # title
            if title.lower() in item.text.lower():
                movies_url.append(item.attrs['href'])
                self.is_perfect_match = True

        series_subs_archives_url = []
        for movies_page in movies_url:
            page_link = self.server_url + movies_page
            r = self.session.get(page_link, timeout=30)
            r.raise_for_status()

            soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'),
                                       ['html.parser'])

            movies_subs_archives = soup.select('a.subList')
            for item in movies_subs_archives:
                download_link = self.server_url + 'films/' + item.attrs['href']
                res = self.session.get(download_link, timeout=30)
                res.raise_for_status()

                archive = self._get_archive(res.content)
                # extract the subtitle
                if archive:
                    subtitles_from_archive = self._get_subtitle_from_archive(
                        archive, video)
                    for subtitle in subtitles_from_archive:
                        subtitle.page_link = page_link
                        subtitle.download_link = download_link
                        subtitles.append(subtitle)

        return subtitles
Ejemplo n.º 22
0
    def query(self, series, season, episode, year=None):
        # get the show id
        show_id = self.get_show_id(series, year)
        if show_id is None:
            logger.error("No show id found for %s (%r)", series, year)
            return []

        # get the episode url
        episode_url = self.get_episode_url(show_id, series, season, episode, year)
        if episode_url is None:
            logger.info(f"[{self.provider_name}]: No episode url found for {series}, season {season}, episode {episode}")
            return []

        # get the page of the episode of the show
        r = self.session.get(episode_url, timeout=10)
        r.raise_for_status()
        soup = ParserBeautifulSoup(r.content, ["lxml", "html.parser"])

        # get episode title
        title_pattern = re.compile("Subt.+tulos de {}(.+){}x{:02d} - (.+)".format(series, season, episode).lower())
        title = title_pattern.search(soup.select("#cabecera-subtitulo")[0].get_text().strip().lower()).group(2)

        # loop over subtitle rows
        subtitles = []

        for sub in soup.find_all("div", attrs={"id": re.compile("version([0-9]+)")}):
            # read the release subtitle
            release = sanitize_release_group(release_pattern.search(sub.find("p", class_="title-sub").contents[2]).group(1))

            for html_language in sub.select("ul.sslist"):
                language = Language.fromtusubtitulo(html_language.find_next("b").get_text().strip())
                hearing_impaired = False

                # modify spanish latino subtitle language to only spanish and set hearing_impaired = True
                # because if exists spanish and spanish latino subtitle for the same episode, the score will be
                # higher with spanish subtitle. Spanish subtitle takes priority.
                if language == Language("spa", "MX"):
                    language = Language("spa")
                    hearing_impaired = True

                # ignore incomplete subtitles
                status = sanitize(html_language.find_next("li", class_=re.compile("li-estado")).get_text())
                if status != "completado":
                    logger.debug("Ignoring subtitle with status %s", status)
                    continue

                # get the most updated version of the subtitle and if it doesn't exist get the original version
                html_status = html_language.select('a[href^="updated/"]')
                if len(html_status) == 0:
                    html_status = html_language.select('a[href^="original/"]')

                subtitle_url = self.server_url + html_status[0]["href"]
                subtitle = TuSubtituloSubtitle(language, hearing_impaired, episode_url, series, season, episode, title, year, release, subtitle_url)
                logger.debug("Found subtitle %r", subtitle)
                subtitles.append(subtitle)

        return subtitles
Ejemplo n.º 23
0
    def query(self, title):
        subtitles = []

        r = self.session.get(self.search_url, params={'q': title}, timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'),
                                   ['lxml', 'html.parser'])

        # loop over subtitle cells
        rows = soup.select('.eBlock')
        for row in rows:
            result_anchor_el = row.select_one('.eTitle > a')

            # page link
            page_link = result_anchor_el.get('href')

            # fetch/parse additional info
            r = self.session.get(page_link, timeout=10)
            soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'),
                                       ['lxml', 'html.parser'])

            # title
            movie_titles_string = soup.select_one('.main-header').text.strip()
            movie_titles_list = movie_titles_string.split(' / ')
            title = movie_titles_list[-1]

            # year
            year = soup.select_one('#film-page-year').text.strip()

            # imdb id
            imdb_link = soup.select_one('#actors-page > a').get('href')
            imdb_id = imdb_link.split('/')[-2]

            # download link
            href = soup.select_one('.hvr').get('href')
            download_link = self.server_url + href

            # create/add the subitle
            subtitle = self.subtitle_class(Language.fromalpha2('lv'),
                                           page_link, download_link, title,
                                           year, imdb_id)
            logger.debug('subtitri.id.lv: Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles
Ejemplo n.º 24
0
    def get_episode_url(self, show_id, series, season, episode, year=None):
        """Get the url best matching show id for `series`, `season`, `episode` and `year`.

        :param int show_id: show id of the series
        :param str series: serie of the episode.
        :param int season: season of the episode.
        :param int episode: number of the episode.
        :param int year: year of the series.
        :return: the episode url, if found.
        :rtype: str

        """
        # get the page of the season of the show
        logger.info('Getting the page of show id %d, season %d', show_id,
                    season)

        series_sanitized = sanitize(series)
        episode_url = None

        r = self.session.get(self.subtitles_url,
                             params={
                                 'show': show_id,
                                 'season': season
                             },
                             timeout=10)
        r.raise_for_status()
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over episodes rows
        for html_episode in soup.select('td > a[href*="/episodes/"]'):
            title = sanitize(html_episode.get_text())

            # attempt series with year
            if sanitize('{} {} {}x{:02d}'.format(series_sanitized, year,
                                                 season, episode)) in title:
                episode_url = 'https://' + html_episode['href'][2:]
                logger.debug(
                    'Subtitle found for %s, season: %d, episode: %d. URL: %s',
                    series, season, episode, episode_url)
                break
            elif sanitize('{} {}x{:02d}'.format(series_sanitized, season,
                                                episode)) in title:
                episode_url = 'https://' + html_episode['href'][2:]
                logger.debug(
                    'Subtitle found for %s, season: %d, episode: %d. URL: %s',
                    series, season, episode, episode_url)
                break

        return episode_url
Ejemplo n.º 25
0
    def query(self, movie_id, title, year):
        # get the season list of the show
        logger.info('Getting the subtitle list of show id %s', movie_id)
        if movie_id:
            page_link = self.server_url + '/' + movie_id
        else:
            page_link = self.server_url + self.search_url.format(' '.join(
                [title, str(year)]))

        r = self.session.get(page_link, timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['html.parser'])

        year = None
        year_element = soup.select_one('td#dates_header > table div')
        matches = False
        if year_element:
            matches = year_re.match(str(year_element.contents[2]).strip())
        if matches:
            year = int(matches.group(1))

        title_tag = soup.select_one('td#dates_header > table u')
        show_title = str(title_tag.contents[0]).strip() if title_tag else None

        subtitles = []
        # loop over episode rows
        for subs_tag in soup.select('.movie-details'):
            # read common info
            version = subs_tag.find('span').text
            download_link = self.server_url + subs_tag.find('a')['href']
            uploader = subs_tag.select_one('.movie-info').find('p').find(
                'a').text
            language_code = subs_tag.select_one('.sprite')['class'][1].split(
                'gif')[0]
            language = Language.fromietf(language_code)

            subtitle = self.subtitle_class(language, page_link, show_title,
                                           year, version, download_link,
                                           uploader)

            logger.debug('Found subtitle {!r}'.format(subtitle))
            subtitles.append(subtitle)

        return subtitles
Ejemplo n.º 26
0
    def query(self, movie_id, title, year):
        # get the season list of the show
        logger.info('Getting the subtitle list of show id %s', movie_id)
        if movie_id:
            page_link = self.server_url + '/' + movie_id
        else:
            page_link = self.server_url + text_type(self.search_url).format(
                ' '.join([title, str(year)]))

        r = self.session.get(page_link, timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['html.parser'])

        year_num = None
        year_element = soup.select_one('td#dates_header > table div')
        matches = False
        if year_element:
            matches = year_re.match(str(year_element.contents[2]).strip())
        if matches:
            year_num = int(matches.group(1))

        title_element = soup.select_one('td#dates_header > table u')
        show_title = str(
            title_element.contents[0]).strip() if title_element else None

        subtitles = []
        # loop over episode rows
        for subtitle in soup.select(
                'table.table_border div[align="center"] > div'):
            # read common info
            version = subtitle.find('b').text
            download_link = self.server_url + subtitle.find('a')['href']
            language = Language.fromalpha2(
                subtitle.find('img')['src'].split('/')[-1].split('.')[0])

            subtitle = self.subtitle_class(language, page_link, show_title,
                                           year_num, version, download_link)

            logger.debug('Found subtitle {!r}'.format(subtitle))
            subtitles.append(subtitle)

        return subtitles
Ejemplo n.º 27
0
    def query(self, show_id, series, season, episode, title):
        # get the season list of the show
        logger.info('Getting the subtitle list of show id %s', show_id)
        if all((show_id, season, episode)):
            page_link = self.server_url + self.episode_link.format(
                show_id=show_id, season=season, episode=episode)
        else:
            return []

        r = self.session.get(page_link, timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        year = None
        matches = year_re.match(
            str(soup.select_one(
                '#dates_header_br > table div').contents[2]).strip())
        if matches:
            year = int(matches.group(1))
        show_title = str(
            soup.select_one('#dates_header_br > table div u').string).strip()

        subtitles = []
        # loop over episode rows
        for subs_tag in soup.select('table .seeDark,.seeMedium'):
            # read common info
            version = subs_tag.find_all('b')[0].text
            download_link = self.server_url + subs_tag.find('a')['href']
            uploader = subs_tag.find_all('b')[1].text
            language = Language.fromalpha2(
                subs_tag.find('img')['src'].split('/')[-1].split('.')[0])

            subtitle = self.subtitle_class(language, page_link, show_title,
                                           year, version, download_link,
                                           uploader)

            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles
Ejemplo n.º 28
0
    def get_episode_ids(self, show_id, season):
        """Get episode ids from the show id and the season.

        :param int show_id: show id.
        :param int season: season of the episode.
        :return: episode ids per episode number.
        :rtype: dict

        """
        # get the page of the season of the show
        logger.info('Getting the page of show id %d, season %d', show_id,
                    season)
        r = self.session.get(self.server_url + 'tvshow-%d-%d.html' %
                             (show_id, season),
                             timeout=10)
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over episode rows
        episode_ids = {}
        for row in soup.select('table#table5 tr'):
            # skip rows that do not have a link to the episode page
            if not row('a', href=episode_id_re):
                continue

            # extract data from the cells
            cells = row('td')
            episode = int(cells[0].text.split('x')[1])
            episode_id = int(cells[1].a['href'][8:-5])
            episode_ids[episode] = episode_id

        if episode_ids:
            logger.debug('Found episode ids %r', episode_ids)
        else:
            logger.warning('No episode ids found')

        soup.decompose()
        soup = None

        return episode_ids
Ejemplo n.º 29
0
    def query(self, show_id, series, season, episode, year=None):
        # get the episode ids
        episode_ids = self.get_episode_ids(show_id, season)
        # Provider doesn't store multi episode information
        episode = min(episode) if episode and isinstance(episode,
                                                         list) else episode

        if episode not in episode_ids:
            logger.error('Episode %d not found', episode)
            return []

        # get the episode page
        logger.info('Getting the page for episode %d', episode_ids[episode])
        r = self.session.get(self.server_url +
                             'episode-%d.html' % episode_ids[episode],
                             timeout=10)
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over subtitles rows
        subtitles = []
        for row in soup.select('.subtitlen'):
            # read the item
            language = Language.fromtvsubtitles(row.h5.img['src'][13:-4])
            subtitle_id = int(row.parent['href'][10:-5])
            page_link = self.server_url + 'subtitle-%d.html' % subtitle_id
            rip = row.find('p', title='rip').text.strip() or None
            release = row.find('h5').text.strip() or None

            subtitle = self.subtitle_class(language, page_link, subtitle_id,
                                           series, season, episode, year, rip,
                                           release)
            logger.info('Found subtitle %s', subtitle)
            subtitles.append(subtitle)

        soup.decompose()
        soup = None

        return subtitles
Ejemplo n.º 30
0
    def get_episode_ids(self, show_id, season):
        """Get episode ids from the show id and the season.

        :param int show_id: show id.
        :param int season: season of the episode.
        :return: episode ids per episode number.
        :rtype: dict

        """
        # get the page of the season of the show
        logger.info('Getting the page of show id %d, season %d', show_id, season)
        r = self.session.get(self.server_url + 'tvshow-%d-%d.html' % (show_id, season), timeout=10)
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over episode rows
        episode_ids = {}
        for row in soup.select('table#table5 tr'):
            # skip rows that do not have a link to the episode page
            if not row('a', href=episode_id_re):
                continue

            # extract data from the cells
            cells = row('td')
            episode = int(cells[0].text.split('x')[1])
            episode_id = int(cells[1].a['href'][8:-5])
            episode_ids[episode] = episode_id

        if episode_ids:
            logger.debug('Found episode ids %r', episode_ids)
        else:
            logger.warning('No episode ids found')

        soup.decompose()
        soup = None

        return episode_ids
Ejemplo n.º 31
0
    def query_series(self, video, title):
        subtitles = []

        r = self.session.get(self.search_url, params={'q': title}, timeout=30)
        r.raise_for_status()

        soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'),
                                   ['html.parser'])

        # loop over series name
        self.is_perfect_match = False
        series_url = []
        series = soup.select('.serie > h3 > a')
        for item in series:
            # title
            if title in item.text:
                series_url.append(item.attrs['href'])
                self.is_perfect_match = True

        series_subs_archives_url = []
        for series_page in series_url:
            page_link = self.server_url + series_page
            r = self.session.get(page_link, timeout=30)
            r.raise_for_status()

            soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'),
                                       ['html.parser'])

            series_subs_archives = soup.select('a.subList')
            for item in series_subs_archives:
                matching_archive = False
                subtitles_archive_name = unquote(
                    item.attrs['href'].split('/')[-1:][0][:-4])
                guessed_subs = guessit(subtitles_archive_name,
                                       {'type': 'episode'})
                try:
                    season, episode = item.select_one(
                        '.episodenum').text.split('×')
                    guessed_subs.update({
                        'season': int(season),
                        'episode': int(episode)
                    })
                except ValueError:
                    season = item.select_one('.episodenum').text[1:]
                    episode = None
                    guessed_subs.update({'season': int(season)})

                if guessed_subs['season'] == video.season:
                    if 'episode' in guessed_subs:
                        if guessed_subs['episode'] == video.episode:
                            matching_archive = True
                    else:
                        matching_archive = True

                if guessed_subs['season'] == 16:
                    print('test')

                if matching_archive:
                    download_link = self.server_url + 'series/' + item.attrs[
                        'href']
                    res = self.session.get(download_link, timeout=30)
                    res.raise_for_status()

                    archive = self._get_archive(res.content)
                    # extract the subtitle
                    if archive:
                        subtitles_from_archive = self._get_subtitle_from_archive(
                            archive, video)
                        for subtitle in subtitles_from_archive:
                            subtitle.page_link = page_link
                            subtitle.download_link = download_link
                            subtitles.append(subtitle)

        return subtitles
Ejemplo n.º 32
0
    def query(self, language=None, title=None, imdb_id=None, video=None):
        subtitles = []

        params = self.getQueryParams(imdb_id, title, language)

        search_response = self.session.get(self.api_url, params=params, timeout=15)
        search_response.raise_for_status()

        if not search_response.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(search_response.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser'])

        # loop over subtitle cells
        rows = soup.select('td[rowspan="5"]')
        for index, row in enumerate(rows):
            result_anchor_el = row.select_one('a')

            # Download link
            href = result_anchor_el.get('href')
            download_link = self.api_url + href

            fullTitle = row.parent.select('h1 a')[0].text

            # Get title
            try:
                title = fullTitle.split("(")[0]
            except:
                logger.error("Error parsing title")

            # Get downloads count
            downloads = 0
            try:
                downloads = int(row.parent.parent.select('span')[index].text[12:])
            except:
                logger.error("Error parsing downloads")

            # Get year
            try:
                year = int(fullTitle.split("(")[1].split(")")[0])
            except:
                year = None
                logger.error("Error parsing year")

            # Get imdbId
            sub_imdb_id = self.getImdbIdFromSubtitle(row)

            comments = ''
            try:
                comments = row.parent.parent.select('.comment')[1].text
            except:
                logger.error("Error parsing comments")

            # Get page_link
            try:
                page_link = self.api_url + row.parent.select('h1 a')[0].get('href')
            except:
                logger.error("Error parsing page_link")

            # Get uploader
            try:
                uploader = row.parent.select('td.row1.stanga a')[-1].text
            except:
                logger.error("Error parsing uploader")

            episode_number = video.episode if isinstance(video, Episode) else None
            subtitle = self.subtitle_class(language, download_link, index, comments, title, sub_imdb_id, page_link, uploader,
                                           year, downloads, isinstance(video, Episode), episode_number)
            logger.debug('Found subtitle %r', str(subtitle))
            subtitles.append(subtitle)

        ordered_subs = self.order(subtitles)

        return ordered_subs
Ejemplo n.º 33
0
    def query(self, languages, video):
        subtitle_name = "%s %dx%02d" % (video.series, video.season,
                                        video.episode)
        logger.debug('Searching subtitles "%s"' % subtitle_name)

        response = self.session.get(self.server_url + '/search/query',
                                    params={'q': video.series},
                                    timeout=10)
        response.raise_for_status()
        result = response.json()

        subtitles = []
        for serie in result:
            # skip non-matching series
            if video.series.lower() != serie['show_name'].lower():
                continue

            # season page
            response = self.session.get(self.server_url +
                                        "/shows/%d" % serie['show_id'],
                                        timeout=10)
            response.raise_for_status()
            soup = ParserBeautifulSoup(response.text, ['lxml', 'html.parser'])
            season_found = False
            for season in soup.select('#season-choices a'):
                if season.text.strip() == str(video.season):
                    season_found = True
                    if "selected" not in season.attrs['class']:
                        # go to the right season page
                        response = self.session.get(self.server_url +
                                                    season['href'],
                                                    timeout=10)
                        response.raise_for_status()
                        soup = ParserBeautifulSoup(response.text,
                                                   ['lxml', 'html.parser'])
                        break
            if not season_found:
                continue

            # episode page
            episode_found = False
            for episode in soup.select('#episode-choices a'):
                if episode.text.strip() == str(video.episode):
                    episode_found = True
                    if "selected" not in episode.attrs['class']:
                        # go to the right episode page
                        response = self.session.get(self.server_url +
                                                    episode['href'],
                                                    timeout=10)
                        response.raise_for_status()
                        soup = ParserBeautifulSoup(response.text,
                                                   ['lxml', 'html.parser'])
                        break
            if not episode_found:
                continue
            episode_url = response.url

            # subtitles
            for lang in soup.select("div.language-container"):
                lang_name = lang.select("div.language-name")[0].text
                if "English" in lang_name:
                    language = "en"
                elif "Español" in lang_name:
                    language = "es"
                else:
                    continue  # not supported yet
                logger.debug('Found subtitles in "%s" language.', language)

                for release in lang.select("div.version-container"):
                    if len(release.select('a[href*="/download"]')) != 1:
                        continue  # incomplete translation, download link is not available

                    release_name = release.select(
                        'div.version-container p')[1].text
                    release_url = self.server_url + release.select(
                        'a[href*="/download"]')[0]['href']

                    subtitles.append(
                        SubtitulamosTVSubtitle(Language.fromietf(language),
                                               episode_url, release_url,
                                               release_name))

        return subtitles
Ejemplo n.º 34
0
    def query(self, languages=None, title=None, imdb_id=None, video=None):
        subtitles = []

        params = self.getQueryParams(imdb_id, title)
        search_response = self.session.post(self.api_url,
                                            data=params,
                                            timeout=15)
        search_response.raise_for_status()

        soup = ParserBeautifulSoup(
            search_response.content.decode('utf-8', 'ignore'),
            ['lxml', 'html.parser'])

        # loop over subtitle cells
        rows = soup.select('div[id="round"]')

        if len(rows) == 0:
            logger.debug('No data returned from provider')
            return []

        # release comments are outside of the parent for the sub details itself, so we just map it to another list
        comment_rows = soup.findAll('div',
                                    attrs={
                                        'class': None,
                                        'id': None,
                                        'align': None
                                    })

        for index, row in enumerate(rows):
            result_anchor_el = row.select_one('.buton').select('a')

            # Download link
            href = result_anchor_el[0]['href']
            download_link = self.server_url + href

            fullTitle = row.select_one('#content-main a').text

            # Get title
            try:
                title = fullTitle.split("(")[0]
            except:
                logger.error("Error parsing title")

            # Get Uploader
            try:
                uploader = row.select('#content-main p')[4].text[10:]
            except:
                logger.error("Error parsing uploader")

            # Get downloads count
            downloads = 0
            try:
                downloads = int(row.select_one('#content-right p').text[12:])
            except:
                logger.error("Error parsing downloads")

            # Get year
            try:
                year = int(fullTitle.split("(")[1].split(")")[0])
            except:
                year = None
                logger.error("Error parsing year")

            # Get imdbId
            sub_imdb_id = self.getImdbIdFromSubtitle(row)

            comments = ''
            try:
                comments = comment_rows[index].text
                logger.debug('Comments: {}'.format(comments))
            except:
                logger.error("Error parsing comments")

            # Get Page Link
            try:
                page_link = row.select_one('#content-main a')['href']
            except:
                logger.error("Error parsing page_link")

            episode_number = video.episode if isinstance(video,
                                                         Episode) else None
            subtitle = self.subtitle_class(next(iter(languages)),
                                           download_link, index, comments,
                                           title, sub_imdb_id, uploader,
                                           page_link, year, downloads,
                                           isinstance(video,
                                                      Episode), episode_number)
            logger.debug('Found subtitle %r', str(subtitle))
            subtitles.append(subtitle)

        ordered_subs = self.order(subtitles)

        return ordered_subs
Ejemplo n.º 35
0
    def query(self, keyword, season=None, episode=None, year=None):
        params = keyword
        if season and episode:
            params += ' S{season:02d}E{episode:02d}'.format(season=season,
                                                            episode=episode)
        elif year:
            params += ' {:4d}'.format(year)

        logger.debug('Searching subtitles %r', params)
        subtitles = []
        search_link = self.server_url + text_type(
            self.search_url).format(params)

        r = self.session.get(search_link, timeout=30)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'),
                                   ['lxml', 'html.parser'])

        for entity in soup.select('div.item.prel.clearfix a:nth-of-type(2)'):
            moviename = entity.text
            entity_url = self.server_url + entity['href']
            logger.debug(entity_url)
            r = self.session.get(entity_url, timeout=30)
            r.raise_for_status()
            logger.debug('looking into ' + entity_url)

            soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'),
                                       ['lxml', 'html.parser']).find(
                                           "div", class_="subs box clearfix")
            # loop over subtitles cells

            subs = soup.tbody.find_all("tr")
            for sub in subs:
                page_link = '%s%s' % (self.server_url,
                                      sub.a.get('href').encode('utf-8'))
                version = sub.a.text.encode('utf-8') or None
                if version is None:
                    version = ""
                try:
                    td = sub.find("td", class_="tac lang")
                    r2 = td.find_all("img")
                    langs = [x.get('title').encode('utf-8') for x in r2]
                except:
                    langs = '未知'
                name = '%s (%s)' % (version, ",".join(langs))

                if ('English' in langs) and not (('简体中文' in langs) or
                                                 ('繁體中文' in langs)):
                    language = Language('eng')
                else:
                    language = Language('zho')
                # read the item
                subtitle = self.subtitle_class(
                    language, page_link, version,
                    page_link.replace("detail", "dld"))

                logger.debug('Found subtitle %r', subtitle)
                subtitles.append(subtitle)

        return subtitles
Ejemplo n.º 36
0
    def query(self, show_id, series, season, episode, title):
        # get the season list of the show
        logger.info('Getting the subtitle list of show id %s', show_id)
        is_episode = False
        if all((show_id, season, episode)):
            is_episode = True
            page_link = self.server_url + self.episode_link.format(
                show_id=show_id, season=season, episode=episode)
        elif all((show_id, title)):
            page_link = self.server_url + self.movie_link.format(show_id)
        else:
            return []

        r = self.session.get(page_link, timeout=10)
        if r.status_code == 404:
            return []

        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        year = None
        if not is_episode:
            year = int(soup.select_one('span.year').text)

        subtitles = []
        # loop over episode rows
        for subs_tag in soup.select('div[id="subtitles"] tr[data-id]'):
            # read common info
            version = subs_tag.find('td', {'class': 'name'}).text
            download_link = subs_tag.find(
                'a', {'class': 'btn-success'})['href'].strip('\'')

            # read the episode info
            if is_episode:
                episode_numbers = soup.select_one(
                    '#summary-wrapper > div.container.summary span.main-title-sxe'
                ).text
                season = None
                episode = None
                matches = episode_re.match(episode_numbers.strip())
                if matches:
                    season = int(matches.group(1))
                    episode = int(matches.group(2))

                series = soup.select_one(
                    '#summary-wrapper > div.summary h2 > a').string.strip()
                title = soup.select_one(
                    '#summary-wrapper > div.container.summary span.main-title'
                ).text

                subtitle = self.subtitle_class(Language.fromalpha2('el'),
                                               page_link, series, season,
                                               episode, title, year, version,
                                               download_link)
            # read the movie info
            else:
                title = str(
                    soup.select_one('#summary-wrapper > div.summary h1').
                    contents[0]).strip()
                subtitle = self.subtitle_class(Language.fromalpha2('el'),
                                               page_link, None, None, None,
                                               title, year, version,
                                               download_link)

            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles