Esempio n. 1
0
    def download_subtitle(self, subtitle):
        logger.info('Downloading subtitle %r', subtitle)

        headers = {'Accept': 'application/json', 'Content-Type': 'application/json'}
        res = self.session.post(self.server_url + 'download',
                                json={'file_id': subtitle.file_id, 'sub_format': 'srt'},
                                headers=headers,
                                timeout=10)
        res.raise_for_status()

        if res.status_code == 429:
            raise TooManyRequests()
        elif res.status_code == 406:
            raise DownloadLimitExceeded("Daily download limit reached")
        else:
            try:
                subtitle.download_link = res.json()['link']
            except ValueError:
                raise ProviderError('Invalid JSON returned by provider')
            else:
                r = self.session.get(subtitle.download_link, timeout=10)
                r.raise_for_status()

                if res.status_code == 429:
                    raise TooManyRequests()
                elif res.status_code == 406:
                    raise DownloadLimitExceeded("Daily download limit reached")

                subtitle_content = r.content

                if subtitle_content:
                    subtitle.content = fix_line_ending(subtitle_content)
                else:
                    logger.debug('Could not download subtitle from {}'.format(subtitle.download_link))
Esempio n. 2
0
    def search_titles(self, title):
        title_id = None

        parameters = {'query': title.lower()}
        logging.debug('Searching using this title: {}'.format(title))

        results = self.session.get(self.server_url + 'features',
                                   params=parameters,
                                   timeout=30)

        if results.status_code == 401:
            logging.debug(
                'Authentification failed: clearing cache and attempting to login.'
            )
            region.delete("oscom_token")
            self.login()

            results = self.session.get(self.server_url + 'features',
                                       params=parameters,
                                       timeout=30)

            if results.status_code == 429:
                raise TooManyRequests()
            elif results.status_code == 503:
                raise ProviderError(results.reason)
        elif results.status_code == 429:
            raise TooManyRequests()
        elif results.status_code == 503:
            raise ProviderError(results.reason)

        # deserialize results
        try:
            results_dict = results.json()['data']
        except ValueError:
            raise ProviderError('Invalid JSON returned by provider')
        else:
            # loop over results
            for result in results_dict:
                if 'title' in result['attributes']:
                    if isinstance(self.video, Episode):
                        if fix_tv_naming(title).lower() == result['attributes']['title'].lower() and \
                                (not self.video.year or self.video.year == int(result['attributes']['year'])):
                            title_id = result['id']
                            break
                    else:
                        if fix_movie_naming(title).lower() == result['attributes']['title'].lower() and \
                                (not self.video.year or self.video.year == int(result['attributes']['year'])):
                            title_id = result['id']
                            break
                else:
                    continue

            if title_id:
                logging.debug('Found this title ID: {}'.format(title_id))
                return self.sanitize_external_ids(title_id)
        finally:
            if not title_id:
                logger.debug('No match found for {}'.format(title))
Esempio n. 3
0
    def download_subtitle(self, subtitle):
        if self.token is NO_VALUE:
            logger.debug("No cached token, we'll try to login again.")
            self.login()
        if self.token is NO_VALUE:
            logger.debug(
                "Unable to obtain an authentication token right now, we'll try again later."
            )
            raise ProviderError("Unable to obtain an authentication token")

        logger.info('Downloading subtitle %r', subtitle)

        headers = {
            'Accept': 'application/json',
            'Content-Type': 'application/json',
            'Authorization': 'Beaker ' + self.token
        }
        res = self.session.post(self.server_url + 'download',
                                json={
                                    'file_id': subtitle.file_id,
                                    'sub_format': 'srt'
                                },
                                headers=headers,
                                timeout=30)
        if res.status_code == 429:
            raise TooManyRequests()
        elif res.status_code == 406:
            raise DownloadLimitExceeded("Daily download limit reached")
        elif res.status_code == 503:
            raise ProviderError(res.reason)
        else:
            try:
                subtitle.download_link = res.json()['link']
            except ValueError:
                raise ProviderError('Invalid JSON returned by provider')
            else:
                r = self.session.get(subtitle.download_link, timeout=30)

                if res.status_code == 429:
                    raise TooManyRequests()
                elif res.status_code == 406:
                    raise DownloadLimitExceeded("Daily download limit reached")
                elif res.status_code == 503:
                    raise ProviderError(res.reason)

                subtitle_content = r.content

                if subtitle_content:
                    subtitle.content = fix_line_ending(subtitle_content)
                else:
                    logger.debug('Could not download subtitle from {}'.format(
                        subtitle.download_link))
Esempio n. 4
0
    def search_titles(self, title):
        title_id = None
        imdb_id = None

        if isinstance(self.video, Episode) and self.video.series_imdb_id:
            imdb_id = self.video.series_imdb_id
        elif isinstance(self.video, Movie) and self.video.imdb_id:
            imdb_id = self.video.imdb_id

        if imdb_id:
            parameters = {'imdb_id': imdb_id}
            logging.debug('Searching using this IMDB id: {}'.format(imdb_id))
        else:
            parameters = {'query': title}
            logging.debug('Searching using this title: {}'.format(title))

        results = self.session.get(self.server_url + 'features', params=parameters, timeout=10)
        results.raise_for_status()

        if results.status_code == 401:
            logging.debug('Authentification failed: clearing cache and attempting to login.')
            region.delete("oscom_token")
            self.login()

            results = self.session.get(self.server_url + 'features', params=parameters, timeout=10)
            results.raise_for_status()

            if results.status_code == 429:
                raise TooManyRequests()
        elif results.status_code == 429:
            raise TooManyRequests()

        # deserialize results
        try:
            results_dict = results.json()['data']
        except ValueError:
            raise ProviderError('Invalid JSON returned by provider')
        else:
            # loop over results
            for result in results_dict:
                if title.lower() == result['attributes']['title'].lower() and \
                        (not self.video.year or self.video.year == int(result['attributes']['year'])):
                    title_id = result['id']
                    break

            if title_id:
                logging.debug('Found this title ID: {}'.format(title_id))
                return title_id
        finally:
            if not title_id:
                logger.debug('No match found for {}'.format(title))
Esempio n. 5
0
    def download_subtitle(self, subtitle):

        try:
            # sleep for a 1 second before another request
            sleep(1)
            res = self.session.get(subtitle.page_link)
            res.raise_for_status()
        except HTTPError as e:
            if "bloqueado" in res.text.lower():
                logger.error("LegendasDivx.pt :: Your IP is blocked on this server.")
                raise IPAddressBlocked("LegendasDivx.pt :: Your IP is blocked on this server.")
            logger.error("Legendasdivx.pt :: HTTP Error %s", e)
            raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s", e)
        except Exception as e:
            logger.error("LegendasDivx.pt :: Uncaught error: %r", e)
            raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r", e)

        # make sure we haven't maxed out our daily limit
        if (res.status_code == 200 and 'limite de downloads diário atingido' in res.text.lower()):
            logger.error("LegendasDivx.pt :: Daily download limit reached!")
            raise DownloadLimitExceeded("Legendasdivx.pt :: Daily download limit reached!")

        archive = self._get_archive(res.content)
        # extract the subtitle
        if archive:
            subtitle_content = self._get_subtitle_from_archive(archive, subtitle)
            if subtitle_content:
                subtitle.content = fix_line_ending(subtitle_content)
                subtitle.normalize()
                return subtitle
        return
Esempio n. 6
0
    def api_request(self, func_name="logIn", params="", tries=5):
        headers = {
            "User-Agent": "BSPlayer/2.x (1022.12360)",
            "Content-Type": "text/xml; charset=utf-8",
            "Connection": "close",
            "SOAPAction": '"http://api.bsplayer-subtitles.com/v1.php#{func_name}"'.format(
                func_name=func_name
            ),
        }
        data = (
            '<?xml version="1.0" encoding="UTF-8"?>\n'
            '<SOAP-ENV:Envelope xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" '
            'xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/" '
            'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
            'xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:ns1="{search_url}">'
            '<SOAP-ENV:Body SOAP-ENV:encodingStyle="http://schemas.xmlsoap.org/soap/encoding/">'
            "<ns1:{func_name}>{params}</ns1:{func_name}></SOAP-ENV:Body></SOAP-ENV:Envelope>"
        ).format(search_url=self.search_url, func_name=func_name, params=params)
        logger.debug("Sending request: %s." % func_name)
        for i in iter(range(tries)):
            try:
                self.session.headers.update(headers.items())
                res = self.session.post(self.search_url, data)
                return ElementTree.fromstring(res.text.strip())

            except Exception as ex:
                logger.error(f"Exception parsing response: {ex}")
                if func_name == "logIn":
                    self.search_url = self.get_sub_domain()

                sleep(1)

        raise TooManyRequests(f"Too many retries: {tries}")
Esempio n. 7
0
 def login(self):
     try:
         r = self.session.post(self.server_url + 'login',
                               json={"username": self.username, "password": self.password},
                               allow_redirects=False,
                               timeout=10)
     except (ConnectionError, Timeout, ReadTimeout):
         raise ServiceUnavailable('Unknown Error, empty response: %s: %r' % (r.status_code, r))
     else:
         if r.status_code == 200:
             try:
                 self.token = r.json()['token']
             except ValueError:
                 raise ProviderError('Invalid JSON returned by provider')
             else:
                 self.session.headers.update({'Authorization': 'Beaker ' + self.token})
                 region.set("oscom_token", self.token)
                 return True
         elif r.status_code == 401:
             raise AuthenticationError('Login failed: {}'.format(r.reason))
         elif r.status_code == 429:
             raise TooManyRequests()
         else:
             raise ProviderError('Bad status code: {}'.format(r.status_code))
     finally:
         return False
Esempio n. 8
0
    def query(self, show_id, series, season, year=None, country=None):
        # patch: fix logging

        # get the page of the season of the show
        logger.info('Getting the page of show id %d, season %d', show_id, season)
        r = self.session.get(self.server_url + 'ajax_loadShow.php',
                             params={'show': show_id, 'season': season},
                             timeout=10,
                             headers={
                                 "referer": "%sshow/%s" % (self.server_url, show_id),
                                 "X-Requested-With": "XMLHttpRequest"
                             }
                             )

        r.raise_for_status()

        if r.status_code == 304:
            raise TooManyRequests()

        if not r.content:
            # Provider wrongful return a status of 304 Not Modified with an empty content
            # raise_for_status won't raise exception for that status code
            logger.error('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over subtitle rows
        subtitles = []
        for row in soup.select('tr.epeven'):
            cells = row('td')

            # ignore incomplete subtitles
            status = cells[5].text
            if status != 'Completed':
                logger.debug('Ignoring subtitle with status %s', status)
                continue

            # read the item
            language = Language.fromaddic7ed(cells[3].text)
            hearing_impaired = bool(cells[6].text)
            page_link = self.server_url + cells[2].a['href'][1:]
            season = int(cells[0].text)
            episode = int(cells[1].text)
            title = cells[2].text
            version = cells[4].text
            download_link = cells[9].a['href'][1:]

            subtitle = self.subtitle_class(language, hearing_impaired, page_link, series, season, episode, title,
                                           year,
                                           version, download_link)
            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        soup.decompose()
        soup = None

        return subtitles
Esempio n. 9
0
    def login(self):
        logger.debug('Legendasdivx.pt :: Logging in')
        try:
            # sleep for a 1 second before another request
            sleep(1)
            res = self.session.get(self.loginpage)
            res.raise_for_status()
            bsoup = ParserBeautifulSoup(res.content, ['lxml'])

            _allinputs = bsoup.findAll('input')
            data = {}
            # necessary to set 'sid' for POST request
            for field in _allinputs:
                data[field.get('name')] = field.get('value')

            # sleep for a 1 second before another request
            sleep(1)
            data['username'] = self.username
            data['password'] = self.password
            res = self.session.post(self.loginpage, data)
            res.raise_for_status()
            # make sure we're logged in
            logger.debug(
                'Legendasdivx.pt :: Logged in successfully: PHPSESSID: %s',
                self.session.cookies.get_dict()['PHPSESSID'])
            cj = self.session.cookies.copy()
            store_cks = ("PHPSESSID", "phpbb3_2z8zs_sid", "phpbb3_2z8zs_k",
                         "phpbb3_2z8zs_u", "lang")
            for cn in iter(self.session.cookies.keys()):
                if cn not in store_cks:
                    del cj[cn]
            # store session cookies on cache
            logger.debug(
                "Legendasdivx.pt :: Storing legendasdivx session cookies: %r",
                cj)
            region.set("legendasdivx_cookies2", cj)

        except KeyError:
            logger.error(
                "Legendasdivx.pt :: Couldn't get session ID, check your credentials"
            )
            raise AuthenticationError(
                "Legendasdivx.pt :: Couldn't get session ID, check your credentials"
            )
        except HTTPError as e:
            if "bloqueado" in res.text.lower():
                logger.error(
                    "LegendasDivx.pt :: Your IP is blocked on this server.")
                raise IPAddressBlocked(
                    "LegendasDivx.pt :: Your IP is blocked on this server.")
            logger.error("Legendasdivx.pt :: HTTP Error %s", e)
            raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s", e)
        except Exception as e:
            logger.error("LegendasDivx.pt :: Uncaught error: %r", e)
            raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r",
                                     e)
Esempio n. 10
0
    def initialize(self):
        self.session = Session()
        self.session.headers[
            'User-Agent'] = 'Subliminal/%s' % subliminal.__short_version__

        if self.USE_ADDICTED_RANDOM_AGENTS:
            from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
            logger.debug("Addic7ed: using random user agents")
            self.session.headers['User-Agent'] = AGENT_LIST[randint(
                0,
                len(AGENT_LIST) - 1)]
            self.session.headers['Referer'] = self.server_url

        # login
        if self.username and self.password:
            ccks = region.get("addic7ed_cookies", expiration_time=86400)
            if ccks != NO_VALUE:
                try:
                    self.session.cookies._cookies.update(ccks)
                    r = self.session.get(self.server_url + 'panel.php',
                                         allow_redirects=False,
                                         timeout=10)
                    if r.status_code == 302:
                        logger.info('Addic7ed: Login expired')
                        region.delete("addic7ed_cookies")
                    else:
                        logger.info('Addic7ed: Reusing old login')
                        self.logged_in = True
                        return
                except:
                    pass

            logger.info('Addic7ed: Logging in')
            data = {
                'username': self.username,
                'password': self.password,
                'Submit': 'Log in'
            }
            r = self.session.post(
                self.server_url + 'dologin.php',
                data,
                allow_redirects=False,
                timeout=10,
                headers={"Referer": self.server_url + "login.php"})

            if "relax, slow down" in r.content:
                raise TooManyRequests(self.username)

            if r.status_code != 302:
                raise AuthenticationError(self.username)

            region.set("addic7ed_cookies", self.session.cookies._cookies)

            logger.debug('Addic7ed: Logged in')
            self.logged_in = True
Esempio n. 11
0
    def download_subtitle(self, subtitle):
        last_dls = region.get("addic7ed_dls")
        now = datetime.datetime.now()
        one_day = datetime.timedelta(hours=24)

        def raise_limit():
            logger.info("Addic7ed: Downloads per day exceeded (%s)", cap)
            raise DownloadLimitPerDayExceeded

        if not isinstance(last_dls, types.ListType):
            last_dls = []
        else:
            # filter all non-expired DLs
            last_dls = filter(lambda t: t + one_day > now, last_dls)
            region.set("addic7ed_dls", last_dls)

        cap = self.vip and 80 or 40
        amount = len(last_dls)

        if amount >= cap:
            raise_limit()

        # download the subtitle
        r = self.session.get(self.server_url + subtitle.download_link,
                             headers={'Referer': subtitle.page_link},
                             timeout=10)
        r.raise_for_status()

        if r.status_code == 304:
            raise TooManyRequests()

        if not r.content:
            # Provider wrongful return a status of 304 Not Modified with an empty content
            # raise_for_status won't raise exception for that status code
            logger.error(
                'Addic7ed: Unable to download subtitle. No data returned from provider'
            )
            return

        # detect download limit exceeded
        if r.headers['Content-Type'] == 'text/html':
            raise DownloadLimitExceeded

        subtitle.content = fix_line_ending(r.content)
        last_dls.append(datetime.datetime.now())
        region.set("addic7ed_dls", last_dls)
        logger.info("Addic7ed: Used %s/%s downloads", amount + 1, cap)

        if amount + 1 >= cap:
            raise_limit()
Esempio n. 12
0
    def _search_show_id(self, series, year=None):
        """Search the show id from the `series` and `year`.

        :param str series: series of the episode.
        :param year: year of the series, if any.
        :type year: int
        :return: the show id, if found.
        :rtype: int

        """
        # addic7ed doesn't support search with quotes
        series = series.replace('\'', ' ')

        # build the params
        series_year = '%s %d' % (series, year) if year is not None else series
        params = {'search': series_year, 'Submit': 'Search'}

        # make the search
        logger.info('Searching show ids with %r', params)
        r = self.session.get(self.server_url + 'srch.php',
                             params=params,
                             timeout=10)
        r.raise_for_status()
        if r.status_code == 304:
            raise TooManyRequests()
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        suggestion = None

        # get the suggestion
        try:
            suggestion = soup.select('span.titulo > a[href^="/show/"]')
            if not suggestion:
                logger.warning('Show id not found: no suggestion')
                return None
            if not sanitize(suggestion[0].i.text.replace('\'', ' '),
                            default_characters=self.sanitize_characters) == \
                    sanitize(series_year, default_characters=self.sanitize_characters):
                logger.warning('Show id not found: suggestion does not match')
                return None
            show_id = int(suggestion[0]['href'][6:])
            logger.debug('Found show id %d', show_id)

            return show_id
        finally:
            soup.decompose()
            soup = None
Esempio n. 13
0
    def download_subtitle(self, subtitle):
        # download the subtitle
        r = self.session.get(self.server_url + subtitle.download_link, headers={'Referer': subtitle.page_link},
                             timeout=10)
        r.raise_for_status()

        if r.status_code == 304:
            raise TooManyRequests()

        if not r.content:
            # Provider wrongful return a status of 304 Not Modified with an empty content
            # raise_for_status won't raise exception for that status code
            logger.error('Unable to download subtitle. No data returned from provider')
            return

        # detect download limit exceeded
        if r.headers['Content-Type'] == 'text/html':
            raise DownloadLimitExceeded

        subtitle.content = fix_line_ending(r.content)
Esempio n. 14
0
    def query(self, languages, video):
        self.video = video
        if self.use_hash:
            hash = self.video.hashes.get('opensubtitlescom')
            logging.debug('Searching using this hash: {}'.format(hash))
        else:
            hash = None

        if isinstance(self.video, Episode):
            title = self.video.series
        else:
            title = self.video.title

        title_id = self.search_titles(title)
        if not title_id:
            return []
        lang_strings = [str(lang) for lang in languages]
        langs = ','.join(lang_strings)
        logging.debug('Searching for this languages: {}'.format(lang_strings))

        # query the server
        if isinstance(self.video, Episode):
            res = self.session.get(self.server_url + 'subtitles',
                                   params={'parent_feature_id': title_id,
                                           'languages': langs,
                                           'episode_number': self.video.episode,
                                           'season_number': self.video.season,
                                           'moviehash': hash},
                                   timeout=10)
        else:
            res = self.session.get(self.server_url + 'subtitles',
                                   params={'id': title_id,
                                           'languages': langs,
                                           'moviehash': hash},
                                   timeout=10)
        res.raise_for_status()

        if res.status_code == 429:
            raise TooManyRequests()

        subtitles = []

        try:
            result = res.json()
        except ValueError:
            raise ProviderError('Invalid JSON returned by provider')
        else:
            logging.debug('Query returned {} subtitles'.format(len(result['data'])))

            if len(result['data']):
                for item in result['data']:
                    if 'season_number' in item['attributes']['feature_details']:
                        season_number = item['attributes']['feature_details']['season_number']
                    else:
                        season_number = None

                    if 'episode_number' in item['attributes']['feature_details']:
                        episode_number = item['attributes']['feature_details']['episode_number']
                    else:
                        episode_number = None

                    if 'moviehash_match' in item['attributes']:
                        moviehash_match = item['attributes']['moviehash_match']
                    else:
                        moviehash_match = False

                    if len(item['attributes']['files']):
                        subtitle = OpenSubtitlesComSubtitle(
                                language=Language.fromietf(item['attributes']['language']),
                                hearing_impaired=item['attributes']['hearing_impaired'],
                                page_link=item['attributes']['url'],
                                file_id=item['attributes']['files'][0]['file_id'],
                                releases=item['attributes']['release'],
                                uploader=item['attributes']['uploader']['name'],
                                title=item['attributes']['feature_details']['movie_name'],
                                year=item['attributes']['feature_details']['year'],
                                season=season_number,
                                episode=episode_number,
                                hash_matched=moviehash_match
                            )
                        subtitle.get_matches(self.video)
                        subtitles.append(subtitle)

        return subtitles
Esempio n. 15
0
    def query_movie(self, movie_id, title, year=None):
        # get the page of the movie
        logger.info('Getting the page of movie id %s', movie_id)
        r = self.session.get(self.server_url + 'movie/' + movie_id,
                             timeout=10,
                             headers={
                                 "referer": self.server_url,
                                 "X-Requested-With": "XMLHttpRequest"
                             }
                             )

        r.raise_for_status()

        if r.status_code == 304:
            raise TooManyRequests()

        if not r.text:
            # Provider wrongful return a status of 304 Not Modified with an empty content
            # raise_for_status won't raise exception for that status code
            logger.error('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over subtitle rows
        tables = []
        subtitles = []
        for table in soup.find_all('table', {'align': 'center',
                                             'border': '0',
                                             'class': 'tabel95',
                                             'width': '100%'}):
            if table.find_all('td', {'class': 'NewsTitle'}):
                tables.append(table)
        for table in tables:
            row1 = table.contents[1]
            row2 = table.contents[4]
            row3 = table.contents[6]
            # other rows are useless

            # ignore incomplete subtitles
            status = row2.contents[6].text
            if "%" in status:
                logger.debug('Ignoring subtitle with status %s', status)
                continue

            # read the item
            try:
                language = Language.fromaddic7ed(row2.contents[4].text.strip('\n'))
            except babelfish.exceptions.LanguageReverseError as error:
                logger.debug("Language error: %s, Ignoring subtitle", error)
                continue

            hearing_impaired = bool(row3.contents[1].contents[1].attrs['src'].endswith('hi.jpg'))
            page_link = self.server_url + 'movie/' + movie_id

            # Seems like Addic7ed returns the first word in the language of the user (Version, Versión, etc)
            # As we can't match a regex, we will just strip the first word
            try:
                version = " ".join(str(row1.contents[1].contents[1]).split()[1:])
                version_matches = re.search(r"(.+),.+", version)
                version = version_matches.group(1) if version_matches else None
            except IndexError:
                version = None

            try:
                download_link = row2.contents[8].contents[3].attrs['href'][1:]
            except IndexError:
                download_link = row2.contents[8].contents[2].attrs['href'][1:]
            uploader = row1.contents[2].contents[8].text.strip()

            # set subtitle language to hi if it's hearing_impaired
            if hearing_impaired:
                language = Language.rebuild(language, hi=True)

            subtitle = self.subtitle_class(language, hearing_impaired, page_link, None, None, None, title, year,
                                           version, download_link, uploader)
            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        soup.decompose()
        soup = None

        return subtitles
Esempio n. 16
0
    def initialize(self):
        self.session = Session()
        self.session.headers[
            'User-Agent'] = 'Subliminal/%s' % subliminal.__short_version__

        from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
        logger.debug("Addic7ed: using random user agents")
        self.session.headers['User-Agent'] = AGENT_LIST[randint(
            0,
            len(AGENT_LIST) - 1)]
        self.session.headers['Referer'] = self.server_url

        # login
        if self.username and self.password:

            def check_verification(cache_region):
                try:
                    rr = self.session.get(self.server_url + 'panel.php',
                                          allow_redirects=False,
                                          timeout=10,
                                          headers={"Referer": self.server_url})
                    if rr.status_code == 302:
                        logger.info('Addic7ed: Login expired')
                        cache_region.delete("addic7ed_data")
                    else:
                        logger.info('Addic7ed: Re-using old login')
                        self.logged_in = True
                        return True
                except ConnectionError as e:
                    logger.debug(
                        "Addic7ed: There was a problem reaching the server: %s."
                        % e)
                    raise IPAddressBlocked(
                        "Addic7ed: Your IP is temporarily blocked.")

            if load_verification("addic7ed",
                                 self.session,
                                 callback=check_verification):
                return

            logger.info('Addic7ed: Logging in')
            data = {
                'username': self.username,
                'password': self.password,
                'Submit': 'Log in',
                'url': '',
                'remember': 'true'
            }

            tries = 0
            while tries <= 3:
                tries += 1
                r = self.session.get(self.server_url + 'login.php',
                                     timeout=10,
                                     headers={"Referer": self.server_url})
                if "g-recaptcha" in r.text or "grecaptcha" in r.text:
                    logger.info(
                        'Addic7ed: Solving captcha. This might take a couple of minutes, but should only '
                        'happen once every so often')

                    for g, s in (("g-recaptcha-response",
                                  r'g-recaptcha.+?data-sitekey=\"(.+?)\"'),
                                 ("recaptcha_response",
                                  r'grecaptcha.execute\(\'(.+?)\',')):
                        site_key = re.search(s, r.text).group(1)
                        if site_key:
                            break
                    if not site_key:
                        logger.error("Addic7ed: Captcha site-key not found!")
                        return

                    pitcher = pitchers.get_pitcher()(
                        "Addic7ed",
                        self.server_url + 'login.php',
                        site_key,
                        user_agent=self.session.headers["User-Agent"],
                        cookies=self.session.cookies.get_dict(),
                        is_invisible=True)

                    result = pitcher.throw()
                    if not result:
                        if tries >= 3:
                            raise Exception(
                                "Addic7ed: Couldn't solve captcha!")
                        logger.info(
                            "Addic7ed: Couldn't solve captcha! Retrying")
                        continue

                    data[g] = result

                r = self.session.post(
                    self.server_url + 'dologin.php',
                    data,
                    allow_redirects=False,
                    timeout=10,
                    headers={"Referer": self.server_url + "login.php"})

                if "relax, slow down" in r.text:
                    raise TooManyRequests(self.username)

                if "Wrong password" in r.text or "doesn't exist" in r.text:
                    raise AuthenticationError(self.username)

                if r.status_code != 302:
                    if tries >= 3:
                        logger.error(
                            "Addic7ed: Something went wrong when logging in")
                        raise AuthenticationError(self.username)
                    logger.info(
                        "Addic7ed: Something went wrong when logging in; retrying"
                    )
                    continue
                break

            store_verification("addic7ed", self.session)

            logger.debug('Addic7ed: Logged in')
            self.logged_in = True
Esempio n. 17
0
    def _search_show_id(self, series, year=None):
        """Search the show id from the `series` and `year`.

        :param str series: series of the episode.
        :param year: year of the series, if any.
        :type year: int
        :return: the show id, if found.
        :rtype: int

        """
        # addic7ed doesn't support search with quotes
        series = series.replace('\'', ' ')

        # build the params
        series_year = '%s %d' % (series, year) if year is not None else series
        params = {'search': series_year, 'Submit': 'Search'}

        # make the search
        logger.info('Searching show ids with %r', params)

        # currently addic7ed searches via srch.php from the front page, then a re-search is needed which calls
        # search.php
        for endpoint in (
                "srch.php",
                "search.php",
        ):
            headers = None
            if endpoint == "search.php":
                headers = {"referer": self.server_url + "srch.php"}
            r = self.session.get(self.server_url + endpoint,
                                 params=params,
                                 timeout=10,
                                 headers=headers)
            r.raise_for_status()

            if r.text and "Sorry, your search" not in r.text:
                break

            time.sleep(4)

        if r.status_code == 304:
            raise TooManyRequests()

        soup = ParserBeautifulSoup(r.text, ['lxml', 'html.parser'])

        suggestion = None

        # get the suggestion
        try:
            suggestion = soup.select('span.titulo > a[href^="/show/"]')
            if not suggestion:
                logger.warning('Show id not found: no suggestion')
                return None
            if not sanitize(suggestion[0].i.text.replace('\'', ' '),
                            default_characters=self.sanitize_characters) == \
                    sanitize(series_year, default_characters=self.sanitize_characters):
                logger.warning('Show id not found: suggestion does not match')
                return None
            show_id = int(suggestion[0]['href'][6:])
            logger.debug('Found show id %d', show_id)

            return show_id
        finally:
            soup.decompose()
            soup = None
Esempio n. 18
0
    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = 'Subliminal/%s' % subliminal.__short_version__

        from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
        logger.debug("Addic7ed: using random user agents")
        self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
        self.session.headers['Referer'] = self.server_url

        # login
        if self.username and self.password:
            def check_verification(cache_region):
                rr = self.session.get(self.server_url + 'panel.php', allow_redirects=False, timeout=10,
                                      headers={"Referer": self.server_url})
                if rr.status_code == 302:
                    logger.info('Addic7ed: Login expired')
                    cache_region.delete("addic7ed_data")
                else:
                    logger.info('Addic7ed: Re-using old login')
                    self.logged_in = True
                    return True

            if load_verification("addic7ed", self.session, callback=check_verification):
                return

            logger.info('Addic7ed: Logging in')
            data = {'username': self.username, 'password': self.password, 'Submit': 'Log in', 'url': '',
                    'remember': 'true'}

            tries = 0
            while tries < 3:
                r = self.session.get(self.server_url + 'login.php', timeout=10, headers={"Referer": self.server_url})
                if "grecaptcha" in r.content:
                    logger.info('Addic7ed: Solving captcha. This might take a couple of minutes, but should only '
                                'happen once every so often')

                    site_key = re.search(r'grecaptcha.execute\(\'(.+?)\',', r.content).group(1)
                    if not site_key:
                        logger.error("Addic7ed: Captcha site-key not found!")
                        return

                    pitcher = pitchers.get_pitcher()("Addic7ed", self.server_url + 'login.php', site_key,
                                                     user_agent=self.session.headers["User-Agent"],
                                                     cookies=self.session.cookies.get_dict(),
                                                     is_invisible=True)

                    result = pitcher.throw()
                    if not result:
                        raise Exception("Addic7ed: Couldn't solve captcha!")

                    data["recaptcha_response"] = result

                r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10,
                                      headers={"Referer": self.server_url + "login.php"})

                if "relax, slow down" in r.content:
                    raise TooManyRequests(self.username)

                if r.status_code != 302:
                    if "User <b></b> doesn't exist" in r.content and tries <= 2:
                        logger.info("Addic7ed: Error, trying again. (%s/%s)", tries+1, 3)
                        tries += 1
                        continue

                    raise AuthenticationError(self.username)
                break

            store_verification("addic7ed", self.session)

            logger.debug('Addic7ed: Logged in')
            self.logged_in = True
Esempio n. 19
0
    def query(self, languages, video):
        self.video = video
        if self.use_hash:
            file_hash = self.video.hashes.get('opensubtitlescom')
            logging.debug('Searching using this hash: {}'.format(hash))
        else:
            file_hash = None

        if isinstance(self.video, Episode):
            title = self.video.series
        else:
            title = self.video.title

        imdb_id = None
        if isinstance(self.video, Episode) and self.video.series_imdb_id:
            imdb_id = self.sanitize_external_ids(self.video.series_imdb_id)
        elif isinstance(self.video, Movie) and self.video.imdb_id:
            imdb_id = self.sanitize_external_ids(self.video.imdb_id)

        title_id = None
        if not imdb_id:
            title_id = self.search_titles(title)
            if not title_id:
                return []

        lang_strings = [str(lang.basename) for lang in languages]
        only_foreign = all([lang.forced for lang in languages])
        also_foreign = any([lang.forced for lang in languages])
        if only_foreign:
            forced = 'only'
        elif also_foreign:
            forced = 'include'
        else:
            forced = 'exclude'

        langs = ','.join(lang_strings)
        logging.debug('Searching for this languages: {}'.format(lang_strings))

        # query the server
        if isinstance(self.video, Episode):
            res = self.session.get(
                self.server_url + 'subtitles',
                params=(('episode_number',
                         self.video.episode), ('foreign_parts_only', forced),
                        ('languages', langs.lower()), ('moviehash', file_hash),
                        ('parent_feature_id', title_id) if title_id else
                        ('imdb_id', imdb_id), ('season_number',
                                               self.video.season),
                        ('query', os.path.basename(self.video.name))),
                timeout=30)
        else:
            res = self.session.get(
                self.server_url + 'subtitles',
                params=(('foreign_parts_only', forced),
                        ('id', title_id) if title_id else ('imdb_id', imdb_id),
                        ('languages', langs.lower()), ('moviehash', file_hash),
                        ('query', os.path.basename(self.video.name))),
                timeout=30)

        if res.status_code == 429:
            raise TooManyRequests()

        elif res.status_code == 503:
            raise ProviderError(res.reason)

        subtitles = []

        try:
            result = res.json()
            if 'data' not in result:
                raise ValueError
        except ValueError:
            raise ProviderError('Invalid JSON returned by provider')
        else:
            logging.debug('Query returned {} subtitles'.format(
                len(result['data'])))

            if len(result['data']):
                for item in result['data']:
                    if 'season_number' in item['attributes'][
                            'feature_details']:
                        season_number = item['attributes']['feature_details'][
                            'season_number']
                    else:
                        season_number = None

                    if 'episode_number' in item['attributes'][
                            'feature_details']:
                        episode_number = item['attributes']['feature_details'][
                            'episode_number']
                    else:
                        episode_number = None

                    if 'moviehash_match' in item['attributes']:
                        moviehash_match = item['attributes']['moviehash_match']
                    else:
                        moviehash_match = False

                    if len(item['attributes']['files']):
                        subtitle = OpenSubtitlesComSubtitle(
                            language=Language.fromietf(
                                item['attributes']['language']),
                            forced=item['attributes']['foreign_parts_only'],
                            hearing_impaired=item['attributes']
                            ['hearing_impaired'],
                            page_link=item['attributes']['url'],
                            file_id=item['attributes']['files'][0]['file_id'],
                            releases=item['attributes']['release'],
                            uploader=item['attributes']['uploader']['name'],
                            title=item['attributes']['feature_details']
                            ['movie_name'],
                            year=item['attributes']['feature_details']['year'],
                            season=season_number,
                            episode=episode_number,
                            hash_matched=moviehash_match)
                        subtitle.get_matches(self.video)
                        subtitles.append(subtitle)

        return subtitles
Esempio n. 20
0
def checked(fn,
            raise_api_limit=False,
            validate_token=False,
            validate_json=False,
            json_key_name=None,
            validate_content=False):
    """Run :fn: and check the response status before returning it.

    :param fn: the function to make an API call to OpenSubtitles.com.
    :param raise_api_limit: if True we wait a little bit longer before running the call again.
    :param validate_token: test if token is valid and return 401 if not.
    :param validate_json: test if response is valid json.
    :param json_key_name: test if returned json contain a specific key.
    :param validate_content: test if response have a content (used with download).
    :return: the response.

    """
    response = None
    try:
        try:
            response = fn()
        except APIThrottled:
            if not raise_api_limit:
                logger.info(
                    "API request limit hit, waiting and trying again once.")
                time.sleep(2)
                return checked(fn, raise_api_limit=True)
            raise
        except (ConnectionError, Timeout, ReadTimeout):
            raise ServiceUnavailable(
                f'Unknown Error, empty response: {response.status_code}: {response}'
            )
        except Exception:
            logging.exception('Unhandled exception raised.')
            raise ProviderError('Unhandled exception raised. Check log.')
        else:
            status_code = response.status_code
    except Exception:
        status_code = None
    else:
        if status_code == 401:
            if validate_token:
                return 401
            else:
                raise AuthenticationError(f'Login failed: {response.reason}')
        elif status_code == 403:
            raise ProviderError("Bazarr API key seems to be in problem")
        elif status_code == 406:
            raise DownloadLimitExceeded("Daily download limit reached")
        elif status_code == 410:
            raise ProviderError("Download as expired")
        elif status_code == 429:
            raise TooManyRequests()
        elif status_code == 502:
            # this one should deal with Bad Gateway issue on their side.
            raise APIThrottled()
        elif 500 <= status_code <= 599:
            raise ProviderError(response.reason)

        if status_code != 200:
            raise ProviderError(f'Bad status code: {response.status_code}')

        if validate_json:
            try:
                json_test = response.json()
            except JSONDecodeError:
                raise ProviderError('Invalid JSON returned by provider')
            else:
                if json_key_name not in json_test:
                    raise ProviderError(
                        f'Invalid JSON returned by provider: no {json_key_name} key in returned json.'
                    )

        if validate_content:
            if not hasattr(response, 'content'):
                logging.error('Download link returned no content attribute.')
                return False
            elif not response.content:
                logging.error(
                    f'This download link returned empty content: {response.url}'
                )
                return False

    return response
Esempio n. 21
0
    def query(self, video, languages):

        _searchurl = self.searchurl

        subtitles = []

        if isinstance(video, Movie):
            querytext = video.imdb_id if video.imdb_id else video.title

        if isinstance(video, Episode):
            querytext = '{} S{:02d}E{:02d}'.format(video.series, video.season,
                                                   video.episode)
            querytext = quote(querytext.lower())

        # language query filter
        if not isinstance(languages, (tuple, list, set)):
            languages = [languages]

        for language in languages:
            logger.debug("Legendasdivx.pt :: searching for %s subtitles.",
                         language)
            language_id = language.opensubtitles
            if 'por' in language_id:
                lang_filter = '&form_cat=28'
            elif 'pob' in language_id:
                lang_filter = '&form_cat=29'
            else:
                lang_filter = ''

            querytext = querytext + lang_filter if lang_filter else querytext

            try:
                # sleep for a 1 second before another request
                sleep(1)
                self.headers['Referer'] = self.site + '/index.php'
                self.session.headers.update(self.headers)
                res = self.session.get(_searchurl.format(query=querytext),
                                       allow_redirects=False)
                res.raise_for_status()
                if (res.status_code == 200
                        and "A legenda não foi encontrada" in res.text):
                    logger.warning(
                        'Legendasdivx.pt :: query %s return no results!',
                        querytext)
                    # for series, if no results found, try again just with series and season (subtitle packs)
                    if isinstance(video, Episode):
                        logger.debug(
                            "Legendasdivx.pt :: trying again with just series and season on query."
                        )
                        querytext = re.sub("(e|E)(\d{2})", "", querytext)
                        # sleep for a 1 second before another request
                        sleep(1)
                        res = self.session.get(
                            _searchurl.format(query=querytext),
                            allow_redirects=False)
                        res.raise_for_status()
                        if (res.status_code == 200 and
                                "A legenda não foi encontrada" in res.text):
                            logger.warning(
                                'Legendasdivx.pt :: query {0} return no results for language {1}(for series and season only).'
                                .format(querytext, language_id))
                            continue
                if res.status_code == 302:  # got redirected to login page.
                    # seems that our session cookies are no longer valid... clean them from cache
                    region.delete("legendasdivx_cookies2")
                    logger.debug(
                        "Legendasdivx.pt :: Logging in again. Cookies have expired!"
                    )
                    # login and try again
                    self.login()
                    # sleep for a 1 second before another request
                    sleep(1)
                    res = self.session.get(_searchurl.format(query=querytext))
                    res.raise_for_status()
            except HTTPError as e:
                if "bloqueado" in res.text.lower():
                    logger.error(
                        "LegendasDivx.pt :: Your IP is blocked on this server."
                    )
                    raise IPAddressBlocked(
                        "LegendasDivx.pt :: Your IP is blocked on this server."
                    )
                logger.error("Legendasdivx.pt :: HTTP Error %s", e)
                raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s", e)
            except Exception as e:
                logger.error("LegendasDivx.pt :: Uncaught error: %r", e)
                raise ServiceUnavailable(
                    "LegendasDivx.pt :: Uncaught error: %r", e)

            bsoup = ParserBeautifulSoup(res.content, ['html.parser'])

            # search for more than 10 results (legendasdivx uses pagination)
            # don't throttle - maximum results = 6 * 10
            MAX_PAGES = 6

            # get number of pages bases on results found
            page_header = bsoup.find("div", {"class": "pager_bar"})
            results_found = re.search(
                r'\((.*?) encontradas\)',
                page_header.text).group(1) if page_header else 0
            logger.debug("Legendasdivx.pt :: Found %s subtitles",
                         str(results_found))
            num_pages = (int(results_found) // 10) + 1
            num_pages = min(MAX_PAGES, num_pages)

            # process first page
            subtitles += self._process_page(video, bsoup)

            # more pages?
            if num_pages > 1:
                for num_page in range(2, num_pages + 1):
                    sleep(1)  # another 1 sec before requesting...
                    _search_next = self.searchurl.format(
                        query=querytext) + "&page={0}".format(str(num_page))
                    logger.debug(
                        "Legendasdivx.pt :: Moving on to next page: %s",
                        _search_next)
                    # sleep for a 1 second before another request
                    sleep(1)
                    res = self.session.get(_search_next)
                    next_page = ParserBeautifulSoup(res.content,
                                                    ['html.parser'])
                    subs = self._process_page(video, next_page)
                    subtitles.extend(subs)

        return subtitles