Exemple #1
0
    def initialize(self):
        self.session = Session()
        self.session.headers[
            'User-Agent'] = 'Subliminal/%s' % subliminal.__short_version__

        from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
        logger.debug("Addic7ed: using random user agents")
        self.session.headers['User-Agent'] = AGENT_LIST[randint(
            0,
            len(AGENT_LIST) - 1)]
        self.session.headers['Referer'] = self.server_url

        # login
        if self.username and self.password:

            def check_verification(cache_region):
                try:
                    rr = self.session.get(self.server_url + 'panel.php',
                                          allow_redirects=False,
                                          timeout=10,
                                          headers={"Referer": self.server_url})
                    if rr.status_code == 302:
                        logger.info('Addic7ed: Login expired')
                        cache_region.delete("addic7ed_data")
                    else:
                        logger.info('Addic7ed: Re-using old login')
                        self.logged_in = True
                        return True
                except ConnectionError as e:
                    logger.debug(
                        "Addic7ed: There was a problem reaching the server: %s."
                        % e)
                    raise IPAddressBlocked(
                        "Addic7ed: Your IP is temporarily blocked.")

            if load_verification("addic7ed",
                                 self.session,
                                 callback=check_verification):
                return

            logger.info('Addic7ed: Logging in')
            data = {
                'username': self.username,
                'password': self.password,
                'Submit': 'Log in',
                'url': '',
                'remember': 'true'
            }

            tries = 0
            while tries <= 3:
                tries += 1
                r = self.session.get(self.server_url + 'login.php',
                                     timeout=10,
                                     headers={"Referer": self.server_url})
                if "g-recaptcha" in r.text or "grecaptcha" in r.text:
                    logger.info(
                        'Addic7ed: Solving captcha. This might take a couple of minutes, but should only '
                        'happen once every so often')

                    for g, s in (("g-recaptcha-response",
                                  r'g-recaptcha.+?data-sitekey=\"(.+?)\"'),
                                 ("recaptcha_response",
                                  r'grecaptcha.execute\(\'(.+?)\',')):
                        site_key = re.search(s, r.text).group(1)
                        if site_key:
                            break
                    if not site_key:
                        logger.error("Addic7ed: Captcha site-key not found!")
                        return

                    pitcher = pitchers.get_pitcher()(
                        "Addic7ed",
                        self.server_url + 'login.php',
                        site_key,
                        user_agent=self.session.headers["User-Agent"],
                        cookies=self.session.cookies.get_dict(),
                        is_invisible=True)

                    result = pitcher.throw()
                    if not result:
                        if tries >= 3:
                            raise Exception(
                                "Addic7ed: Couldn't solve captcha!")
                        logger.info(
                            "Addic7ed: Couldn't solve captcha! Retrying")
                        continue

                    data[g] = result

                r = self.session.post(
                    self.server_url + 'dologin.php',
                    data,
                    allow_redirects=False,
                    timeout=10,
                    headers={"Referer": self.server_url + "login.php"})

                if "relax, slow down" in r.text:
                    raise TooManyRequests(self.username)

                if "Wrong password" in r.text or "doesn't exist" in r.text:
                    raise AuthenticationError(self.username)

                if r.status_code != 302:
                    if tries >= 3:
                        logger.error(
                            "Addic7ed: Something went wrong when logging in")
                        raise AuthenticationError(self.username)
                    logger.info(
                        "Addic7ed: Something went wrong when logging in; retrying"
                    )
                    continue
                break

            store_verification("addic7ed", self.session)

            logger.debug('Addic7ed: Logged in')
            self.logged_in = True
Exemple #2
0
    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = 'Subliminal/%s' % subliminal.__short_version__

        from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
        logger.debug("Addic7ed: using random user agents")
        self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
        self.session.headers['Referer'] = self.server_url

        # login
        if self.username and self.password:
            def check_verification(cache_region):
                rr = self.session.get(self.server_url + 'panel.php', allow_redirects=False, timeout=10,
                                      headers={"Referer": self.server_url})
                if rr.status_code == 302:
                    logger.info('Addic7ed: Login expired')
                    cache_region.delete("addic7ed_data")
                else:
                    logger.info('Addic7ed: Re-using old login')
                    self.logged_in = True
                    return True

            if load_verification("addic7ed", self.session, callback=check_verification):
                return

            logger.info('Addic7ed: Logging in')
            data = {'username': self.username, 'password': self.password, 'Submit': 'Log in', 'url': '',
                    'remember': 'true'}

            tries = 0
            while tries < 3:
                r = self.session.get(self.server_url + 'login.php', timeout=10, headers={"Referer": self.server_url})
                if "grecaptcha" in r.content:
                    logger.info('Addic7ed: Solving captcha. This might take a couple of minutes, but should only '
                                'happen once every so often')

                    site_key = re.search(r'grecaptcha.execute\(\'(.+?)\',', r.content).group(1)
                    if not site_key:
                        logger.error("Addic7ed: Captcha site-key not found!")
                        return

                    pitcher = pitchers.get_pitcher()("Addic7ed", self.server_url + 'login.php', site_key,
                                                     user_agent=self.session.headers["User-Agent"],
                                                     cookies=self.session.cookies.get_dict(),
                                                     is_invisible=True)

                    result = pitcher.throw()
                    if not result:
                        raise Exception("Addic7ed: Couldn't solve captcha!")

                    data["recaptcha_response"] = result

                r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10,
                                      headers={"Referer": self.server_url + "login.php"})

                if "relax, slow down" in r.content:
                    raise TooManyRequests(self.username)

                if r.status_code != 302:
                    if "User <b></b> doesn't exist" in r.content and tries <= 2:
                        logger.info("Addic7ed: Error, trying again. (%s/%s)", tries+1, 3)
                        tries += 1
                        continue

                    raise AuthenticationError(self.username)
                break

            store_verification("addic7ed", self.session)

            logger.debug('Addic7ed: Logged in')
            self.logged_in = True
Exemple #3
0
    def _request(self, method, url, *args, **kwargs):
        ourSuper = super(CloudScraper, self)
        resp = ourSuper.request(method, url, *args, **kwargs)

        if resp.headers.get('Content-Encoding') == 'br':
            if self.allow_brotli and resp._content:
                resp._content = brotli.decompress(resp.content)
            else:
                logging.warning(
                    'Brotli content detected, But option is disabled, we will not continue.'
                )
                return resp

        # Debug request
        if self.debug:
            self.debugRequest(resp)

        # Check if Cloudflare anti-bot is on
        try:
            if self.isChallengeRequest(resp):
                if resp.request.method != 'GET':
                    # Work around if the initial request is not a GET,
                    # Supersede with a GET then re-request the original METHOD.
                    CloudScraper.request(self, 'GET', resp.url)
                    resp = ourSuper.request(method, url, *args, **kwargs)
                else:
                    # Solve Challenge
                    resp = self.sendChallengeResponse(resp, **kwargs)

        except ValueError as e:
            if PY3:
                error = str(e)
            else:
                error = e.message
            if error == "Captcha":
                parsed_url = urlparse(url)
                domain = parsed_url.netloc
                # solve the captcha
                site_key = re.search(r'data-sitekey="(.+?)"',
                                     resp.text).group(1)
                challenge_s = re.search(
                    r'type="hidden" name="s" value="(.+?)"',
                    resp.text).group(1)
                challenge_ray = re.search(r'data-ray="(.+?)"',
                                          resp.text).group(1)
                if not all([site_key, challenge_s, challenge_ray]):
                    raise Exception("cf: Captcha site-key not found!")

                pitcher = pitchers.get_pitcher()(
                    "cf: %s" % domain,
                    resp.request.url,
                    site_key,
                    user_agent=self.headers["User-Agent"],
                    cookies=self.cookies.get_dict(),
                    is_invisible=True)

                parsed_url = urlparse(resp.url)
                logger.info("cf: %s: Solving captcha", domain)
                result = pitcher.throw()
                if not result:
                    raise Exception("cf: Couldn't solve captcha!")

                submit_url = '{}://{}/cdn-cgi/l/chk_captcha'.format(
                    parsed_url.scheme, domain)
                method = resp.request.method

                cloudflare_kwargs = {
                    'allow_redirects':
                    False,
                    'headers': {
                        'Referer': resp.url
                    },
                    'params':
                    OrderedDict([('s', challenge_s),
                                 ('g-recaptcha-response', result)])
                }

                return CloudScraper.request(self, method, submit_url,
                                            **cloudflare_kwargs)

        return resp
Exemple #4
0
    def query(self,
              languages,
              title,
              season=None,
              episode=None,
              year=None,
              video=None):
        items_per_page = 10
        current_page = 1

        used_languages = languages
        lang_strings = [str(lang) for lang in used_languages]

        # handle possible duplicate use of Serbian Latin
        if "sr" in lang_strings and "sr-Latn" in lang_strings:
            logger.info(
                'Duplicate entries <Language [sr]> and <Language [sr-Latn]> found, filtering languages'
            )
            used_languages = filter(
                lambda l: l != Language.fromietf('sr-Latn'), used_languages)
            logger.info('Filtered language list %r', used_languages)

        # convert list of languages into search string
        langs = '|'.join(map(str, [l.titlovi for l in used_languages]))

        # set query params
        params = {'prijevod': title, 'jezik': langs}
        is_episode = False
        if season and episode:
            is_episode = True
            params['s'] = season
            params['e'] = episode
        if year:
            params['g'] = year

        # loop through paginated results
        logger.info('Searching subtitles %r', params)
        subtitles = []

        while True:
            # query the server
            try:
                r = self.session.get(self.search_url,
                                     params=params,
                                     timeout=10)
                r.raise_for_status()
            except RequestException as e:
                captcha_passed = False
                if e.response.status_code == 403 and "data-sitekey" in e.response.content:
                    logger.info(
                        'titlovi: Solving captcha. This might take a couple of minutes, but should only '
                        'happen once every so often')

                    site_key = re.search(r'data-sitekey="(.+?)"',
                                         e.response.content).group(1)
                    challenge_s = re.search(
                        r'type="hidden" name="s" value="(.+?)"',
                        e.response.content).group(1)
                    challenge_ray = re.search(r'data-ray="(.+?)"',
                                              e.response.content).group(1)
                    if not all([site_key, challenge_s, challenge_ray]):
                        raise Exception("titlovi: Captcha site-key not found!")

                    pitcher = pitchers.get_pitcher()(
                        "titlovi",
                        e.request.url,
                        site_key,
                        user_agent=self.session.headers["User-Agent"],
                        cookies=self.session.cookies.get_dict(),
                        is_invisible=True)

                    result = pitcher.throw()
                    if not result:
                        raise Exception("titlovi: Couldn't solve captcha!")

                    s_params = {
                        "s": challenge_s,
                        "id": challenge_ray,
                        "g-recaptcha-response": result,
                    }
                    r = self.session.get(self.server_url +
                                         "/cdn-cgi/l/chk_captcha",
                                         params=s_params,
                                         timeout=10,
                                         allow_redirects=False)
                    r.raise_for_status()
                    r = self.session.get(self.search_url,
                                         params=params,
                                         timeout=10)
                    r.raise_for_status()
                    store_verification("titlovi", self.session)
                    captcha_passed = True

                if not captcha_passed:
                    logger.exception('RequestException %s', e)
                    break
            else:
                try:
                    soup = BeautifulSoup(r.content, 'lxml')

                    # number of results
                    result_count = int(
                        soup.select_one('.results_count b').string)
                except:
                    result_count = None

                # exit if no results
                if not result_count:
                    if not subtitles:
                        logger.debug('No subtitles found')
                    else:
                        logger.debug("No more subtitles found")
                    break

                # number of pages with results
                pages = int(math.ceil(result_count / float(items_per_page)))

                # get current page
                if 'pg' in params:
                    current_page = int(params['pg'])

                try:
                    sublist = soup.select(
                        'section.titlovi > ul.titlovi > li.subtitleContainer.canEdit'
                    )
                    for sub in sublist:
                        # subtitle id
                        sid = sub.find(attrs={
                            'data-id': True
                        }).attrs['data-id']
                        # get download link
                        download_link = self.download_url + sid
                        # title and alternate title
                        match = title_re.search(sub.a.string)
                        if match:
                            _title = match.group('title')
                            alt_title = match.group('altitle')
                        else:
                            continue

                        # page link
                        page_link = self.server_url + sub.a.attrs['href']
                        # subtitle language
                        match = lang_re.search(
                            sub.select_one('.lang').attrs['src'])
                        if match:
                            try:
                                # decode language
                                lang = Language.fromtitlovi(
                                    match.group('lang') +
                                    match.group('script'))
                            except ValueError:
                                continue

                        # relase year or series start year
                        match = year_re.search(
                            sub.find(attrs={
                                'data-id': True
                            }).parent.i.string)
                        if match:
                            r_year = int(match.group('year'))
                        # fps
                        match = fps_re.search(sub.select_one('.fps').string)
                        if match:
                            fps = match.group('fps')
                        # releases
                        releases = str(
                            sub.select_one('.fps').parent.contents[0].string)

                        # handle movies and series separately
                        if is_episode:
                            # season and episode info
                            sxe = sub.select_one('.s0xe0y').string
                            r_season = None
                            r_episode = None
                            if sxe:
                                match = season_re.search(sxe)
                                if match:
                                    r_season = int(match.group('season'))
                                match = episode_re.search(sxe)
                                if match:
                                    r_episode = int(match.group('episode'))

                            subtitle = self.subtitle_class(
                                lang,
                                page_link,
                                download_link,
                                sid,
                                releases,
                                _title,
                                alt_title=alt_title,
                                season=r_season,
                                episode=r_episode,
                                year=r_year,
                                fps=fps,
                                asked_for_release_group=video.release_group,
                                asked_for_episode=episode)
                        else:
                            subtitle = self.subtitle_class(
                                lang,
                                page_link,
                                download_link,
                                sid,
                                releases,
                                _title,
                                alt_title=alt_title,
                                year=r_year,
                                fps=fps,
                                asked_for_release_group=video.release_group)
                        logger.debug('Found subtitle %r', subtitle)

                        # prime our matches so we can use the values later
                        subtitle.get_matches(video)

                        # add found subtitles
                        subtitles.append(subtitle)

                finally:
                    soup.decompose()

                # stop on last page
                if current_page >= pages:
                    break

                # increment current page
                params['pg'] = current_page + 1
                logger.debug('Getting page %d', params['pg'])

        return subtitles
    def request(self, method, url, *args, **kwargs):
        # self.headers = (
        #     OrderedDict(
        #         [
        #             ('User-Agent', self.headers['User-Agent']),
        #             ('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
        #             ('Accept-Language', 'en-US,en;q=0.5'),
        #             ('Accept-Encoding', 'gzip, deflate'),
        #             ('Connection',  'close'),
        #             ('Upgrade-Insecure-Requests', '1')
        #         ]
        #     )
        # )
        self.headers = self._hdrs.copy()

        resp = super(CloudflareScraper, self).request(method, url, *args, **kwargs)
        if resp.headers.get('content-encoding') == 'br' and brotli_available:
            resp._content = brdec(resp._content)

        # Debug request
        if self.debug:
            self.debugRequest(resp)

        # Check if Cloudflare anti-bot is on
        try:
            if self.is_cloudflare_challenge(resp):
                self._was_cf = True
                # Work around if the initial request is not a GET,
                # Superseed with a GET then re-request the orignal METHOD.
                if resp.request.method != 'GET':
                    self.request('GET', resp.url)
                    resp = self.request(method, url, *args, **kwargs)
                else:
                    resp = self.solve_cf_challenge(resp, **kwargs)
        except NeedsCaptchaException:
            # solve the captcha
            self._was_cf = True
            site_key = re.search(r'data-sitekey="(.+?)"', resp.content).group(1)
            challenge_s = re.search(r'type="hidden" name="s" value="(.+?)"', resp.content).group(1)
            challenge_ray = re.search(r'data-ray="(.+?)"', resp.content).group(1)
            if not all([site_key, challenge_s, challenge_ray]):
                raise Exception("cf: Captcha site-key not found!")

            pitcher = pitchers.get_pitcher()("cf", resp.request.url, site_key,
                                             user_agent=self.headers["User-Agent"],
                                             cookies=self.cookies.get_dict(),
                                             is_invisible=True)

            parsed_url = urlparse(resp.url)
            domain = parsed_url.netloc
            logger.info("cf: %s: Solving captcha", domain)
            result = pitcher.throw()
            if not result:
                raise Exception("cf: Couldn't solve captcha!")

            submit_url = '{}://{}/cdn-cgi/l/chk_captcha'.format(parsed_url.scheme, domain)
            method = resp.request.method

            cloudflare_kwargs = {
                'allow_redirects': False,
                'headers': {'Referer': resp.url},
                'params': OrderedDict(
                    [
                        ('s', challenge_s),
                        ('g-recaptcha-response', result)
                    ]
                )
            }

            return self.request(method, submit_url, **cloudflare_kwargs)

        return resp
Exemple #6
0
    def _request(self, method, url, *args, **kwargs):
        ourSuper = super(CloudScraper, self)
        resp = ourSuper.request(method, url, *args, **kwargs)

        if resp.headers.get('Content-Encoding') == 'br':
            if self.allow_brotli and resp._content:
                resp._content = brotli.decompress(resp.content)
            else:
                logging.warning('Brotli content detected, But option is disabled, we will not continue.')
                return resp

        # Debug request
        if self.debug:
            self.debugRequest(resp)

        # Check if Cloudflare anti-bot is on
        try:
            if self.isChallengeRequest(resp):
                if resp.request.method != 'GET':
                    # Work around if the initial request is not a GET,
                    # Supersede with a GET then re-request the original METHOD.
                    CloudScraper.request(self, 'GET', resp.url)
                    resp = ourSuper.request(method, url, *args, **kwargs)
                else:
                    # Solve Challenge
                    resp = self.sendChallengeResponse(resp, **kwargs)

        except ValueError, e:
            if e.message == "Captcha":
                parsed_url = urlparse(url)
                domain = parsed_url.netloc
                # solve the captcha
                site_key = re.search(r'data-sitekey="(.+?)"', resp.content).group(1)
                challenge_s = re.search(r'type="hidden" name="s" value="(.+?)"', resp.content).group(1)
                challenge_ray = re.search(r'data-ray="(.+?)"', resp.content).group(1)
                if not all([site_key, challenge_s, challenge_ray]):
                    raise Exception("cf: Captcha site-key not found!")

                pitcher = pitchers.get_pitcher()("cf: %s" % domain, resp.request.url, site_key,
                                                 user_agent=self.headers["User-Agent"],
                                                 cookies=self.cookies.get_dict(),
                                                 is_invisible=True)

                parsed_url = urlparse(resp.url)
                logger.info("cf: %s: Solving captcha", domain)
                result = pitcher.throw()
                if not result:
                    raise Exception("cf: Couldn't solve captcha!")

                submit_url = '{}://{}/cdn-cgi/l/chk_captcha'.format(parsed_url.scheme, domain)
                method = resp.request.method

                cloudflare_kwargs = {
                    'allow_redirects': False,
                    'headers': {'Referer': resp.url},
                    'params': OrderedDict(
                        [
                            ('s', challenge_s),
                            ('g-recaptcha-response', result)
                        ]
                    )
                }

                return CloudScraper.request(self, method, submit_url, **cloudflare_kwargs)