예제 #1
0
def get_cooked_url(url,
                   base_url,
                   timeout,
                   cookies=None,
                   data=None,
                   multipart_data=None,
                   headers=None,
                   allow_redirect=True,
                   cache_limit=8):
    if cookies is None: cookies = {}
    if timeout == 0: timeout = None
    if headers is None: headers = {}
    referer = headers['Referer'] if 'Referer' in headers else url
    if kodi.get_setting('debug') == "true":
        log_utils.log(
            'Getting Url: %s cookie=|%s| data=|%s| extra headers=|%s|' %
            (url, cookies, data, headers))
    if data is not None:
        if isinstance(data, basestring):
            data = data
        else:
            data = urllib.urlencode(data, True)

    if multipart_data is not None:
        headers['Content-Type'] = 'multipart/form-data; boundary=X-X-X'
        data = multipart_data

    try:
        cj = _set_cookies(base_url, cookies)
        request = urllib2.Request(url, data=data)
        request.add_header('User-Agent', _get_ua())
        request.add_unredirected_header('Host', '9movies.to')
        request.add_unredirected_header('Referer', referer)
        for key in headers:
            request.add_header(key, headers[key])
        cj.add_cookie_header(request)
        if not allow_redirect:
            opener = urllib2.build_opener(NoRedirection)
            urllib2.install_opener(opener)
        else:
            opener = urllib2.build_opener(urllib2.HTTPRedirectHandler)
            urllib2.install_opener(opener)
            opener2 = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener2)

        response = urllib2.urlopen(request, timeout=timeout)
        cj.extract_cookies(response, request)
        if kodi.get_setting('cookie_debug') == 'true':
            print 'Response Cookies: %s - %s' % (url, cookies_as_str(cj))
        __fix_bad_cookies()
        cj.save(ignore_discard=True)
        if not allow_redirect and (response.getcode() in [301, 302, 303, 307]
                                   or response.info().getheader('Refresh')):
            if response.info().getheader('Refresh') is not None:
                refresh = response.info().getheader('Refresh')
                return refresh.split(';')[-1].split('url=')[-1]
            else:
                return response.info().getheader('Location')

        content_length = response.info().getheader('Content-Length', 0)
        if int(content_length) > MAX_RESPONSE:
            log_utils.log(
                'Response exceeded allowed size. %s => %s / %s' %
                (url, content_length, MAX_RESPONSE), log_utils.LOGWARNING)

        if response.info().get('Content-Encoding') == 'gzip':
            buf = StringIO(response.read(MAX_RESPONSE))
            f = gzip.GzipFile(fileobj=buf)
            html = f.read()
        else:
            html = response.read(MAX_RESPONSE)
    except urllib2.HTTPError as e:
        if e.code == 503 and 'cf-browser-verification' in e.read():
            #print "WAS ERROR"
            html = cloudflare.solve(url, cj, _get_ua())
            if not html:
                return ''
        else:
            log_utils.log(
                'Error (%s) during THE scraper http get: %s' % (str(e), url),
                log_utils.LOGWARNING)
            return ''
    except Exception as e:
        log_utils.log('Error (%s) during scraper http get: %s' % (str(e), url),
                      log_utils.LOGWARNING)
        return ''

    return html
예제 #2
0
def get_cooked_url(url, base_url, timeout, cookies=None, data=None, multipart_data=None, headers=None, allow_redirect=True, cache_limit=8):
        if cookies is None: cookies = {}
        if timeout == 0: timeout = None
        if headers is None: headers = {}
        referer = headers['Referer'] if 'Referer' in headers else url
        if kodi.get_setting('debug') == "true":
            log_utils.log('Getting Url: %s cookie=|%s| data=|%s| extra headers=|%s|' % (url, cookies, data, headers))
        if data is not None:
            if isinstance(data, basestring):
                data = data
            else:
                data = urllib.urlencode(data, True)

        if multipart_data is not None:
            headers['Content-Type'] = 'multipart/form-data; boundary=X-X-X'
            data = multipart_data

        try:
            cj = _set_cookies(base_url, cookies)
            request = urllib2.Request(url, data=data)
            request.add_header('User-Agent', _get_ua())
            request.add_unredirected_header('Host', '9movies.to')
            request.add_unredirected_header('Referer', referer)
            for key in headers: request.add_header(key, headers[key])
            cj.add_cookie_header(request)
            if not allow_redirect:
                opener = urllib2.build_opener(NoRedirection)
                urllib2.install_opener(opener)
            else:
                opener = urllib2.build_opener(urllib2.HTTPRedirectHandler)
                urllib2.install_opener(opener)
                opener2 = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
                urllib2.install_opener(opener2)

            response = urllib2.urlopen(request, timeout=timeout)
            cj.extract_cookies(response, request)
            if kodi.get_setting('cookie_debug') == 'true':
                print 'Response Cookies: %s - %s' % (url, cookies_as_str(cj))
            __fix_bad_cookies()
            cj.save(ignore_discard=True)
            if not allow_redirect and (response.getcode() in [301, 302, 303, 307] or response.info().getheader('Refresh')):
                if response.info().getheader('Refresh') is not None:
                    refresh = response.info().getheader('Refresh')
                    return refresh.split(';')[-1].split('url=')[-1]
                else:
                    return response.info().getheader('Location')

            content_length = response.info().getheader('Content-Length', 0)
            if int(content_length) > MAX_RESPONSE:
                log_utils.log('Response exceeded allowed size. %s => %s / %s' % (url, content_length, MAX_RESPONSE), log_utils.LOGWARNING)

            if response.info().get('Content-Encoding') == 'gzip':
                buf = StringIO(response.read(MAX_RESPONSE))
                f = gzip.GzipFile(fileobj=buf)
                html = f.read()
            else:
                html = response.read(MAX_RESPONSE)
        except urllib2.HTTPError as e:
            if e.code == 503 and 'cf-browser-verification' in e.read():
                #print "WAS ERROR"
                html = cloudflare.solve(url, cj, _get_ua())
                if not html:
                    return ''
            else:
                log_utils.log('Error (%s) during THE scraper http get: %s' % (str(e), url), log_utils.LOGWARNING)
                return ''
        except Exception as e:
            log_utils.log('Error (%s) during scraper http get: %s' % (str(e), url), log_utils.LOGWARNING)
            return ''

        return html
예제 #3
0
    def _cached_http_get(self,
                         url,
                         base_url,
                         timeout,
                         cookies=None,
                         data=None,
                         multipart_data=None,
                         headers=None,
                         allow_redirect=True,
                         method=None,
                         require_debrid=False,
                         read_error=False,
                         cache_limit=8):

        # if require_debrid:
        #     if Scraper.debrid_resolvers is None:
        #         Scraper.debrid_resolvers = [resolver for resolver in urlresolver.relevant_resolvers() if
        #                                     resolver.isUniversal()]
        #     if not Scraper.debrid_resolvers:
        #         log_utils.log('%s requires debrid: %s' % (self.__module__, Scraper.debrid_resolvers),
        #                       log_utils.LOGDEBUG)
        #         return ''

        if cookies is None: cookies = {}
        if timeout == 0: timeout = None
        if headers is None: headers = {}
        if url.startswith('//'): url = 'http:' + url
        referer = headers['Referer'] if 'Referer' in headers else base_url
        log_utils.log(
            'Getting Url: %s cookie=|%s| data=|%s| extra headers=|%s|' %
            (url, cookies, data, headers), log_utils.LOGDEBUG)
        if data is not None:
            if isinstance(data, basestring):
                data = data
            else:
                data = urllib.urlencode(data, True)

        if multipart_data is not None:
            headers['Content-Type'] = 'multipart/form-data; boundary=X-X-X'
            data = multipart_data

        try:
            self.cj = self._set_cookies(base_url, cookies)
            if isinstance(url, unicode):
                url = url.encode('utf-8')

            request = urllib2.Request(url, data=data)
            request.add_header('User-Agent', scraper_utils.get_ua())
            request.add_header('Accept', '*/*')
            request.add_unredirected_header('Host', request.get_host())
            request.add_unredirected_header('Referer', referer)
            for key in headers:
                request.add_header(key, headers[key])
            self.cj.add_cookie_header(request)
            if not allow_redirect:
                opener = urllib2.build_opener(NoRedirection)
                urllib2.install_opener(opener)
            else:
                opener = urllib2.build_opener(urllib2.HTTPRedirectHandler)
                urllib2.install_opener(opener)
                opener2 = urllib2.build_opener(
                    urllib2.HTTPCookieProcessor(self.cj))
                urllib2.install_opener(opener2)
            if method is not None: request.get_method = lambda: method.upper()
            response = urllib2.urlopen(request, timeout=timeout)
            self.cj.extract_cookies(response, request)
            if kodi.get_setting('cookie_debug') == 'true':
                log_utils.log(
                    'Response Cookies: %s - %s' %
                    (url, scraper_utils.cookies_as_str(self.cj)),
                    log_utils.LOGDEBUG)

            self.cj._cookies = scraper_utils.fix_bad_cookies(self.cj._cookies)
            self.cj.save(ignore_discard=True)
            if not allow_redirect and (
                    response.getcode() in [301, 302, 303, 307]
                    or response.info().getheader('Refresh')):
                if response.info().getheader('Refresh') is not None:
                    refresh = response.info().getheader('Refresh')
                    return refresh.split(';')[-1].split('url=')[-1]
                else:
                    redir_url = response.info().getheader('Location')
                    if redir_url.startswith('='):
                        redir_url = redir_url[1:]
                    return redir_url

            content_length = response.info().getheader('Content-Length', 0)
            if int(content_length) > MAX_RESPONSE:
                log_utils.log(
                    'Response exceeded allowed size. %s => %s / %s' %
                    (url, content_length, MAX_RESPONSE), log_utils.LOGWARNING)

            if method == 'HEAD':
                return ''
            else:
                if response.info().get('Content-Encoding') == 'gzip':
                    buf = StringIO(response.read(MAX_RESPONSE))
                    f = gzip.GzipFile(fileobj=buf)
                    html = f.read()
                else:
                    html = response.read(MAX_RESPONSE)
        except urllib2.HTTPError as e:
            html = e.read()
            if CF_CAPCHA_ENABLED and e.code == 403 and 'cf-captcha-bookmark' in html:
                html = cf_captcha.solve(url, self.cj, scraper_utils.get_ua(),
                                        self.get_name())
                if not html:
                    return ''
            elif e.code == 503 and 'cf-browser-verification' in html:
                html = cloudflare.solve(url, self.cj, scraper_utils.get_ua())
                if not html:
                    return ''
            else:
                log_utils.log(
                    'Error (%s) during first scraper http get: %s' %
                    (str(e), url), log_utils.LOGWARNING)
                if not read_error:
                    return ''
        except Exception as e:
            log_utils.log(
                'Error (%s) during scraper http get: %s' % (str(e), url),
                log_utils.LOGWARNING)
            return ''

        #self.db_connection().cache_url(url, html, data)
        return html