def get_sources(self, video, video_type): hosters = [] source_url = self.get_url(video) if source_url and source_url != FORCE_NO_MATCH: if video_type == 'movies': #if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(source_url) stream_url = source_url + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) quality = scraper_utils.height_get_quality(meta['height']) #source = {'hostname': 'RealMovies', 'url': stream_url, 'host': host, 'class': '', 'quality': quality,'views': None, 'rating': None, 'direct': False} hoster = { 'hostname': 'SeriesWatch', 'host': self._get_direct_hostname(stream_url), 'class': '', 'quality': quality, 'views': None, 'rating': None, 'url': BASE_URL + stream_url, 'direct': True } if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) else: for episode in self.__match_episode(source_url, video): meta = scraper_utils.parse_episode_link(episode['title']) stream_url = episode['url'] + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) stream_url = stream_url.replace(self.base_url, '') quality = scraper_utils.height_get_quality(meta['height']) #hoster = {'hostname': 'SeriesWatch', 'host': self._get_direct_hostname(stream_url), 'class': '','quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hoster = { 'hostname': 'SeriesWatch', 'host': self._get_direct_hostname(stream_url), 'class': '', 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } if 'format' in meta: hoster['format'] = meta['format'] if 'size' in episode: hoster['size'] = scraper_utils.format_size( int(episode['size'])) hosters.append(hoster) main_scrape.apply_urlresolver(hosters) return hosters
def get_url(url, data=None, timeout=20, headers=None): if headers is None: headers = {} if data is None: data = {} post_data = urllib.urlencode(data, doseq=True) if 'User-Agent' not in headers: headers['User-Agent'] = scraper_utils.get_ua() log_utils.log( 'URL: |%s| Data: |%s| Headers: |%s|' % (url, post_data, headers), log_utils.LOGDEBUG, COMPONENT) try: req = urllib2.Request(url) for key in headers: req.add_header(key, headers[key]) response = urllib2.urlopen(req, data=post_data, timeout=timeout) result = response.read() response.close() except urllib2.HTTPError as e: log_utils.log('ReCaptcha.V2 HTTP Error: %s on url: %s' % (e.code, url), log_utils.LOGWARNING, COMPONENT) result = '' except urllib2.URLError as e: log_utils.log('ReCaptcha.V2 URLError Error: %s on url: %s' % (e, url), log_utils.LOGWARNING, COMPONENT) result = '' return result
def get_url(url, data=None, timeout=20, headers=None): if headers is None: headers = {} if data is None: data = {} post_data = urllib.urlencode(data, doseq=True) if 'User-Agent' not in headers: headers['User-Agent'] = scraper_utils.get_ua() logger.log( 'URL: |%s| Data: |%s| Headers: |%s|' % (url, post_data, headers), log_utils.LOGDEBUG) try: req = urllib2.Request(url) for key in headers: req.add_header(key, headers[key]) response = urllib2.urlopen(req, data=post_data, timeout=timeout) result = response.read() response.close() except urllib2.HTTPError as e: logger.log('ReCaptcha.V2 HTTP Error: %s on url: %s' % (e.code, url), log_utils.LOGWARNING) result = '' except urllib2.URLError as e: logger.log('ReCaptcha.V2 URLError Error: %s on url: %s' % (e, url), log_utils.LOGWARNING) result = '' return result
def __get_links(self, html): hosters = [] r = re.search('salt\("([^"]+)', html) if r: plaintext = self.__caesar( self.__get_f(self.__caesar(r.group(1), 13)), 13) sources = self._parse_sources_list(plaintext) for source in sources: stream_url = source + scraper_utils.append_headers( { 'User-Agent': scraper_utils.get_ua(), 'Cookie': self._get_stream_cookies() }) host = self._get_direct_hostname(stream_url) hoster = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': sources[source]['quality'], 'rating': None, 'views': None, 'direct': True } hosters.append(hoster) return hosters
def get_url(url, data=None, timeout=20, headers=None): if headers is None: headers = {} if data is None: data = {} post_data = urllib.urlencode(data, doseq=True) if 'User-Agent' not in headers: headers['User-Agent'] = scraper_utils.get_ua() log_utils.log('URL: |%s| Data: |%s| Headers: |%s|' % (url, post_data, headers), log_utils.LOGDEBUG) req = urllib2.Request(url) for key in headers: req.add_header(key, headers[key]) response = urllib2.urlopen(req, data=post_data, timeout=timeout) result = response.read() response.close() return result
def get_url(url, data=None, timeout=20, headers=None): if headers is None: headers = {} if data is None: data = {} post_data = urllib.urlencode(data, doseq=True) if 'User-Agent' not in headers: headers['User-Agent'] = scraper_utils.get_ua() log_utils.log( 'URL: |%s| Data: |%s| Headers: |%s|' % (url, post_data, headers), log_utils.LOGDEBUG) req = urllib2.Request(url) for key in headers: req.add_header(key, headers[key]) response = urllib2.urlopen(req, data=post_data, timeout=timeout) result = response.read() response.close() return result
def _get_links(self, html): hosters = [] for match in re.finditer('file\s*:\s*"([^"]+).*?label\s*:\s*"([^"]+)', html): url, resolution = match.groups() url += '|User-Agent=%s&Cookie=%s' % (scraper_utils.get_ua(), self._get_stream_cookies()) hoster = { 'multi-part': False, 'url': url, 'host': self._get_direct_hostname(url), 'class': self, 'quality': scraper_utils.height_get_quality(resolution), 'rating': None, 'views': None, 'direct': True } hosters.append(hoster) return hosters
def _cached_http_get(self, url, base_url, timeout, cookies=None, data=None, multipart_data=None, headers=None, allow_redirect=True, method=None, require_debrid=False, read_error=False, cache_limit=8): # if require_debrid: # if Scraper.debrid_resolvers is None: # Scraper.debrid_resolvers = [resolver for resolver in urlresolver.relevant_resolvers() if # resolver.isUniversal()] # if not Scraper.debrid_resolvers: # log_utils.log('%s requires debrid: %s' % (self.__module__, Scraper.debrid_resolvers), # log_utils.LOGDEBUG) # return '' if cookies is None: cookies = {} if timeout == 0: timeout = None if headers is None: headers = {} if url.startswith('//'): url = 'http:' + url referer = headers['Referer'] if 'Referer' in headers else base_url log_utils.log( 'Getting Url: %s cookie=|%s| data=|%s| extra headers=|%s|' % (url, cookies, data, headers), log_utils.LOGDEBUG) if data is not None: if isinstance(data, basestring): data = data else: data = urllib.urlencode(data, True) if multipart_data is not None: headers['Content-Type'] = 'multipart/form-data; boundary=X-X-X' data = multipart_data try: self.cj = self._set_cookies(base_url, cookies) if isinstance(url, unicode): url = url.encode('utf-8') request = urllib2.Request(url, data=data) request.add_header('User-Agent', scraper_utils.get_ua()) request.add_header('Accept', '*/*') request.add_unredirected_header('Host', request.get_host()) request.add_unredirected_header('Referer', referer) for key in headers: request.add_header(key, headers[key]) self.cj.add_cookie_header(request) if not allow_redirect: opener = urllib2.build_opener(NoRedirection) urllib2.install_opener(opener) else: opener = urllib2.build_opener(urllib2.HTTPRedirectHandler) urllib2.install_opener(opener) opener2 = urllib2.build_opener( urllib2.HTTPCookieProcessor(self.cj)) urllib2.install_opener(opener2) if method is not None: request.get_method = lambda: method.upper() response = urllib2.urlopen(request, timeout=timeout) self.cj.extract_cookies(response, request) if kodi.get_setting('cookie_debug') == 'true': log_utils.log( 'Response Cookies: %s - %s' % (url, scraper_utils.cookies_as_str(self.cj)), log_utils.LOGDEBUG) self.cj._cookies = scraper_utils.fix_bad_cookies(self.cj._cookies) self.cj.save(ignore_discard=True) if not allow_redirect and ( response.getcode() in [301, 302, 303, 307] or response.info().getheader('Refresh')): if response.info().getheader('Refresh') is not None: refresh = response.info().getheader('Refresh') return refresh.split(';')[-1].split('url=')[-1] else: redir_url = response.info().getheader('Location') if redir_url.startswith('='): redir_url = redir_url[1:] return redir_url content_length = response.info().getheader('Content-Length', 0) if int(content_length) > MAX_RESPONSE: log_utils.log( 'Response exceeded allowed size. %s => %s / %s' % (url, content_length, MAX_RESPONSE), log_utils.LOGWARNING) if method == 'HEAD': return '' else: if response.info().get('Content-Encoding') == 'gzip': buf = StringIO(response.read(MAX_RESPONSE)) f = gzip.GzipFile(fileobj=buf) html = f.read() else: html = response.read(MAX_RESPONSE) except urllib2.HTTPError as e: html = e.read() if CF_CAPCHA_ENABLED and e.code == 403 and 'cf-captcha-bookmark' in html: html = cf_captcha.solve(url, self.cj, scraper_utils.get_ua(), self.get_name()) if not html: return '' elif e.code == 503 and 'cf-browser-verification' in html: html = cloudflare.solve(url, self.cj, scraper_utils.get_ua()) if not html: return '' else: log_utils.log( 'Error (%s) during first scraper http get: %s' % (str(e), url), log_utils.LOGWARNING) if not read_error: return '' except Exception as e: log_utils.log( 'Error (%s) during scraper http get: %s' % (str(e), url), log_utils.LOGWARNING) return '' #self.db_connection().cache_url(url, html, data) return html