def _get_episode_url(self, show_url, video): params = urlparse.parse_qs(show_url) cmd = '{"jsonrpc": "2.0", "method": "VideoLibrary.GetEpisodes", "params": {"tvshowid": %s, "season": %s, "filter": {"field": "%s", "operator": "is", "value": "%s"}, \ "limits": { "start" : 0, "end": 25 }, "properties" : ["title", "season", "episode", "file", "streamdetails"], "sort": { "order": "ascending", "method": "label", "ignorearticle": true }}, "id": "libTvShows"}' base_url = 'video_type=%s&id=%s' episodes = [] force_title = scraper_utils.force_title(video) if not force_title: run = cmd % (params['id'][0], video.season, 'episode', video.episode) meta = xbmc.executeJSONRPC(run) meta = scraper_utils.parse_json(meta) log_utils.log('Episode Meta: %s' % (meta), log_utils.LOGDEBUG) if 'result' in meta and 'episodes' in meta['result']: episodes = meta['result']['episodes'] else: log_utils.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title and not episodes: run = cmd % (params['id'][0], video.season, 'title', video.ep_title) meta = xbmc.executeJSONRPC(run) meta = scraper_utils.parse_json(meta) log_utils.log('Episode Title Meta: %s' % (meta), log_utils.LOGDEBUG) if 'result' in meta and 'episodes' in meta['result']: episodes = meta['result']['episodes'] for episode in episodes: if episode['file'].endswith('.strm'): continue return base_url % (video.video_type, episode['episodeid'])
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for server_list in dom_parser.parse_dom(html, 'ul', {'class': 'episodes'}): for hash_id in dom_parser.parse_dom(server_list, 'a', ret='data-id'): now = time.localtime() url = urlparse.urljoin(self.base_url, HASH_URL) url = url % (hash_id, now.tm_hour + now.tm_min) html = self._http_get(url, headers=XHR, cache_limit=.5) js_result = scraper_utils.parse_json(html, url) if 'videoUrlHash' in js_result and 'grabber' in js_result: query = {'flash': 1, 'json': 1, 's': now.tm_min, 'link': js_result['videoUrlHash'], '_': int(time.time())} query['link'] = query['link'].replace('\/', '/') grab_url = js_result['grabber'].replace('\/', '/') grab_url += '?' + urllib.urlencode(query) html = self._http_get(grab_url, headers=XHR, cache_limit=.5) js_result = scraper_utils.parse_json(html, grab_url) for result in js_result: if 'label' in result: quality = scraper_utils.height_get_quality(result['label']) else: quality = scraper_utils.gv_get_quality(result['file']) sources[result['file']] = quality for source in sources: hoster = {'multi-part': False, 'host': self._get_direct_hostname(source), 'class': self, 'quality': sources[source], 'views': None, 'rating': None, 'url': source, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for server_list in dom_parser.parse_dom(html, 'ul', {'class': 'episodes'}): labels = dom_parser.parse_dom(server_list, 'a') hash_ids = dom_parser.parse_dom(server_list, 'a', ret='data-id') for label, hash_id in zip(labels, hash_ids): if video.video_type == VIDEO_TYPES.EPISODE and not self.__episode_match( label, video.episode): continue now = time.localtime() url = urlparse.urljoin(self.base_url, HASH_URL) url = url % (hash_id, now.tm_hour + now.tm_min) html = self._http_get(url, headers=XHR, cache_limit=.5) js_result = scraper_utils.parse_json(html, url) if 'videoUrlHash' in js_result and 'grabber' in js_result: query = { 'flash': 1, 'json': 1, 's': now.tm_min, 'link': js_result['videoUrlHash'], '_': int(time.time()) } query['link'] = query['link'].replace('\/', '/') grab_url = js_result['grabber'].replace('\/', '/') grab_url += '?' + urllib.urlencode(query) html = self._http_get(grab_url, headers=XHR, cache_limit=.5) js_result = scraper_utils.parse_json(html, grab_url) for result in js_result: if 'label' in result: quality = scraper_utils.height_get_quality( result['label']) else: quality = scraper_utils.gv_get_quality( result['file']) sources[result['file']] = quality for source in sources: hoster = { 'multi-part': False, 'host': self._get_direct_hostname(source), 'class': self, 'quality': sources[source], 'views': None, 'rating': None, 'url': source, 'direct': True } hosters.append(hoster) return hosters
def __get_gk_links(self, html, page_url): sources = {} for link in dom_parser.parse_dom(html, 'div', {'class': '[^"]*server_line[^"]*'}): film_id = dom_parser.parse_dom(link, 'a', ret='data-film') name_id = dom_parser.parse_dom(link, 'a', ret='data-name') server_id = dom_parser.parse_dom(link, 'a', ret='data-server') if film_id and name_id and server_id: data = {'ipplugins': 1, 'ip_film': film_id[0], 'ip_server': server_id[0], 'ip_name': name_id[0]} headers = XHR headers['Referer'] = page_url url = urlparse.urljoin(self.base_url, LINK_URL) html = self._http_get(url, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, url) if 's' in js_data: url = urlparse.urljoin(self.base_url, LINK_URL2) params = {'u': js_data['s'], 'w': '100%', 'h': 420} html = self._http_get(url, params=params, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, url) if 'data' in js_data and js_data['data']: if isinstance(js_data['data'], basestring): sources[js_data['data']] = QUALITIES.HIGH else: for link in js_data['data']: stream_url = link['files'] if self._get_direct_hostname(stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'quality' in link: quality = scraper_utils.height_get_quality(link['quality']) else: quality = QUALITIES.HIGH sources[stream_url] = quality return sources
def _parse_google(self, link): sources = [] html = self._http_get(link, cache_limit=.25) match = re.search('pid=([^&]+)', link) if match: vid_id = match.group(1) sources = self.__parse_gplus(vid_id, html, link) else: if 'drive.google' in link or 'docs.google' in link: sources = self._parse_gdocs(link) if 'picasaweb' in link: i = link.rfind('#') if i > -1: link_id = link[i + 1:] else: link_id = '' match = re.search('feedPreload:\s*(.*}]}})},', html, re.DOTALL) if match: js = scraper_utils.parse_json(match.group(1), link) for item in js['feed']['entry']: if not link_id or item['gphoto$id'] == link_id: for media in item['media']['content']: if media['type'].startswith('video'): sources.append(media['url'].replace('%3D', '=')) else: match = re.search('preload\'?:\s*(.*}})},', html, re.DOTALL) if match: js = scraper_utils.parse_json(match.group(1), link) for media in js['feed']['media']['content']: if media['type'].startswith('video'): sources.append(media['url'].replace('%3D', '=')) sources = list(set(sources)) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) html = self.__get_players(html, page_url) players = list(set(re.findall("load_player\(\s*'([^']+)'\s*,\s*'?(\d+)\s*'?", html))) player_url = urlparse.urljoin(self.base_url, PLAYER_URL) for link_id, height in players: params = {'id': link_id, 'quality': height, '_': int(time.time() * 1000)} player_url2 = player_url + '?' + urllib.urlencode(params) headers = {'Referer': page_url, 'Accept-Encoding': 'gzip, deflate', 'Server': 'cloudflare-nginx', 'Accept-Formating': 'application/json, text/javascript'} headers.update(XHR) html = self._http_get(player_url2, headers=headers, cache_limit=0) js_data = scraper_utils.parse_json(html, player_url) if js_data.get('playlist', ''): link_url = js_data['playlist'] else: link_url = js_data.get('link', '') if link_url: headers = {'Referer': page_url} html = self._http_get(link_url, headers=headers, allow_redirect=False, method='HEAD', cache_limit=0) if html.startswith('http'): streams = [html] else: headers = {'Referer': page_url} html = self._http_get(link_url, headers=headers, cache_limit=0) js_data = scraper_utils.parse_json(html, link_url) try: streams = [source['file'] for source in js_data[0]['sources']] except: streams = [] for stream in streams: if self._get_direct_hostname(stream) == 'gvideo': quality = scraper_utils.gv_get_quality(stream) sources[stream] = {'quality': quality, 'direct': True} else: if height != '0': quality = scraper_utils.height_get_quality(height) else: quality = QUALITIES.HIGH sources[stream] = {'quality': quality, 'direct': False} for source in sources: direct = sources[source]['direct'] quality = sources[source]['quality'] if direct: host = self._get_direct_hostname(source) else: host = urlparse.urlparse(source).hostname stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} hosters.append(hoster) return hosters
def _parse_google(self, link): sources = [] html = self._http_get(link, cache_limit=.5) match = re.search('pid=([^&]+)', link) if match: vid_id = match.group(1) match = re.search('return\s+(\[\[.*?)\s*}}', html, re.DOTALL) if match: try: js = scraper_utils.parse_json(match.group(1), link) for item in js[1]: vid_match = False for e in item: if e == vid_id: vid_match = True if vid_match: if isinstance(e, dict): for key in e: for item2 in e[key]: try: for item3 in item2: for item4 in item3: if isinstance(item4, basestring): for match in re.finditer('url=([^&]+)', item4): sources.append(urllib.unquote(match.group(1))) except Exception as e: log_utils.log('Exception during google plus parse: %s' % (e), log_utils.LOGDEBUG) except Exception as e: log_utils.log('Google Plus Parse failure: %s - %s' % (link, e), log_utils.LOGWARNING) else: if 'picasaweb' in link: i = link.rfind('#') if i > -1: link_id = link[i + 1:] else: link_id = '' match = re.search('feedPreload:\s*(.*}]}})},', html, re.DOTALL) if match: js = scraper_utils.parse_json(match.group(1), link) for item in js['feed']['entry']: if not link_id or item['gphoto$id'] == link_id: for media in item['media']['content']: if media['type'].startswith('video'): sources.append(media['url'].replace('%3D', '=')) else: match = re.search('preload\'?:\s*(.*}})},', html, re.DOTALL) if match: js = scraper_utils.parse_json(match.group(1), link) for media in js['feed']['media']['content']: if media['type'].startswith('video'): sources.append(media['url'].replace('%3D', '=')) sources = list(set(sources)) return sources
def _parse_google(self, link): sources = [] html = self._http_get(link, cache_limit=0.5) match = re.search("pid=([^&]+)", link) if match: vid_id = match.group(1) match = re.search("return\s+(\[\[.*?)\s*}}", html, re.DOTALL) if match: try: js = scraper_utils.parse_json(match.group(1), link) for item in js[1]: vid_match = False for e in item: if e == vid_id: vid_match = True if vid_match: if isinstance(e, dict): for key in e: for item2 in e[key]: try: for item3 in item2: for item4 in item3: if isinstance(item4, basestring): for match in re.finditer("url=([^&]+)", item4): sources.append(urllib.unquote(match.group(1))) except Exception as e: log_utils.log( "Exception during google plus parse: %s" % (e), log_utils.LOGDEBUG ) except Exception as e: log_utils.log("Google Plus Parse failure: %s - %s" % (link, e), log_utils.LOGWARNING) else: i = link.rfind("#") if i > -1: link_id = link[i + 1 :] match = re.search("feedPreload:\s*(.*}]}})},", html, re.DOTALL) if match: js = scraper_utils.parse_json(match.group(1), link) for item in js["feed"]["entry"]: if item["gphoto$id"] == link_id: for media in item["media"]["content"]: if media["type"].startswith("video"): sources.append(media["url"].replace("%3D", "=")) else: match = re.search("preload'?:\s*(.*}})},", html, re.DOTALL) if match: js = scraper_utils.parse_json(match.group(1), link) for media in js["feed"]["media"]["content"]: if media["type"].startswith("video"): sources.append(media["url"].replace("%3D", "=")) return sources
def _parse_google(self, link): sources = [] html = self._http_get(link, cache_limit=.5) match = re.search('pid=([^&]+)', link) if match: vid_id = match.group(1) match = re.search('return\s+(\[\[.*?)\s*}}', html, re.DOTALL) if match: try: js = scraper_utils.parse_json(match.group(1), link) for item in js[1]: vid_match = False for e in item: if e == vid_id: vid_match = True if vid_match: if isinstance(e, dict): for key in e: for item2 in e[key]: try: for item3 in item2: for item4 in item3: if isinstance(item4, basestring): for match in re.finditer('url=([^&]+)', item4): sources.append(urllib.unquote(match.group(1))) except Exception as e: log_utils.log('Exception during google plus parse: %s' % (e), log_utils.LOGDEBUG) except Exception as e: log_utils.log('Google Plus Parse failure: %s - %s' % (link, e), log_utils.LOGWARNING) else: i = link.rfind('#') if i > -1: link_id = link[i + 1:] match = re.search('feedPreload:\s*(.*}]}})},', html, re.DOTALL) if match: js = scraper_utils.parse_json(match.group(1), link) for item in js['feed']['entry']: if item['gphoto$id'] == link_id: for media in item['media']['content']: if media['type'].startswith('video'): sources.append(media['url'].replace('%3D', '=')) else: match = re.search('preload\'?:\s*(.*}})},', html, re.DOTALL) if match: js = scraper_utils.parse_json(match.group(1), link) for media in js['feed']['media']['content']: if media['type'].startswith('video'): sources.append(media['url'].replace('%3D', '=')) return sources
def _http_get(self, url, params=None, data=None, allow_redirect=True, cache_limit=8): if not self.username or not self.password: return {} if data is None: data = {} data.update({'customer_id': self.username, 'pin': self.password}) result = super(self.__class__, self)._http_get(url, params=params, data=data, allow_redirect=allow_redirect, cache_limit=cache_limit) js_result = scraper_utils.parse_json(result, url) if 'status' in js_result and js_result['status'] == 'error': logger.log( 'Premiumize V2 Scraper Error: %s - (%s)' % (url, js_result.get('message', 'Unknown Error')), log_utils.LOGWARNING) js_result = {} return js_result
def search(self, video_type, title, year, season=''): self.__get_token() results = [] search_url = urlparse.urljoin(self.base_url, '/api/v1/caut') timestamp = int(time.time() * 1000) query = { 'q': title, 'limit': '100', 'timestamp': timestamp, 'verifiedCheck': self.__token } html = self._http_get(search_url, data=query, headers=XHR, cache_limit=1) if video_type in [VIDEO_TYPES.TVSHOW, VIDEO_TYPES.EPISODE]: media_type = 'TV SHOW' else: media_type = 'MOVIE' for item in scraper_utils.parse_json(html, search_url): if item['meta'].upper().startswith(media_type): match_year = str( item['year']) if 'year' in item and item['year'] else '' if not year or not match_year or year == match_year: result = { 'title': item['title'], 'url': scraper_utils.pathify_url(item['permalink']), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] media_type = 'series' if video_type == VIDEO_TYPES.TVSHOW else 'movie' search_url = scraper_utils.urljoin( self.base_url, '/typeahead/%s' % (urllib.quote(title))) headers = {'Referer': self.base_url} headers.update(XHR) html = self._http_get(search_url, headers=headers, require_debrid=True, cache_limit=.5) for item in scraper_utils.parse_json(html, search_url): match_title = item.get('title') match_url = item.get('link') match_year = '' if item.get('type') == media_type and match_title and match_url: if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) pattern = '\$\.post\("([^"]+)"\s*,\s*\{(.*?)\}' match = re.search(pattern, html) if match: post_url, post_data = match.groups() data = self.__get_data(post_data) html = self._http_get(post_url, data=data, cache_limit=.5) js_result = scraper_utils.parse_json(html, post_url) for key in js_result: stream_url = js_result[key] host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(key) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] sources = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) match = re.search('var\s+view_id\s*=\s*"([^"]+)', html) if not match: return hosters view_id = match.group(1) for lang in ['or', 'tr']: subs = True if lang == 'tr' else False view_data = {'id': view_id, 'tip': 'view', 'dil': lang} html = self._http_get(self.ajax_url, data=view_data, headers=XHR, cache_limit=.25) html = html.strip() html = re.sub(r'\\n|\\t', '', html) match = re.search('var\s+sources\s*=\s*(\[.*?\])', html) if match: raw_data = match.group(1) raw_data = raw_data.replace('\\', '') else: raw_data = html js_data = scraper_utils.parse_json(raw_data, self.ajax_url) if 'data' not in js_data: continue src = dom_parser2.parse_dom(js_data['data'], 'iframe', req='src') if not src: continue html = self._http_get(src[0].attrs['src'], cache_limit=.25) for attrs, _content in dom_parser2.parse_dom(html, 'iframe', req='src'): src = attrs['src'] if not src.startswith('http'): continue sources.append({'label': '720p', 'file': src, 'direct': False, 'subs': subs}) sources += [{'file': url, 'subs': subs} for url in scraper_utils.parse_sources_list(self, html).iterkeys()] if sources: break for source in sources: direct = source.get('direct', True) stream_url = source['file'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) if direct: host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: continue else: host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.height_get_quality(source['label']) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} if source.get('subs'): hoster['subs'] = 'Turkish Subtitles' hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) js_result = scraper_utils.parse_json(html, url) if 'error' in js_result: log_utils.log('DD.tv API error: "%s" @ %s' % (js_result['error'], url), log_utils.LOGWARNING) return hosters sxe_str = '.S%02dE%02d.' % (int(video.season), int(video.episode)) try: airdate_str = video.ep_airdate.strftime('.%Y.%m.%d.') except: airdate_str = '' for result in js_result: if sxe_str not in result['release'] and airdate_str not in result['release']: continue if result['quality'] in self.q_order: for key in result['links']: url = result['links'][key][0] if re.search('\.rar(\.|$)', url): continue hostname = urlparse.urlparse(url).hostname hoster = {'multi-part': False, 'class': self, 'views': None, 'url': url, 'rating': None, 'host': hostname, 'quality': QUALITY_MAP[result['quality']], 'direct': False} hoster['dd_qual'] = result['quality'] if 'x265' in result['release'] and result['quality'] != '1080P-X265': hoster['dd_qual'] += '-x265' hosters.append(hoster) return hosters
def __get_movie_sources(self, page_url): sources = [] headers = {'Referer': ''} html = self._http_get(page_url, headers=headers, cache_limit=.5) match = re.search('APP_PATH\+"([^"]+)"\+([^"]+)\+"([^"]+)"', html) if match: url1, var, url2 = match.groups() match = re.search("%s\s*=\s*'([^']+)" % (var), html) if match: headers = {'Referer': page_url} headers.update(XHR) contents_url = '/' + url1 + match.group(1) + url2 contents_url = scraper_utils.urljoin(self.base_url, contents_url) js_data = scraper_utils.parse_json( self._http_get(contents_url, headers=headers, cache_limit=2), contents_url) if js_data: sources = [ item['src'] for item in js_data if 'src' in item ] match = re.search("openloadLink\s*=\s*'([^']+)", html, re.I) if match: sources.append(match.group(1)) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: params = urlparse.parse_qs(source_url) if video.video_type == VIDEO_TYPES.MOVIE: cmd = '{"jsonrpc": "2.0", "method": "VideoLibrary.GetMovieDetails", "params": {"movieid": %s, "properties" : ["file", "playcount", "streamdetails"]}, "id": "libMovies"}' result_key = 'moviedetails' else: cmd = '{"jsonrpc": "2.0", "method": "VideoLibrary.GetEpisodeDetails", "params": {"episodeid": %s, "properties" : ["file", "playcount", "streamdetails"]}, "id": "libTvShows"}' result_key = 'episodedetails' run = cmd % (params['id'][0]) meta = xbmc.executeJSONRPC(run) meta = scraper_utils.parse_json(meta) log_utils.log('Source Meta: %s' % (meta), log_utils.LOGDEBUG) if 'result' in meta and result_key in meta['result']: details = meta['result'][result_key] def_quality = [item[0] for item in sorted(SORT_KEYS['quality'].items(), key=lambda x:x[1])][self.def_quality] host = {'multi-part': False, 'class': self, 'url': details['file'], 'host': 'XBMC Library', 'quality': def_quality, 'views': details['playcount'], 'rating': None, 'direct': True} stream_details = details['streamdetails'] if len(stream_details['video']) > 0 and 'width' in stream_details['video'][0]: host['quality'] = scraper_utils.width_get_quality(stream_details['video'][0]['width']) hosters.append(host) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=0) match = re.search('var\s*video_id\s*=\s*"([^"]+)', html) if not match: return hosters video_id = match.group(1) headers = {'Referer': page_url} headers.update(XHR) _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'), headers=headers, method='POST', cache_limit=0) vid_url = scraper_utils.urljoin(self.base_url, VIDEO_URL) html = self._http_get(vid_url, data={'v': video_id}, headers=headers, cache_limit=0) for source, value in scraper_utils.parse_json(html, vid_url).iteritems(): match = re.search('url=(.*)', value) if not match: continue stream_url = urllib.unquote(match.group(1)) host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(source) stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def __get_links(self, url, video): hosters = [] seen_urls = set() for search_type in SEARCH_TYPES: search_url, params = self.__translate_search(url, search_type) if not search_url: continue html = self._http_get(search_url, params=params, cache_limit=.5) js_result = scraper_utils.parse_json(html, search_url) if js_result.get('status') != 'success': logger.log('Alluc API Error: |%s|%s|: %s' % (search_url, params, js_result.get('message', 'Unknown Error')), log_utils.LOGWARNING) continue for result in js_result['result']: stream_url = result['hosterurls'][0]['url'] if len(result['hosterurls']) > 1: continue if result['extension'] == 'rar': continue if stream_url in seen_urls: continue if scraper_utils.release_check(video, result['title']): host = urlparse.urlsplit(stream_url).hostname quality = scraper_utils.get_quality(video, host, self._get_title_quality(result['title'])) hoster = {'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': False} hoster['extra'] = scraper_utils.cleanse_title(result['title']) if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(hoster['extra']) else: meta = scraper_utils.parse_episode_link(hoster['extra']) if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) seen_urls.add(stream_url) return hosters
def __get_links_from_playlist(self, grab_url, headers): sources = {} grab_url = grab_url.replace('\\', '') grab_html = self._http_get(grab_url, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(grab_html, grab_url) try: playlist = js_data['playlist'][0]['sources'] except: playlist = [] for item in playlist: stream_url = item.get('file') if stream_url: if stream_url.startswith('/'): stream_url = scraper_utils.urljoin(self.base_url, stream_url) redir_url = self._http_get(stream_url, headers=headers, allow_redirect=False, method='HEAD') if redir_url.startswith('http'): stream_url = redir_url if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in item: quality = scraper_utils.height_get_quality(item['label']) else: quality = QUALITIES.HIGH logger.log('Adding stream: %s Quality: %s' % (stream_url, quality), log_utils.LOGDEBUG) sources[stream_url] = {'quality': quality, 'direct': True} if not kodi.get_setting('scraper_url'): break return sources
def __get_params(self, grab_url, episode_id, movie_id, page_url): hash_id, token, ts = None, None, None url = scraper_utils.urljoin(grab_url, '/token_v2.php', replace_path=True) headers = {'Referer': page_url} params = { 'eid': episode_id, 'mid': movie_id, '_': int(time.time() * 1000) } html = self._http_get(url, params=params, headers=headers, cache_limit=0) if aa_decoder.is_aaencoded(html): html = aa_decoder.decode(html) match1 = re.search("hash\s*=\s*'([^']+)", html) match2 = re.search("token\s*=\s*'([^']+)", html) match3 = re.search("_\s*=\s*'([^']+)", html) if match1 and match2 and match3: hash_id = match1.group(1) token = match2.group(1) ts = match3.group(1) else: js_data = scraper_utils.parse_json(html, url) hash_id, token, ts = js_data.get('hash'), js_data.get( 'token'), js_data.get('_') return hash_id, token, ts
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, SEARCH_URL) referer = scraper_utils.urljoin(self.base_url, '/search/?q=%s') referer = referer % (urllib.quote_plus(title)) headers = {'Referer': referer} headers.update(XHR) params = { 'searchTerm': title, 'type': SEARCH_TYPES[video_type], 'limit': 500 } html = self._http_get(search_url, params=params, headers=headers, auth=False, cache_limit=2) js_data = scraper_utils.parse_json(html, search_url) if 'results' in js_data: for result in js_data['results']: match_year = str(result.get('year', '')) match_url = result.get('permalink', '') match_title = result.get('title', '') if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def __grab_links(self, grab_url, query, referer): try: sources = {} query['mobile'] = '0' query.update(self.__get_token(query)) grab_url = grab_url + '?' + urllib.urlencode(query) headers = XHR headers['Referer'] = referer html = self._http_get(grab_url, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, grab_url) if 'data' in js_data: for link in js_data['data']: stream_url = link['file'] if self._get_direct_hostname(stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in link: quality = scraper_utils.height_get_quality( link['label']) else: quality = QUALITIES.HIGH sources[stream_url] = {'direct': True, 'quality': quality} except Exception as e: log_utils.log('9Movies Link Parse Error: %s' % (e), log_utils.LOGWARNING) return sources
def search(self, video_type, title, year, season=''): results = [] search_url = urlparse.urljoin(self.base_url, '/ajax/search.php') timestamp = int(time.time() * 1000) query = { 'q': title, 'limit': '100', 'timestamp': timestamp, 'verifiedCheck': '' } html = self._http_get(search_url, data=query, headers=XHR, cache_limit=1) if video_type in [VIDEO_TYPES.TVSHOW, VIDEO_TYPES.EPISODE]: media_type = 'TV SHOW' else: media_type = 'MOVIE' js_data = scraper_utils.parse_json(html, search_url) for item in js_data: if item['meta'].upper().startswith(media_type): result = { 'title': scraper_utils.cleanse_title(item['title']), 'url': scraper_utils.pathify_url(item['permalink']), 'year': '' } results.append(result) return results
def __get_links_from_json2(self, url, page_url, video_type): sources = {} headers = {'Referer': page_url} headers.update(XHR) html = self._http_get(url, headers=headers, cache_limit=0) js_data = scraper_utils.parse_json(html, url) try: playlist = js_data.get('playlist', []) for source in playlist[0].get('sources', []): stream_url = source['file'] if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: if video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(stream_url) else: meta = scraper_utils.parse_episode_link(stream_url) quality = scraper_utils.height_get_quality(meta['height']) sources[stream_url] = {'quality': quality, 'direct': True} logger.log( 'Adding stream: %s Quality: %s' % (stream_url, quality), log_utils.LOGDEBUG) except Exception as e: logger.log('Exception during yesmovies extract: %s' % (e), log_utils.LOGDEBUG) return sources
def __get_links(self, url, video): hosters = [] search_url = self.__translate_search(url) html = self._http_get(search_url, cache_limit=.5) js_result = scraper_utils.parse_json(html, search_url) if 'data' in js_result: for item in js_result['data']: post_hash, size, post_title, ext, duration = item['0'], item['4'], item['10'], item['11'], item['14'] checks = [False] * 6 if not scraper_utils.title_check(video, post_title): checks[0] = True if 'alangs' in item and item['alangs'] and 'eng' not in item['alangs']: checks[1] = True if re.match('^\d+s', duration) or re.match('^[0-5]m', duration): checks[2] = True if 'passwd' in item and item['passwd']: checks[3] = True if 'virus' in item and item['virus']: checks[4] = True if 'type' in item and item['type'].upper() != 'VIDEO': checks[5] = True if any(checks): log_utils.log('EasyNews Post excluded: %s - |%s|' % (checks, item), log_utils.LOGDEBUG) continue stream_url = urllib.quote('%s%s/%s%s' % (post_hash, ext, post_title, ext)) stream_url = 'http://members.easynews.com/dl/%s' % (stream_url) stream_url = stream_url + '|Cookie=%s' % (self._get_stream_cookies()) host = self._get_direct_hostname(stream_url) quality = scraper_utils.width_get_quality(item['width']) hoster = {'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': True} if size: hoster['size'] = size if post_title: hoster['extra'] = post_title hosters.append(hoster) return hosters
def __get_gk_links(self, html, page_url): sources = {} for link in dom_parser.parse_dom(html, 'div', {'class': '[^"]*server_line[^"]*'}): film_id = dom_parser.parse_dom(link, 'a', ret='data-film') name_id = dom_parser.parse_dom(link, 'a', ret='data-name') server_id = dom_parser.parse_dom(link, 'a', ret='data-server') if film_id and name_id and server_id: data = {'ipplugins': 1, 'ip_film': film_id[0], 'ip_server': server_id[0], 'ip_name': name_id[0]} headers = XHR headers['Referer'] = page_url url = urlparse.urljoin(self.base_url, LINK_URL) html = self._http_get(url, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, url) if 's' in js_data: if isinstance(js_data['s'], basestring): sources[js_data['s']] = QUALITIES.HIGH else: for link in js_data['s']: stream_url = link['file'] if self._get_direct_hostname(stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in link: quality = scraper_utils.height_get_quality(link['label']) else: quality = QUALITIES.HIGH sources[stream_url] = quality return sources
def __get_gk_links(self, html): sources = {} match = re.search('{link\s*:\s*"([^"]+)', html) if match: iframe_url = match.group(1) data = {'link': iframe_url} headers = {'Referer': iframe_url} html = self._http_get(self.gk_url, data=data, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, self.gk_url) links = js_data.get('link', []) if isinstance(links, basestring): links = [{'link': links}] for link in links: stream_url = link['link'] if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) direct = True elif 'label' in link: quality = scraper_utils.height_get_quality(link['label']) direct = True else: quality = QUALITIES.HIGH direct = False sources[stream_url] = {'quality': quality, 'direct': direct} return sources
def __get_gk_links(self, html, page_url): sources = {} match = re.search('{link\s*:\s*"([^"]+)', html) if match: data = {'link': match.group(1)} url = urlparse.urljoin(self.base_url, LINK_URL) headers = {'Referer': page_url} html = self._http_get(url, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, url) if 'link' in js_data: for link in js_data['link']: if 'type' in link and link[ 'type'] == 'mp4' and 'link' in link: if self._get_direct_hostname(link['link']) == 'gvideo': quality = scraper_utils.gv_get_quality( link['link']) elif 'label' in link: quality = scraper_utils.height_get_quality( link['label']) else: quality = QUALITIES.HIGH sources[link['link']] = quality return sources
def __login(self): url = scraper_utils.urljoin(self.base_url, '/api/v1/user/login') data = {'user': self.username, 'password': self.password} headers = {'Content-Type': 'application/json'} html = self._http_get(url, data=json.dumps(data), headers=headers, cache_limit=0) js_data = scraper_utils.parse_json(html, url) if 'user' not in js_data: raise Exception('sit2play login failed')
def __get_json_links(self, html, sub): hosters = [] js_data = scraper_utils.parse_json(html) if 'sources' in js_data: for source in js_data.get('sources', []): stream_url = source.get('file') if stream_url is None: continue host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: quality = QUALITIES.HIGH hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } hoster['subs'] = sub hosters.append(hoster) return hosters
def _http_get(self, url, data=None, retry=True, allow_redirect=True, cache_limit=8): if not self.username or not self.password: return {} if data is None: data = {} data.update({'customer_id': self.username, 'pin': self.password}) result = super(Premiumize_Scraper, self)._http_get(url, data=data, allow_redirect=allow_redirect, cache_limit=cache_limit) js_result = scraper_utils.parse_json(result, url) if 'status' in js_result and js_result['status'] == 'error': log_utils.log( 'Error received from premiumize.me (%s)' % (js_result.get('message', 'Unknown Error')), log_utils.LOGWARNING) js_result = {} return js_result
def get_sources(self, video): sources = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return sources object_id = self.__extract_id(source_url) if object_id is None: return sources source_url = TITLE_URL.format(id=object_id) page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._authed_http_get(page_url, cache_limit=.5) js_data = scraper_utils.parse_json(html, page_url) if video.video_type == VIDEO_TYPES.MOVIE: links = js_data.get('links', {}) else: links = self.__episode_match(js_data, video) prefix = js_data.get('domain', {}).get('prefix') suffix = js_data.get('domain', {}).get('suffix') for key, path in links.get('links', {}).iteritems(): for mirror in sorted(list(set(links.get('mirrors', [])))): stream_url = TEMPLATE.format(prefix=prefix, mirror=mirror, suffix=suffix, path=path) host = scraper_utils.get_direct_hostname(self, stream_url) quality = Q_MAP.get(key, QUALITIES.HIGH) source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True} source['version'] = '(Mirror %d)' % (mirror) sources.append(source) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) if re.search('<span[^>]+>\s*Low Quality\s*</span>', html): quality = QUALITIES.LOW else: quality = QUALITIES.HIGH for match in re.finditer('gkpluginsphp.*?link\s*:\s*"([^"]+)', html): data = {'link': match.group(1)} headers = XHR headers['Referer'] = url gk_url = urlparse.urljoin(self.base_url, GK_URL) html = self._http_get(gk_url, data=data, headers=headers, cache_limit=.25) js_result = scraper_utils.parse_json(html, gk_url) if 'link' in js_result and 'func' not in js_result: if isinstance(js_result['link'], list): sources = dict((link['link'], scraper_utils.height_get_quality(link['label'])) for link in js_result['link']) else: sources = {js_result['link']: quality} for source in sources: host = self._get_direct_hostname(source) hoster = {'multi-part': False, 'url': source, 'class': self, 'quality': sources[source], 'host': host, 'rating': None, 'views': None, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) js_result = scraper_utils.parse_json(html, url) if 'error' in js_result: logger.log('DD.tv API error: "%s" @ %s' % (js_result['error'], url), log_utils.LOGWARNING) return hosters for result in js_result: if not scraper_utils.release_check(video, result['release'], require_title=False): continue if result['quality'] in self.q_order: for key in result['links']: url = result['links'][key][0] if re.search('\.rar(\.|$)', url): continue hostname = urlparse.urlparse(url).hostname hoster = {'multi-part': False, 'class': self, 'views': None, 'url': url, 'rating': None, 'host': hostname, 'quality': QUALITY_MAP[result['quality']], 'direct': False} hoster['format'] = result['quality'] if 'x265' in result['release'] and result['quality'] != '1080P-X265': hoster['dd_qual'] += '-x265' hosters.append(hoster) return hosters
def __get_gk_links(self, html, page_url, video_type, episode): sources = {} phimid = dom_parser.parse_dom(html, 'input', {'name': 'phimid'}, ret='value') if phimid and video_type == VIDEO_TYPES.EPISODE: url = urlparse.urljoin(self.tv_base_url, '/ajax.php') data = {'ipos_server': 1, 'phimid': phimid[0], 'keyurl': episode} headers = XHR headers['Referer'] = page_url html = self._http_get(url, data=data, headers=headers, cache_limit=.5) for link in dom_parser.parse_dom(html, 'div', {'class': '[^"]*server_line[^"]*'}): film_id = dom_parser.parse_dom(link, 'a', ret='data-film') name_id = dom_parser.parse_dom(link, 'a', ret='data-name') server_id = dom_parser.parse_dom(link, 'a', ret='data-server') if film_id and name_id and server_id: data = {'ipplugins': 1, 'ip_film': film_id[0], 'ip_server': server_id[0], 'ip_name': name_id[0]} headers = XHR headers['Referer'] = page_url url = urlparse.urljoin(self.__get_base_url(video_type), LINK_URL) html = self._http_get(url, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, url) if 's' in js_data: if isinstance(js_data['s'], basestring): sources[js_data['s']] = QUALITIES.HIGH else: for link in js_data['s']: stream_url = link['file'] if self._get_direct_hostname(stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in link: quality = scraper_utils.height_get_quality(link['label']) else: quality = QUALITIES.HIGH sources[stream_url] = quality return sources