def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False): try: start_time = time.time() scrape = clean_search(title.lower()) start_url = '%ssearch/ajax_search?q=%s' %(self.base_link,scrape) #print 'SEARCH > '+start_url headers = {'User_Agent':User_Agent} html = requests.get(start_url, headers=headers,timeout=5).content #print html regex = re.compile('"value":"(.+?)","seo":"(.+?)"',re.DOTALL).findall(html) for name,link_title in regex: if not clean_title(title).lower() == clean_title(name).lower(): continue show_page = self.base_link + link_title format_grab = 'season-%s-episode-%s-' %(season, episode) #print 'format ' + format_grab headers = {'User_Agent':User_Agent} linkspage = requests.get(show_page, headers=headers,timeout=5).content series_links = re.compile('<div class="el-item.+?href="(.+?)"',re.DOTALL).findall(linkspage) for episode_url in series_links: if not format_grab in episode_url: continue # print 'PASS ME >>>>>>>> '+episode_url self.get_sources(episode_url,title,year,season,episode,start_time) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name,argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = '%s/?s=%s' % (self.base_link, search_id.replace(' ', '+')) #print 'scraperchk - scrape_movie - start_url: ' + start_url headers = {'User-Agent': client.agent()} html = client.request(start_url, headers=headers) match = re.compile('class="thumb".+?title="(.+?)".+?href="(.+?)">', re.DOTALL).findall(html) for name, item_url in match: print 'scraperchk - scrape_movie - name: ' + name #print 'scraperchk - scrape_movie - item_url: '+item_url if year in name: if clean_title(search_id).lower() == clean_title( name).lower(): #print 'scraperchk - scrape_movie - Send this URL: ' + item_url self.get_source(item_url, title, year, start_time) #print self.sources return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument)
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = '%s/search?query=%s' % (self.base_link, urllib.quote_plus(search_id)) #print 'scraperchk - scrape_movie - start_url: ' + start_url html = client.request(start_url) posts = client.parseDOM(html, 'div', attrs={'class': 'one_movie-item'}) for post in posts: data = dom.parse_dom(post, 'a', req='href', attrs={'class': 'movie-title'})[0] if not clean_title(title) == clean_title(data.content): continue qual = client.parseDOM(post, 'span', attrs={'data-title': 'Quality'})[0] qual = client.replaceHTMLCodes(qual) item_url = urlparse.urljoin(self.base_link, data.attrs['href']) self.get_source(item_url, title, year, start_time, qual) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid = False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = '%s/search?query=%s' % (self.base_link, urllib.quote_plus(search_id)) #print 'scraperchk - scrape_movie - start_url: ' + start_url headers = {'User-Agent': client.agent()} html = client.request(start_url, headers=headers) #print html match = re.compile('class="movie-item view-tenth".+?href="(.+?)">.+?alt="(.+?)" />.+?data-title="Quality">(.+?)<',re.DOTALL).findall(html) for link, name, qual in match: #print item_url1 item_url = urlparse.urljoin(self.base_link, link) qual = qual.replace(' ','') #print 'scraperchk - scrape_movie - name: '+name #print 'scraperchk - scrape_movie - item_url: '+item_url if clean_title(search_id) == clean_title(name): #print 'scraperchk - scrape_movie - Send this URL: ' + item_url self.get_source(item_url, title, year, start_time, qual) print self.sources return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name,argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): try: count = 0 start_time = time.time() search_id = urllib.quote_plus('%s %s' % (clean_search(title), year)) start_url = '%s/?s=%s' % (self.base_link, search_id) html = client.request(start_url, referer=self.base_link) match = re.compile('class="thumb".+?title="(.+?)".+?href="(.+?)">', re.DOTALL).findall(html) for name, item_url in match: if not year in name: continue if not clean_title(title) == clean_title((name.split(year)[0][:-1])): continue OPEN = client.request(item_url, referer=self.base_link) link = client.parseDOM(OPEN, 'iframe', ret='src')[0] host = link.split('//')[1].replace('www.', '') host = host.split('/')[0] if not filter_host(host): continue count += 1 self.sources.append({'source': host, 'quality': 'HD', 'scraper': self.name, 'url': link, 'direct': False}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): if int(year) > 1980: return self.sources try: start_time = time.time() query = urllib.quote_plus(clean_search(title.lower())) start_url = urlparse.urljoin(self.base_link, self.search_link % query) headers = {'User-Agent': client.agent(), 'Referer': self.base_link} count = 0 html = client.request(start_url, headers=headers) posts = client.parseDOM(html, 'div', attrs={'class': 'post'}) posts = [(dom_parser.parse_dom(i, 'a', req='href')[0]) for i in posts if i] posts = [(i.attrs['href'], i.content) for i in posts if i] post = [(i[0]) for i in posts if clean_title(i[1]) == clean_title(title)][0] r = client.request(post, headers=headers) y = client.parseDOM(r, 'h1')[0] if not year in y: return self.sources links = client.parseDOM(r, 'source', ret='src') link = [i for i in links if i.endswith('mp4')][0] link += '|User-Agent=%s&Referer=%s' % (client.agent(), post) link = urllib.quote(link, ':/-_|&+=') count += 1 self.sources.append({'source': 'bnw', 'quality': 'SD', 'scraper': self.name, 'url': link, 'direct': True}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name,end_time,count,title,year) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name,argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = '%s/?s=%s' % (self.base_link, urllib.quote_plus(search_id)) #print start_url headers = {'User-Agent': client.agent()} r = client.request(start_url, headers=headers) grab = re.compile( 'class="TPost C".+?<a href="(.+?)">.+?class="Title">(.+?)</div> <span class="Year">(.+?)</span>', re.DOTALL).findall(r) for url, name, date in grab: #print url+'>>>>>>' name = name.lower() #print name+'<<<<<<<<<<' if clean_title(title) == clean_title(name): if date == year: self.get_source(url, title, year, '', '', start_time) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() scrape = urllib.quote_plus(clean_search(title)) start_url = '%s/search.html?keyword=%s' % (self.base_link, scrape) html = client.request(start_url) thumbs = re.compile('<ul class="listing items">(.+?)</ul> ', re.DOTALL).findall(html) thumb = re.compile('href="(.+?)".+?alt="(.+?)"', re.DOTALL).findall(str(thumbs)) for link, link_title in thumb: if clean_title(title).lower() == clean_title( link_title).lower(): page_link = urlparse.urljoin(self.base_link, link) holdpage = client.request(page_link) datecheck = re.compile('<span>Release: </span>(.+?)</li>', re.DOTALL).findall(holdpage)[0] if year in datecheck: movie_link = re.compile( '<li class="child_episode".+?href="(.+?)"', re.DOTALL).findall(holdpage)[0] movie_link = urlparse.urljoin(self.base_link, movie_link) self.get_source(movie_link, title, year, '', '', start_time) else: pass return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False): try: start_time = time.time() hdlr = 'S%02dE%02d' % (int(season), int(episode)) query = clean_search(title) query = urllib.quote_plus(query + ' ' + hdlr) urls = [] for link in self.search_links: try: url = urlparse.urljoin(self.base_link, link % query) url = urlparse.urljoin(self.base_link, url) r = client.request(url) posts = client.parseDOM(r, 'tbody') posts = client.parseDOM(posts, 'tr') urls += [(client.parseDOM(i, 'button', ret='data-clipboard-text')[0]) for i in posts if i] except: pass count = 0 for url in urls: name = url.split('/')[-1].lower() name = client.replaceHTMLCodes(name).replace('%20', '') if 'movies' in url: continue if any(x in url for x in ['italian', 'dubbed', 'teaser', 'subs', 'sub', 'dub', 'samples', 'extras', 'french', 'trailer', 'trailers', 'sample']): continue t = re.sub('(\.|\(|\[|\s)(S\d+E\d+|S\d+)(\.|\)|\]|\s)(.+|)', '', name, flags=re.I) if clean_title(t) not in clean_title(title): continue y = re.findall('[\.|\(|\[|\s](S\d+E\d+|S\d+)[\.|\)|\]|\s]', name, re.I)[-1].upper() if not y == hdlr: continue res, info = quality_tags.get_release_quality(name, url) if any(x in url for x in ['hastidl', '1tehmovies', '62.210.103.107', '79.127', '213.32.113.82', 'dl5.downloadha', '89.163.255.42', '185.56.20.142', 's1.0music', 'dl3.yoozdl', 'dl4.lavinmovie.net', 'dl6.lavinmovie.net', 'dl3.upload08.com', 'dl8.uploadt.com', '163.172.6.218', 'samba.allunix.ru', 'server417']): count += 1 url += '|User-Agent=%s&Referer=%s' % (client.agent(), self.base_link) url = urllib.quote(url, '|:?/&+=_-') self.sources.append( {'source': 'DirectLink', 'quality': res, 'scraper': self.name, 'url': url, 'direct': True}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources #filepursuit().scrape_movie('Black Panther', '2018', '')
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid = False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = '%s/?s=%s+season+%s' % (self.base_link, urllib.quote_plus(search_id), season) headers = {'User_Agent': client.agent()} html = client.request(start_url, headers=headers) #print 'PAGE>>>>>>>>>>>>>>>>>'+html posts = client.parseDOM(html, 'div', attrs={'class': 'result-item'}) posts = [(client.parseDOM(i, 'div', attrs={'class': 'details'})[0]) for i in posts if i] posts = [i for i in posts if not 'SEO Checker' in i] for post in posts: try: name = client.parseDOM(post, 'a')[0] name = re.sub('<.+?>', '', name) url = client.parseDOM(post, 'a', ret='href')[0] except: raise Exception() tit = re.sub('(\.|\(|\[|\s)(Season)(\.|\)|\]|\s|)(.+|)', '', name, flags=re.I) if not clean_title(title).lower() == clean_title(tit).lower(): continue epi_id = '%sx%s/' % (season, episode) ep_link = url.replace('/seasons/', '/episodes/') ep_link = ep_link.split('-season')[0] + '-%s' % epi_id self.get_source(ep_link, title, year, season, episode, start_time) #print self.sources return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name,argument) return self.sources
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False): try: start_time = time.time() hdlr = 'S%02dE%02d' % (int(season), int(episode)) search_id = '%s %s' % (title, hdlr) start_url = self.base_link + self.search_link % urllib.quote_plus( search_id) html = client.request(start_url) items = client.parseDOM(html, 'item') for item in items: name = client.parseDOM(item, 'title')[0] name = client.replaceHTMLCodes(name) t = name.split(hdlr)[0] if not clean_title(title) == clean_title(t): continue if not hdlr in name: continue self.get_source(item, title, year, season, episode, debrid, start_time) #print self.sources return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument)
def scrape_movie(self, title, year, imdb, debrid=False): count = 0 try: start_time = time.time() search_id = '%s %s' % (clean_search(title), year) start_url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(search_id)) headers = {'User-Agent': client.agent()} html = requests.get(start_url, headers=headers, timeout=5).content posts = client.parseDOM(html, 'item') posts = [(client.parseDOM(i, 'title')[0], client.parseDOM(i, 'a', ret='href')) for i in posts if i] posts = [i[1] for i in posts if clean_title(i[0]) == clean_title(title)][0] for url in posts: if 'cmovies' in url: continue link = 'https:' + url if url.startswith('//') else url if '1080' in link: qual = '1080p' elif '720' in link: qual = '720p' else: qual = 'SD' host = url.split('//')[1].replace('www.', '') host = host.split('/')[0].split('.')[0].title() count += 1 self.sources.append({'source': host, 'quality': qual, 'scraper': self.name, 'url': link, 'direct': False}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): if int(year) > 1980: return self.sources try: start_time = time.time() query = urllib.quote_plus(clean_search(title.lower())) start_url = urlparse.urljoin(self.base_link, self.search_link % query) headers = {'User-Agent': client.agent(), 'Referer': self.base_link} count = 0 html = client.request(start_url, headers=headers) posts = client.parseDOM(html, 'div', attrs={'class': 'post'}) posts = [(dom_parser.parse_dom(i, 'a', req='href')[0]) for i in posts if i] posts = [(i.attrs['href'], i.content) for i in posts if i] post = [(i[0]) for i in posts if clean_title(i[1]) == clean_title(title)][0] r = client.request(post, headers=headers) y = client.parseDOM(r, 'h1')[0] if not year in y: return self.sources links = client.parseDOM(r, 'source', ret='src') link = [i for i in links if i.endswith('mp4')][0] link += '|User-Agent=%s&Referer=%s' % (client.agent(), post) link = urllib.quote(link, ':/-_|&+=') count += 1 self.sources.append({'source': 'bnw', 'quality': 'SD', 'scraper': self.name, 'url': link, 'direct': True}) if dev_log=='true': end_time = time.time() - start_time send_log(self.name,end_time,count,title,year) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name,argument) return self.sources
def _get_items(self, url): items = [] try: headers = {'User-Agent': client.agent()} #scraper = cfscrape.create_scraper() #r = scraper.get(url, headers=headers) r= client.request(url, headers=headers) #print r posts = client.parseDOM(r, 'tr', attrs={'name': 'hover'}) for post in posts: data = dom.parse_dom(post, 'a', {'class': 'magnet'}, req=['href', 'title'])[0] url = data.attrs['href'] name = data.attrs['title'] t = name.split(self.hdlr)[0] if not clean_title(re.sub('(|)', '', t)) == clean_title(self.title): continue try: y = re.findall('[\.|\(|\[|\s|\_|\-](S\d+E\d+|S\d+)[\.|\)|\]|\s|\_|\-]', name, re.I)[-1].upper() except BaseException: y = re.findall('[\.|\(|\[|\s\_|\-](\d{4})[\.|\)|\]|\s\_|\-]', name, re.I)[-1].upper() if not y == self.hdlr: continue try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except BaseException: size = '0' items.append((name, url, size)) return items except BaseException: return items
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid = False): try: start_time = time.time() scrape = urllib.quote_plus(clean_search(title)) start_url = '%s/search.html?keyword=%s' %(self.base_link, scrape) #print 'SEARCH > '+start_url html = client.request(start_url) thumbs = re.compile('<ul class="listing items">(.+?)</ul> ',re.DOTALL).findall(html) thumb = re.compile('href="(.+?)".+?alt="(.+?)"',re.DOTALL).findall(str(thumbs)) for link, link_title in thumb: if clean_title(title) in clean_title(link_title): season_chk = '-season-%s' %season #print 'season chk% '+season_chk if not season_chk in link: continue page_link = urlparse.urljoin(self.base_link, link) #print 'page_link:::::::::::::: '+page_link holdpage = client.request(page_link) series_links = re.compile('<li class="child_episode".+?href="(.+?)"',re.DOTALL).findall(holdpage) for movie_link in series_links: episode_chk = '-episode-%sBOLLOX' %episode spoof_link = movie_link + 'BOLLOX' if episode_chk in spoof_link: movie_link = urlparse.urljoin(self.base_link, movie_link) #print 'pass TWS episode check: '+movie_link self.get_source(movie_link,title,year,season,episode,start_time) else:pass return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name,argument)
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() start_url = urlparse.urljoin( self.base_link, self.moviesearch_link % urllib.quote_plus(clean_search(title))) html = client.request(start_url) posts = client.parseDOM(html, 'div', attrs={'class': 'index_item.+?'}) posts = [(dom.parse_dom(i, 'a', req='href')[0]) for i in posts if i] post = [(i.attrs['href']) for i in posts if clean_title(title) == clean_title( re.sub('(\.|\(|\[|\s)(\d{4})(\.|\)|\]|\s|)(.+|)', '', i.attrs['title'], re.I))][0] #xbmc.log('@#@POST: %s' % post, xbmc.LOGNOTICE) self.get_sources(post, title, year, '', '', start_time) return self.sources except Exception as argument: if dev_log == 'true': error_log(self.name, argument) return []
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = urllib.quote_plus(clean_search(title)) query = self.search_url % search_id headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36', 'Referer': self.base_link } scraper = cfscrape.create_scraper() r = scraper.get(query, headers=headers).content posts = client.parseDOM( r, 'div', attrs={'class': 'col-xs-4 col-sm-4 col-md-3 col-lg-3'}) posts = [ dom.parse_dom(i, 'a', req='href')[0] for i in posts if year in i ] post = [ i.attrs['href'] for i in posts if clean_title(title) == clean_title(i.attrs['title']) ][0] self.get_source(post, title, year, '', '', start_time) except Exception, argument: if dev_log == 'true': error_log(self.name, 'Check Search') return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = urllib.quote_plus('%s %s' % (title, year)) # search_id.replace(' ','+') = urllib.quote_plus(search_id) start_url = '%s/index.php?s=%s' % (self.base_link, search_id) #print 'CoolMovieZone - scrape_movie - start_url: ' + start_url headers = {'User-Agent': client.agent()} html = client.request(start_url, headers=headers) match = client.parseDOM(html, 'h1') match = [ dom_parser.parse_dom(i, 'a', req='href') for i in match if i ] match = [(i[0].attrs['href'], i[0].content) for i in match if i] for item_url, name in match: if year not in name: continue if not clean_title(title) == clean_title(name): continue #.lower() added on clean_title function self.get_source(item_url, title, year, '', '', start_time) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument)
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False): try: start_time = time.time() scrape = clean_search(title.lower()) start_url = urlparse.urljoin( self.base_link, self.search_link % urllib.quote_plus(clean_search(title))) html = client.request(start_url) data = json.loads(html) posts = data['series'] post = [ i['seo_name'] for i in posts if clean_title(title) == clean_title(i['original_name']) ][0] show_page = self.base_link + post r = client.request(show_page) sepi = 'season-%s-episode-%s' % (int(season), int(episode)) epi_link = client.parseDOM(r, 'a', ret='href') epi_link = [i for i in epi_link if sepi in i][0] self.get_sources(epi_link, title, year, season, episode, start_time) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = urllib.quote_plus(title + ' ' + year).replace( '+', '%2B') r = requests.get(self.search_url % search_id).content page = re.findall('id="main-col">(.+?)</section></div>', r, re.DOTALL)[0] Regex = re.compile( '''-title.+?href=['"]([^'"]+)['"]>([^<]+)</a></div>''', re.DOTALL).findall(page) for item_url, name in Regex: #print '@#@(grabbed url) %s (title) %s' %(item_url, name) if not clean_title(title).lower() == clean_title(name).lower(): continue if not year in name: continue #print '@#@URL check> ' + item_url self.get_source(item_url, title, year, '', '', start_time) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False): try: start_time = time.time() if not debrid: return [] season_url = "0%s" % season if len(season) < 2 else season episode_url = "0%s" % episode if len(episode) < 2 else episode sea_epi = 's%se%s' % (season_url, episode_url) start_url = "%s/?s=%s+%s" % (self.base_link, title.replace( ' ', '+').lower(), sea_epi) OPEN = client.request(start_url) content = re.compile('<h2><a href="([^"]+)"', re.DOTALL).findall(OPEN) for url in content: if not clean_title(title).lower() in clean_title(url).lower(): continue self.get_source(url, title, year, season, episode, start_time) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = '%s/search?query=%s' % (self.base_link, urllib.quote_plus(search_id)) #print 'scraperchk - scrape_movie - start_url: ' + start_url headers = {'User-Agent': client.agent()} html = client.request(start_url, headers=headers) #print html match = re.compile( 'class="movie-item view-tenth".+?href="(.+?)">.+?alt="(.+?)" />.+?data-title="Quality">(.+?)<', re.DOTALL).findall(html) for link, name, qual in match: #print item_url1 item_url = urlparse.urljoin(self.base_link, link) qual = qual.replace(' ', '') #print 'scraperchk - scrape_movie - name: '+name #print 'scraperchk - scrape_movie - item_url: '+item_url if clean_title(search_id) == clean_title(name): #print 'scraperchk - scrape_movie - Send this URL: ' + item_url self.get_source(item_url, title, year, start_time, qual) print self.sources return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = '%s %s' % (clean_search(title.lower()), year) start_url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(search_id)) headers={'User-Agent': client.agent()} scraper = cfscrape.create_scraper() html = scraper.get(start_url, headers=headers).content #xbmc.log('@#@DATA:%s' % html, xbmc.LOGNOTICE) data = client.parseDOM(html, 'div', attrs={'id': 'content_box'})[0] data = client.parseDOM(data, 'h2') #returns a list with all search results data = [dom_parser.parse_dom(i, 'a', req=['href', 'title'])[0] for i in data if i] #scraping url-title links = [(i.attrs['href'], i.attrs['title']) for i in data if i] #list with link-title for each result #links = re.compile('<header>.+?href="(.+?)" title="(.+?)"',re.DOTALL).findall(html) #xbmc.log('@#@LINKS:%s' % links, xbmc.LOGNOTICE) for m_url, m_title in links: movie_year = re.findall("(\d{4})", re.sub('\d{3,4}p', '', m_title))[-1] movie_name = m_title.split(movie_year)[0] if not clean_title(title) == clean_title(movie_name): continue if not year in movie_year: continue url = m_url #error_log(self.name + ' Pass',url) self.get_source(url, title, year, '', '', start_time) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name,argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid = False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = '%s/?s=%s' % (self.base_link, urllib.quote_plus(search_id)) headers = {'User-Agent': client.agent()} html = client.request(start_url, headers=headers) posts = client.parseDOM(html, 'div', attrs={'class': 'result-item'}) posts = [(client.parseDOM(i, 'div', attrs={'class': 'details'})[0]) for i in posts if i] posts = [i for i in posts if not 'SEO Checker' in i] for post in posts: try: name = client.parseDOM(post, 'a')[0] url = client.parseDOM(post, 'a', ret='href')[0] date = client.parseDOM(post, 'span', attrs={'class': 'year'})[0] except: raise Exception() name = re.sub('<.+?>', '', name) tit = re.sub('(\.|\(|\[|\s)(\d{4}|S\d+E\d+|S\d+)(\.|\)|\]|\s|)(.+|)', '', name, flags=re.I) if not clean_title(title) == clean_title(tit): continue if not year == date: continue self.get_source(url, title, year, '', '', start_time) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name,argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): try: count = 0 urls = [] start_time = time.time() search_id = clean_search(title.lower()) start_url = '%s/search/?keyword=%s' %(self.base_link, urllib.quote_plus(search_id)) headers = {'User-Agent': client.agent(), 'Referer': self.base_link} scraper = cfscrape.create_scraper() html = scraper.get(start_url, headers=headers).content match = re.compile('class="ml-item".+?href="(.+?)".+?<b>(.+?)</b>.+?<b>(.+?)</b>.+?alt="(.+?)"',re.DOTALL).findall(html) for item_url1, date, res, name in match: item_url = urlparse.urljoin(self.base_link, item_url1) if not clean_title(search_id) == clean_title(name): continue OPEN = scraper.get(item_url, headers=headers).content Endlinks = re.compile('class="movie_links"><li(.+?)<h3><b class="icon-share-alt"', re.DOTALL).findall(OPEN)[0] links = re.compile('target="_blank" href="(.+?)"', re.DOTALL).findall(Endlinks) for link in links: if not link.startswith('http'): continue count += 1 host = link.split('//')[1].replace('www.', '') host = host.split('/')[0] if not filter_host(host): continue self.sources.append({'source': host, 'quality': res, 'scraper': self.name, 'url': link, 'direct': False}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def get_source(self, url, title, year, season, episode, start_time): sources = [] try: count = 0 if url is None: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) tit = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s S%02dE%02d' % (data['tvshowtitle'], int(data['season']), int(data['episode'])) \ if 'tvshowtitle' in data else '%s %s' % (data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = urlparse.urljoin(self.base_link, self.search_link%(urllib.quote(query))) r = client.request(url) r = client.parseDOM(r, 'table', attrs={'id': 'searchResult'})[0] posts = client.parseDOM(r, 'td') posts = [i for i in posts if 'detName' in i] for post in posts: post = post.replace(' ', ' ') name = client.parseDOM(post, 'a')[0] t = name.split(hdlr)[0] if not clean_title(re.sub('(|)', '', t)) == clean_title(tit): continue try: y = re.findall('[\.|\(|\[|\s|\_|\-](S\d+E\d+|S\d+)[\.|\)|\]|\s|\_|\-]', name, re.I)[-1].upper() except BaseException: y = re.findall('[\.|\(|\[|\s\_|\-](\d{4})[\.|\)|\]|\s\_|\-]', name, re.I)[-1].upper() if not y == hdlr: continue links = client.parseDOM(post, 'a', ret='href') magnet = [i for i in links if 'magnet:' in i][0] url = magnet.split('&tr')[0] count += 1 quality, info = quality_tags.get_release_quality(name, name) try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except BaseException: size = '0' info.append(size) info = ' | '.join(info) qual = '{0} | {1}'.format(quality, info) self.sources.append({'source': 'Torrent', 'quality': qual, 'scraper': self.name, 'url': url, 'direct': False, 'debridonly': True}) if dev_log == 'true': end_time = time.time() - float(start_time) send_log(self.name, end_time, count, title, year, season=season, episode=episode) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def get_muscic_url(scraper, title, artist, cache_location, maximum_age, debrid = False): cache_enabled = xbmcaddon.Addon('script.module.universalscrapers').getSetting("cache_enabled") == 'true' try: dbcon = database.connect(cache_location) dbcur = dbcon.cursor() try: dbcur.execute("SELECT * FROM version") match = dbcur.fetchone() except: universalscrapers.clear_cache() dbcur.execute("CREATE TABLE version (""version TEXT)") dbcur.execute("INSERT INTO version Values ('0.5.4')") dbcon.commit() dbcur.execute( "CREATE TABLE IF NOT EXISTS rel_music_src (""scraper TEXT, ""title Text, ""artist TEXT, ""urls TEXT, ""added TEXT, ""UNIQUE(scraper, title, artist)"");") except: pass if cache_enabled: try: sources = [] dbcur.execute( "SELECT * FROM rel_music_src WHERE scraper = '%s' AND title = '%s' AND artist = '%s'" % ( scraper.name, clean_title(title).upper(), artist.upper())) match = dbcur.fetchone() t1 = int(re.sub('[^0-9]', '', str(match[4]))) t2 = int(datetime.datetime.now().strftime("%Y%m%d%H%M")) update = abs(t2 - t1) > maximum_age if update == False: sources = json.loads(match[3]) return sources except: pass try: sources = scraper.scrape_music(title, artist, debrid = debrid) if sources == None: sources = [] else: if cache_enabled: dbcur.execute( "DELETE FROM rel_music_src WHERE scraper = '%s' AND title = '%s' AND artist = '%s'" % ( scraper.name, clean_title(title).upper(), artist.upper)) dbcur.execute("INSERT INTO rel_music_src Values (?, ?, ?, ?, ?)", ( scraper.name, clean_title(title).upper(), artist.upper(), json.dumps(sources), datetime.datetime.now().strftime("%Y-%m-%d %H:%M"))) dbcon.commit() return sources except: pass
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False): try: start_time = time.time() #season_pull = '%02d' % int(season) #"0%s"%season if len(season)<2 else season #episode_pull = '%02d' % int(episode) #"0%s"%episode if len(episode)<2 else episode sepi = 'S%02dE%02d' % (int(season), int(episode)) search_id = '%s %s' % (title, sepi) movie_url = self.base_link + self.search_link % urllib.quote_plus( search_id) #print ' ##MOVIE URL## %s' % movie_url r = client.request(movie_url) items = client.parseDOM(r, 'article', attrs={'id': 'post-\d+'}) for item in items: name = client.parseDOM(item, 'a')[0] name = client.replaceHTMLCodes(name) t = re.sub( '(\.|\(|\[|\s)(\d{4}|S\d+E\d+|S\d+|3D)(\.|\)|\]|\s|)(.+|)', '', name, flags=re.I) if not clean_title(title).lower() in clean_title(t).lower(): continue y = re.findall('[\.|\(|\[|\s](S\d*E\d*|S\d*)[\.|\)|\]|\s]', name, flags=re.I)[-1].upper() if y not in sepi: continue link = client.parseDOM(item, 'a', ret='href')[0] if not y == sepi: link = link else: link += '2' if link.endswith('/') else '/2' #print ' ##final Item to pass## %s' % link self.get_source(link, title, year, season, episode, start_time) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument)
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = clean_search(title.lower()) #print search_id #xbmc.log('@#@TITLE: %s' % search_id, xbmc.LOGNOTICE) start_url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(search_id)) headers = {'User-Agent': client.agent()} r = client.request(start_url, headers=headers) posts = client.parseDOM(r, 'div', attrs={'id': 'movie-\d+'}) posts = [(client.parseDOM(i, 'h4')[0]) for i in posts if i] #print posts posts = [(client.parseDOM(i, 'a', ret='href')[0], client.parseDOM(i, 'a')[0]) for i in posts if i] #posts = [(i[0]) for i in posts if clean_title(search_id) == clean_title(i[1])] count = 0 for link, found_title in posts: link = urlparse.urljoin(self.base_link, link) if link.startswith('/') else link if not clean_title(title) == clean_title(found_title): continue result = client.request(link, headers=headers) y = client.parseDOM(result, 'div', attrs={'class': 'showValue showValueRelease'})[0] if not year == y: continue streams = client.parseDOM(result, 'div', attrs={'class': 'linkTr'}) for stream in streams: quality = client.parseDOM(stream, 'div', attrs={'class': 'linkQualityText'})[0] link = client.parseDOM(stream, 'div', attrs={'class':'linkHidden linkHiddenUrl'})[0] #print link if 'vidnode' in link: continue if 'HD' in quality: quality = 'HD' else: quality = 'SD' host = quality_tags._give_host(link) #print host count += 1 self.sources.append( {'source': host, 'quality': quality, 'scraper': self.name, 'url': link, 'direct': False}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid = False): start_time = time.time() if season == "19": season = "1" try: uniques = [] for base in [self.base_link_cartoons,self.dubbed_link_cartoons]: html = requests.get(base,timeout=5).content match = re.compile('<td><a href="(.+?)">(.+?)</a></td>',re.DOTALL).findall(html) bollox = '%s season %s' %(title,season) for item_url, name in match: if clean_title(title).lower() == clean_title(name).lower(): print 'title 1> ' + item_url headers = {'User-Agent':User_Agent} show_page = requests.get(item_url,headers=headers,allow_redirects=False).content Regex = re.compile('<div id="videos">(.+?)</ul>',re.DOTALL).findall(show_page) get_episodes = re.compile('<li>.+?href="(.+?)"',re.DOTALL).findall(str(Regex)) for link in get_episodes: spoof = link + '#' print 'spoofed url '+spoof if not '-season-' in link: episode_format = '-episode-%s#' %(episode) else: episode_format = 'season-%s-episode-%s#' %(season, episode) if episode_format in spoof: if link not in uniques: uniques.append(link) print 'title 1 routePass this episode_url aniema>> ' + link self.check_for_play(link,title,year,season,episode,start_time) else: if clean_title(bollox).lower().replace('!','') == clean_title(name).lower().replace('!','') or clean_title(bollox).lower().replace('!','') + 'season'+str(season) == clean_title(name).lower().replace('!',''): headers = {'User-Agent':User_Agent} show_page = requests.get(item_url,headers=headers,allow_redirects=False).content Regex = re.compile('<div id="videos">(.+?)</ul>',re.DOTALL).findall(show_page) get_episodes = re.compile('<li>.+?href="(.+?)"',re.DOTALL).findall(str(Regex)) for link in get_episodes: spoof = link + '#' if not '-season-' in link: episode_format = '-episode-%s#' %(episode) else: episode_format = 'season-%s-episode-%s#' %(season, episode) if episode_format in spoof: if link not in uniques: uniques.append(link) self.check_for_play(link,title,year,season,episode,start_time) return self.sources except Exception, argument: print argument if dev_log == 'true': error_log(self.name,argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = '%s/search.html?keyword=%s' % ( self.base_link, search_id.replace(' ', '+')) html = client.request(start_url) match = re.compile('<figure>.+?href="(.+?)".+?title="(.+?)"', re.DOTALL).findall(html) for url, name in match: name = name.replace('Movie', '') if clean_title(title).lower() == clean_title(name).lower(): url = self.base_link + url self.get_source(url, title, year, '', '', start_time) return self.sources except: return self.sources
def _get_items(self, url): try: headers = {'User-Agent': client.agent()} scraper = cfscrape.create_scraper() r = scraper.get(url, headers=headers) #r = client.request(url, headers=headers) posts = client.parseDOM(r.content, 'tbody')[0] posts = client.parseDOM(posts, 'tr') for post in posts: data = dom.parse_dom(post, 'a', req='href')[1] link = urlparse.urljoin(self.base_link, data.attrs['href']) name = data.content t = name.split(self.hdlr)[0] if not clean_title(re.sub('(|)', '', t)) == clean_title( self.title): continue try: y = re.findall( '[\.|\(|\[|\s|\_|\-](S\d+E\d+|S\d+)[\.|\)|\]|\s|\_|\-]', name, re.I)[-1].upper() except BaseException: y = re.findall( '[\.|\(|\[|\s\_|\-](\d{4})[\.|\)|\]|\s\_|\-]', name, re.I)[-1].upper() if not y == self.hdlr: continue try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float( re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except BaseException: size = '0' self.items.append((name, link, size)) return self.items except BaseException: return self.items
def scrape_music(self, title, artist, debrid=False): try: song_search = clean_title(title.lower()).replace(' ','+') artist_search = clean_title(artist.lower()).replace(' ','+') start_url = '%sresults?search_query=%s+%s' %(self.base_link,artist_search,song_search) html = requests.get(start_url, headers=headers, timeout=20).content match = re.compile('<h4 class="card-title">.+?</i>(.+?)</h4>.+?id="(.+?)"',re.DOTALL).findall(html) count = 0 for m, link in match: match4 = m.replace('\n','').replace('\t','').replace(' ',' ').replace(' ',' ').replace(' ',' ').replace(' ',' ') match5 = re.sub('&#(\d+);', '', match4) match5 = re.sub('(&#[0-9]+)([^;^0-9]+)', '\\1;\\2', match5) match5 = match5.replace('"', '\"').replace('&', '&') match5 = re.sub('\\\|/|\(|\)|\[|\]|\{|\}|-|:|;|\*|\?|"|\'|<|>|\_|\.|\?', ' ', match5) match5 = ' '.join(match5.split()) match2 = m.replace('\n','').replace('\t','').replace(' ','') if clean_title(title).lower() in clean_title(match2).lower(): if clean_title(artist).lower() in clean_title(match2).lower(): final_link = 'https://www.youtube.com/watch?v='+link count +=1 self.sources.append({'source':match5, 'quality':'SD', 'scraper':self.name, 'url':final_link, 'direct': False}) if dev_log=='true': end_time = time.time() - self.start_time send_log(self.name,end_time,count) return self.sources except Exception, argument: return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search = clean_search(title) start_url = '%s/%s/%s' % (self.base_link, self.search_link, search.replace(' ', '-')) #print 'series - scrape_movie - start_url: ' + start_url headers={'User-Agent': client.agent()} html = client.request(start_url, headers=headers) match = re.compile('class="ml-item".+?href="(.+?)".+?alt="(.+?)"',re.DOTALL).findall(html) for item_url1, name in match: item_url = 'https://www2.series9.io'+item_url1+'/watching.html' print 'series8 - scrape_movie - item_url: '+item_url if clean_title(search) == clean_title(name): #print 'series8 - scrape_movie - Send this URL: ' + item_url self.get_source(item_url, title, year, start_time) #print self.sources return self.sources except Exception, argument: if dev_log=='true': error_log(self.name,argument)
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = '%s/?s=%s' %(self.base_link,search_id.replace(' ','+')) #print 'scraperchk - scrape_movie - start_url: ' + start_url headers={'User-Agent': client.agent()} html = client.request(start_url, headers=headers) match = re.compile('class="thumb".+?title="(.+?)".+?href="(.+?)">',re.DOTALL).findall(html) for name, item_url in match: print 'scraperchk - scrape_movie - name: '+name #print 'scraperchk - scrape_movie - item_url: '+item_url if year in name: if clean_title(search_id).lower() == clean_title(name).lower(): #print 'scraperchk - scrape_movie - Send this URL: ' + item_url self.get_source(item_url,title,year,start_time) #print self.sources return self.sources except Exception, argument: if dev_log=='true': error_log(self.name,argument)
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(search_id)) headers={'User-Agent': client.agent()} html = client.request(start_url, headers=headers) results = client.parseDOM(html, 'div', attrs={'class': 'video_title'}) items = [] for item in results: try: data = dom_parser.parse_dom(item, 'a', req=['href', 'title'])[0] t = data.content y = re.findall('\((\d{4})\)', data.attrs['title'])[0] qual = data.attrs['title'].split('-')[1] link = data.attrs['href'] if not clean_title(t) == clean_title(title): continue if not y == year: continue items += [(link, qual)] except: pass for item in items: count = 0 try: url = item[0] if item[0].startswith('http') else urlparse.urljoin(self.base_link, item[0]) r = client.request(url) qual = client.parseDOM(r, 'h1')[0] res = quality_tags.get_release_quality(item[1], qual)[0] url = re.findall('''frame_url\s*=\s*["']([^']+)['"]\;''', r, re.DOTALL)[0] url = url if url.startswith('http') else urlparse.urljoin('https://', url) if 'vidlink' in url: html = client.request(url, headers=headers) action = re.findall("action'\s*:\s*'([^']+)", html)[0] postID = re.findall("postID\s*=\s*'([^']+)", html)[0] url = 'https://vidlink.org' + re.findall("var\s*url\s*=\s*'([^']+)", html)[0] data = {'browserName': 'Firefox', 'platform': 'Win32', 'postID': postID, 'action': action} headers['X-Requested-With'] = 'XMLHttpRequest' headers['Referer'] = url html = client.request(url, post=data, headers=headers) html = jsunpack.unpack(html).replace('\\', '') sources = json.loads(re.findall('window\.srcs\s*=\s*([^;]+)', html, re.DOTALL)[0]) for src in sources: r = requests.head(src['url'], headers={'User-Agent': client.agent()}) if r.status_code < 400: movie_link = src['url'] count += 1 self.sources.append({'source': 'Googlelink', 'quality': res, 'scraper': self.name, 'url': movie_link, 'direct': True}) else: continue except: pass if dev_log=='true': end_time = time.time() - start_time send_log(self.name,end_time, count, title,year) #print self.sources return self.sources except Exception, argument: print argument if dev_log == 'true': error_log(self.name,argument) return self.sources #watch32().scrape_movie('Black Panther', '2018', 'tt1825683', False)
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(search_id)) #print start_url headers = {'User-Agent': client.agent()} r = client.request(start_url, headers=headers) posts = client.parseDOM(r, 'div', attrs={'id': 'movie-\d+'}) posts = [(client.parseDOM(i, 'h4')[0]) for i in posts if i] for item in posts: name = client.parseDOM(item, 'a')[0] link = client.parseDOM(item, 'a', ret='href')[0] if not clean_title(title) == clean_title(name): continue link = urlparse.urljoin(self.base_link, link) html = client.request(link) #<div class="season" id="season8"> sep_id = 'Season %s Serie %s' % (int(season), int(episode)) #print sep_id seasons = client.parseDOM(html, 'div', attrs={'class': 'season'}) seasons = [i for i in seasons if 'season %s' % int(season) in i.lower()][0] epis = re.findall('<h3>(.+?)</div>\s+</div>\s+</div>\s+</div>', seasons, re.DOTALL | re.MULTILINE) epis = [i for i in epis if sep_id in i][0] count = 0 streams = client.parseDOM(epis, 'div', attrs={'class': 'linkTr'}) for stream in streams: quality = client.parseDOM(stream, 'div', attrs={'class': 'linkQualityText'})[0] link = client.parseDOM(stream, 'div', attrs={'class': 'linkHidden linkHiddenUrl'})[0] #print link if 'vidnode' in link: continue if 'HD' in quality: quality = 'HD' else: quality = 'SD' host = quality_tags._give_host(link) # print host count += 1 self.sources.append( {'source': host, 'quality': quality, 'scraper': self.name, 'url': link, 'direct': False}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season=season, episode=episode) return self.sources except Exception as argument: if dev_log == 'true': error_log(self.name, argument) return [] #vkflix().scrape_movie('Black Panther', '2018', '', False) #vkflix().scrape_episode('Suits', '2011','','8','5','','')
def get_url(scraper, title, show_year, year, season, episode, imdb, tvdb, type, cache_location, maximum_age, check_url = False, debrid = False): cache_enabled = xbmcaddon.Addon('script.module.universalscrapers').getSetting("cache_enabled") == 'true' try: dbcon = database.connect(cache_location) dbcur = dbcon.cursor() try: dbcur.execute("SELECT * FROM version") match = dbcur.fetchone() except: universalscrapers.clear_cache() dbcur.execute("CREATE TABLE version (""version TEXT)") dbcur.execute("INSERT INTO version Values ('0.5.4')") dbcon.commit() dbcur.execute( "CREATE TABLE IF NOT EXISTS rel_src (""scraper TEXT, ""title Text, show_year TEXT, year TEXT, ""season TEXT, ""episode TEXT, ""imdb_id TEXT, ""urls TEXT, ""added TEXT, ""UNIQUE(scraper, title, year, season, episode)"");") except: pass if cache_enabled: try: sources = [] dbcur.execute( "SELECT * FROM rel_src WHERE scraper = '%s' AND title = '%s' AND show_year= '%s' AND year = '%s' AND season = '%s' AND episode = '%s'" % ( scraper.name, clean_title(title).upper(), show_year, year, season, episode)) match = dbcur.fetchone() t1 = int(re.sub('[^0-9]', '', str(match[8]))) t2 = int(datetime.datetime.now().strftime("%Y%m%d%H%M")) update = abs(t2 - t1) > maximum_age if update == False: sources = json.loads(match[7]) return sources except: pass try: sources = [] if type == "movie": sources = scraper.scrape_movie(title, year, imdb, debrid = debrid) elif type == "episode": sources = scraper.scrape_episode(title, show_year, year, season, episode, imdb, tvdb, debrid = debrid) if sources == None: sources = [] else: if cache_enabled: try: dbcur.execute( "DELETE FROM rel_src WHERE scraper = '%s' AND title = '%s' AND show_year= '%s' AND year = '%s' AND season = '%s' AND episode = '%s'" % ( scraper.name, clean_title(title).upper(), show_year, year, season, episode)) dbcur.execute("INSERT INTO rel_src Values (?, ?, ?, ?, ?, ?, ?, ?, ?)", ( scraper.name, clean_title(title).upper(), show_year, year, season, episode, imdb, json.dumps(sources), datetime.datetime.now().strftime("%Y-%m-%d %H:%M"))) dbcon.commit() except: pass if check_url: noresolver = False try: import resolveurl as urlresolver except: try: import urlresolver as urlresolver except: noresolver = True new_sources = [] from common import check_playable for source in sources: if source["direct"]: check = check_playable(source["url"]) if check: new_sources.append(source) elif not noresolver: try: hmf = urlresolver.HostedMediaFile(url=source['url'], include_disabled=False, include_universal=False) if hmf.valid_url(): resolved_url = hmf.resolve() check = check_playable(resolved_url) if check: new_sources.append(source) except: pass else: new_sources.append(source) sources = new_sources return sources except: pass