def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = '%s %s' % (clean_search(title.lower()), year) start_url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(search_id)) headers={'User-Agent': client.agent()} scraper = cfscrape.create_scraper() html = scraper.get(start_url, headers=headers).content #xbmc.log('@#@DATA:%s' % html, xbmc.LOGNOTICE) data = client.parseDOM(html, 'div', attrs={'id': 'content_box'})[0] data = client.parseDOM(data, 'h2') #returns a list with all search results data = [dom_parser.parse_dom(i, 'a', req=['href', 'title'])[0] for i in data if i] #scraping url-title links = [(i.attrs['href'], i.attrs['title']) for i in data if i] #list with link-title for each result #links = re.compile('<header>.+?href="(.+?)" title="(.+?)"',re.DOTALL).findall(html) #xbmc.log('@#@LINKS:%s' % links, xbmc.LOGNOTICE) for m_url, m_title in links: movie_year = re.findall("(\d{4})", re.sub('\d{3,4}p', '', m_title))[-1] movie_name = m_title.split(movie_year)[0] if not clean_title(title) == clean_title(movie_name): continue if not year in movie_year: continue url = m_url #error_log(self.name + ' Pass',url) self.get_source(url, title, year, '', '', start_time) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name,argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): if int(year) > 1980: return self.sources try: start_time = time.time() query = urllib.quote_plus(clean_search(title.lower())) start_url = urlparse.urljoin(self.base_link, self.search_link % query) headers = {'User-Agent': client.agent(), 'Referer': self.base_link} count = 0 html = client.request(start_url, headers=headers) posts = client.parseDOM(html, 'div', attrs={'class': 'post'}) posts = [(dom_parser.parse_dom(i, 'a', req='href')[0]) for i in posts if i] posts = [(i.attrs['href'], i.content) for i in posts if i] post = [(i[0]) for i in posts if clean_title(i[1]) == clean_title(title)][0] r = client.request(post, headers=headers) y = client.parseDOM(r, 'h1')[0] if not year in y: return self.sources links = client.parseDOM(r, 'source', ret='src') link = [i for i in links if i.endswith('mp4')][0] link += '|User-Agent=%s&Referer=%s' % (client.agent(), post) link = urllib.quote(link, ':/-_|&+=') count += 1 self.sources.append({'source': 'bnw', 'quality': 'SD', 'scraper': self.name, 'url': link, 'direct': True}) if dev_log=='true': end_time = time.time() - start_time send_log(self.name,end_time,count,title,year) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name,argument) return self.sources
def parseDOM(html, name='', attrs=None, ret=False): if attrs: attrs = dict((key, re.compile(value + ('$' if value else ''))) for key, value in attrs.iteritems()) results = dom_parser.parse_dom(html, name, attrs, ret) if ret: results = [result.attrs[ret.lower()] for result in results] else: results = [result.content for result in results] return results
def _get_items(self, url): items = [] try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = client.parseDOM(r, 'tr', attrs={'name': 'hover'}) for post in posts: data = dom.parse_dom(post, 'a', {'class': 'magnet'}, req=['href', 'title'])[0] url = data.attrs['href'] name = data.attrs['title'] t = name.split(self.hdlr)[0] if not clean_title(re.sub('(|)', '', t)) == clean_title( self.title): continue try: y = re.findall( '[\.|\(|\[|\s|\_|\-](S\d+E\d+|S\d+)[\.|\)|\]|\s|\_|\-]', name, re.I)[-1].upper() except BaseException: y = re.findall( '[\.|\(|\[|\s\_|\-](\d{4})[\.|\)|\]|\s\_|\-]', name, re.I)[-1].upper() if not y == self.hdlr: continue try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float( re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except BaseException: size = '0' items.append((name, url, size)) return items except BaseException: return items
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False): try: start_time = time.time() start_url = urlparse.urljoin( self.base_link, self.tvsearch_link % urllib.quote_plus(clean_search(title))) #xbmc.log('@#@START: %s' % start_url, xbmc.LOGNOTICE) html = client.request(start_url) posts = client.parseDOM(html, 'div', attrs={'class': 'index_item.+?'}) posts = [(dom.parse_dom(i, 'a', req='href')[0]) for i in posts if i] post = [ (i.attrs['href']) for i in posts if clean_title(title) == clean_title( re.sub('(\.|\(|\[|\s)(S\d+E\d+|S\d+)(\.|\)|\]|\s|)(.+|)', '', i.attrs['title'], re.I)) ][0] r = client.request(post) r = client.parseDOM(r, 'div', attrs={'class': 'tv_episode_item'}) urls = client.parseDOM(r, 'a', ret='href') epi_link = [ i for i in urls if 'season-%s-episode-%s' % (int(season), int(episode)) in i ][0] #xbmc.log('@#@EPI-LINK: %s' % epi_link, xbmc.LOGNOTICE) self.get_sources(epi_link, title, year, season, episode, start_time) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False): try: start_time = time.time() query = urllib.quote_plus(clean_search(title)) query = self.search_url % query headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36', 'Referer': self.base_link } scraper = cfscrape.create_scraper() r = scraper.get(query, headers=headers).content posts = client.parseDOM( r, 'div', attrs={'class': 'col-xs-4 col-sm-4 col-md-3 col-lg-3'}) posts = [ dom.parse_dom(i, 'a', req='href')[0] for i in posts if show_year in i ] post = [ i.attrs['href'] for i in posts if clean_title(title) == clean_title(i.attrs['title']) ][0] start_url = '%s?session=%01d&episode=%01d' % (post, int(season), int(episode)) self.get_source(start_url, title, year, season, episode, start_time) except Exception, argument: if dev_log == 'true': error_log(self.name, 'Check Search') return self.sources
def _get_items(self, url): try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = client.parseDOM(r, 'tbody')[0] posts = client.parseDOM(posts, 'tr') for post in posts: data = dom.parse_dom(post, 'a', req='href')[1] link = urlparse.urljoin(self.base_link, data.attrs['href']) name = data.content t = name.split(self.hdlr)[0] if not clean_title(re.sub('(|)', '', t)) == clean_title( self.title): continue try: y = re.findall( '[\.|\(|\[|\s|\_|\-](S\d+E\d+|S\d+)[\.|\)|\]|\s|\_|\-]', name, re.I)[-1].upper() except BaseException: y = re.findall( '[\.|\(|\[|\s\_|\-](\d{4})[\.|\)|\]|\s\_|\-]', name, re.I)[-1].upper() if not y == self.hdlr: continue try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float( re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except BaseException: size = '0' self.items.append((name, link, size)) return self.items except BaseException: return self.items
def get_source(self, item_url, title, year, start_time): try: count = 0 headers = {'User-Agent': client.agent()} OPEN = client.request(item_url, headers=headers) Endlinks = dom_parser.parse_dom(OPEN, 'a', req='player-data') Endlinks = [(i.attrs['player-data'], i.content) for i in Endlinks if i] if 'Season' in year: Endlinks = [(i[0], 'SD') for i in Endlinks if i[1] in year] else: Endlinks = [(i[0], i[1]) for i in Endlinks if i] for link, quality in Endlinks: qual = quality_tags.check_sd_url(quality) if 'vidcloud' in link: link = 'https:' + link if link.startswith('//') else link data = client.request(link, headers=headers) link = re.findall( '''file\s*:\s*['"](.+?)['"].+?type['"]\s*:\s*['"](.+?)['"]''', data, re.DOTALL)[0] host = link[1] link = link[ 0] + '|User-Agent=%s&Referer=https://vidcloud.icu/' % client.agent( ) direct = True else: host = link.split('//')[1].replace('www.', '') host = host.split('/')[0].split('.')[0].title() direct = False count += 1 self.sources.append({ 'source': host, 'quality': qual, 'scraper': self.name, 'url': link, 'direct': direct }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return []
def get_source(self, item_url, title, year, start_time): try: print 'PASSEDURL >>>>>>'+item_url count = 0 headers = {'User-Agent': client.agent()} OPEN = client.request(item_url, headers=headers) #print OPEN Endlinks = dom_parser.parse_dom(OPEN, 'a', req='player-data') Endlinks = [(i.attrs['player-data'], i.content) for i in Endlinks if i] if 'Season' in year: Endlinks = [(i[0], 'SD') for i in Endlinks if i[1] in year] else: Endlinks = [(i[0], i[1]) for i in Endlinks if i] #print 'series8 - scrape_movie - EndLinks: '+str(Endlinks) for link, quality in Endlinks: qual = quality_tags.check_sd_url(quality) if 'vidcloud' in link: link = 'https:' + link if link.startswith('//') else link data = client.request(link, headers=headers) link = re.findall('''file\s*:\s*['"](.+?)['"].+?type['"]\s*:\s*['"](.+?)['"]''', data, re.DOTALL)[0] host = link[1] link = link[0] + '|User-Agent=%s&Referer=https://vidcloud.icu/' % client.agent() direct = True else: host = link.split('//')[1].replace('www.', '') host = host.split('/')[0].split('.')[0].title() direct = False count += 1 self.sources.append({'source': host, 'quality': qual, 'scraper': self.name, 'url': link, 'direct': direct}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) except Exception, argument: if dev_log=='true': error_log(self.name, argument) return[] #seriesonline8().scrape_movie('Black Panther', '2018', 'tt1825683', False) #seriesonline8().scrape_episode('Suits','2011','','8','5','','')
def scrape_movie(self, title, year, imdb, debrid=False): if int(year) > 1980: return self.sources try: start_time = time.time() query = urllib.quote_plus(clean_search(title.lower())) start_url = urlparse.urljoin(self.base_link, self.search_link % query) headers = {'User-Agent': client.agent(), 'Referer': self.base_link} count = 0 html = client.request(start_url, headers=headers) posts = client.parseDOM(html, 'div', attrs={'class': 'post'}) posts = [(dom_parser.parse_dom(i, 'a', req='href')[0]) for i in posts if i] posts = [(i.attrs['href'], i.content) for i in posts if i] post = [(i[0]) for i in posts if clean_title(i[1]) == clean_title(title)][0] r = client.request(post, headers=headers) y = client.parseDOM(r, 'h1')[0] if not year in y: return self.sources links = client.parseDOM(r, 'source', ret='src') link = [i for i in links if i.endswith('mp4')][0] link += '|User-Agent=%s&Referer=%s' % (client.agent(), post) link = urllib.quote(link, ':/-_|&+=') count += 1 self.sources.append({ 'source': 'bnw', 'quality': 'SD', 'scraper': self.name, 'url': link, 'direct': True }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False): try: start_time = time.time() start_url = urlparse.urljoin( self.base_link, self.search_link % urllib.quote(clean_search(title))) #xbmc.log('@#@START: %s' % start_url, xbmc.LOGNOTICE) html = client.request(start_url) posts = client.parseDOM(html, 'ul', attrs={'class': 'list-group'})[0] posts = dom.parse_dom(posts, 'a') posts = [(i.attrs['href'], client.parseDOM(i, 'h3')[0]) for i in posts if show_year in i.content] post = [ i[0] for i in posts if clean_title(title) == clean_title(i[1]) ][0] #xbmc.log('@#@POST: %s' % post, xbmc.LOGNOTICE) title = post[:-1].split('/')[-1] #/tvshow/328724/young-sheldon/ tvid = post[:-1].split('/')[-2] epi = 'episode/%s/%01d/%01d/%s' % (tvid, int(season), int(episode), title) epi_link = urlparse.urljoin(self.base_link, epi) #xbmc.log('@#@EPI_LINK: %s' % epi_link, xbmc.LOGNOTICE) self.get_sources(epi_link, title, year, season, episode, start_time) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = '%s %s' % (clean_search(title.lower()), year) start_url = urlparse.urljoin( self.base_link, self.search_link % urllib.quote_plus(search_id)) headers = {'User-Agent': client.agent()} scraper = cfscrape.create_scraper() html = scraper.get(start_url, headers=headers).content #xbmc.log('@#@DATA:%s' % html, xbmc.LOGNOTICE) data = client.parseDOM(html, 'div', attrs={'id': 'content_box'})[0] data = client.parseDOM( data, 'h2') #returns a list with all search results data = [ dom_parser.parse_dom(i, 'a', req=['href', 'title'])[0] for i in data if i ] #scraping url-title links = [(i.attrs['href'], i.attrs['title']) for i in data if i] #list with link-title for each result #xbmc.log('@#@LINKS:%s' % links, xbmc.LOGNOTICE) for m_url, m_title in links: movie_year = re.findall("(\d{4})", re.sub('\d{3,4}p', '', m_title))[-1] movie_name = m_title.split(movie_year)[0] if not clean_title(title) == clean_title(movie_name): continue if not year in movie_year: continue url = m_url #error_log(self.name + ' Pass',url) self.get_source(url, title, year, '', '', start_time) #print self.sources return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument)
def scrape_movie(self, title, year, imdb, debrid = False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = '%s/search?query=%s' % (self.base_link, urllib.quote_plus(search_id)) #print 'scraperchk - scrape_movie - start_url: ' + start_url html = client.request(start_url) posts = client.parseDOM(html, 'div', attrs={'class': 'one_movie-item'}) for post in posts: data = dom.parse_dom(post, 'a', req='href', attrs={'class': 'movie-title'})[0] if not clean_title(title) == clean_title(data.content): continue qual = client.parseDOM(post, 'span', attrs={'data-title': 'Quality'})[0] qual = client.replaceHTMLCodes(qual) item_url = urlparse.urljoin(self.base_link, data.attrs['href']) self.get_source(item_url, title, year, start_time, qual) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() start_url = urlparse.urljoin( self.base_link, self.moviesearch_link % urllib.quote_plus(clean_search(title))) html = client.request(start_url) posts = client.parseDOM(html, 'div', attrs={'class': 'index_item.+?'}) posts = [(dom.parse_dom(i, 'a', req='href')[0]) for i in posts if i] post = [(i.attrs['href']) for i in posts if clean_title(title) == clean_title( re.sub('(\.|\(|\[|\s)(\d{4})(\.|\)|\]|\s|)(.+|)', '', i.attrs['title'], re.I))][0] self.get_sources(post, title, year, '', '', start_time) return self.sources except Exception as argument: if dev_log == 'true': error_log(self.name, argument) return []
def scrape_movie(self, title, year, imdb, debrid=False): links = [] try: start_time = time.time() search_id = urllib.quote_plus('%s %s' % (clean_search(title), year)) start_url = urlparse.urljoin(self.base_link, self.search_link % search_id) html = client.request(start_url) posts = client.parseDOM(html, 'div', attrs={'class': 'cell_container'}) posts = [i for i in posts if year in i] posts = [dom.parse_dom(i, 'a', req='href')[1] for i in posts if i] post = [ i.attrs['href'] for i in posts if clean_title(title) == clean_title(i.content) ][0] mov_link = urlparse.urljoin(self.base_link, post) r = client.request(mov_link) res_chk = client.parseDOM(r, 'h1')[0] url = re.findall('''frame_url\s*=\s*["']([^']+)['"]\;''', r, re.DOTALL)[0] furl = url if url.startswith('http') else urlparse.urljoin( 'https://', url) try: r = client.request(furl) ua = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:14.0) Gecko/20100101 Firefox/14.0.1' } id_view = client.request( 'https://vidlink.org/embed/update_views', headers=ua) id_view = json.loads(id_view)['id_view'].replace('\/', '/') postID = re.findall('''postID\s*=\s*['"]([^'"]+)['"]\;''', r)[0] try: plink = 'https://vidlink.org/streamdrive/info' data = { 'browserName': 'Firefox', 'platform': 'Win32', 'postID': postID, 'id_view': id_view } headers = ua headers['X-Requested-With'] = 'XMLHttpRequest' headers['Referer'] = url ihtml = client.request(plink, post=data, headers=headers) linkcode = jsunpack.unpack(ihtml).replace('\\', '') linkcode = re.findall('window\.srcs\s*=\s*\[(.+?)\]\;', linkcode, re.DOTALL)[0] frames = json.loads(linkcode) link = frames['url'] links.append(link) except: pass try: plink = 'https://vidlink.org/opl/info' post = 'postID=%s' % postID headers = ua headers['X-Requested-With'] = 'XMLHttpRequest' headers['Referer'] = url ihtml = client.request(plink, post=post, headers=headers) ihtml = json.loads(ihtml)['id'] link = 'https://oload.icu/embed/%s' % ihtml links.append(link) except: pass except: pass count = 0 #xbmc.log('@#@-LINKS:%s' % links, xbmc.LOGNOTICE) for link in links: if '1080' in res_chk: res = '1080p' elif '720' in res_chk: res = '720p' else: res = 'DVD' count += 1 if 'google' in link: self.sources.append({ 'source': 'Googlelink', 'quality': res, 'scraper': self.name, 'url': link, 'direct': True }) else: self.sources.append({ 'source': 'Openload', 'quality': res, 'scraper': self.name, 'url': link, 'direct': False }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) #putlockerhd().scrape_movie('Black Panther', '2018','')
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(search_id)) headers={'User-Agent': client.agent()} html = client.request(start_url, headers=headers) results = client.parseDOM(html, 'div', attrs={'class': 'video_title'}) items = [] for item in results: try: data = dom_parser.parse_dom(item, 'a', req=['href', 'title'])[0] t = data.content y = re.findall('\((\d{4})\)', data.attrs['title'])[0] qual = data.attrs['title'].split('-')[1] link = data.attrs['href'] if not clean_title(t) == clean_title(title): continue if not y == year: continue items += [(link, qual)] except: pass for item in items: count = 0 try: url = item[0] if item[0].startswith('http') else urlparse.urljoin(self.base_link, item[0]) r = client.request(url) qual = client.parseDOM(r, 'h1')[0] res = quality_tags.get_release_quality(item[1], qual)[0] url = re.findall('''frame_url\s*=\s*["']([^']+)['"]\;''', r, re.DOTALL)[0] url = url if url.startswith('http') else urlparse.urljoin('https://', url) if 'vidlink' in url: html = client.request(url, headers=headers) action = re.findall("action'\s*:\s*'([^']+)", html)[0] postID = re.findall("postID\s*=\s*'([^']+)", html)[0] url = 'https://vidlink.org' + re.findall("var\s*url\s*=\s*'([^']+)", html)[0] data = {'browserName': 'Firefox', 'platform': 'Win32', 'postID': postID, 'action': action} headers['X-Requested-With'] = 'XMLHttpRequest' headers['Referer'] = url html = client.request(url, post=data, headers=headers) html = jsunpack.unpack(html).replace('\\', '') sources = json.loads(re.findall('window\.srcs\s*=\s*([^;]+)', html, re.DOTALL)[0]) for src in sources: r = requests.head(src['url'], headers={'User-Agent': client.agent()}) if r.status_code < 400: movie_link = src['url'] count += 1 self.sources.append({'source': 'Googlelink', 'quality': res, 'scraper': self.name, 'url': movie_link, 'direct': True}) else: continue except: pass if dev_log=='true': end_time = time.time() - start_time send_log(self.name,end_time, count, title,year) #print self.sources return self.sources except Exception, argument: print argument if dev_log == 'true': error_log(self.name,argument) return self.sources #watch32().scrape_movie('Black Panther', '2018', 'tt1825683', False)
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = urlparse.urljoin( self.base_link, self.search_link % urllib.quote_plus(search_id)) headers = {'User-Agent': client.agent()} html = client.request(start_url, headers=headers) results = client.parseDOM(html, 'div', attrs={'class': 'video_title'}) items = [] for item in results: try: data = dom_parser.parse_dom(item, 'a', req=['href', 'title'])[0] t = data.content y = re.findall('\((\d{4})\)', data.attrs['title'])[0] qual = data.attrs['title'].split('-')[1] link = data.attrs['href'] if not clean_title(t) == clean_title(title): continue if not y == year: continue items += [(link, qual)] except: pass for item in items: count = 0 try: url = item[0] if item[0].startswith( 'http') else urlparse.urljoin(self.base_link, item[0]) r = client.request(url) qual = client.parseDOM(r, 'h1')[0] res = quality_tags.get_release_quality(item[1], qual)[0] url = re.findall('''frame_url\s*=\s*["']([^']+)['"]\;''', r, re.DOTALL)[0] url = url if url.startswith('http') else urlparse.urljoin( 'https://', url) if 'vidlink' in url: html = client.request(url, headers=headers) action = re.findall("action'\s*:\s*'([^']+)", html)[0] postID = re.findall("postID\s*=\s*'([^']+)", html)[0] url = 'https://vidlink.org' + re.findall( "var\s*url\s*=\s*'([^']+)", html)[0] data = { 'browserName': 'Firefox', 'platform': 'Win32', 'postID': postID, 'action': action } headers['X-Requested-With'] = 'XMLHttpRequest' headers['Referer'] = url html = client.request(url, post=data, headers=headers) html = jsunpack.unpack(html).replace('\\', '') sources = json.loads( re.findall('window\.srcs\s*=\s*([^;]+)', html, re.DOTALL)[0]) for src in sources: r = requests.head( src['url'], headers={'User-Agent': client.agent()}) if r.status_code < 400: movie_link = src['url'] count += 1 self.sources.append({ 'source': 'Googlelink', 'quality': res, 'scraper': self.name, 'url': movie_link, 'direct': True }) else: continue except: pass if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) #print self.sources return self.sources except Exception, argument: print argument if dev_log == 'true': error_log(self.name, argument) return self.sources #watch32().scrape_movie('Black Panther', '2018', 'tt1825683', False)
def get_sources(self, url, title, year, season, episode, start_time): try: if url is None: return self.sources count = 0 url, hdlr = url[0], url[1] main = [] try: headers = { 'User-Agent': client.agent(), 'Referer': self.base_link } scraper = cfscrape.create_scraper() data = scraper.get(url, headers=headers).content main = dom.parse_dom(data, 'div', {'class': 'postContent'}) main = [i.content for i in main] comments = dom.parse_dom(data, 'div', {'class': re.compile('content')}) main += [i.content for i in comments] except: pass for con in main: try: frames = client.parseDOM(con, 'a', ret='href') for link in frames: if 'youtube' in link: continue if any(x in link for x in ['.rar', '.zip', '.iso']) or any( link.endswith(x) for x in ['.rar', '.zip', '.iso']): continue host = re.findall( '([\w]+[.][\w]+)$', urlparse.urlparse(link.strip().lower()).netloc)[0] host = client.replaceHTMLCodes(host) host = host.encode('utf-8') if not hdlr.lower() in link.lower(): continue quality, info = quality_tags.get_release_quality( link, link) if link in str(self.sources): continue rd_domains = get_rd_domains() if host in rd_domains: count += 1 self.sources.append({ 'source': host, 'quality': quality, 'scraper': self.name, 'url': link, 'direct': False, 'debridonly': True }) except: pass if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season=season, episode=episode) # xbmc.log('@#@SOURCES:%s' % self._sources, xbmc.LOGNOTICE) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources #Releasebb().scrape_movie('Black Panther', '2018', '', True)