def _get_items(self, r): try: size = re.search(r'<size>([\d]+)</size>', r).groups()[0] seeders = re.search(r'<seeders>([\d]+)</seeders>', r).groups()[0] _hash = re.search(r'<info_hash>([a-zA-Z0-9]+)</info_hash>', r).groups()[0] name = re.search(r'<title>(.+?)</title>', r).groups()[0] url = 'magnet:?xt=urn:btih:%s&dn=%s' % (_hash.upper(), urllib.quote_plus(name)) t = name.split(self.hdlr)[0] try: y = re.findall(r'[\.|\(|\[|\s|\_|\-](S\d+E\d+|S\d+)[\.|\)|\]|\s|\_|\-]', name, re.I)[-1].upper() except BaseException: y = re.findall(r'[\.|\(|\[|\s\_|\-](\d{4})[\.|\)|\]|\s\_|\-]', name, re.I)[-1].upper() try: div = 1000 ** 3 size = float(size) / div size = '%.2f GB' % size except BaseException: size = '0' quality, info = source_utils.get_release_quality(name, name) info.append(size) info = ' | '.join(info) if not seeders == '0': if cleantitle.get(re.sub('(|)', '', t)) == cleantitle.get(self.title): if y == self.hdlr: self._sources.append({'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True}) except BaseException: pass
def __search(self, titles, year): try: query = self.search_link % urllib.quote_plus( cleantitle.query(titles[0])) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = client.request(query, XHR=True) if r and r.startswith('{'): r = '[%s]' % r r = json.loads(r) r = [(i['url'], i['name']) for i in r if 'name' in i and 'url' in i] r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})?\)*$', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y][0] url = source_utils.strip_domain(r) url = url.replace('serien/', '') return url except: return
def search(self, localtitle, year, search_type): try: url = urlparse.urljoin(self.base_link, self.search_link) r = client.request(url, redirect=False, post={ 'q': cleantitle.query(localtitle), 'sb': '' }) r = client.parseDOM(r, 'div', attrs={'class': 'small-item'}) local_simple = cleantitle.get(localtitle) for row in r: name_found = client.parseDOM(row, 'a')[1] year_found = name_found[name_found.find("(") + 1:name_found.find(")")] url = client.parseDOM(row, 'a', ret='href')[1] if not search_type in url: continue if cleantitle.get( name_found) == local_simple and year_found == year: return url except: return
def __search(self, titles, year): try: url = urljoin(self.base_link, self.search_link) t = [cleantitle.get(i) for i in set(titles) if i] y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0'] post = {'story': titles[0], 'years_ot': str(int(year) - 1), 'years_do': str(int(year) + 1)} r = client.request(url, post=post, XHR=True) if len(r) < 1000: url = urljoin(self.base_link, self.search_old % quote_plus(titles[0])) r = client.request(url) r = r.decode('cp1251').encode('utf-8') r = dom_parser.parse_dom(r, 'article') r = dom_parser.parse_dom(r, 'div', attrs={'class': 'full'}) r = [(dom_parser.parse_dom(i, 'a', attrs={'itemprop': 'url'}, req='href'), dom_parser.parse_dom(i, 'h3', attrs={'class': 'name'}, req='content'), dom_parser.parse_dom(i, 'div', attrs={'class': 'origin-name'}, req='content'), dom_parser.parse_dom(i, 'div', attrs={'class': 'year'})) for i in r] r = [(i[0][0].attrs['href'], i[1][0].attrs['content'], i[2][0].attrs['content'], dom_parser.parse_dom(i[3], 'a', attrs={'itemprop': 'copyrightYear'})) for i in r if i[0] and i[1] and i[2]] r = [(i[0], i[1], i[2], i[3][0].content) for i in r if i[3]] r = [i[0] for i in r if (cleantitle.get(i[1]) in t or cleantitle.get(i[2]) in t) and i[3] in y][0] return source_utils.strip_domain(r) except: return
def __search(self, titles, year): try: t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = client.request(urljoin(self.base_link, self.search_link), post={'query': cleantitle.query(titles[0])}) r = dom_parser.parse_dom(r, 'li', attrs={'class': 'entTd'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 've-screen'}, req='title') r = [(dom_parser.parse_dom(i, 'a', req='href'), i.attrs['title'].split(' - ')[0]) for i in r] r = [(i[0][0].attrs['href'], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y][0] return source_utils.strip_domain(r) except: return
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) query = '%s %s' % (data['title'], data['year']) url = self.search_link % urllib.quote(query) url = urlparse.urljoin(self.base_link, url) html = client.request(url) try: results = client.parseDOM(html, 'div', attrs={'class': 'row'})[2] except Exception: return sources items = re.findall('class="browse-movie-bottom">(.+?)</div>\s</div>', results, re.DOTALL) if items is None: return sources for entry in items: try: try: link, name = \ re.findall('<a href="(.+?)" class="browse-movie-title">(.+?)</a>', entry, re.DOTALL)[0] name = client.replaceHTMLCodes(name) if not cleantitle.get(name) == cleantitle.get(data['title']): continue except Exception: continue y = entry[-4:] if not y == data['year']: continue response = client.request(link) try: entries = client.parseDOM(response, 'div', attrs={'class': 'modal-torrent'}) for torrent in entries: link, name = re.findall( 'href="magnet:(.+?)" class="magnet-download download-torrent magnet" title="(.+?)"', torrent, re.DOTALL)[0] link = 'magnet:%s' % link link = str(client.replaceHTMLCodes(link).split('&tr')[0]) quality, info = source_utils.get_release_quality(name, name) try: size = re.findall('((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|MB|MiB))', torrent)[-1] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size)) / div size = '%.2f GB' % size info.append(size) except Exception: pass info = ' | '.join(info) sources.append( {'source': 'Torrent', 'quality': quality, 'language': 'en', 'url': link, 'info': info, 'direct': False, 'debridonly': True}) except Exception: continue except Exception: continue return sources except Exception: return sources
def __search(self, titles, year): try: query = self.search_link % (cleantitle.getsearch(titles[0].replace( ' ', '%20'))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i][0] r = client.request(query) r = client.parseDOM( r, 'li', attrs={'class': 'item everyone-item over_online haveTooltip'}) for i in r: title = client.parseDOM(i, 'a', ret='title')[0] url = client.parseDOM(i, 'a', ret='href')[0] data = client.request(url) y = re.findall('<p><span>Año:</span>(\d{4})', data)[0] original_t = re.findall('movie-text">.+?h2.+?">\((.+?)\)</h2>', data, re.DOTALL)[0] original_t, title = cleantitle.get(original_t), cleantitle.get( title) if (t in title or t in original_t) and y == year: x = dom_parser.parse_dom(i, 'a', req='href') return source_utils.strip_domain(x[0][0]['href']) return except: return
def _get_items(self, url): items = [] try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = client.parseDOM(r, 'tr', attrs={'class': 't-row'}) posts = [i for i in posts if not 'racker:' in i] for post in posts: data = client.parseDOM(post, 'a', ret='href') url = [i for i in data if 'magnet:' in i][0] name = client.parseDOM(post, 'a', ret='title')[0] t = name.split(self.hdlr)[0] if not cleantitle.get(re.sub('(|)', '', t)) == cleantitle.get(self.title): continue try: y = re.findall('[\.|\(|\[|\s|\_|\-](S\d+E\d+|S\d+)[\.|\)|\]|\s|\_|\-]', name, re.I)[-1].upper() except BaseException: y = re.findall('[\.|\(|\[|\s\_|\-](\d{4})[\.|\)|\]|\s\_|\-]', name, re.I)[-1].upper() if not y == self.hdlr: continue try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith('GB') else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except BaseException: size = '0' items.append((name, url, size)) return items except BaseException: return items
def _get_items(self, url): try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = client.parseDOM(r, 'table', attrs={'class': 'table2'})[0] posts = client.parseDOM(posts, 'tr') for post in posts: data = dom_parser.parse_dom(post, 'a', req='href')[1] link = urlparse.urljoin(self.base_link, data.attrs['href']) name = data.content t = name.split(self.hdlr)[0] if not cleantitle.get(re.sub('(|)', '', t)) == cleantitle.get(self.title): continue try: y = re.findall('[\.|\(|\[|\s|\_|\-](S\d+E\d+|S\d+)[\.|\)|\]|\s|\_|\-]', name, re.I)[-1].upper() except BaseException: y = re.findall('[\.|\(|\[|\s\_|\-](\d{4})[\.|\)|\]|\s\_|\-]', name, re.I)[-1].upper() if not y == self.hdlr: continue try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith('GB') else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except BaseException: size = '0' self.items.append((name, link, size)) return self.items except BaseException: return self.items
def search(self, localtitle, year, search_type): try: simply_name = cleantitle.get(localtitle) query = self.search_link % urllib.quote_plus( cleantitle.query(localtitle)) query = urlparse.urljoin(self.base_link, query) result = self.scraper.get(query).content result = client.parseDOM(result, 'div', attrs={'class': 'result-item'}) for x in result: correct_type = client.parseDOM(x, 'span', attrs={'class': search_type}) correct_year = client.parseDOM(x, 'span', attrs={'class': 'year'})[0] == year name = client.parseDOM(x, 'div', attrs={'class': 'title'})[0] url = client.parseDOM(name, 'a', ret='href')[0] name = cleantitle.get(client.parseDOM(name, 'a')[0]) if (correct_type and correct_year and name == simply_name): return url except: return
def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, aliases, year): try: query = self.search_link % urllib.quote_plus( cleantitle.query(tvshowtitle)) # req page 3 times to workaround their BS random 404's # responses (legit & BS 404s) are actually very fast: timeout prob not important for i in range(4): result = client.request(query, timeout=3) if not result == None: break t = [tvshowtitle] + source_utils.aliases_to_array(aliases) t = [cleantitle.get(i) for i in set(t) if i] result = re.compile( 'itemprop="url"\s+href="([^"]+).*?itemprop="name"\s+class="serie-title">([^<]+)', re.DOTALL).findall(result) for i in result: if cleantitle.get(cleantitle.normalize( i[1])) in t and year in i[1]: url = i[0] url = url.encode('utf-8') #log_utils.log('\n\n~~~ outgoing tvshow() url') #log_utils.log(url) # returned 'url' format like: /serie/x_files return url except: return
def movie(self, imdb, title, localtitle, aliases, year): try: scrape = title.lower().replace(' ', '+').replace(':', '') start_url = self.search_link % (self.base_link, scrape) html = self.scraper.get(start_url, headers={ 'referer': start_url }).content results = re.compile('href="(.+?)"', re.DOTALL).findall(html) for url in results: if self.base_link in url: if 'webcache' in url: continue if 'rss' in url: continue if cleantitle.get(title) in cleantitle.get(url): chkhtml = self.scraper.get(url).content chktitle = re.compile('<title.+?>(.+?)</title>', re.DOTALL).findall(chkhtml)[0] if cleantitle.get(title) in cleantitle.get(chktitle): return url return except: return
def episode(self, url, imdb, tvdb, title, premiered, season, episode): try: if url is None: return url = urlparse.parse_qs(url) url = dict([(i, url[i][0]) if url[i] else (i, '') for i in url]) clean_title = cleantitle.geturl(url['tvshowtitle']) + '+s%02d' % int(season) search_url = urlparse.urljoin(self.base_link, (self.search_link % (clean_title.replace('-', '+'), url['year']))) search_results = self.scraper.get(search_url, headers={'referer': self.base_link}).content not_found = dom_parser.parse_dom(search_results, 'div', {'class': 'not-found'}) if len(not_found) > 0: return links = client.parseDOM(search_results, "a", ret="href", attrs={"class": "ml-mask jt"}) results = [] for link in links: if '%ss%02d' % (cleantitle.get(url['tvshowtitle']), int(season)) in cleantitle.get(link): link_results = self.scraper.get(link, headers={'referer': search_url}).content r2 = dom_parser.parse_dom(link_results, 'div', {'id': 'ip_episode'}) r3 = [dom_parser.parse_dom(i, 'a', req=['href']) for i in r2 if i] for i in r3[0]: if i.content == 'Episode %s' % episode: results.append(i.attrs['href']) return results except: return
def __search(self, title, season): try: query = self.search_link % (urllib.quote_plus( cleantitle.query(title))) query = urlparse.urljoin(self.base_link, query) t = cleantitle.get(title) r = client.request(query) r = client.parseDOM(r, 'div', attrs={'class': 'moviefilm'}) r = client.parseDOM(r, 'div', attrs={'class': 'movief'}) r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a')) for i in r] r = [(i[0][0], i[1][0].lower()) for i in r if len(i[0]) > 0 and len(i[1]) > 0] r = [(i[0], i[1], re.findall('(.+?)\s+(?:saison)\s+(\d+)', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = [(i[0], i[1], re.findall('\((.+?)\)$', i[1]), i[2]) for i in r] r = [(i[0], i[2][0] if len(i[2]) > 0 else i[1], i[3]) for i in r] r = [ i[0] for i in r if t == cleantitle.get(i[1]) and int(i[2]) == int(season) ][0] url = re.findall('(?://.+?|)(/.+)', r)[0] url = client.replaceHTMLCodes(url) url = url.encode('utf-8') return url except: return
def episode(self, url, imdb, tvdb, title, premiered, season, episode): try: if url is None: return r = self.scraper.get(url, headers={'referer': self.base_link}).content r = client.parseDOM(r, 'li', attrs={'itemprop': 'episode'}) t = cleantitle.get(title) r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'span', attrs={'itemprop': 'name'}), re.compile('(\d{4}-\d{2}-\d{2})').findall(i)) for i in r] r = [(i[0], i[1][0].split(' ')[-1], i[2]) for i in r if i[1]] + [(i[0], None, i[2]) for i in r if not i[1]] r = [(i[0], i[1], i[2][0]) for i in r if i[2]] + [(i[0], i[1], None) for i in r if not i[2]] r = [(i[0][0], i[1], i[2]) for i in r if i[0]] url = [i for i in r if t == cleantitle.get(i[1]) and premiered == i[2]][:1] if not url: url = [i for i in r if t == cleantitle.get(i[1])] if len(url) > 1 or not url: url = [i for i in r if premiered == i[2]] if len(url) > 1 or not url: raise Exception() return url[0][0] except: return
def __search(self, titles, year): try: query = self.search_link % (quote_plus(cleantitle.query( titles[0]))) query = urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'id': 'main'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'panel-body'}) r = [ (dom_parser.parse_dom(i.content, 'h4', attrs={'class': 'title-list'}), dom_parser.parse_dom(i.content, 'a', attrs={'href': re.compile('.*/year/.*')})) for i in r ] r = [(dom_parser.parse_dom(i[0][0].content, 'a', req='href'), i[1][0].content if i[1] else '0') for i in r if i[0]] r = [(i[0][0].attrs['href'], i[0][0].content, re.sub('<.+?>|</.+?>', '', i[1])) for i in r if i[0] and i[1]] r = [(i[0], i[1], i[2].strip()) for i in r if i[2]] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [ i[0] for i in r if cleantitle.get(i[1]) in t and i[2] == year ][0] return source_utils.strip_domain(r) except: return
def __search(self, titles, year): try: t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = client.request(urlparse.urljoin(self.base_link, self.search_link), post={'query': titles[0]}, XHR=True) r = dom_parser.parse_dom(r, 'a', req='href') r = [(i.attrs['href'], i.content.split('<br')[0]) for i in r] r = [(i[0], re.sub('<.+?>|</.+?>', '', i[1])) for i in r] r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r] r = [(i[0], i[2][0][0] if i[2] else i[1], i[2][0][1] if i[2] else '0') for i in r] r = [(i[0], re.sub(u'\(с \d+ по \d+ сезон\)', '', i[1]), i[2]) for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y][0] return source_utils.strip_domain(r) except: return
def __search(self, titles): try: query = self.search_link % (urllib.quote_plus( cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'nag'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'item-video'}) r = dom_parser.parse_dom(r, 'h2', attrs={'class': 'entry-title'}) r = dom_parser.parse_dom(r, 'a', req='href') for i in r: title = i[1] if re.search('\*(?:.*?)\*', title) is not None: title = re.sub('\*(?:.*?)\*', '', title) title = cleantitle.get(title) if title in t: return source_utils.strip_domain(i[0]['href']) else: return except: return
def __search(self, titles, year): try: query = self.search_link % (urllib.quote_plus(cleantitle.getsearch(titles[0] + ' ' + year))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i][0] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'v_pict'}) for i in r: title = re.findall('alt="(.+?)"', i[1], re.DOTALL)[0] y = re.findall('(\d{4})', title, re.DOTALL)[0] title = re.sub('<\w+>|</\w+>', '', title) title = cleantitle.get(title) title = re.findall('(\w+)', cleantitle.get(title))[0] if title in t and year == y: url = re.findall('href="(.+?)"', i[1], re.DOTALL)[0] return source_utils.strip_domain(url) return except: return
def __search(self, titles, imdb, year): try: query = self.search_link % (urllib.quote_plus( cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = client.request(query, XHR=True) r = json.loads(r) r = [(i.get('title'), i.get('custom_fields', {})) for i in r.get('posts', [])] r = [(i[0], i[1]) for i in r if i[0] and i[1]] r = [(i[0], i[1].get('Streaming', ['']), i[1].get('Jahr', ['0']), i[1].get('IMDb-Link', [''])) for i in r if i] r = [(i[0], i[1][0], i[2][0], re.findall('.+?(tt\d+).*?', i[3][0])) for i in r if i[0] and i[1] and i[2] and i[3]] r = [ i[1] for i in r if imdb in i[3] or (cleantitle.get(i[0]) in t and i[2] in y) ][0] return source_utils.strip_domain(r) except: return
def __search(self, titles, year): try: query = self.search_link % (urllib.quote_plus( cleantitle.getsearch(titles[0] + ' ' + year))) query = urlparse.urljoin(self.base_link, query) t = cleantitle.get(titles[0]) r = client.request(query) r = client.parseDOM(r, 'div', attrs={'class': 'card'}) r = client.parseDOM(r, 'h3') for i in r: data = re.findall( '<span.*?>(.+?)</span>.+?date">\s*\((\d{4}).*?</span>', i, re.DOTALL) for title, year in data: title = cleantitle.get(title) y = year if title in t and year == y: url = client.parseDOM(i, 'a', ret='href')[0] return source_utils.strip_domain(url) return except: return
def _get_sources(self, url): try: r = client.request(url) posts = client.parseDOM(r, 'tr') for post in posts: link = re.findall( 'a title="Download Torrent Magnet" href="(magnet:.+?)"', post, re.DOTALL) if link == []: continue for url in link: url = url.split('&tr')[0] name = url.split('&dn=')[1] name = urllib.unquote_plus(name).replace(' ', '.') if source_utils.remove_lang(name): continue t = name.split(self.hdlr)[0].replace( self.year, '').replace('(', '').replace(')', '').replace( '&', 'and').replace('.US.', '.').replace('.us.', '.') if cleantitle.get(t) != cleantitle.get(self.title): continue if self.hdlr not in url: continue quality, info = source_utils.get_release_quality(name, url) try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size)) / div size = '%.2f GB' % size info.insert(0, size) except: pass info = ' | '.join(info) self.sources.append({ 'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except: source_utils.scraper_error('BTSCENE') pass
def __search(self, search_link, imdb, titles): try: query = search_link % (urllib.quote_plus( cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'big-list'}) r = dom_parser.parse_dom(r, 'table', attrs={'class': 'row'}) r = dom_parser.parse_dom(r, 'td', attrs={'class': 'list-name'}) r = dom_parser.parse_dom(r, 'a', req='href') r = [ i.attrs['href'] for i in r if i and cleantitle.get(i.content) in t ][0] url = source_utils.strip_domain(r) r = client.request(urlparse.urljoin(self.base_link, url)) r = dom_parser.parse_dom(r, 'a', attrs={'href': re.compile('.*/tt\d+.*')}, req='href') r = [re.findall('.+?(tt\d+).*?', i.attrs['href']) for i in r] r = [i[0] for i in r if i] return url if imdb in r else None except: return
def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, aliases, year): try: query = self.tvsearch_link % urllib.quote_plus( cleantitle.query(tvshowtitle)) query = urlparse.urljoin(self.base_link, query.lower()) result = client.request(query, referer=self.base_link) result = client.parseDOM(result, 'div', attrs={'class': 'index_item.+?'}) result = [(dom.parse_dom(i, 'a', req=['href', 'title'])[0]) for i in result if i] result = [( i.attrs['href'] ) for i in result if cleantitle.get(tvshowtitle) == cleantitle.get( re.sub( '(\.|\(|\[|\s)(\d{4}|S\d+E\d+|S\d+|3D)(\.|\)|\]|\s|)(.+|)', '', i.attrs['title'], flags=re.I))][0] url = client.replaceHTMLCodes(result) url = url.encode('utf-8') return url except Exception: return
def movie(self, imdb, title, localtitle, aliases, year): try: clean_title = cleantitle.geturl(title) search_url = urlparse.urljoin( self.base_link, (self.search_link % (clean_title, year))) search_results = self.scraper.get(search_url, headers={ 'referer': self.base_link }).content not_found = dom_parser.parse_dom(search_results, 'div', {'class': 'not-found'}) if len(not_found) > 0: return links = client.parseDOM(search_results, "a", ret="href", attrs={"class": "ml-mask jt"}) results = [] for link in links: if '%s%s' % (cleantitle.get(title), year) in cleantitle.get(link): results.append(link) return results except: return
def get_sources(self, link): try: url = '%s%s' % (self.base_link, link) result = client.request(url) if result is None: return info_hash = re.findall('<kbd>(.+?)<', result, re.DOTALL)[0] url = '%s%s' % ('magnet:?xt=urn:btih:', info_hash) name = re.findall('<h3 class="card-title">(.+?)<', result, re.DOTALL)[0] name = urllib.unquote_plus(name).replace(' ', '.') url = '%s%s%s' % (url, '&dn=', str(name)) if source_utils.remove_lang(name): return if url in str(self.sources): return t = name.split(self.hdlr)[0].replace(self.year, '').replace( '(', '').replace(')', '').replace('&', 'and').replace( '.US.', '.').replace('.us.', '.') if cleantitle.get(t) != cleantitle.get(self.title): return if self.hdlr not in name: return size = re.findall( '<div class="col-3">File size:</div><div class="col">(.+?)<', result, re.DOTALL)[0] quality, info = source_utils.get_release_quality(name, url) try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', size)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: isize = '0' dsize = 0 pass info = ' | '.join(info) self.sources.append({ 'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('YOURBITTORRENT') pass
def episode(self, url, imdb, tvdb, title, premiered, season, episode): try: if url == None: return url = urlparse.parse_qs(url) url = dict([(i, url[i][0]) if url[i] else (i, '') for i in url]) url['premiered'], url['season'], url[ 'episode'] = premiered, season, episode try: clean_title = cleantitle.geturl( url['tvshowtitle']) + '-season-%d' % int(season) search_url = urlparse.urljoin( self.base_link, self.search_link % clean_title.replace('-', '+')) r = self.scraper.get(search_url).content r = client.parseDOM(r, 'div', {'id': 'movie-featured'}) r = [(client.parseDOM(i, 'a', ret='href'), re.findall('<b><i>(.+?)</i>', i)) for i in r] r = [(i[0][0], i[1][0]) for i in r if cleantitle.get(i[1][0]) == cleantitle.get(clean_title)] url = r[0][0] except: pass data = client.request(url) data = client.parseDOM(data, 'div', attrs={'id': 'details'}) data = zip(client.parseDOM(data, 'a'), client.parseDOM(data, 'a', ret='href')) url = [(i[0], i[1]) for i in data if i[0] == str(int(episode))] return url[0][1] except: return
def search(self, title, localtitle, year): try: import sys reload(sys) sys.setdefaultencoding('utf8') simply_name = cleantitle.get(localtitle) simply_name2 = cleantitle.get(title) simply_name = cleantitle.query(localtitle).split(' ') simply_name2 = cleantitle.query(title).split(' ') query = self.search_link % urllib.quote_plus( cleantitle.query(localtitle)) url = urlparse.urljoin(self.base_link, query) result = client.request(url) result = client.parseDOM(result, 'div', attrs={'class': 'row search-results'}) results = client.parseDOM( result, 'div', attrs={'class': 'item-detail-bigblock title title-bigblock'}) for result in results: movieneourl = client.parseDOM(result, 'a', ret='href')[0] result = client.parseDOM(result, 'a')[0] for word in simply_name: if word in result and year in result: return [ urlparse.urljoin(self.base_link, movieneourl), result ] continue except Exception, e: print str(e) return
def matchAlias(self, title, aliases): try: for alias in aliases: if cleantitle.get(title) == cleantitle.get(alias['title']): return True except: return False
def __search(self, titles, year, content): try: query = self.search_link % (urllib.quote_plus( cleantitle.getsearch(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i][0] r = client.request(query) r = client.parseDOM(r, 'div', attrs={'class': 'tab-content clearfix'}) if content == 'movies': r = client.parseDOM(r, 'div', attrs={'id': 'movies'}) else: r = client.parseDOM(r, 'div', attrs={'id': 'series'}) data = dom_parser.parse_dom(r, 'figcaption') for i in data: title = i[0]['title'] title = cleantitle.get(title) if title in t: x = dom_parser.parse_dom(i, 'a', req='href') return source_utils.strip_domain(x[0][0]['href']) else: url = dom_parser.parse_dom(i, 'a', req='href') data = client.request(url[0][0]['href']) data = re.findall( '<h3>Pelicula.+?">(.+?)\((\d{4})\).+?</a>', data, re.DOTALL)[0] if titles[0] in data[0] and year == data[1]: return source_utils.strip_domain(url[0][0]['href']) return except: return