def __search(self, search_link, imdb, title): try: query = search_link % (urllib.quote_plus(cleantitle.query(title))) query = urlparse.urljoin(self.base_link, query) t = cleantitle.get(title) tq = cleantitle.query(title) r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'big-list'}) r = dom_parser.parse_dom(r, 'table', attrs={'class': 'row'}) r = dom_parser.parse_dom(r, 'td', attrs={'class': 'list-name'}) r = dom_parser.parse_dom(r, 'a', req='href') r = [(i.attrs['href'], i.content) for i in r if i] url = [i[0] for i in r if t == cleantitle.get(i[1])] url = url[0] if len(url) > 0 else [i[0] for i in r if tq == cleantitle.query(i[1])][0] url = source_utils.strip_domain(url) r = client.request(urlparse.urljoin(self.base_link, url)) r = dom_parser.parse_dom(r, 'a', attrs={'href': re.compile('.*/tt\d+.*')}, req='href') r = [re.findall('.+?(tt\d+).*?', i.attrs['href']) for i in r] r = [i[0] for i in r if i] return url if imdb in r else None except: return
def search(self, title, localtitle, year, search_type): try: url = self.do_search(cleantitle.query(title), title, localtitle, year, search_type) if not url: url = self.do_search(cleantitle.query(localtitle), title, localtitle, year, search_type) return url except: return
def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, aliases, year): try: query = self.search_link % urllib.quote_plus(cleantitle.query(tvshowtitle)) # req page 3 times to workaround their BS random 404's # responses (legit & BS 404s) are actually very fast: timeout prob not important for i in range(4): result = client.request(query, timeout=3) if not result == None: break t = [tvshowtitle] + source_utils.aliases_to_array(aliases) t = [cleantitle.get(i) for i in set(t) if i] result = re.compile('itemprop="url"\s+href="([^"]+).*?itemprop="name"\s+class="serie-title">([^<]+)', re.DOTALL).findall(result) for i in result: if cleantitle.get(cleantitle.normalize(i[1])) in t and year in i[1]: url = i[0] url = url.encode('utf-8') #log_utils.log('\n\n~~~ outgoing tvshow() url') #log_utils.log(url) # returned 'url' format like: /serie/x_files return url except: return
def __search(self, title, season): try: query = self.search_link % (urllib.quote_plus(cleantitle.query(title))) query = urlparse.urljoin(self.base_link, query) t = cleantitle.get(title) r = client.request(query) r = client.parseDOM(r, 'div', attrs={'class': 'moviefilm'}) r = client.parseDOM(r, 'div', attrs={'class': 'movief'}) r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a')) for i in r] r = [(i[0][0], i[1][0].lower()) for i in r if len(i[0]) > 0 and len(i[1]) > 0] r = [(i[0], i[1], re.findall('(.+?)\s+(?:saison)\s+(\d+)', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = [(i[0], i[1], re.findall('\((.+?)\)$', i[1]), i[2]) for i in r] r = [(i[0], i[2][0] if len(i[2]) > 0 else i[1], i[3]) for i in r] r = [i[0] for i in r if t == cleantitle.get(i[1]) and int(i[2]) == int(season)][0] url = re.findall('(?://.+?|)(/.+)', r)[0] url = client.replaceHTMLCodes(url) url = url.encode('utf-8') return url except: return
def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, aliases, year): try: t = cleantitle.get(tvshowtitle) q = urllib.quote_plus(cleantitle.query(tvshowtitle)) p = urllib.urlencode({'term': q}) r = client.request(self.search_link, post=p, XHR=True) try: r = json.loads(r) except: r = None if r: r = [(i['seo_url'], i['value'], i['label']) for i in r if 'value' in i and 'label' in i and 'seo_url' in i] else: r = requests.get(self.search_link_2 % q, 'tv shows').text r = client.parseDOM(r, 'div', attrs = {'valign': '.+?'}) r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a', ret='title'), client.parseDOM(i, 'a')) for i in r] r = [(i[0][0], i[1][0], i[2][0]) for i in r if i[0] and i[1] and i[2]] r = [(i[0], i[1], re.findall('(\d{4})', i[2])) for i in r] r = [(i[0], i[1], i[2][-1]) for i in r if i[2]] r = [i for i in r if t == cleantitle.get(i[1]) and year == i[2]] url = r[0][0] url = proxy.parse(url) url = url.strip('/').split('/')[-1] url = url.encode('utf-8') return url except: return
def __search(self, titles, type, year, season=0, episode=False): try: years = [str(year), str(int(year) + 1), str(int(year) - 1)] years = ['&veroeffentlichung[]=%s' % i for i in years] query = self.search_link % (type, urllib.quote_plus(cleantitle.query(titles[0]))) query += ''.join(years) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = self.__proceed_search(query) r = [i[0] for i in r if cleantitle.get(i[1]) in t and int(i[2]) == int(season)][0] url = source_utils.strip_domain(r) if episode: r = client.request(urlparse.urljoin(self.base_link, url)) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'season-list'}) r = dom_parser.parse_dom(r, 'li') r = dom_parser.parse_dom(r, 'a', req='href') r = [i.attrs['href'] for i in r if i and int(i.content) == int(episode)][0] url = source_utils.strip_domain(r) return url except: return
def movie(self, imdb, title, year): try: t = cleantitle.get(title) q = '/search/%s.html' % (urllib.quote_plus(cleantitle.query(title))) q = urlparse.urljoin(self.base_link, q) for i in range(3): r = client.request(q) if not r == None: break r = client.parseDOM(r, 'div', attrs = {'class': 'ml-item'}) r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a', ret='title')) for i in r] r = [(i[0][0], i[1][0]) for i in r if i[0] and i[1]] r = [i[0] for i in r if t == cleantitle.get(i[1])][:2] r = [(i, re.findall('(\d+)', i)[-1]) for i in r] for i in r: try: y, q = cache.get(self.ymovies_info, 9000, i[1]) if not y == year: raise Exception() return urlparse.urlparse(i[0]).path except: pass except: return
def __search(self, titles, year, imdb): try: query = self.search_link % urllib.quote_plus(cleantitle.query(titles[0])) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0'] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'movie_cell'}) r = [(dom_parser.parse_dom(i, 'div', attrs={'class': 'bottom'}), dom_parser.parse_dom(i, 'div', attrs={'class': 'year'})) for i in r] r = [(dom_parser.parse_dom(i[0], 'a', req=['href', 'title']), re.findall('[(](\d{4})[)]', i[1][0].content)) for i in r if i[0] and i[1]] r = [(i[0][0].attrs['href'], i[0][0].content, i[1][0]) for i in r if i[0] and i[1]] r = [(i[0], i[1].lower(), i[2]) for i in r if i[2] in y] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t] if len(r) > 1: for i in r: data = client.request(urlparse.urljoin(self.base_link, i)) data = dom_parser.parse_dom(data, 'a', attrs={'name': re.compile('.*/tt\d+.*')}, req='name') data = [re.findall('.+?(tt\d+).*?', d.attrs['name']) for d in data] data = [d[0] for d in data if len(d) > 0 and d[0] == imdb] if len(data) >= 1: url = i else: url = r[0] if url: return source_utils.strip_domain(url) except: return
def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, year): try: query = base64.b64decode(self.search_link) % urllib.quote_plus(cleantitle.query(tvshowtitle)) result = self.request(query) tvshowtitle = cleantitle.get(tvshowtitle) years = ['%s' % str(year), '%s' % str(int(year)+1), '%s' % str(int(year)-1)] result = [i for i in result if any(x in str(i['year']) for x in years)] match = [i['href'] for i in result if tvshowtitle == cleantitle.get(i['name'])] match = [i['href'] for i in result if tvshowtitle == cleantitle.get(i['name']) and str(year) == str(i['year'])] match2 = [i['href'] for i in result] match2 = [x for y,x in enumerate(match2) if x not in match2[:y]] if match2 == []: return for i in match2[:5]: try: if len(match) > 0: url = match[0] ; break if imdb in str(self.request(i)[0]['imdb']): url = i ; break except: pass url = '/' + url.split('/json/')[-1] url = url.encode('utf-8') return url except: return
def search(self, title, localtitle, year): try: simply_name = cleantitle.get(localtitle) simply_name2 = cleantitle.get(title) query = self.search_link % urllib.quote_plus(cleantitle.query(localtitle)) url = urlparse.urljoin(self.base_link, query) headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0', 'Referer': 'https://segos.es/?page=login' } data ={"login" : self.user_name, 'password': self.user_pass,'loguj': ''} url = 'https://segos.es/?page=login' s = requests.Session() s.post('https://segos.es/?page=login',data=data,headers=headers) url=urlparse.urljoin(self.base_link,query) k = s.get(url) result = k.text results = client.parseDOM(result, 'div', attrs={'class':'col-lg-12 col-md-12 col-xs-12'}) for result in results: segosurl = client.parseDOM(result, 'a', ret='href')[0] result = client.parseDOM(result, 'a') segostitles = cleantitle.get(result[1]).split('/') for segostitle in segostitles: if simply_name == segostitle or simply_name2 == segostitle: return urlparse.urljoin(self.base_link,segosurl) continue except Exception, e: print str(e) return
def __search(self, titles, year, season='0'): try: query = self.search_link % urllib.quote_plus(cleantitle.query(titles[0])) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0'] r = client.request(query) r = dom_parser.parse_dom(r, 'article', attrs={'class': 'shortstory'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 's_info'}) r = dom_parser.parse_dom(r, 'h2') r = dom_parser.parse_dom(r, 'a', req='href') r = [(i.attrs['href'], i.content.lower()) for i in r if i] r = [(i[0], re.sub('<.+?>|</.+?>', '', i[1]), re.findall('(.+?) \(*(\d{4})', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = [(i[0], i[1], i[2], re.findall('(.+?)(\d+)\s+(?:staf+el|s)', i[1])) for i in r] r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r] r = [(i[0], i[1], i[2], '1' if int(season) > 0 and i[3] == '0' else i[3]) for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0]for i in r if cleantitle.get(i[1]) in t and i[2] in y and int(i[3]) == int(season)][0] return source_utils.strip_domain(r) except: return
def __search(self, title, year): try: r = client.request(self.base_link) r = re.findall('sL10n\s*=\s*({.*?});', r)[0] r = json.loads(r)['nonce'] query = self.search_link % (urllib.quote_plus(cleantitle.query(title)), r) query = urlparse.urljoin(self.base_link, query) t = cleantitle.get(title) y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0'] r = client.request(query) r = json.loads(r) r = [(i, r[i].get('url', ''), r[i].get('title', ''), r[i].get('extra', {}).get('names', ''), r[i].get('extra', {}).get('date', '0')) for i in r] r = [(i[0], i[1], client.replaceHTMLCodes(i[2]), client.replaceHTMLCodes(i[3]), i[4]) for i in r] r = sorted(r, key=lambda i: int(i[4]), reverse=True) # with year > no year r = [i[1] for i in r if (t == cleantitle.get(i[2]) or t == cleantitle.get(i[3])) and i[4] in y][0] url = re.findall('(?://.+?|)(/.+)', r)[0] url = client.replaceHTMLCodes(url) url = url.encode('utf-8') return url except: return
def tvshow(self, imdb, tvdb, tvshowtitle, year): try: t = cleantitle.get(tvshowtitle) q = urllib.quote_plus(cleantitle.query(tvshowtitle)) p = urllib.urlencode({'term': q}) h = {'X-Requested-With': 'XMLHttpRequest'} r = client.request(self.search_link, post=p, headers=h) try: r = json.loads(r) except: r = None print ("WATCHSERIES RESULT", r) r = [(i['seo_url'], i['value'], i['label']) for i in r if 'value' in i and 'label' in i and 'seo_url' in i] r = [(i[0], i[1], re.findall('(\d{4})', i[2])) for i in r] r = [(i[0], i[1], i[2][-1]) for i in r if i[2]] r = [i for i in r if t == cleantitle.get(i[1]) and year == i[2]] print ("WATCHSERIES RESULT 4", r, year) url = r[0][0] print ("WATCHSERIES RESULT 5", r, url) try: url = urlparse.parse_qs(urlparse.urlparse(url).query)['u'][0] except: pass try: url = urlparse.parse_qs(urlparse.urlparse(url).query)['q'][0] except: pass url = url.strip('/').split('/')[-1] url = url.encode('utf-8') return url except: return
def movie(self, imdb, title, localtitle, aliases, year): try: url = self.search_link % (cleantitle.geturl(title), year) q = urlparse.urljoin(self.base_link, url) r = proxy.geturl(q) if not r == None: return url t = cleantitle.get(title) q = self.search_link_2 % urllib.quote_plus(cleantitle.query(title)) q = urlparse.urljoin(self.base_link, q) r = client.request(q) r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a')) r = [(i[0], re.findall('(?:\'|\")(.+?)(?:\'|\")', i[1])) for i in r] r = [(i[0], [re.findall('(.+?)\((\d{4})', x) for x in i[1]]) for i in r] r = [(i[0], [x[0] for x in i[1] if x]) for i in r] r = [(i[0], i[1][0][0], i[1][0][1]) for i in r if i[1]] r = [i[0] for i in r if t == cleantitle.get(i[1]) and year == i[2]] url = re.findall('(?://.+?|)(/.+)', r[0])[0] url = client.replaceHTMLCodes(url) url = url.encode('utf-8') return url except: return
def __search(self, titles, year): try: query = self.search_link % (urllib.quote_plus(cleantitle.query(titles[0])+' '+year)) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i][0] r = client.request(query) r = client.parseDOM(r, 'div', attrs={'class': 'karatula'}) for i in r: title = client.parseDOM(i, 'a', ret='title')[0] y = re.findall('(\d{4})',title)[0] title = cleantitle.get_simple(title) if t in title and y == year : x = dom_parser.parse_dom(i, 'a', req='href') return source_utils.strip_domain(x[0][0]['href']) return except: return
def __search(self, titles, year, season='0'): try: query = self.search_link % (urllib.quote_plus(cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0'] r = client.request(query) r = dom_parser.parse_dom(r, 'ul', attrs={'class': ['products', 'row']}) r = dom_parser.parse_dom(r, 'div', attrs={'class': ['box-product', 'clearfix']}) if int(season) > 0: r = [i for i in r if dom_parser.parse_dom(i, 'div', attrs={'class': 'episode'})] else: r = [i for i in r if not dom_parser.parse_dom(i, 'div', attrs={'class': 'episode'})] r = dom_parser.parse_dom(r, 'h3', attrs={'class': 'title-product'}) r = dom_parser.parse_dom(r, 'a', req='href') r = [(i.attrs['href'], i.content.lower()) for i in r if i] r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = [(i[0], i[1], i[2], re.findall('(.+?)\s+(?:staf+el|s)\s+(\d+)', i[1])) for i in r] r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r] r = [(i[0], i[1].replace(' hd', ''), i[2], '1' if int(season) > 0 and i[3] == '0' else i[3]) for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0]for i in r if cleantitle.get(i[1]) in t and i[2] in y and int(i[3]) == int(season)][0] url = source_utils.strip_domain(r) url = url.replace('-info', '-stream') return url except: return
def __search(self, titles): try: query = self.search_link % (urllib.quote_plus(cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'nag'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'item-video'}) r = dom_parser.parse_dom(r, 'h2', attrs={'class': 'entry-title'}) r = dom_parser.parse_dom(r, 'a', req='href') for i in r: title = i[1] if re.search('\*(?:.*?)\*', title) is not None: title = re.sub('\*(?:.*?)\*', '', title) title = cleantitle.get(title) if title in t: return source_utils.strip_domain(i[0]['href']) else: return except: return
def __get_episode_url(self, data): try: path = self.search_link % urllib.quote_plus(cleantitle.query(data['tvshowtitle'])) url = urlparse.urljoin(self.base_link, path) xbmc.log('__get_episode_url start url: ' + str(url)) response = client.request(url) exp = 'href="([^"]+?)".+?videoHname.+?title="%s - Season %s"' % (data['tvshowtitle'], data['season']) get_season = re.findall(exp, response, flags=re.I)[0] url = urlparse.urljoin(self.base_link, get_season + '/season') xbmc.log('__get_episode_url season url: ' + str(url)) response = client.request(url) exp = 'href="([^"]+?)" title="(.+?Episode (?:%02d|%s):.+?)".+?videoHname' % (int(data['episode']), data['episode']) episode = re.findall(exp, response)[0][0] url = urlparse.urljoin(self.base_link, episode) xbmc.log('__get_episode_url episode url: ' + str(url)) return url except Exception: return
def __search(self, titles, year, content): try: t = [cleantitle.get(i) for i in set(titles) if i] c = client.request(urlparse.urljoin(self.base_link, self.year_link % int(year)), output='cookie') p = urllib.urlencode({'search': cleantitle.query(titles[0])}) c = client.request(urlparse.urljoin(self.base_link, self.search_link), cookie=c, post=p, output='cookie') r = client.request(urlparse.urljoin(self.base_link, self.type_link % content), cookie=c, post=p) r = dom_parser.parse_dom(r, 'div', attrs={'id': 'content'}) r = dom_parser.parse_dom(r, 'tr') r = [dom_parser.parse_dom(i, 'td') for i in r] r = [dom_parser.parse_dom(i, 'a', req='href') for i in r] r = [(i[0].attrs['href'], i[0].content, i[1].content) for i in r if i] x = [] for i in r: if re.search('(?<=<i>\().*$', i[1]): x.append((i[0], re.search('(.*?)(?=\s<)', i[1]).group(), re.search('(?<=<i>\().*$', i[1]).group(), i[2])) else: x.append((i[0], i[1], i[1], i[2])) r = [i[0] for i in x if (cleantitle.get(i[1]) in t or cleantitle.get(i[2]) in t) and i[3] == year][0] return source_utils.strip_domain(r) except: return
def tvshow(self, imdb, tvdb, tvshowtitle, year): try: query = self.search_link % urllib.quote_plus(cleantitle.query(tvshowtitle)) query = urlparse.urljoin(self.base_link, query) t = cleantitle.get(tvshowtitle) r = client.request(query).decode('iso-8859-1').encode('utf-8') r = client.parseDOM(r, 'div', attrs = {'id': 'series'})[0] r = client.parseDOM(r, 'figcaption') r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a', ret='title'), client.parseDOM(i, 'a')) for i in r] r = [(i[0][0], i[1][0], i[2][0]) for i in r if len(i[0]) > 0 and len(i[1]) > 0 and len(i[2]) > 0] r = [(i[0], re.findall('(?:^Watch |)(.+?)(?: Online|)$', i[1]), re.findall('(\d{4})', i[2])) for i in r] r = [(i[0], i[1][0], i[2][0]) for i in r if len(i[1]) > 0 and len(i[2]) > 0] r = [(i[0], i[1].replace(i[2], ''), i[2]) for i in r] r = [i[0] for i in r if t == cleantitle.get(i[1]) and year == i[2]][0] url = re.findall('(?://.+?|)(/.+)', r)[0] url = client.replaceHTMLCodes(url) url = url.encode('utf-8') return url except: return
def sources(self, url, hostDict, hostprDict): try: sources = [] if url == None: return sources if not str(url).startswith('http'): data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) if 'tvshowtitle' in data: url = '%s%s' % (self.search_link, cleantitle.getsearch(data['tvshowtitle'])) url = urlparse.urljoin(self.base_link, url) r = client.request(url, timeout='10') t = cleantitle.query(data['tvshowtitle']) ref = client.parseDOM(r, 'a', ret='href', attrs = {'title': t }) [0] url = '%s/%s-ep-%01d/' % (ref, cleantitle.geturl(data['tvshowtitle']), int(data['episode'])) else: url = '%s/movie/%s-engsub/%s-ep-1/' % (self.base_link, cleantitle.geturl(data['title']), cleantitle.geturl(data['title'])) url = client.request(url, timeout='10', output='geturl') if url == None: raise Exception() else: url = urlparse.urljoin(self.base_link, url) r = client.request(url, timeout='10') r = client.request(url, timeout='10') r = client.parseDOM(r, 'iframe', ret='src') for i in r: if 'drama4u' in i or 'k-vid' in i: i = client.request(i, timeout='10') i = re.findall('(https:\W.redirector\..*?)[\'\"]', i) for g in i: g = g.replace("\\", "") try: sources.append({'source': 'gvideo', 'quality': directstream.googletag(g)[0]['quality'], 'language': 'ko', 'url': g, 'direct': True, 'debridonly': False}) except: pass elif 'ads' in i: pass else: host = re.findall('([\w]+[.][\w]+)$', urlparse.urlparse(i.strip().lower()).netloc)[0] if not host in hostDict: raise Exception() host = host.encode('utf-8') sources.append({'source': host, 'quality': 'SD', 'language': 'ko', 'url': i, 'direct': False, 'debridonly': False}) return sources except: return sources
def movie(self, imdb, title, year): try: t = cleantitle.get(title) query = '%s %s' % (title, year) query = base64.b64decode(self.search_link) % urllib.quote_plus(query) result = client.source(query) result = json.loads(result)['results'] result = [(i['url'], i['titleNoFormatting']) for i in result] result = [(i[0], re.findall('(?:^Ver |)(.+?)(?: HD |)\((\d{4})', i[1])) for i in result] result = [(i[0], i[1][0][0], i[1][0][1]) for i in result if len(i[1]) > 0] r = [i for i in result if t == cleantitle.get(i[1]) and year == i[2]] if len(r) == 0: t = 'http://www.imdb.com/title/%s' % imdb t = client.source(t, headers={'Accept-Language':'es-ES'}) t = client.parseDOM(t, 'title')[0] t = re.sub('(?:\(|\s)\d{4}.+', '', t).strip() t = cleantitle.get(t) r = [i for i in result if t == cleantitle.get(i[1]) and year == i[2]] try: url = re.findall('//.+?(/.+)', r[0][0])[0] except: url = r[0][0] try: url = re.findall('(/.+?/.+?/)', url)[0] except: pass url = client.replaceHTMLCodes(url) url = url.encode('utf-8') return url except: pass try: t = cleantitle.get(title) query = self.search3_link % urllib.quote_plus(cleantitle.query(title)) query = urlparse.urljoin(self.base_link, query) result = cloudflare.source(query) result = re.sub(r'[^\x00-\x7F]+','', result) r = result.split('<li class=') r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'i'), re.findall('\((\d{4})\)', i)) for i in r] r = [(i[0][0], re.sub('\(|\)','', i[1][0]), i[2][0]) for i in r if len(i[0]) > 0 and len(i[1]) > 0 and len(i[2]) > 0] r = [i[0] for i in r if t == cleantitle.get(i[1]) and year == i[2]][0] try: url = re.findall('//.+?(/.+)', r)[0] except: url = r url = client.replaceHTMLCodes(url) url = url.encode('utf-8') return url except: pass
def tvshow(self, imdb, tvdb, tvshowtitle, year): try: key = urlparse.urljoin(self.base_link, self.key_link) key = proxy.request(key, 'searchform') key = client.parseDOM(key, 'input', ret='value', attrs = {'name': 'key'})[0] query = self.tvsearch_link % (urllib.quote_plus(cleantitle.query(tvshowtitle)), key) query = urlparse.urljoin(self.base_link, query) result = str(proxy.request(query, 'index_item')) if 'page=2' in result or 'page%3D2' in result: result += str(proxy.request(query + '&page=2', 'index_item')) result = client.parseDOM(result, 'div', attrs = {'class': 'index_item.+?'}) tvshowtitle = 'watch' + cleantitle.get(tvshowtitle) years = ['(%s)' % str(year), '(%s)' % str(int(year)+1), '(%s)' % str(int(year)-1)] result = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a', ret='title')) for i in result] result = [(i[0][0], i[1][0]) for i in result if len(i[0]) > 0 and len(i[1]) > 0] result = [i for i in result if any(x in i[1] for x in years)] r = [] for i in result: u = i[0] try: u = urlparse.parse_qs(urlparse.urlparse(u).query)['u'][0] except: pass try: u = urlparse.parse_qs(urlparse.urlparse(u).query)['q'][0] except: pass r += [(u, i[1])] match = [i[0] for i in r if tvshowtitle == cleantitle.get(i[1]) and '(%s)' % str(year) in i[1]] match2 = [i[0] for i in r] match2 = [x for y,x in enumerate(match2) if x not in match2[:y]] if match2 == []: return for i in match2[:5]: try: if len(match) > 0: url = match[0] ; break r = proxy.request(urlparse.urljoin(self.base_link, i), 'tv_episode_item') if imdb in str(r): url = i ; break except: pass url = re.findall('(?://.+?|)(/.+)', url)[0] url = client.replaceHTMLCodes(url) url = url.encode('utf-8') return url except: return
def __search(self, titles, year, season=0, episode=False): try: query = self.search_link % (urllib.quote_plus(cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0'] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'container'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'ml-item-content'}) f = [] for i in r: _url = dom_parser.parse_dom(i, 'a', attrs={'class': 'ml-image'}, req='href')[0].attrs['href'] _title = re.sub('<.+?>|</.+?>', '', dom_parser.parse_dom(i, 'h6')[0].content).strip() try: _title = re.search('(.*?)\s(?:staf+el|s)\s*(\d+)', _title, re.I).group(1) except: pass _season = '0' _year = re.findall('calendar.+?>.+?(\d{4})', ''.join([x.content for x in dom_parser.parse_dom(i, 'ul', attrs={'class': 'item-params'})])) _year = _year[0] if len(_year) > 0 else '0' if season > 0: s = dom_parser.parse_dom(i, 'span', attrs={'class': 'season-label'}) s = dom_parser.parse_dom(s, 'span', attrs={'class': 'el-num'}) if s: _season = s[0].content.strip() if cleantitle.get(_title) in t and _year in y and int(_season) == int(season): f.append((_url, _year)) r = f r = sorted(r, key=lambda i: int(i[1]), reverse=True) # with year > no year r = [i[0] for i in r if r[0]][0] url = source_utils.strip_domain(r) if episode: r = client.request(urlparse.urljoin(self.base_link, url)) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'season-list'}) r = dom_parser.parse_dom(r, 'li') r = dom_parser.parse_dom(r, 'a', req='href') r = [(i.attrs['href'], i.content) for i in r] r = [i[0] for i in r if i[1] and int(i[1]) == int(episode)][0] url = source_utils.strip_domain(r) return url except: return
def search(self, title, localtitle, year): try: import sys reload(sys) sys.setdefaultencoding('utf8') simply_name = cleantitle.get(localtitle) simply_name2 = cleantitle.get(title) simply_name = cleantitle.query(localtitle).split(' ') simply_name2 = cleantitle.query(title).split(' ') query = self.search_link % urllib.quote_plus(cleantitle.query(localtitle)) url = urlparse.urljoin(self.base_link, query) result = client.request(url) result = client.parseDOM(result, 'div', attrs={'class':'row search-results'}) results = client.parseDOM(result, 'div', attrs={'class':'item-detail-bigblock title title-bigblock'}) for result in results: movieneourl = client.parseDOM(result, 'a', ret='href')[0] result = client.parseDOM(result, 'a')[0] for word in simply_name: if word in result and year in result: return [urlparse.urljoin(self.base_link,movieneourl),result] continue except Exception, e: print str(e) return
def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, aliases, year): try: query = self.search_link % urllib.quote_plus(cleantitle.query(tvshowtitle)) result = client.request(query) #tvshowtitle = cleantitle.get(tvshowtitle) t = [tvshowtitle] + source_utils.aliases_to_array(aliases) t = [cleantitle.get(i) for i in set(t) if i] result = re.compile('itemprop="url"\s+href="([^"]+).*?itemprop="name"\s+class="serie-title">([^<]+)', re.DOTALL).findall(result) for i in result: if cleantitle.get(cleantitle.normalize(i[1])) in t and year in i[1]: url = i[0] url = url.encode('utf-8') return url except: return
def get_filmweb_data(self, type_url, title, localtitle, year): try: if localtitle == 'Vikings': localtitle = 'Wikingowie' titles = {localtitle,title} for item in titles: from urllib2 import Request, urlopen url = urlparse.urljoin(self.film_web, self.filmweb_search) url = url % (urllib.quote_plus(item), year, year) result = client.request(url) id = '' rows = client.parseDOM(result, 'div', attrs={'class':'ad__page-wrapper'}) if not rows: continue rows = client.parseDOM(rows, 'div', attrs={'id':'searchResult'}) try: id = client.parseDOM(rows, 'data', ret='data-id')[0] except: pass rows = client.parseDOM(rows, 'div', attrs={'class':'filmPreview__card'}) local_clean = cleantitle.get(localtitle) title_clean = cleantitle.get(title) if not rows: url = urlparse.urljoin(self.film_web, self.filmweb_search) url = url % (type_url, urllib.quote_plus(cleantitle.query(localtitle)), year, year) q = Request(url) a = urlopen(q) result = a.read() rows = client.parseDOM(rows, 'div', attrs={'id':'searchResult'}) try: id = client.parseDOM(rows, 'data', ret='data-id')[0] except: pass rows = client.parseDOM(rows, 'div', attrs={'class':'filmPreview__card'}) local_clean = cleantitle.get(localtitle) title_clean = cleantitle.get(title) for row in rows: row2 = row row2 = client.parseDOM(row, 'div', attrs={'class':'filmPreview__originalTitle'}) if not row2: row2 = client.parseDOM(row, 'h3', attrs={'class':'filmPreview__title'}) href = client.parseDOM(row, 'a', ret='href')[0] found_clean = cleantitle.get(row2[0]) if title_clean == found_clean or local_clean == found_clean: return {'href':href, 'id':id} return {'href':href, 'id':id} except Exception, e: print str(e)
def movie(self, imdb, title, localtitle, aliases, year): try: url = urlparse.urljoin(self.base_link, self.search_link) r = client.request(url, redirect=False, post={'szukaj' :cleantitle.query(localtitle)}) r = client.parseDOM(r, 'div', attrs={'class':'video_info'}) local_simple = cleantitle.get(localtitle) for row in r: name_found = client.parseDOM(row, 'h1')[0] year_found = name_found[name_found.find("(") + 1:name_found.find(")")] if cleantitle.get(name_found) == local_simple and year_found == year: url = client.parseDOM(row, 'a', ret='href')[0] return url except: return
def __search(self, titles): try: query = self.search_link % urllib.quote_plus(cleantitle.query(titles[0])) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = json.loads(r) r = [(i.get('id'), i.get('value')) for i in r] r = [i[0] for i in r if cleantitle.get(i[1]) in t][0] return r except: return
def __search(self, titles): try: query = self.search_link % urllib.quote_plus(cleantitle.query(titles[0])) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query, XHR=True) r = json.loads(r) r = [(i.get('url'), i.get('name'))for i in r] r = [(i[0]) for i in r if cleantitle.get(i[1]) in t][0] return source_utils.strip_domain(r) except: return
def tvshow(self, imdb, tvdb, tvshowtitle, year): try: t = cleantitle.get(tvshowtitle) q = urllib.quote_plus(cleantitle.query(tvshowtitle)) p = urllib.urlencode({'term': q}) h = {'X-Requested-With': 'XMLHttpRequest'} r = client.request(self.search_link, post=p, headers=h) try: r = json.loads(r) except: r = None print("WATCHSERIES RESULT", r) r = [(i['seo_url'], i['value'], i['label']) for i in r if 'value' in i and 'label' in i and 'seo_url' in i] r = [(i[0], i[1], re.findall('(\d{4})', i[2])) for i in r] r = [(i[0], i[1], i[2][-1]) for i in r if i[2]] r = [i for i in r if t == cleantitle.get(i[1]) and year == i[2]] print("WATCHSERIES RESULT 4", r, year) url = r[0][0] print("WATCHSERIES RESULT 5", r, url) try: url = urlparse.parse_qs(urlparse.urlparse(url).query)['u'][0] except: pass try: url = urlparse.parse_qs(urlparse.urlparse(url).query)['q'][0] except: pass url = url.strip('/').split('/')[-1] url = url.encode('utf-8') return url except: return
def __search(self, titles, year): try: query = self.search_link % (urllib.quote_plus( cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = client.request(query, headers={'Accept-Encoding': 'gzip'}) r = client.parseDOM(r, 'div', attrs={'id': 'main'}) r = client.parseDOM(r, 'div', attrs={'class': 'panel-body'}) r = [(client.parseDOM(i, 'h4', attrs={'class': 'title-list'}), client.parseDOM(i, 'a', attrs={'href': '[^\'"]+/year/[^\'"]+'})) for i in r] r = [(client.parseDOM(i[0], 'a', ret='href'), client.parseDOM(i[0], 'a'), i[1][0] if len(i[1]) > 0 else '0') for i in r if len(i[0]) > 0] r = [(i[0][0], i[1][0], re.sub('<.+?>|</.+?>', '', i[2])) for i in r if len(i[0]) > 0 and len(i[1]) > 0] r = [(i[0], i[1], i[2].strip()) for i in r if i[2]] r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y][0] url = re.findall('(?://.+?|)(/.+)', r)[0] url = client.replaceHTMLCodes(url) url = url.encode('utf-8') return url except: return
def __search(self, titles, year): try: query = self.search_link % (urllib.quote_plus( cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'id': 'main'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'panel-body'}) r = [ (dom_parser.parse_dom(i.content, 'h4', attrs={'class': 'title-list'}), dom_parser.parse_dom(i.content, 'a', attrs={'href': re.compile('.*/year/.*')})) for i in r ] r = [(dom_parser.parse_dom(i[0][0].content, 'a', req='href'), i[1][0].content if i[1] else '0') for i in r if i[0]] r = [(i[0][0].attrs['href'], i[0][0].content, re.sub('<.+?>|</.+?>', '', i[1])) for i in r if i[0] and i[1]] r = [(i[0], i[1], i[2].strip()) for i in r if i[2]] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [ i[0] for i in r if cleantitle.get(i[1]) in t and i[2] == year ][0] return source_utils.strip_domain(r) except: return
def __search(self, titles, year, season='0'): try: query = self.search_link % (urllib.quote_plus( cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] result = cache.get(client.request, 4, query) entry = dom_parser.parse_dom(result, 'div', attrs={'class': 'result-item'}) entry = [(dom_parser.parse_dom(i, 'a')[0], dom_parser.parse_dom(i, 'span', attrs={'class': 'year'})[0]) for i in entry] entry = [(i[0].attrs['href'], dom_parser.parse_dom(i[0], 'img')[0].attrs['alt'], i[1].content) for i in entry] entry = [ i[0] for i in entry if cleantitle.get(i[1]) in t and i[2] == year ] if len(entry) > 0: return source_utils.strip_domain(entry[0]) else: return except: try: source_faultlog.logFault(__name__, source_faultlog.tagSearch, titles[0]) except: return return
def tvshow(self, imdb, tvdb, tvshowtitle, year): try: query = self.search_link % urllib.quote_plus( cleantitle.query(tvshowtitle)) query = urlparse.urljoin(self.base_link, query) t = cleantitle.get(tvshowtitle) r = client.request(query).decode('iso-8859-1').encode('utf-8') r = client.parseDOM(r, 'div', attrs={'id': 'series'})[0] r = client.parseDOM(r, 'figcaption') r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a', ret='title'), client.parseDOM(i, 'a')) for i in r] r = [(i[0][0], i[1][0], i[2][0]) for i in r if len(i[0]) > 0 and len(i[1]) > 0 and len(i[2]) > 0] r = [(i[0], re.findall('(?:^Watch |)(.+?)(?: Online|)$', i[1]), re.findall('(\d{4})', i[2])) for i in r] r = [(i[0], i[1][0], i[2][0]) for i in r if len(i[1]) > 0 and len(i[2]) > 0] r = [(i[0], i[1].replace(i[2], ''), i[2]) for i in r] r = [ i[0] for i in r if t == cleantitle.get(i[1]) and year == i[2] ][0] url = re.findall('(?://.+?|)(/.+)', r)[0] url = client.replaceHTMLCodes(url) url = url.encode('utf-8') return url except: return
def search(self, localtitle, year, search_type): try: simply_name = cleantitle.get(localtitle) query = self.search_link % urllib.quote_plus(cleantitle.query(localtitle)) query = urlparse.urljoin(self.base_link, query) result = client.request(query) result = client.parseDOM(result, 'div', attrs={'id':search_type}) links = client.parseDOM(result, 'figcaption') names = client.parseDOM(result, 'figcaption', ret='title') urls = [] for i in range(len(names)): name = cleantitle.get(names[i]) url = client.parseDOM(links[i], 'a', ret='href')[0] if(name == simply_name): urls.append(url) if len(urls) == 1: return urls[0] else: return self.findMatchByYear(year, urls) except : return
def __search(self, title, localtitle, year, content_type): try: t = cleantitle.get(title) tq = cleantitle.get(localtitle) y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0'] query = urlparse.urljoin(self.base_link, self.search_link) r = urllib.urlencode({'k': cleantitle.query(localtitle)}) r = client.request(query, post=r) r = json.loads(r) r = [i.get('result') for i in r if i.get('type', '').encode('utf-8') == content_type] r = [(i.get('url'), i.get('originalTitle'), i.get('title'), i.get('anneeProduction', 0), i.get('dateStart', 0)) for i in r] r = [(i[0], re.sub('<.+?>|</.+?>', '', i[1] if i[1] else ''), re.sub('<.+?>|</.+?>', '', i[2] if i[2] else ''), i[3] if i[3] else re.findall('(\d{4})', i[4])[0]) for i in r if i[3] or i[4]] r = sorted(r, key=lambda i: int(i[3]), reverse=True) # with year > no year r = [i[0] for i in r if i[3] in y and (t == cleantitle.get(i[1]) or tq == cleantitle.query(i[2]))][0] url = re.findall('(?://.+?|)(/.+)', r)[0] url = client.replaceHTMLCodes(url) url = url.encode('utf-8') return url except: return
def __search(self, titles): try: query = self.search_link % (urllib.quote_plus( cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = dom_parser.parse_dom(r, 'article') r = dom_parser.parse_dom(r, 'div', attrs={'class': 'title'}) r = dom_parser.parse_dom(r, 'a', req='href') for i in r: title = client.replaceHTMLCodes(r[0][1]) title = cleantitle.get(title) if title in t: return source_utils.strip_domain(i[0]['href']) return except: return
def search(self, localtitle, year): try: simply_name = cleantitle.get(localtitle) query = self.search_link % urllib.quote_plus(cleantitle.query(localtitle)) query = urlparse.urljoin(self.base_link, query) result = client.request(query) result = client.parseDOM(result, 'article') for row in result: a_href = client.parseDOM(row, 'h3')[0] url = client.parseDOM(a_href, 'a', ret='href')[0] name = client.parseDOM(a_href, 'a')[0] name = cleantitle.get(name) year_found = client.parseDOM(row, 'span', attrs={'class':'dtyear'}) if year_found: year_found = year_found[0] if(name == simply_name and (not year_found or not year or year_found == year)): return url except : return
def __search(self, titles, year, season='0'): try: query = self.search_link % (urllib.quote_plus( cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) titles = [cleantitle.get(i) for i in set(titles) if i] cache.cache_clear() searchResult = cache.get(self.scraper.get, 4, query).content results = re.findall( r'<div class="title-product">\n<a href="(.*?)">((?s).*?)</a>', searchResult) usedIndex = 0 #Find result with matching name and season for x in range(0, len(results)): title = cleantitle.get(results[x][1]) if any(i in title for i in titles): if season == "0" or ("staffel" in title and ("0" + str(season) in title or str(season) in title)): #We have the suspected link! return source_utils.strip_domain(results[x][0]) usedIndex += 1 return except: try: source_faultlog.logFault(__name__, source_faultlog.tagSearch, titles[0]) except: return return
def movie(self, imdb, title, localtitle, year): try: t = cleantitle.get(title) p = self.post_link % urllib.quote_plus(cleantitle.query(title)) q = urlparse.urljoin(self.base_link, self.search_link) r = proxy.request(q, 'playing top', post=p, XHR=True) r = client.parseDOM(r, 'li') r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a')) for i in r] r = [(i[0][0], i[1][0]) for i in r if i[0] and i[1]] r = [(i[0], re.findall('(.+?)\((\d{4})', i[1])) for i in r] r = [(i[0], i[1][0][0], i[1][0][1]) for i in r if i[1]] r = [i for i in r if t == cleantitle.get(i[1]) and str(year) == i[2]] url = proxy.parse(r[0][0]) url = re.findall('(?://.+?|)(/.+)', url)[0] url = client.replaceHTMLCodes(url) url = url.encode('utf-8') return url except: pass
def __search(self, titles): try: query = self.search_link % (urllib.quote_plus( cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'nag'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'item-video'}) r = dom_parser.parse_dom(r, 'h2', attrs={'class': 'entry-title'}) r = dom_parser.parse_dom(r, 'a', req='href') title = r[0][1] title = cleantitle.get(title) if title in t: return source_utils.strip_domain(r[0][0]['href']) else: return except: return
def do_search(self, title, year, video_type): try: url = urlparse.urljoin(self.base_link, self.search_link) url = url % urllib.quote_plus(cleantitle.query(title)) result = client.request(url) result = client.parseDOM(result, 'div', attrs={'class': 'item'}) for row in result: row_type = client.parseDOM(row, 'div', attrs={'class': 'typepost'})[0] if row_type != video_type: continue names = client.parseDOM(row, 'span', attrs={'class': 'tt'})[0] names = names.split('/') year_found = client.parseDOM(row, 'span', attrs={'class': 'year'}) if self.name_matches(names, title, year) and (len(year_found) == 0 or year_found[0] == year): url = client.parseDOM(row, 'a', ret='href')[0] return urlparse.urljoin(self.base_link, url) except: return
def __search(self, search_link, imdb, titles): try: query = search_link % (urllib.quote_plus( cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'big-list'}) r = dom_parser.parse_dom(r, 'table', attrs={'class': 'row'}) r = dom_parser.parse_dom(r, 'td', attrs={'class': 'list-name'}) r = dom_parser.parse_dom(r, 'a', req='href') r = [ i.attrs['href'] for i in r if i and cleantitle.get(i.content) in t ] if len(r) == 0: return None r = r[0] url = source_utils.strip_domain(r) r = client.request(urlparse.urljoin(self.base_link, url)) r = dom_parser.parse_dom(r, 'a', attrs={'href': re.compile('.*/tt\d+.*')}, req='href') r = [re.findall('.+?(tt\d+).*?', i.attrs['href']) for i in r] r = [i[0] for i in r if i] return url if imdb in r else None except: source_faultlog.logFault(__name__, source_faultlog.tagSearch) return
def __search(self, titles, year, content_type): try: query = self.search_link % (urllib.quote_plus(cleantitle.query(titles[0])), content_type) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0'] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'id': 'search'}) r = dom_parser.parse_dom(r, 'table') r = dom_parser.parse_dom(r, 'tr', attrs={'class': re.compile('entry\d+')}) r = [(dom_parser.parse_dom(i, 'a'), dom_parser.parse_dom(i, 'img', attrs={'class': 'flag', 'alt': 'de'})) for i in r] r = [i[0] for i in r if i[0] and i[1]] r = [(i[0].attrs['href'], i[0].content) for i in r] r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y][0] return source_utils.strip_domain(r) except: return
def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, aliases, year): try: query = self.search_link % urllib.quote_plus( cleantitle.query(tvshowtitle)) for i in range(4): result = client.request(query, timeout=3) if not result == None: break t = [tvshowtitle] + source_utils.aliases_to_array(aliases) t = [cleantitle.get(i) for i in set(t) if i] result = re.compile( 'itemprop="url"\s+href="([^"]+).*?itemprop="name"\s+class="serie-title">([^<]+)', re.DOTALL).findall(result) for i in result: if cleantitle.get(cleantitle.normalize( i[1])) in t and year in i[1]: url = i[0] url = url.encode('utf-8') return url except: return
def __search(self, titles): try: query = self.search_link % (urllib.quote_plus( urllib.quote_plus(cleantitle.query(titles[0])))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = dom_parser.parse_dom(r, 'ul', attrs={'class': 'coverBox'}) r = dom_parser.parse_dom(r, 'li') r = dom_parser.parse_dom(r, 'span', attrs={'class': 'name'}) r = dom_parser.parse_dom(r, 'a') title = r[0][1] title = cleantitle.get(title) if title in t: return source_utils.strip_domain(r[0][0]['href']) else: return except: return
def __search(self, titles, year): try: t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = client.request(urlparse.urljoin(self.base_link, self.search_link), post={'query': cleantitle.query(titles[0])}) r = dom_parser.parse_dom(r, 'li', attrs={'class': 'entTd'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 've-screen'}, req='title') r = [(dom_parser.parse_dom(i, 'a', req='href'), i.attrs['title'].split(' - ')[0]) for i in r] r = [(i[0][0].attrs['href'], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y] if len(r) > 0: r = r[0] else: return return source_utils.strip_domain(r) except: source_faultlog.logFault(__name__, source_faultlog.tagSearch) return
def __search(self, titles, year): try: query = self.search_link % urllib.quote_plus(cleantitle.query(titles[0])) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0'] r = client.request(query, XHR=True) if r and r.startswith('{'): r = '[%s]' % r r = json.loads(r) r = [(i['url'], i['name']) for i in r if 'name' in i and 'url' in i] r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})?\)*$', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y][0] url = source_utils.strip_domain(r) url = url.replace('serien/', '') return url except: return
def __search(self, titles, imdb, year): try: query = self.search_link % (urllib.quote_plus( cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = cache.get(client.request, 4, query, XHR=True) r = json.loads(r) r = [(i.get('title'), i.get('custom_fields', {})) for i in r.get('posts', [])] r = [(i[0], i[1]) for i in r if i[0] and i[1]] r = [(i[0], i[1].get('Streaming', ['']), i[1].get('Jahr', ['0']), i[1].get('IMDb-Link', [''])) for i in r if i] r = [(i[0], i[1][0], i[2][0], re.findall('.+?(tt\d+).*?', i[3][0])) for i in r if i[0] and i[1] and i[2] and i[3]] r = [ i[1] for i in r if imdb in i[3] or (cleantitle.get(i[0]) in t and i[2] in y) ] if len(r) > 0: return source_utils.strip_domain(r[0]) return "" except: try: source_faultlog.logFault(__name__, source_faultlog.tagSearch) except: return
def movie(self, imdb, title, year): try: t = cleantitle.get(title) query = '%s %s' % (title, year) query = base64.b64decode( self.search_link) % urllib.quote_plus(query) result = client.request(query) result = json.loads(result)['results'] result = [(i['url'], i['titleNoFormatting']) for i in result] result = [(i[0], re.findall('(?:^Ver |)(.+?)(?: HD |)\((\d{4})', i[1])) for i in result] result = [(i[0], i[1][0][0], i[1][0][1]) for i in result if len(i[1]) > 0] r = [ i for i in result if t == cleantitle.get(i[1]) and year == i[2] ] if len(r) == 0: t = 'http://www.imdb.com/title/%s' % imdb t = client.request(t, headers={'Accept-Language': 'es-ES'}) t = client.parseDOM(t, 'title')[0] t = re.sub('(?:\(|\s)\d{4}.+', '', t).strip() t = cleantitle.get(t) r = [ i for i in result if t == cleantitle.get(i[1]) and year == i[2] ] try: url = re.findall('//.+?(/.+)', r[0][0])[0] except: url = r[0][0] try: url = re.findall('(/.+?/.+?/)', url)[0] except: pass url = client.replaceHTMLCodes(url) url = url.encode('utf-8') return url except: pass try: t = cleantitle.get(title) query = self.search3_link % urllib.quote_plus( cleantitle.query(title)) query = urlparse.urljoin(self.base_link, query) result = client.request(query) result = re.sub(r'[^\x00-\x7F]+', '', result) r = result.split('<li class=') r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'i'), re.findall('\((\d{4})\)', i)) for i in r] r = [(i[0][0], re.sub('\(|\)', '', i[1][0]), i[2][0]) for i in r if len(i[0]) > 0 and len(i[1]) > 0 and len(i[2]) > 0] r = [ i[0] for i in r if t == cleantitle.get(i[1]) and year == i[2] ][0] try: url = re.findall('//.+?(/.+)', r)[0] except: url = r url = client.replaceHTMLCodes(url) url = url.encode('utf-8') return url except: pass
def movie(self, imdb, title, localtitle, year): try: query = self.moviesearch_link % urllib.quote_plus( cleantitle.query(title)) xbmc.log('[plugin.video.libra]::sources:movie:query:' + query, xbmc.LOGNOTICE) query = urlparse.urljoin(self.base_link, query) xbmc.log('[plugin.video.libra]::sources:movie:query:' + query, xbmc.LOGNOTICE) result = str(proxy.request(query, 'Sẻ chia bất tận')) # xbmc.log('[plugin.video.libra]::sources:movie:result:' + result, xbmc.LOGNOTICE) if 'page=2' in result or 'page%3D2' in result: result += str(proxy.request(query + '&page=2', 'free movies')) result = client.parseDOM( result, 'ul', attrs={'class': 'hfeed posts-default clearfix'}) xbmc.log( '[plugin.video.libra]::sources:movie:result::::' + str(result), xbmc.LOGNOTICE) result = client.parseDOM(result, 'h3', attrs={'class': 'entry-title'}) title = cleantitle.get(title) years = [ '(%s)' % str(year), '(%s)' % str(int(year) + 1), '(%s)' % str(int(year) - 1) ] xbmc.log( '[plugin.video.libra]::sources:movie:title:' + cleantitle.get(title), xbmc.LOGNOTICE) xbmc.log('[plugin.video.libra]::sources:movie:year:' + str(years), xbmc.LOGNOTICE) result = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a')) for i in result] xbmc.log( '[plugin.video.libra]::sources:movie:result:' + str(result), xbmc.LOGNOTICE) result = [(i[0][0], i[1][0]) for i in result if len(i[0]) > 0 and len(i[1]) > 0] # xbmc.log('[plugin.video.libra]::sources:movie:resultmatch:' + str(result), xbmc.LOGNOTICE) result = [i for i in result if any(x in i[1] for x in years)] xbmc.log( '[plugin.video.libra]::sources:movie:resultyears:' + str(result), xbmc.LOGNOTICE) r = [(proxy.parse(i[0]), i[1]) for i in result] xbmc.log('[plugin.video.libra]::sources:movie:r:' + str(r), xbmc.LOGNOTICE) parsed_title = str(cleantitle.get(i[1])).split('<br/>') xbmc.log( '[plugin.video.libra]::sources:movie:parsed_title:' + str(parsed_title), xbmc.LOGNOTICE) match = [ i[0] for i in r if title == parsed_title[1] and '(%s)' % str(year) in i[1] ] xbmc.log('[plugin.video.libra]::sources:movie:match:' + str(match), xbmc.LOGNOTICE) match2 = [i[0] for i in r] match2 = [x for y, x in enumerate(match2) if x not in match2[:y]] xbmc.log( '[plugin.video.libra]::sources:movie:match2:' + str(match2), xbmc.LOGNOTICE) if match2 == []: return for i in match2[:5]: try: if len(match) > 0: url = match[0] break r = proxy.request(urlparse.urljoin(self.base_link, i), 'Sẻ chia bất tận') r = re.findall('(tt\d+)', r) if imdb in r: url = i break except: pass url = re.findall('(?://.+?|)(/.+)', url)[0] url = client.replaceHTMLCodes(url) url = url.encode('utf-8') xbmc.log('[plugin.video.libra]::movie:url:' + url, xbmc.LOGNOTICE) return url except: return
def sources(self, url, hostDict, hostprDict): try: print '------------------------------- -------------------------------' sources = [] print url data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) season = data['season'] if 'season' in data else False episode = data['episode'] if 'episode' in data else False print season, episode if season and episode: print 'TV' self.search_link = 'query=%s&submit=Submit+Query' aTitle = data['tvshowtitle'] else: self.search_link = 'query=%s&submit=Submit+Query' aTitle = data['title'] post = self.search_link % (urllib.quote_plus( cleantitle.query(aTitle))) url = 'http://www.filmenstreaminghd.com/recherche/' t = cleantitle.get(aTitle) r = client.request(url, XHR=True, referer=url, post=post) r = client.parseDOM(r, 'div', attrs={'class': 'film-k kutu-icerik kat'}) if season and episode: t = t + 'saison0' + season r = client.parseDOM(r, 'div', attrs={'class': 'play fa fa-play-circle'}) r = sorted(set(r)) r = [(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title')) for i in r] r = [(i[0][0], i[1][0].lower()) for i in r if len(i[0]) > 0 and len(i[1]) > 0] r = [i[0] for i in r if t == cleantitle.get(i[1])][0] #r = sorted(set(r)) url0 = '%s%s' % ('http://www.filmenstreaminghd.com', r) print url0 url = client.replaceHTMLCodes(url0) url = url0.encode('utf-8') r = client.request(url, XHR=True, referer=url) r = re.sub('(\n|\t)', '', r) langue = re.compile('<b class=\"fa fa-cc\"></b><span>(.+?)</span>', re.MULTILINE | re.DOTALL).findall(r)[0] if langue == 'VF': langue = 'FR' quality2 = re.compile('<div class=\"kalite\">(.+?)</div>', re.MULTILINE | re.DOTALL).findall(r)[0] quality2 = re.sub('-', '', quality2) if season and episode: unLien0a = client.parseDOM(r, 'div', attrs={'class': 'dizi-bolumleri'})[0] r = re.compile( 'Saison\s+0%s\s+\-\s+Episode\s+0%s(.+?)class=\"dropit-trigger\">' % (season, episode), re.MULTILINE | re.DOTALL).findall(unLien0a)[0] unLien0b = client.parseDOM(r, 'li', ret='id') else: r = client.parseDOM(r, 'div', attrs={'class': 'dizi-bolumleri film'}) unLien0b = client.parseDOM(r, 'span', ret='id') counter = 0 for unLienUrl in unLien0b: if 'gf-' in unLienUrl: continue dataUrl = urllib.urlencode({'pid': unLienUrl[1:]}) dataUrl = client.request(url0, post=dataUrl, XHR=True, referer=url0) try: url = client.parseDOM(dataUrl, 'iframe', ret='src')[1] except: url = client.parseDOM(dataUrl, 'iframe', ret='src')[0] if url.startswith('//'): url = url.replace('//', '', 1) host = re.findall( '([\w]+[.][\w]+)$', urlparse.urlparse(url.strip().lower()).netloc)[0] if not host in hostDict: continue host = client.replaceHTMLCodes(host) host = host.encode('utf-8') url = url.encode('utf-8') if '1080p' in quality2: quality = '1080p' elif '720p' in quality2 or 'bdrip' in quality2 or 'hdrip' in quality2: quality = 'HD' else: quality = 'SD' if 'dvdscr' in quality2 or 'r5' in quality2 or 'r6' in quality2: quality2 = 'SCR' elif 'camrip' in quality2 or 'tsrip' in quality2 or 'hdcam' in quality2 or 'hdts' in quality2 or 'dvdcam' in quality2 or 'dvdts' in quality2 or 'cam' in quality2 or 'telesync' in quality2 or 'ts' in quality2: quality2 = 'CAM' sources.append({ 'source': host, 'quality': quality, 'language': langue, 'url': url, 'direct': False, 'debridonly': False }) print sources return sources except: return sources
def __search(self, titles, year): try: t = [cleantitle.get(i) for i in set(titles) if i] y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0'] r = client.request(urlparse.urljoin(self.base_link, self.search_link), post=urllib.urlencode({'val': cleantitle.query(titles[0])}), XHR=True) r = dom_parser.parse_dom(r, 'li') r = dom_parser.parse_dom(r, 'a', req='href') r = [(i.attrs['href'], i.content, re.findall('\((\d{4})', i.content)) for i in r] r = [(i[0], i[1], i[2][0] if i[2] else '0') for i in r] r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y][0] return source_utils.strip_domain(r) except: return
def sources(self, url, hostDict, hostprDict): try: print '------------------------------- -------------------------------' sources = [] print url data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) print data title = data['title'] year = data['year'] if 'year' in data else data['year'] season = data['season'] if 'season' in data else False episode = data['episode'] if 'episode' in data else False localtitle = data['localtitle'] if 'localtitle' in data else False if season and episode: localtitle = data[ 'localtvshowtitle'] if 'localtvshowtitle' in data else False t = cleantitle.get(title) tq = cleantitle.query(localtitle) tq2 = re.sub(' ', '', cleantitle.query(localtitle).lower()) tq = re.sub(' ', '%20', tq) y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] query = 'http://www.cinemay.com' r = client.request('http://www.cinemay.com/?s=%s' % tq) print 'http://www.cinemay.com/?s=%s' % tq r = client.parseDOM(r, 'div', attrs={'class': 'unfilm'}) r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a', ret='title')) for i in r] r = [(i[0][0], re.sub( '(film| en streaming vf| en streaming vostfr|’| )', '', i[1][0]).lower()) for i in r if len(i[0]) > 0 and len(i[1]) > 0] r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = [(i[0], i[1], i[2], re.findall('(.+?)\s+(?:saison|s)\s+(\d+)', i[1])) for i in r] r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r] r = [(i[0], re.sub(' \&\#[0-9]{4,6};', '', i[1]), i[2], i[3]) for i in r] r = [i[0] for i in r if tq2 == cleantitle.get(i[1])][0] url = re.findall('(?://.+?|)(/.+)', r)[0] url = client.replaceHTMLCodes(url) url = url.encode('utf-8') r = client.request('http://www.cinemay.com' + url) print 'http://www.cinemay.com' + url r = client.parseDOM(r, 'div', attrs={'class': 'module-actionbar'}) r = client.parseDOM(r, 'a', ret='href') for i in r: if i == '#': continue url = client.request('http://www.cinemay.com' + i) url = client.parseDOM(url, 'div', attrs={'class': 'wbox2 video dark'}) url = client.parseDOM(url, 'iframe', ret='src')[0] host = re.findall( '([\w]+[.][\w]+)$', urlparse.urlparse(url.strip().lower()).netloc)[0] if not host in hostDict: continue host = client.replaceHTMLCodes(host) host = host.encode('utf-8') sources.append({ 'source': host, 'quality': 'SD', 'language': 'FR', 'url': url, 'direct': False, 'debridonly': False }) return sources except: return sources
def __search(self, titles, year, season='0'): try: query = self.search_link % (urllib.quote_plus( cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = cache.get(client.request, 4, query, timeout='40') r = dom_parser.parse_dom(r, 'ul', attrs={'class': ['products', 'row']}) r = dom_parser.parse_dom( r, 'div', attrs={'class': ['box-product', 'clearfix']}) if int(season) > 0: r = [ i for i in r if dom_parser.parse_dom( i, 'div', attrs={'class': 'episode'}) ] else: r = [ i for i in r if not dom_parser.parse_dom( i, 'div', attrs={'class': 'episode'}) ] r = dom_parser.parse_dom(r, 'h3', attrs={'class': 'title-product'}) r = dom_parser.parse_dom(r, 'a', req='href') r = [(i.attrs['href'], i.content.lower()) for i in r if i] r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = [(i[0], i[1], i[2], re.findall('(.+?)\s+(?:staf+el|s)\s+(\d+)', i[1])) for i in r] r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r] r = [(i[0], i[1].replace(' hd', ''), i[2], '1' if int(season) > 0 and i[3] == '0' else i[3]) for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [ i[0] for i in r if any(a in cleantitle.get(i[1]) for a in t) and i[2] in y and int(i[3]) == int(season) ] if len(r) > 0: r = r[0] else: return url = source_utils.strip_domain(r) return url except: try: source_faultlog.logFault(__name__, source_faultlog.tagSearch, titles[0]) except: return return
def __search(self, imdb, titles, year): try: q = self.search_link % urllib.quote_plus( cleantitle.query(titles[0])) q = urlparse.urljoin(self.base_link, q) t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = client.request(q) r = dom_parser.parse_dom( r, 'tr', attrs={'id': re.compile('coverPreview.+?')}) r = [(dom_parser.parse_dom(i, 'a', req='href'), dom_parser.parse_dom(i, 'div', attrs={'style': re.compile('.+?')}), dom_parser.parse_dom(i, 'img', req='src')) for i in r] r = [(i[0][0].attrs['href'].strip(), i[0][0].content.strip(), i[1], i[2]) for i in r if i[0] and i[2]] r = [(i[0], i[1], [ x.content for x in i[2] if x.content.isdigit() and len(x.content) == 4 ], i[3]) for i in r] r = [(i[0], i[1], i[2][0] if i[2] else '0', i[3]) for i in r] r = [ i for i in r if any('us_flag' in x.attrs['src'] for x in i[3]) ] r = [(i[0], i[1], i[2], [ re.findall('(\d+)', x.attrs['src']) for x in i[3] if 'smileys' in x.attrs['src'] ]) for i in r] r = [(i[0], i[1], i[2], [x[0] for x in i[3] if x]) for i in r] r = [(i[0], i[1], i[2], int(i[3][0]) if i[3] else 0) for i in r] r = sorted(r, key=lambda x: x[3])[::-1] r = [(i[0], i[1], i[2], re.findall('\((.+?)\)$', i[1])) for i in r] r = [(i[0], i[1], i[2]) for i in r if not i[3]] r = [i for i in r if i[2] in y] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [(client.replaceHTMLCodes(i[0]), i[1], i[2]) for i in r] match = [ i[0] for i in r if cleantitle.get(i[1]) in t and year == i[2] ] match2 = [i[0] for i in r] match2 = [x for y, x in enumerate(match2) if x not in match2[:y]] if match2 == []: return for i in match2[:5]: try: if match: url = match[0] break r = client.request(urlparse.urljoin(self.base_link, i)) r = re.findall('(tt\d+)', r) if imdb in r: url = i break except: pass return source_utils.strip_domain(url) except: return
def searchShow(self, title, season, year, aliases, headers): try: title = cleantitle.normalize(title) t = cleantitle.get(title) url = urlparse.urljoin( self.base_link, self.search_link % urllib.quote_plus( cleantitle.query('%s S%02d' % (title.replace('\'', '-'), int(season))))) sr = client.request(url, headers=headers, timeout='10') if sr: r = client.parseDOM(sr, 'h2', attrs={'class': 'tit'}) r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a', ret='title')) for i in r] r = [(i[0][0], i[1][0]) for i in r if len(i[0]) > 0 and len(i[1]) > 0] r = [(i[0], re.findall('(.+?)\s+-\s+S(\d+)', i[1])) for i in r] r = [(i[0], i[1][0][0], i[1][0][1]) for i in r if len(i[1]) > 0] r = [ i[0] for i in r if t == cleantitle.get(i[1]) and int(season) == int(i[2]) ][0] else: url = urlparse.urljoin( self.base_link, self.search_link % urllib.quote_plus( cleantitle.query( '%s Season %01d' % (title.replace('\'', '-'), int(season))))) sr = client.request(url, headers=headers, timeout='10') if sr: r = client.parseDOM(sr, 'h2', attrs={'class': 'tit'}) r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a', ret='title')) for i in r] r = [(i[0][0], i[1][0]) for i in r if len(i[0]) > 0 and len(i[1]) > 0] r = [(i[0], re.findall('(.+?)\s+-\s+Season\s+(\d+)', i[1])) for i in r] r = [(i[0], i[1][0][0], i[1][0][1]) for i in r if len(i[1]) > 0] r = [ i[0] for i in r if t == cleantitle.get(i[1]) and int(season) == int(i[2]) ][0] else: url = urlparse.urljoin( self.base_link, self.search_link % urllib.quote_plus( cleantitle.query( '%s %01d' % (title.replace('\'', '-'), int(year))))) sr = client.request(url, headers=headers, timeout='10') if sr: r = client.parseDOM(sr, 'h2', attrs={'class': 'tit'}) r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a', ret='title')) for i in r] r = [(i[0][0], i[1][0]) for i in r if len(i[0]) > 0 and len(i[1]) > 0] r = [(i[0], re.findall('(.+?) \((\d{4})', i[1])) for i in r] r = [(i[0], i[1][0][0], i[1][0][1]) for i in r if len(i[1]) > 0] r = [ i[0] for i in r if t == cleantitle.get(i[1]) and year == i[2] ][0] url = re.findall('(?://.+?|)(/.+)', r)[0] url = client.replaceHTMLCodes(url) return url.encode('utf-8') except: return
def movie(self, imdb, title, localtitle, aliases, year): try: key = urlparse.urljoin(self.base_link, self.key_link) key = proxy.request(key, 'main_body') key = client.parseDOM(key, 'input', ret='value', attrs={'name': 'key'})[0] query = self.moviesearch_link % (urllib.quote_plus( cleantitle.query(title)), key) query = urlparse.urljoin(self.base_link, query) result = str(proxy.request(query, 'main_body')) if 'page=2' in result or 'page%3D2' in result: result += str(proxy.request(query + '&page=2', 'main_body')) result = client.parseDOM(result, 'div', attrs={'class': 'index_item.+?'}) title = 'watch' + cleantitle.get(title) years = [ '(%s)' % str(year), '(%s)' % str(int(year) + 1), '(%s)' % str(int(year) - 1) ] result = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a', ret='title')) for i in result] result = [(i[0][0], i[1][0]) for i in result if len(i[0]) > 0 and len(i[1]) > 0] result = [i for i in result if any(x in i[1] for x in years)] r = [(proxy.parse(i[0]), i[1]) for i in result] match = [ i[0] for i in r if title == cleantitle.get(i[1]) and '(%s)' % str(year) in i[1] ] match2 = [i[0] for i in r] match2 = [x for y, x in enumerate(match2) if x not in match2[:y]] if match2 == []: return for i in match2[:5]: try: if len(match) > 0: url = match[0] break r = proxy.request(urlparse.urljoin(self.base_link, i), 'main_body') r = re.findall('(tt\d+)', r) if imdb in r: url = i break except: pass url = re.findall('(?://.+?|)(/.+)', url)[0] url = client.replaceHTMLCodes(url) url = url.encode('utf-8') return url except: return
def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, aliases, year): try: url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(cleantitle.query(tvshowtitle))) self.tvshowtitle = tvshowtitle return url except: return