def search(self, title, localtitle, year): try: import sys reload(sys) sys.setdefaultencoding('utf8') simply_name = cleantitle.get(localtitle) simply_name2 = cleantitle.get(title) simply_name = cleantitle.query(localtitle).split(' ') simply_name2 = cleantitle.query(title).split(' ') query = self.search_link % urllib.quote_plus( cleantitle.query(localtitle)) url = urlparse.urljoin(self.base_link, query) result = client.request(url) result = client.parseDOM(result, 'div', attrs={'class': 'row search-results'}) results = client.parseDOM( result, 'div', attrs={'class': 'item-detail-bigblock title title-bigblock'}) for result in results: movieneourl = client.parseDOM(result, 'a', ret='href')[0] result = client.parseDOM(result, 'a')[0] for word in simply_name: if word in result and year in result: return [ urlparse.urljoin(self.base_link, movieneourl), result ] continue except Exception, e: print str(e) return
def search(self, title, localtitle, year, search_type): try: url = self.do_search(cleantitle.query(title), title, localtitle, year, search_type) if not url: url = self.do_search(cleantitle.query(localtitle), title, localtitle, year, search_type) return url except: return
def __search(self, title, season): try: query = self.search_link % (urllib.quote_plus( cleantitle.query(title))) query = urlparse.urljoin(self.base_link, query) t = cleantitle.get(title) r = client.request(query) r = client.parseDOM(r, 'div', attrs={'class': 'moviefilm'}) r = client.parseDOM(r, 'div', attrs={'class': 'movief'}) r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a')) for i in r] r = [(i[0][0], i[1][0].lower()) for i in r if len(i[0]) > 0 and len(i[1]) > 0] r = [(i[0], i[1], re.findall('(.+?)\s+(?:saison)\s+(\d+)', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = [(i[0], i[1], re.findall('\((.+?)\)$', i[1]), i[2]) for i in r] r = [(i[0], i[2][0] if len(i[2]) > 0 else i[1], i[3]) for i in r] r = [ i[0] for i in r if t == cleantitle.get(i[1]) and int(i[2]) == int(season) ][0] url = re.findall('(?://.+?|)(/.+)', r)[0] url = client.replaceHTMLCodes(url) url = url.encode('utf-8') return url except: return
def search(self, title, localtitle, year, search_type): try: titles = [] titles.append(cleantitle.normalize(cleantitle.getsearch(title))) titles.append(cleantitle.normalize(cleantitle.getsearch(localtitle))) cookies = client.request(self.base_link, output='cookie') cache.cache_insert('alltube_cookie', cookies) for title in titles: r = client.request(urljoin(self.base_link, self.search_link), post={'search': cleantitle.query(title)}, headers={'Cookie': cookies}) r = self.get_rows(r, search_type) for row in r: url = client.parseDOM(row, 'a', ret='href')[0] names_found = client.parseDOM(row, 'h3')[0] if names_found.startswith('Zwiastun') and not title.startswith('Zwiastun'): continue names_found = names_found.encode('utf-8').split('/') names_found = [cleantitle.normalize(cleantitle.getsearch(i)) for i in names_found] for name in names_found: name = name.replace(" ", " ") title = title.replace(" ", " ") words = title.split(" ") found_year = self.try_read_year(url) if self.contains_all_words(name, words) and (not found_year or found_year == year): return url else: continue continue except: return
def __search(self, titles, year, content_type): try: query = self.search_link % (quote_plus(cleantitle.query( titles[0])), content_type) query = urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'id': 'search'}) r = dom_parser.parse_dom(r, 'table') r = dom_parser.parse_dom(r, 'tr', attrs={'class': re.compile('entry\d+')}) r = [(dom_parser.parse_dom(i, 'a'), dom_parser.parse_dom(i, 'img', attrs={ 'class': 'flag', 'alt': 'de' })) for i in r] r = [i[0] for i in r if i[0] and i[1]] r = [(i[0].attrs['href'], i[0].content) for i in r] r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y][0] return source_utils.strip_domain(r) except: return
def __search(self, titles, year): try: n = cache.get(self.__get_nonce, 24) query = self.search_link % (urllib.quote_plus( cleantitle.query(titles[0])), n) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = client.request(query) r = json.loads(r) r = [(r[i].get('url'), r[i].get('title'), r[i].get('extra').get('date')) for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y][0] return source_utils.strip_domain(r) except: return
def __search(self, titles, year): try: query = self.search_link % (urllib.quote_plus( cleantitle.query(titles[0] + ' ' + year))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = dom_parser.parse_dom(r, 'figure', attrs={'class': 'pretty-figure'}) r = dom_parser.parse_dom(r, 'figcaption') for i in r: title = client.replaceHTMLCodes(i[0]['title']) title = cleantitle.get(title) if title in t: x = dom_parser.parse_dom(i, 'a', req='href') return source_utils.strip_domain(x[0][0]['href']) return except: return
def __search(self, titles, year): try: query = self.search_link % (urllib.quote_plus( cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'details'}) r = [(dom_parser.parse_dom(i, 'div', attrs={'class': 'title'}), dom_parser.parse_dom(i, 'span', attrs={'class': 'year'})) for i in r] r = [(dom_parser.parse_dom(i[0][0], 'a', req='href'), i[1][0].content) for i in r if i[0] and i[1]] r = [(i[0][0].attrs['href'], client.replaceHTMLCodes(i[0][0].content), i[1]) for i in r if i[0]] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y][0] return source_utils.strip_domain(r) except: return
def search(self, localtitle, year, search_type): try: simply_name = cleantitle.get(localtitle) query = self.search_link % urllib.quote_plus( cleantitle.query(localtitle)) query = urlparse.urljoin(self.base_link, query) result = client.request(query) result = client.parseDOM(result, 'div', attrs={'id': search_type}) links = client.parseDOM(result, 'figcaption') names = client.parseDOM(result, 'figcaption', ret='title') urls = [] for i in range(len(names)): name = cleantitle.get(names[i]) url = client.parseDOM(links[i], 'a', ret='href')[0] if (name == simply_name): urls.append(url) if len(urls) == 1: return urls[0] else: return self.findMatchByYear(year, urls) except: return
def search(self, title, localtitle, year): try: simply_name = cleantitle.get(localtitle) simply_name2 = cleantitle.get(title) query = self.search_link % urllib.quote_plus(cleantitle.query(localtitle)) url = urlparse.urljoin(self.base_link, query) headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0', 'Referer': 'https://segos.es/?page=login'} data = {"login": self.user_name, 'password': self.user_pass, 'loguj': ''} url = 'https://segos.es/?page=login' s = requests.Session() s.post('https://segos.es/?page=login', data=data, headers=headers) url = urlparse.urljoin(self.base_link, query) k = s.get(url) result = k.text results = client.parseDOM(result, 'div', attrs={'class': 'col-lg-12 col-md-12 col-xs-12'}) for result in results: segosurl = client.parseDOM(result, 'a', ret='href')[0] result = client.parseDOM(result, 'a') segostitles = cleantitle.get(result[1]).split('/') for segostitle in segostitles: if simply_name == segostitle or simply_name2 == segostitle: return urlparse.urljoin(self.base_link, segosurl) continue except Exception as e: print(str(e)) return
def __search(self, titles, year, season='0'): try: query = self.search_link % (urllib.quote_plus(cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0'] r = self.scraper.get(query).content r = dom_parser.parse_dom(r, 'ul', attrs={'class': ['products', 'row']}) r = dom_parser.parse_dom(r, 'div', attrs={'class': ['box-product', 'clearfix']}) if int(season) > 0: r = [i for i in r if dom_parser.parse_dom(i, 'div', attrs={'class': 'episode'})] else: r = [i for i in r if not dom_parser.parse_dom(i, 'div', attrs={'class': 'episode'})] r = dom_parser.parse_dom(r, 'h3', attrs={'class': 'title-product'}) r = dom_parser.parse_dom(r, 'a', req='href') r = [(i.attrs['href'], i.content.lower()) for i in r if i] r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = [(i[0], i[1], i[2], re.findall('(.+?)\s+(?:staf+el|s)\s+(\d+)', i[1])) for i in r] r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r] r = [(i[0], i[1].replace(' hd', ''), i[2], '1' if int(season) > 0 and i[3] == '0' else i[3]) for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y and int(i[3]) == int(season)][0] url = source_utils.strip_domain(r) url = url.replace('-info', '-stream') return url except: return
def __search(self, titles, year, season='0'): try: aj = cache.get(self.__get_ajax_object, 24) t = [cleantitle.get(i) for i in set(titles) if i] y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0'] r = client.request(aj.get('ajax_url'), post={'action': aj.get('search'), 'nonce': aj.get('snonce'), 'query': cleantitle.query(titles[0])}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'search-result'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'search-item-content'}) r = dom_parser.parse_dom(r, 'a', req='href') r = [(i.attrs['href'], i.content.lower()) for i in r if i] r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = [(i[0], i[1], i[2], re.findall('(.+?)\s+(?:staf+el|s)\s+(\d+)', i[1])) for i in r] r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r] r = [(i[0], i[1].replace(' hd', ''), i[2], '1' if int(season) > 0 and i[3] == '0' else i[3]) for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y and int(i[3]) == int(season)][0] return source_utils.strip_domain(r) except: return
def search(self, localtitle, year, search_type): try: simply_name = cleantitle.get(localtitle) query = self.search_link % urllib.quote_plus( cleantitle.query(localtitle)) query = urlparse.urljoin(self.base_link, query) result = self.scraper.get(query).content result = client.parseDOM(result, 'div', attrs={'class': 'result-item'}) for x in result: correct_type = client.parseDOM(x, 'span', attrs={'class': search_type}) correct_year = client.parseDOM(x, 'span', attrs={'class': 'year'})[0] == year name = client.parseDOM(x, 'div', attrs={'class': 'title'})[0] url = client.parseDOM(name, 'a', ret='href')[0] name = cleantitle.get(client.parseDOM(name, 'a')[0]) if (correct_type and correct_year and name == simply_name): return url except: return
def sezonlukdizi_tvcache(self): try: url = urlparse.urljoin(self.base_link, self.search_link) result = client.request(url, redirect=False) if not result: r = client.request(self.base_link) r = \ dom_parser.parse_dom(r, 'script', attrs={'type': 'text/javascript', 'src': re.compile('.*/js/dizi.*')}, req='src')[0] url = urlparse.urljoin(self.base_link, r.attrs['src']) result = client.request(url) result = re.compile('{(.+?)}').findall(result) result = [ (re.findall('u\s*:\s*(?:\'|\")(.+?)(?:\'|\")', i), re.findall('d\s*:\s*(?:\'|\")(.+?)(?:\',|\")', i)) for i in result] result = [(i[0][0], i[1][0]) for i in result if len(i[0]) > 0 and len(i[1]) > 0] result = [(re.compile('/diziler(/.+?)(?://|\.|$)').findall(i[0]), re.sub('&#\d*;', '', i[1])) for i in result] result = [(i[0][0] + '/', cleantitle.query(self.lat2asc(i[1]))) for i in result if len(i[0]) > 0] return result except BaseException: return []
def search(self, localtitle, year, search_type): try: url = urlparse.urljoin(self.base_link, self.search_link) r = client.request(url, redirect=False, post={ 'q': cleantitle.query(localtitle), 'sb': '' }) r = client.parseDOM(r, 'div', attrs={'class': 'small-item'}) local_simple = cleantitle.get(localtitle) for row in r: name_found = client.parseDOM(row, 'a')[1] year_found = name_found[name_found.find("(") + 1:name_found.find(")")] url = client.parseDOM(row, 'a', ret='href')[1] if not search_type in url: continue if cleantitle.get( name_found) == local_simple and year_found == year: return url except: return
def __search(self, titles, year): try: query = self.search_link % urllib.quote_plus( cleantitle.query(titles[0])) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = client.request(query, XHR=True) if r and r.startswith('{'): r = '[%s]' % r r = json.loads(r) r = [(i['url'], i['name']) for i in r if 'name' in i and 'url' in i] r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})?\)*$', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y][0] url = source_utils.strip_domain(r) url = url.replace('serien/', '') return url except: return
def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, aliases, year): try: query = self.search_link % urllib.quote_plus( cleantitle.query(tvshowtitle)) for i in range(3): result = self.scraper.get(query).content if not result is None: break t = [tvshowtitle] + source_utils.aliases_to_array(aliases) t = [cleantitle.get(i) for i in set(t) if i] items = dom_parser.parse_dom(result, 'div', attrs={'class': 'result'}) url = None for i in items: result = re.findall(r'href="([^"]+)">(.*)<', i.content) if re.sub('<[^<]+?>', '', cleantitle.get(cleantitle.normalize(result[0][1]))) in t and year in \ result[0][1]: url = result[0][0] if not url is None: break url = url.encode('utf-8') return url except: return
def __search(self, search_link, imdb, titles): try: query = search_link % (urllib.quote_plus( cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'big-list'}) r = dom_parser.parse_dom(r, 'table', attrs={'class': 'row'}) r = dom_parser.parse_dom(r, 'td', attrs={'class': 'list-name'}) r = dom_parser.parse_dom(r, 'a', req='href') r = [ i.attrs['href'] for i in r if i and cleantitle.get(i.content) in t ][0] url = source_utils.strip_domain(r) r = client.request(urlparse.urljoin(self.base_link, url)) r = dom_parser.parse_dom(r, 'a', attrs={'href': re.compile('.*/tt\d+.*')}, req='href') r = [re.findall('.+?(tt\d+).*?', i.attrs['href']) for i in r] r = [i[0] for i in r if i] return url if imdb in r else None except: return
def __search(self, titles, year): try: t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = client.request(urljoin(self.base_link, self.search_link), post={'query': cleantitle.query(titles[0])}) r = dom_parser.parse_dom(r, 'li', attrs={'class': 'entTd'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 've-screen'}, req='title') r = [(dom_parser.parse_dom(i, 'a', req='href'), i.attrs['title'].split(' - ')[0]) for i in r] r = [(i[0][0].attrs['href'], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y][0] return source_utils.strip_domain(r) except: return
def do_search(self, title, local_title, year, video_type): try: url = urlparse.urljoin(self.base_link, self.search_link) url = url % urllib.quote_plus(cleantitle.query(title)) result = client.request(url) result = client.parseDOM(result, 'div', attrs={'class': 'item'}) for row in result: row_type = client.parseDOM(row, 'div', attrs={'class': 'typepost'})[0] if row_type != video_type: continue names = client.parseDOM(row, 'span', attrs={'class': 'tt'})[0] names = names.split('/') year_found = client.parseDOM(row, 'span', attrs={'class': 'year'}) titles = [cleantitle.get(i) for i in [title, local_title]] if self.name_matches(names, titles, year) and (len(year_found) == 0 or year_found[0] == year): url = client.parseDOM(row, 'a', ret='href')[0] return urlparse.urljoin(self.base_link, url) except: return
def __search(self, titles, year): try: query = self.search_link % (urllib.quote_plus(cleantitle.query(titles[0]) + ' ' + year)) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i][0] r = client.request(query) r = client.parseDOM(r, 'div', attrs={'class': 'karatula'}) for i in r: title = client.parseDOM(i, 'a', ret='title')[0] y = re.findall('(\d{4})', title)[0] title = cleantitle.get_simple(title) if t in title and y == year: x = dom_parser.parse_dom(i, 'a', req='href') return source_utils.strip_domain(x[0][0]['href']) return except: return
def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, aliases, year): try: query = self.tvsearch_link % urllib.quote_plus( cleantitle.query(tvshowtitle)) query = urlparse.urljoin(self.base_link, query.lower()) result = client.request(query, referer=self.base_link) result = client.parseDOM(result, 'div', attrs={'class': 'index_item.+?'}) result = [(dom.parse_dom(i, 'a', req=['href', 'title'])[0]) for i in result if i] result = [( i.attrs['href'] ) for i in result if cleantitle.get(tvshowtitle) == cleantitle.get( re.sub( '(\.|\(|\[|\s)(\d{4}|S\d+E\d+|S\d+|3D)(\.|\)|\]|\s|)(.+|)', '', i.attrs['title'], flags=re.I))][0] url = client.replaceHTMLCodes(result) url = url.encode('utf-8') return url except Exception: return
def __search(self, titles, imdb, year): try: query = self.search_link % (urllib.quote_plus( cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = client.request(query, XHR=True) r = json.loads(r) r = [(i.get('title'), i.get('custom_fields', {})) for i in r.get('posts', [])] r = [(i[0], i[1]) for i in r if i[0] and i[1]] r = [(i[0], i[1].get('Streaming', ['']), i[1].get('Jahr', ['0']), i[1].get('IMDb-Link', [''])) for i in r if i] r = [(i[0], i[1][0], i[2][0], re.findall('.+?(tt\d+).*?', i[3][0])) for i in r if i[0] and i[1] and i[2] and i[3]] r = [ i[1] for i in r if imdb in i[3] or (cleantitle.get(i[0]) in t and i[2] in y) ][0] return source_utils.strip_domain(r) except: return
def __search(self, titles): try: query = self.search_link % (urllib.quote_plus( cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'nag'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'item-video'}) r = dom_parser.parse_dom(r, 'h2', attrs={'class': 'entry-title'}) r = dom_parser.parse_dom(r, 'a', req='href') for i in r: title = i[1] if re.search('\*(?:.*?)\*', title) is not None: title = re.sub('\*(?:.*?)\*', '', title) title = cleantitle.get(title) if title in t: return source_utils.strip_domain(i[0]['href']) else: return except: return
def __search(self, titles, year): try: query = self.search_link % (quote_plus(cleantitle.query( titles[0]))) query = urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'id': 'main'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'panel-body'}) r = [ (dom_parser.parse_dom(i.content, 'h4', attrs={'class': 'title-list'}), dom_parser.parse_dom(i.content, 'a', attrs={'href': re.compile('.*/year/.*')})) for i in r ] r = [(dom_parser.parse_dom(i[0][0].content, 'a', req='href'), i[1][0].content if i[1] else '0') for i in r if i[0]] r = [(i[0][0].attrs['href'], i[0][0].content, re.sub('<.+?>|</.+?>', '', i[1])) for i in r if i[0] and i[1]] r = [(i[0], i[1], i[2].strip()) for i in r if i[2]] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [ i[0] for i in r if cleantitle.get(i[1]) in t and i[2] == year ][0] return source_utils.strip_domain(r) except: return
def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, aliases, year): try: query = self.search_link % urllib.quote_plus( cleantitle.query(tvshowtitle)) # req page 3 times to workaround their BS random 404's # responses (legit & BS 404s) are actually very fast: timeout prob not important for i in range(4): result = client.request(query, timeout=3) if not result == None: break t = [tvshowtitle] + source_utils.aliases_to_array(aliases) t = [cleantitle.get(i) for i in set(t) if i] result = re.compile( 'itemprop="url"\s+href="([^"]+).*?itemprop="name"\s+class="serie-title">([^<]+)', re.DOTALL).findall(result) for i in result: if cleantitle.get(cleantitle.normalize( i[1])) in t and year in i[1]: url = i[0] url = url.encode('utf-8') #log_utils.log('\n\n~~~ outgoing tvshow() url') #log_utils.log(url) # returned 'url' format like: /serie/x_files return url except: return
def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, aliases, year): try: url = urlparse.urljoin( self.base_link, self.search_link % urllib.quote_plus(cleantitle.query(tvshowtitle))) self.tvshowtitle = tvshowtitle return url except: return
def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, aliases, year): try: q = cleantitle.query(tvshowtitle) r = self.scraper.get(self.search_link % q, headers={'referer': self.base_link}).content r = client.parseDOM(r, 'div', attrs={'valign': '.+?'}) r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a', ret='title'), client.parseDOM(i, 'a')) for i in r] r = [(i[0][0], i[1][0], i[2][0]) for i in r if i[0] and i[1] and i[2]] return r[0][0] except: return
def __search(self, titles): try: query = self.search_link % quote_plus(cleantitle.query(titles[0])) query = urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query, XHR=True) r = json.loads(r) r = [(i.get('url'), i.get('name')) for i in r] r = [(i[0]) for i in r if cleantitle.get(i[1]) in t][0] return source_utils.strip_domain(r) except: return
def __search(self, titles): try: query = self.search_link % quote_plus(cleantitle.query(titles[0])) query = urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = json.loads(r) r = [(i.get('id'), i.get('value')) for i in r] r = [i[0] for i in r if cleantitle.get(i[1]) in t][0] return r except: return