def search(self, title, localtitle, year, search_type): try: titles= [] titles.append(cleantitle.normalize(cleantitle.getsearch(title))) titles.append(cleantitle.normalize(cleantitle.getsearch(localtitle))) for title in titles: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3435.0 Safari/537.36', 'Origin': 'http://alltube.pl', 'Referer': 'http://alltube.pl/szukaj' } r = client.request(urlparse.urljoin(self.base_link, self.search_link), post={'search': cleantitle.query(title)}, headers=headers) r = self.get_rows(r, search_type) for row in r: url = client.parseDOM(row, 'a', ret='href')[0] names_found = client.parseDOM(row, 'h3')[0] if names_found.startswith('Zwiastun') and not title.startswith('Zwiastun'): continue names_found = names_found.split('/') names_found = [cleantitle.normalize(cleantitle.getsearch(i)) for i in names_found] for name in names_found: name = name.replace(" "," ") title = title.replace(" "," ") words = title.split(" ") found_year = self.try_read_year(url) if self.contains_all_wors(name, words) and (not found_year or found_year == year): return url else: continue continue except Exception, e: print e return
def search(self, title, localtitle, year, search_type): try: titles= [] titles.append(cleantitle.normalize(cleantitle.getsearch(title))) titles.append(cleantitle.normalize(cleantitle.getsearch(localtitle))) for title in titles: r = client.request(urlparse.urljoin(self.base_link, self.search_link), post={'search': cleantitle.query(title)}) r = self.get_rows(r, search_type) for row in r: url = client.parseDOM(row, 'a', ret='href')[0] names_found = client.parseDOM(row, 'h3')[0] if names_found.startswith('Zwiastun') and not title.startswith('Zwiastun'): continue names_found = names_found.split('/') names_found = [cleantitle.normalize(cleantitle.getsearch(i)) for i in names_found] for name in names_found: name = name.replace(" "," ") title = title.replace(" "," ") words = title.split(" ") found_year = self.try_read_year(url) if self.contains_all_wors(name, words) and (not found_year or found_year == year): return url else: continue continue except Exception, e: print e return
def search(self, title, localtitle, year, is_movie_search): try: titles = [] titles.append(cleantitle.normalize(cleantitle.getsearch(title))) titles.append( cleantitle.normalize(cleantitle.getsearch(localtitle))) for title in titles: url = urlparse.urljoin(self.base_link, self.search_link) url = url % urllib.quote(str(title).replace(" ", "_")) result = client.request(url) result = result.decode('utf-8') h = HTMLParser() result = h.unescape(result) result = client.parseDOM(result, 'div', attrs={'class': 'video-clip-wrapper'}) for item in result: link = str(client.parseDOM(item, 'a', ret='href')[0]) nazwa = str( client.parseDOM(item, 'a', attrs={'class': 'link-title-visit'})[0]) name = cleantitle.normalize(cleantitle.getsearch(nazwa)) name = name.replace(" ", " ") title = title.replace(" ", " ") words = title.split(" ") if self.contains_all_wors(name, words) and str(year) in name: return link except Exception as e: log_exception() return
def search(self, title, localtitle, year): try: searchtitles = (localtitle,title) for searchtitle in searchtitles: simply_name = cleantitle.normalize(cleantitle.getsearch(localtitle)) simply_name2 = cleantitle.normalize(cleantitle.getsearch(title)) data = { 's': str(cleantitle.getsearch(searchtitle)), 'search-form':'5' } response = requests.post(self.base_link, data = data) result = response.text result = client.parseDOM(result, 'ul', attrs={'class':'film-num-list'}) titles = client.parseDOM(result, 'a') links = client.parseDOM(result, 'a', ret='href') rok = client.parseDOM(result, 'li') counter = 0 for item in zip(titles,links): if not str(item[1]).startswith("http://filmdom.pl/films/"): continue rok_film = str(rok[counter][-4:]) counter +=1 t2 = item[0].split(" / ") for tytul in t2: tytul = tytul.split(" (") words = cleantitle.normalize(cleantitle.getsearch(tytul[0])).split(" ") if (self.contains_all_wors(simply_name, words) or self.contains_all_wors(simply_name2, words)) and year == rok_film: return item[1] return except Exception, e: print str(e) return
def search_ep(self, titles, season, episode, year): try: searchtitles = titles for searchtitle in searchtitles: response = requests.get(self.base_link + self.search_serial % searchtitle) result = response.content h = HTMLParser() result = h.unescape(result) result = client.parseDOM(result, 'ul', attrs={'class': 'resultsList hits'}) items = client.parseDOM(result, 'li') items = [x for x in items if not str(x).startswith("<a href")] orgtitles = [] for content in items: try: orgtitle = str( client.parseDOM( content, 'div', attrs={'class': 'filmPreview__originalTitle'})[0]) except: orgtitle = "0" pass orgtitles.append(orgtitle) ids = client.parseDOM(items, 'data', ret='data-id') titles = client.parseDOM(result, 'data', ret='data-title') years = client.parseDOM(result, 'span', attrs={'class': 'filmPreview__year'}) for item in zip(titles, ids, years, orgtitles): f_title = str(item[0]) f_id = str(item[1]) f_year = str(item[2]) f_orgtitle = str(item[3]) teststring = cleantitle.normalize( cleantitle.getsearch(searchtitle)) words = cleantitle.normalize( cleantitle.getsearch(f_title)).split(" ") if self.contains_all_wors(teststring, words) and year == f_year: return (f_title, f_id, f_year, f_orgtitle, "SERIAL", season, episode) except Exception, e: print str(e) return
def __search(self, titles, year): try: query = self.search_link % (urllib.quote_plus( cleantitle.getsearch(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = cleantitle.get(titles[0]) scraper = cfscrape.create_scraper() data = scraper.get(query).content #data = client.request(query, referer=self.base_link) data = client.parseDOM(data, 'div', attrs={'class': 'result-item'}) r = dom_parser.parse_dom(data, 'div', attrs={'class': 'title'}) r = zip( dom_parser.parse_dom(r, 'a'), dom_parser.parse_dom(data, 'span', attrs={'class': 'year'})) url = [] for i in range(len(r)): title = cleantitle.get(r[i][0][1]) title = re.sub('(\d+p|4k|3d|hd|season\d+)', '', title) y = r[i][1][1] link = r[i][0][0]['href'] if 'season' in title: continue if t == title and y == year: if 'season' in link: url.append(source_utils.strip_domain(link)) print url[0] return url[0] else: url.append(source_utils.strip_domain(link)) return url except: return
def searchMovie(self, title, year, aliases, headers): try: title = cleantitle.normalize(title) url = urlparse.urljoin( self.base_link, self.search_link % urllib.quote_plus(cleantitle.getsearch(title))) r = client.request(url, headers=headers, timeout='15') r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'}) r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title')) results = [(i[0], i[1], re.findall('\((\d{4})', i[1])) for i in r] try: r = [(i[0], i[1], i[2][0]) for i in results if len(i[2]) > 0] url = [ i[0] for i in r if self.matchAlias(i[1], aliases) and (year == i[2]) ][0] except: url = None pass if (url == None): url = [ i[0] for i in results if self.matchAlias(i[1], aliases) ][0] return url except: return
def search_ep(self, titles,season,episode,year): try: searchtitles = titles for searchtitle in searchtitles: simply_name = cleantitle.normalize(cleantitle.getsearch(searchtitle)) data = { 's':cleantitle.normalize(cleantitle.getsearch(searchtitle)), 'search-form':'5' } response = requests.post(self.base_link, data = data) result = response.text result = client.parseDOM(result, 'ul', attrs={'class':'film-num-list'}) if len(result) == 2: result = result[1] titles = client.parseDOM(result, 'a') links = client.parseDOM(result, 'a', ret='href') rok = client.parseDOM(result, 'li') counter = 0 for item in zip(titles,links): if not str(item[1]).startswith("http://filmdom.pl/serials/"): continue rok_film = str(rok[counter][-4:]) counter +=1 t2 = item[0].split(" / ") for tytul in t2: if searchtitle == "Gra o tron": year = "2011" tytul = tytul.split(" (") words = cleantitle.normalize(cleantitle.getsearch(tytul[0])).split(" ") if self.contains_all_wors(simply_name, words) and year == rok_film: return item[1]+"?sezon=%s&odcinek=%s" % (season,episode) return except Exception, e: print str(e) return
def search(self, title, localtitle, year, is_movie_search): try: titles = [] titles.append(cleantitle.normalize(cleantitle.getsearch(title))) titles.append( cleantitle.normalize(cleantitle.getsearch(localtitle))) for title in titles: url = urlparse.urljoin(self.base_link, self.search_link) url = url % urllib.quote(title) cookies = client.request(self.base_link, output='cookie') cache.cache_insert('naszekino_cookie', cookies) result = client.request(url, cookie=cookies) result = result.decode('utf-8') result = client.parseDOM(result, 'div', attrs={'class': 'col-sm-4'}) for item in result: link = str(client.parseDOM(item, 'a', ret='href')[0]) nazwa = str(client.parseDOM(item, 'a', ret='title')[0]) #rok = str(client.parseDOM(result, 'div', attrs={'class': 'year'})[0]) names_found = nazwa.split('/') names_found = [ cleantitle.normalize(cleantitle.getsearch(i)) for i in names_found ] for name in names_found: name = name.replace(" ", " ") title = title.replace(" ", " ") words = title.split(" ") if self.contains_all_wors(name, words) and str(year) in link: return link except Exception, e: print e return
def sources(self, url, hostDict, hostprDict): try: words = cleantitle.getsearch(url).split(' ') search_url = urlparse.urljoin( self.base_link, self.search_link) % urllib.quote_plus(url) result = client.request(search_url) sources = [] result = client.parseDOM(result, 'div', attrs={'class': 'tile-container'}) for el in result: main = client.parseDOM(el, 'h3') link = client.parseDOM(main, 'a', ret='href')[0] found_title = client.parseDOM(main, 'a')[0] if not self.contains_all_wors(found_title, words): continue quality = client.parseDOM(el, 'a', attrs={'class': 'qualityLink'}) q = 'SD' if quality: if (quality[0] == '720p'): q = 'HD' if (quality[0] == '1080p'): q = '1080p' lang, info = self.get_lang_by_type(found_title) sources.append({ 'source': 'trt', 'quality': q, 'language': lang, 'url': link, 'info': info, 'direct': False, 'debridonly': False }) return sources except: return sources
def searchShow(self, title, season, aliases, headers): try: title = cleantitle.normalize(title) search = '%s Season %s' % (title, season) url = urlparse.urljoin( self.base_link, self.search_link % urllib.quote_plus(cleantitle.getsearch(search))) r = client.request(url) url = re.findall( '<a href=\"(.+?\/movie\/%s-season-%s-.+?\.html)\"' % (cleantitle.geturl(title), season), r)[0] return url except: return
def searchShow(self, title, season, aliases, headers): try: title = cleantitle.normalize(title) search = '%s Season %01d' % (title, int(season)) url = urlparse.urljoin( self.base_link, self.search_link % urllib.quote_plus(cleantitle.getsearch(search))) log_utils.log('shit Returned: %s' % str(url), log_utils.LOGNOTICE) r = client.request(url, headers=headers, timeout='15') r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'}) r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title')) r = [(i[0], i[1], re.findall('(.*?)\s+-\s+Season\s+(\d)', i[1])) for i in r] r = [(i[0], i[1], i[2][0]) for i in r if len(i[2]) > 0] url = [ i[0] for i in r if self.matchAlias(i[2][0], aliases) and i[2][1] == season ][0] return url except: return
def sources(self, url, hostDict, hostprDict): try: #import pydevd #pydevd.settrace(stdoutToServer=True, stderrToServer=True) sources = [] titles = [] titles.append(url[0]) titles.append(url[1]) try: year = url[2] except: year = '' for url_single in titles: url_single = cleantitle.normalize( cleantitle.getsearch(url_single)) words = url_single.split(' ') search_url = urlparse.urljoin( self.base_link, self.search_link) % (url_single + " " + year) cookies = client.request(self.base_link, output='cookie') verifyGet = client.request(self.verify, cookie=cookies) cookies = cookies + ";tmvh=" + self.crazy_cookie_hash( verifyGet) cache.cache_insert('szukajka_cookie', cookies) result = client.request(search_url, cookie=cookies) result = client.parseDOM(result, 'div', attrs={'class': 'element'}) for el in result: found_title = str( client.parseDOM(el, 'div', attrs={'class': 'title'})[0]).lower() if ("zwiastun" or "trailer") in str(found_title).lower(): continue if len(words) >= 4: regtest = re.findall("\d{3}[^1080p]", found_title) if len(regtest) > 0: if not self.contains_all_wors( found_title, words) and year not in found_title: continue else: if not self.contains_all_wors(found_title, words): continue else: if not self.contains_all_wors( found_title, words) and year not in found_title: continue q = 'SD' if self.contains_word(found_title, '1080p') or self.contains_word( found_title, 'FHD'): q = '1080p' elif self.contains_word(found_title, '720p'): q = 'HD' link = client.parseDOM(el, 'a', attrs={'class': 'link'}, ret='href')[0] transl_type = client.parseDOM(el, 'span', attrs={'class': 'version'})[0] transl_type = transl_type.split(' ') transl_type = transl_type[-1] host = client.parseDOM(el, 'span', attrs={'class': 'host'})[0] host = host.split(' ') host = host[-1] lang, info = self.get_lang_by_type(transl_type) sources.append({ 'source': host, 'quality': q, 'language': lang, 'url': link, 'info': info, 'direct': False, 'debridonly': False }) continue return sources except Exception, e: print e return sources
def clean_serach(self, serach_str): result = cleantitle.getsearch(serach_str) result = re.sub(' +', ' ', result) return result.strip()