def search(self, title, localtitle, year=''): try: titles = [] titles.append(cleantitle.normalize(cleantitle.getsearch(title))) titles.append(cleantitle.normalize(cleantitle.getsearch(localtitle))) for title in titles: url = self.search_link + str(title) result = self.session.get(url).content result = result.decode('utf-8') h = HTMLParser() result = h.unescape(result) result = client.parseDOM(result, 'div', attrs={'class': 'item-detail-bigblock title title-bigblock'}) for item in result: if 'trailer' in item.lower(): continue link = self.base_link + str(client.parseDOM(item, 'a', ret='href')[0]) nazwa = str(client.parseDOM(item, 'a')[0]) name = cleantitle.normalize(cleantitle.getsearch(nazwa)) name = name.replace(" ", " ") title = title.replace(" ", " ") words = title.split(" ") if self.contains_all_words(name, words) and str(year) in name: return link except Exception as e: print(str(e)) return
def search(self, title, localtitle, year, is_movie_search): try: titles = [] titles.append(cleantitle.normalize(cleantitle.getsearch(title))) titles.append(cleantitle.normalize(cleantitle.getsearch(localtitle))) for title in titles: url = urlparse.urljoin(self.base_link, self.search_link) url = url % urllib.quote(str(title).replace(" ", "_")) result = client.request(url) result = client.parseDOM(result, 'div', attrs={'class': 'video-clip-wrapper'}) linki = [] for item in result: try: link = str(client.parseDOM(item, 'a', ret='href')[0]) nazwa = str(client.parseDOM(item, 'a', attrs={'class': 'link-title-visit'})[0]) name = cleantitle.normalize(cleantitle.getsearch(nazwa)) name = name.replace(" ", " ") title = title.replace(" ", " ") words = title.split(" ") if self.contains_all_words(name, words) and str(year) in name: linki.append(link) except: continue return linki except Exception as e: print(e) return
def search(self, title, localtitle, year, is_movie_search): try: titles = [] titles.append(cleantitle.normalize(cleantitle.getsearch(title))) titles.append( cleantitle.normalize(cleantitle.getsearch(localtitle))) for title in titles: url = urlparse.urljoin(self.base_link, self.search_link) url = url % urllib.quote(str(title).replace(" ", "+")) result = client.request(url) result = result.decode('utf-8') h = HTMLParser() result = h.unescape(result) result = client.parseDOM(result, 'div', attrs={'class': 'col-sm-4'}) for item in result: link = str(client.parseDOM(item, 'a', ret='href')[0]) nazwa = str(client.parseDOM(item, 'a', ret='title')[0]) name = cleantitle.normalize(cleantitle.getsearch(nazwa)) name = name.replace(" ", " ") title = title.replace(" ", " ") words = title.split(" ") if self.contains_all_words(name, words) and str(year) in link: return link except Exception as e: log_exception() return
def search(self, title, localtitle, year): try: titles = [] title2 = title.split('.')[0] localtitle2 = localtitle.split('.')[0] titles.append(cleantitle.normalize(cleantitle.getsearch(title2))) titles.append(cleantitle.normalize(cleantitle.getsearch(localtitle2))) titles.append(title2) titles.append(localtitle2) for title in titles: try: token = client.request("https://filmowakraina.tv/movies") token = re.findall("""token:.*'(.*?)'""", token)[0] url = self.search_link % (token, urllib.quote_plus(cleantitle.query(title)), 'movie') content = client.request(url) content = json.loads(content) for item in content[u'items']: if year in item[u'release_date']: return item[u'link'] except: pass except Exception as e: print(str(e)) return
def search(self, title, localtitle, year, is_movie_search): try: titles = [] titles.append(cleantitle.normalize(cleantitle.getsearch(title))) titles.append(cleantitle.normalize(cleantitle.getsearch(localtitle))) for title in titles: url = self.search_link + str(title) result = client.request(url) result = result.decode('utf-8') h = HTMLParser() result = h.unescape(result) result = client.parseDOM(result, 'div', attrs={'class': 'row'}) for item in result: try: link = str(client.parseDOM(item, 'a', ret='href')[0]) if link.startswith('//'): link = "https:" + link nazwa = str(client.parseDOM(item, 'img', ret='alt')[0]) name = cleantitle.normalize(cleantitle.getsearch(nazwa)) rok = link name = name.replace(" ", " ") title = title.replace(" ", " ") words = title.split(" ") if self.contains_all_words(name, words) and str(year) in rok: return link except: continue except Exception as e: log_exception() return
def search_ep(self, titles, season, episode, tvdb): try: odcinek = source_utils.absoluteNumber(tvdb, episode, season) for title in titles: title = cleantitle.normalize( cleantitle.getsearch(title)).replace(" ", "+").replace( "shippuden", "shippuuden") r = self.session.get(self.search_link % title).content result = client.parseDOM(r, 'div', attrs={ 'class': 'description pull-right' }) ## na linki i opisy linki = client.parseDOM(result, 'a', ret='href') nazwy = client.parseDOM(result, 'a') for row in zip(linki, nazwy): try: tytul = re.findall("""<mark>(.*)</mark>""", row[1])[0] except: continue tytul = cleantitle.normalize( cleantitle.getsearch(tytul)).replace(" ", " ") words = tytul.split(" ") if self.contains_all_words(title, words): link = self.base_link + row[0].replace( 'odcinki', 'odcinek') + '/' + odcinek return link except: return
def search(self, title, localtitle, year, is_movie_search): try: titles = [] titles.append(cleantitle.normalize(cleantitle.getsearch(title))) titles.append(cleantitle.normalize(cleantitle.getsearch(localtitle))) for title in titles: url = urlparse.urljoin(self.base_link, self.search_link) url = url % urllib.quote(title) cookies = client.request(self.base_link, output='cookie') cache.cache_insert('naszekino_cookie', cookies) result = client.request(url, cookie=cookies) result = result.decode('utf-8') result = client.parseDOM(result, 'div', attrs={'class': 'col-sm-4'}) for item in result: link = str(client.parseDOM(item, 'a', ret='href')[0]) nazwa = str(client.parseDOM(item, 'a', ret='title')[0]) # rok = str(client.parseDOM(result, 'div', attrs={'class': 'year'})[0]) names_found = nazwa.split('/') names_found = [cleantitle.normalize(cleantitle.getsearch(i)) for i in names_found] for name in names_found: name = name.replace(" ", " ") title = title.replace(" ", " ") words = title.split(" ") if self.contains_all_wors(name, words) and str(year) in link: return link except Exception, e: print e return
def search(self, title, localtitle, year): try: titles = [] title2 = title.split('.')[0] localtitle2 = localtitle.split('.')[0] titles.append(cleantitle.normalize(cleantitle.getsearch(title2))) titles.append( cleantitle.normalize(cleantitle.getsearch(localtitle2))) titles.append(title2) titles.append(localtitle2) cookies = client.request('http://segos.es', output='cookie') cache.cache_insert('segos_cookie', cookies) for title in titles: try: query = self.search_link % urllib.quote_plus( title.replace(" ", "+")) url = urlparse.urljoin(self.base_link, query) result = client.request(url, headers={'Cookie': cookies}) results = client.parseDOM( result, 'div', attrs={'class': 'col-lg-12 col-md-12 col-xs-12'}) except: continue for result in results: try: segosurl = client.parseDOM(result, 'a', ret='href')[0] result = client.parseDOM(result, 'a') segostitles = cleantitle.normalize( cleantitle.getsearch(result[1])).split('/') segostitles.append(result[1]) rok = str(result[1][-5:-1]) except: continue for segostitle in segostitles: try: segostitle = segostitle.replace(" ", " ") simply_name = title.replace(" ", " ") words = simply_name.split(" ") if self.contains_all_words(segostitle, words) and year == rok: link = urlparse.urljoin( self.base_link, segosurl) return link continue except: continue except Exception as e: print(str(e)) return
def search(self, title, localtitle, year=''): try: titles = [] titles.append(cleantitle.normalize(cleantitle.getsearch(title))) titles.append( cleantitle.normalize(cleantitle.getsearch(localtitle))) self.login() for title in titles: data = { 'type': '1', 'search': title + ' ' + year + ' (avi|mkv|mp4)' } self.session.post('https://tb7.pl/mojekonto/szukaj', data=data).content headers = { 'Connection': 'keep-alive', 'Cache-Control': 'max-age=0', 'Origin': 'https://tb7.pl', 'Upgrade-Insecure-Requests': '1', 'DNT': '1', 'Content-Type': 'application/x-www-form-urlencoded', 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.66 Mobile Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3', 'Referer': 'https://tb7.pl/mojekonto/szukaj/1', 'Accept-Language': 'pl-PL,pl;q=0.9,en-US;q=0.8,en;q=0.7', } data = {'sort': 'size'} self.session.post('https://tb7.pl/mojekonto/szukaj/1', headers=headers, data=data) r = self.session.post('https://tb7.pl/mojekonto/szukaj/1', headers=headers, data=data).content rows = client.parseDOM(r, 'tr') if rows: cookies = self.session.cookies cookies = "; ".join( [str(x) + "=" + str(y) for x, y in cookies.items()]) cache.cache_insert('tb7_cookie', cookies) return rows except Exception as e: log_exception() return
def search(self, title, localtitle, year, is_movie_search): try: titles = [] titles.append(cleantitle.normalize(cleantitle.getsearch(title))) titles.append( cleantitle.normalize(cleantitle.getsearch(localtitle))) titles.append(title) titles.append(localtitle) for title in titles: try: url = self.search_link + str(title) result = self.session.get(url).content result = result.decode('utf-8') h = HTMLParser() result = h.unescape(result) result = client.parseDOM(result, 'div', attrs={'class': 'card-body p-2'}) for item in result: try: nazwa = re.findall("""Film online: (.*?)\"""", item)[0] try: nazwa = re.findall(""">(.*?)<""", nazwa)[0] except: pass name = cleantitle.normalize( cleantitle.getsearch(nazwa)) rok = re.findall( """Rok wydania filmu online\".*>(.*?)<""", item)[0] item = str(item).replace( "<span style='color:red'>", "").replace("</span>", "") link = re.findall("""href=\"(.*?)\"""", item)[0] if link.startswith('//'): link = "https:" + link name = name.replace(" ", " ") title = title.replace(" ", " ") words = name.split(" ") if self.contains_all_words( title, words) and str(year) in rok: return link except: continue except: continue except Exception as e: log_exception() return
def search_ep(self, titles, season, episode, year): try: searchtitles = titles for searchtitle in searchtitles: response = requests.get(self.base_link + self.search_serial % searchtitle) result = response.content h = HTMLParser() result = h.unescape(result) result = client.parseDOM(result, 'ul', attrs={'class': 'resultsList hits'}) items = client.parseDOM(result, 'li') items = [x for x in items if not str(x).startswith("<a href")] orgtitles = [] for content in items: try: orgtitle = str( client.parseDOM( content, 'div', attrs={'class': 'filmPreview__originalTitle'})[0]) except: orgtitle = "0" pass orgtitles.append(orgtitle) ids = client.parseDOM(items, 'data', ret='data-id') titles = client.parseDOM(result, 'data', ret='data-title') years = client.parseDOM(result, 'span', attrs={'class': 'filmPreview__year'}) for item in zip(titles, ids, years, orgtitles): f_title = str(item[0]) f_id = str(item[1]) f_year = str(item[2]) f_orgtitle = str(item[3]) teststring = cleantitle.normalize( cleantitle.getsearch(searchtitle)) words = cleantitle.normalize( cleantitle.getsearch(f_title)).split(" ") if self.contains_all_wors(teststring, words) and year == f_year: return (f_title, f_id, f_year, f_orgtitle, "SERIAL", season, episode) except Exception, e: print str(e) return
def movie(self, imdb, title, localtitle, aliases, year): try: titles = [] title2 = title.split('.')[0] localtitle2 = localtitle.split('.')[0] titles.append(cleantitle.normalize(cleantitle.getsearch(title2))) titles.append( cleantitle.normalize(cleantitle.getsearch(localtitle2))) titles.append(title2) titles.append(localtitle2) for title in titles: headers = { 'Connection': 'keep-alive', 'Cache-Control': 'max-age=0', 'Origin': 'https://www.boxfilm.pl', 'Upgrade-Insecure-Requests': '1', 'DNT': '1', 'Content-Type': 'application/x-www-form-urlencoded', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3555.0 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Referer': 'https://www.boxfilm.pl/szukaj', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'pl-PL,pl;q=0.9,en-US;q=0.8,en;q=0.7', } data = {'szukaj': title} cookies = { 'lektor': 'Wszystkie', 'cookies-accepted': '1', } r = requests.post('https://www.boxfilm.pl/szukaj', headers=headers, cookies=cookies, data=data).content r = client.parseDOM(r, 'div', attrs={'class': 'video_info'}) local_simple = cleantitle.get(localtitle) for row in r: name_found = client.parseDOM(row, 'h1')[0] year_found = name_found[name_found.find("(") + 1:name_found.find(")")] if cleantitle.get( name_found) == local_simple and year_found == year: url = client.parseDOM(row, 'a', ret='href')[0] return url except: return
def search_ep(self, titles, season, episode, year): try: # data = {'login': self.user_name, 'password': self.user_pass} # result = self.session.post('https://zalukaj.com/account.php', headers={'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36"}, data=data) headers = { 'Cookie': '__cfduid=d61b42b729455a590ff291892cb688ea11546349293; PHPSESSID=7u6cbc5pagnhqfm84jgjhg9hc2; __PHPSESSIDS=de81fa674b436a948cb337b7f4d2fa3898bd308c' } query = 'S{:02d}E{:02d}'.format(int(season), int(episode)) for title in titles: title = cleantitle.normalize(cleantitle.getsearch(title)) result = self.session.get(self.base_link, headers=headers).content result = client.parseDOM(result, 'td', attrs={'class': 'wef32f'}) for row in result: try: tytul = client.parseDOM(row, 'a', ret='title')[0] except: continue tytul = cleantitle.normalize( cleantitle.getsearch(tytul)).replace(" ", " ") words = title.split(" ") if self.contains_all_words(tytul, words): link = self.base_link + client.parseDOM( row, 'a', ret='href')[0] content = self.session.get(link, headers=headers).content content = client.parseDOM(content, 'div', attrs={'id': 'sezony'}) for item in content: if 'Sezon: %s' % str(season) in item: link = self.base_link + client.parseDOM( item, 'a', ret='href')[0] content = self.session.get( link, headers=headers).content content = client.parseDOM( content, 'div', attrs={'class': 'sezony'}) for item in content: if query in item: link = client.parseDOM(item, 'a', ret='href')[0] return link except: return
def get_movie(self, imdb, title, year): mytab = [] try: title = cleantitle.getsearch(title) query = self.search_link % (urllib.quote_plus(title), year) query = urlparse.urljoin(self.base_link, query) r = client.request(query, headers=self.headers) r = client.parseDOM(r, 'div', attrs={'class': 'title'}) r = [(client.parseDOM(i, 'a'), client.parseDOM(i, 'a', ret='onclick')) for i in r] r = [ i[1][0] for i in r if cleantitle.get(title) == cleantitle.get(i[0][0]) ] murl = re.findall('alias=(.*?)\'', r[0])[0] murl = client.replaceHTMLCodes(murl) murl = murl.encode('utf-8') url = {'imdb': imdb, 'title': title, 'year': year, 'url': murl} url = urllib.urlencode(url) return url except: return
def __search(self, titles, year): try: query = self.search_link % (urllib.quote_plus( cleantitle.getsearch(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = cleantitle.get(titles[0]) data = client.request(query) # data = client.request(query, referer=self.base_link) data = client.parseDOM(data, 'div', attrs={'class': 'result-item'}) r = dom_parser.parse_dom(data, 'div', attrs={'class': 'title'}) r = zip( dom_parser.parse_dom(r, 'a'), dom_parser.parse_dom(data, 'span', attrs={'class': 'year'})) url = [] for i in range(len(r)): title = cleantitle.get(r[i][0][1]) title = re.sub('(\d+p|4k|3d|hd|season\d+)', '', title) y = r[i][1][1] link = r[i][0][0]['href'] if 'season' in title: continue if t == title and y == year: if 'season' in link: url.append(source_utils.strip_domain(link)) print url[0] return url[0] else: url.append(source_utils.strip_domain(link)) return url except: return
def searchMovie(self, title, year, aliases, headers): try: title = cleantitle.normalize(title) url = urlparse.urljoin( self.base_link, self.search_link % urllib.quote_plus(cleantitle.getsearch(title))) r = client.request(url, headers=headers, timeout='15') r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'}) r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title')) results = [(i[0], i[1], re.findall('\((\d{4})', i[1])) for i in r] try: r = [(i[0], i[1], i[2][0]) for i in results if len(i[2]) > 0] url = [ i[0] for i in r if self.matchAlias(i[1], aliases) and (year == i[2]) ][0] except: url = None pass if (url == None): url = [ i[0] for i in results if self.matchAlias(i[1], aliases) ][0] return url except: return
def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, aliases, year): try: tvshowtitle = cleantitle.getsearch(tvshowtitle) p = urllib.urlencode({ 'action': 'ajaxy_sf', 'sf_value': tvshowtitle, 'search': 'false' }) r = urlparse.urljoin(self.base_link, self.search_link) result = client.request(r, post=p, XHR=True) if len(json.loads(result)) == 0: p = urllib.urlencode({ 'action': 'ajaxy_sf', 'sf_value': localtvshowtitle, 'search': 'false' }) result = client.request(r, post=p, XHR=True) diziler = json.loads(result)['diziler'][0]['all'] for i in diziler: if cleantitle.get(tvshowtitle) == cleantitle.get( i['post_title']): url = i['post_link'] url = url.split('/')[4] url = url.encode('utf-8') return url except: return
def search(self, title, localtitle, year, search_type): try: titles = [] titles.append( cleantitle.normalize(cleantitle.getsearch(title + " 3d"))) titles.append( cleantitle.normalize(cleantitle.getsearch(localtitle + " 3d"))) cookies = client.request(self.base_link, output='cookie') cache.cache_insert('alltube_cookie', cookies) for title in titles: r = client.request(urlparse.urljoin(self.base_link, self.search_link), post={'search': cleantitle.query(title)}, headers={'Cookie': cookies}) r = self.get_rows(r, search_type) for row in r: url = client.parseDOM(row, 'a', ret='href')[0] names_found = client.parseDOM(row, 'h3')[0] if names_found.startswith( 'Zwiastun') and not title.startswith('Zwiastun'): continue names_found = names_found.encode('utf-8').split('/') names_found = [ cleantitle.normalize(cleantitle.getsearch(i)) for i in names_found ] for name in names_found: name = name.replace(" ", " ") title = title.replace(" ", " ") words = title.split(" ") found_year = self.try_read_year(url) if self.contains_all_wors( name, words) and (not found_year or found_year == year): return url else: continue continue except Exception, e: print e return
def search(self, title, localtitle, year, is_movie_search): try: titles = [] titles.append(cleantitle.normalize(cleantitle.getsearch(title))) titles.append( cleantitle.normalize(cleantitle.getsearch(localtitle))) # data = {'login': self.user_name, 'password': self.user_pass} # result = self.session.post('https://zalukaj.com/account.php', headers={'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36"}, data=data) headers = { 'Cookie': '__cfduid=d61b42b729455a590ff291892cb688ea11546349293; PHPSESSID=7u6cbc5pagnhqfm84jgjhg9hc2; __PHPSESSIDS=de81fa674b436a948cb337b7f4d2fa3898bd308c' } for title in titles: url = self.search_link % str(title).replace(" ", "+") result = self.session.get(url, headers=headers).content result = result.decode('utf-8') h = HTMLParser() result = h.unescape(result) result = client.parseDOM(result, 'div', attrs={'class': 'row'}) for item in result: try: link = str(client.parseDOM(item, 'a', ret='href')[0]) if link.startswith('//'): link = "https:" + link elif link.startswith('/'): link = self.base_link + link nazwa = str(client.parseDOM(item, 'a', ret='title')[0]) name = cleantitle.normalize( cleantitle.getsearch(nazwa)) name = name.replace(" ", " ") title = title.replace(" ", " ") words = title.split(" ") if self.contains_all_words( name, words) and str(year) in link: return link except: continue except Exception as e: log_exception() return
def search_ep(self, titles, season, episode, year): query = 'S{:02d}E{:02d}'.format(int(season), int(episode)) try: titles_tv = [] titles_tv.append(cleantitle.normalize(cleantitle.getsearch(titles[0]))) titles_tv.append(cleantitle.normalize(cleantitle.getsearch(titles[1]))) for title in titles_tv: url = self.search_link + str(title) result = client.request(url) result = result.decode('utf-8') h = HTMLParser() result = h.unescape(result) result = client.parseDOM(result, 'div', attrs={'class': 'row'}) result = client.parseDOM(result[5], 'div', attrs={'class': 'col-sm-4'}) for item in result: try: link = str(client.parseDOM(item, 'a', ret='href')[0]) if link.startswith('//'): link = "https:" + link nazwa = str(client.parseDOM(item, 'img', ret='alt')[0]) name = cleantitle.normalize(cleantitle.getsearch(nazwa)) name = name.replace(" ", " ") title = title.replace(" ", " ") words = title.split(" ") if self.contains_all_words(name, words): result = client.request(link) result = client.parseDOM(result, 'ul', attrs={'id': "episode-list"}) result = client.parseDOM(result, 'li') for episode in result: nazwa = client.parseDOM(episode, 'a')[0] link = str(client.parseDOM(episode, 'a', ret='href')[0]) if query.lower() in nazwa.lower(): return link except: continue except Exception as e: log_exception() return
def search_ep(self, titles, season, episode): try: for title in titles: simply_name = cleantitle.normalize(cleantitle.getsearch(title)) query = self.search_link % str(title).replace(" ", "+") url = urlparse.urljoin(self.base_link, query) cookies = client.request(self.base_link, output='cookie') cache.cache_insert('segos_cookie', cookies) result = client.request(url, headers={'Cookie': cookies}) results = client.parseDOM( result, 'div', attrs={'class': 'col-lg-12 col-md-12 col-xs-12'}) for result in results: try: segosurl = client.parseDOM(result, 'a', ret='href')[0] segosurl = segosurl + "&s=%s&o=%s" % (season, episode) result = client.parseDOM(result, 'a') segostitles = cleantitle.normalize( cleantitle.getsearch(result[1])).split('/') except: continue for segostitle in segostitles: try: segostitle = segostitle.replace(" ", " ") simply_name = simply_name.replace(" ", " ") words = simply_name.split(" ") if self.contains_all_words(segostitle, words): if 'seriale' in segosurl: link = urlparse.urljoin( self.base_link, segosurl) return link except: continue continue except: return
def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, aliases, year): try: lowerTitle = tvshowtitle.lower() possibleTitles = set( (lowerTitle, cleantitle.getsearch(lowerTitle)) + tuple((alias['title'].lower() for alias in aliases) if aliases else ())) return self._getSearchData(lowerTitle, possibleTitles, year, self._createSession(), isMovie=False) except: self._logException() return None
def searchMovie(self, title, year): try: query = self.search_link % urllib.quote_plus( cleantitle.getsearch(title + ' ' + year)) url = urlparse.urljoin(self.base_link, query) r = client.request(url) r = client.parseDOM(r, 'item') r = [(client.parseDOM(i, 'title')[0], i) for i in r if i] r = [ i[1] for i in r if cleantitle.get(title) == cleantitle.get(i[0]) ] return r[0] except: return
def search(self, title, localtitle, year): try: titles = [] title2 = title.split('.')[0] localtitle2 = localtitle.split('.')[0] titles.append(cleantitle.normalize(cleantitle.getsearch(title2))) titles.append( cleantitle.normalize(cleantitle.getsearch(localtitle2))) titles.append(title2) titles.append(localtitle2) for title in titles: title = title.replace(" ", "+") result = client.request(self.search_link % title) result = client.parseDOM(result, 'div', attrs={'class': 'col-xs-4'}) for item in result: try: rok = client.parseDOM(item, 'div', attrs={'class': 'col-sm-8'}) rok_nazwa = client.parseDOM(rok, 'p')[0].lower() link = client.parseDOM(item, 'a', ret='href')[0] link = self.base_link + link words = title.lower().split(" ") if self.contains_all_words( rok_nazwa, words) and year in rok_nazwa: return link except: continue return except Exception as e: print(e) return
def get_episode(self, url, imdb, tvdb, title, premiered, season, episode): try: data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = cleantitle.getsearch(title) cleanmovie = cleantitle.get(title) data['season'], data['episode'] = season, episode episodecheck = 'S%02dE%02d' % (int( data['season']), int(data['episode'])) episodecheck = episodecheck.lower() query = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) ep = "%01d" % (int(data['episode'])) full_check = 'season%01d' % (int(data['season'])) full_check = cleanmovie + full_check query = self.search_link % (urllib.quote_plus(title), season) query = urlparse.urljoin(self.base_link, query) r = client.request(query, headers=self.headers) # print ("BOBBYAPP", r) match = re.compile('alias=(.+?)\'">(.+?)</a>').findall(r) for id, name in match: name = cleantitle.get(name) # print ("BOBBYAPP id name", id, name) if full_check == name: type = 'tv_episodes' ep = "%01d" % (int(data['episode'])) print("BOBBYAPP PASSED", id, name) murl = id murl = client.replaceHTMLCodes(murl) murl = murl.encode('utf-8') url = { 'imdb': imdb, 'title': title, 'episode': episode, 'url': murl } url = urllib.urlencode(url) return url except Exception as e: print "ERROR %s" % e return
def searchShow(self, title, season): try: sea = '%s season %d' % (title, int(season)) query = self.search_link % urllib.quote_plus( cleantitle.getsearch(sea)) url = urlparse.urljoin(self.base_link, query) r = client.request(url) r = client.parseDOM(r, 'item') r = [(client.parseDOM(i, 'title')[0], i) for i in r if i] r = [i[1] for i in r if sea in i[0]] links = re.findall('''<h4>(EP\d+)</h4>.+?src=['"]([^'"]+)''', r[0], re.I | re.DOTALL) links = [(i[0], i[1]) for i in links if i] return links except: return
def search_ep(self, titles, season, episode, year): try: query = 'S{:02d}E{:02d}'.format(int(season), int(episode)) for title in titles: url = self.search_link + str(title) result = self.session.get(url).content result = result.decode('utf-8') h = HTMLParser() result = h.unescape(result) result = client.parseDOM(result, 'div', attrs={'class': 'card-body p-2'}) for item in result: nazwa = re.findall("""Film online: (.*?)\"""", item)[0] name = cleantitle.normalize(cleantitle.getsearch(nazwa)) rok = re.findall("""Rok wydania filmu online\".*>(.*?)<""", item)[0] item = str(item).replace("<span style='color:red'>", "").replace("</span>", "") link = re.findall("""href=\"(.*?)\"""", item)[0] if link.startswith('//'): link = "https:" + link name = name.replace(" ", " ") title = title.replace(" ", " ") words = title.split(" ") if self.contains_all_words(name, words) and str(year) in rok: content = requests.get(link.replace( 'filmy', 'seriale')).content content = client.parseDOM( content, 'div', attrs={'class': 'tabela_wiersz mb-1'}) for odcinek in content: if query.lower() in odcinek.lower(): link = str( client.parseDOM(odcinek, 'a', ret='href')[0]) return self.base_link + link except Exception as e: log_exception() return
def searchShow(self, title, season, aliases, headers): try: title = cleantitle.normalize(title) search = '%s Season %01d' % (title, int(season)) url = urlparse.urljoin( self.base_link, self.search_link % urllib.quote_plus(cleantitle.getsearch(search))) r = client.request(url, headers=headers, timeout='15') r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'}) r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title')) r = [(i[0], i[1], re.findall('(.*?)\s+-\s+Season\s+(\d)', i[1])) for i in r] r = [(i[0], i[1], i[2][0]) for i in r if len(i[2]) > 0] url = [ i[0] for i in r if self.matchAlias(i[2][0], aliases) and i[2][1] == season ][0] return url except: return
def searchShow(self, title, season): try: title = cleantitle.normalize(title) search = '%s Season %s' % (title, int(season)) url = urlparse.urljoin( self.base_link, self.search_link_2 % urllib.quote_plus(cleantitle.getsearch(search))) r = client.request(url, timeout='10') r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'}) print len(r) if len(r) == 0: url = url.replace('+', '-') r = client.request(url, timeout='10') r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'}) r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title')) url = [ i[0] for i in r if cleantitle.get(search) == cleantitle.get(i[1]) ][0] return url except: return
def clean_search(self, serach_str): result = cleantitle.getsearch(serach_str); result = re.sub(' +', ' ', result) return result.strip()
def searchShow(self, title, season, aliases, headers): try: title = cleantitle.normalize(title) search = '%s Season %01d' % (title, int(season)) url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(cleantitle.getsearch(search))) r = client.request(url, headers=headers, timeout='15') r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'}) r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title')) r = [(i[0], i[1], re.findall('(.*?)\s+-\s+Season\s+(\d)', i[1])) for i in r] r = [(i[0], i[1], i[2][0]) for i in r if len(i[2]) > 0] url = [i[0] for i in r if self.matchAlias(i[2][0], aliases) and i[2][1] == season][0] return url except: return
def searchMovie(self, title, year, aliases, headers): try: title = cleantitle.normalize(title) url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(cleantitle.getsearch(title))) r = client.request(url, headers=headers, timeout='15') r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'}) r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title')) results = [(i[0], i[1], re.findall('\((\d{4})', i[1])) for i in r] try: r = [(i[0], i[1], i[2][0]) for i in results if len(i[2]) > 0] url = [i[0] for i in r if self.matchAlias(i[1], aliases) and (year == i[2])][0] except: url = None pass if (url == None): url = [i[0] for i in results if self.matchAlias(i[1], aliases)][0] return url except: return
def get_episode(self, url, imdb, tvdb, title, date, season, episode): try: data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) t = cleantitle.get(data['tvshowtitle']) title = cleantitle.getsearch(data['tvshowtitle']) print('###', t, data['tvshowtitle']) year = re.findall('(\d{4})', date)[0] years = [str(year), str(int(year) + 1), str(int(year) - 1)] season = '%01d' % int(season) episode = '%01d' % int(episode) ep_id = '?episode=%01d' % int(episode) headers = {'User-Agent': client.agent()} query = (urllib.quote_plus(title)) + "+season+" + season q = self.search_link % (query) r = urlparse.urljoin(self.base_link, q) r = client.request(r, headers=headers) r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'}) print("1", r) r = [(client.parseDOM(i, 'a', ret='href')[0], client.parseDOM(i, 'a')[0]) for i in r] r = [(i[0], re.findall('-(\d+)/', i[0])[0], client.parseDOM(i[1], 'h2')[0]) for i in r] r = [ i for i in r if cleantitle.get(title + "season" + season) == cleantitle.get(i[2]) ] for i in r: try: y, q = cache.get(self.muchmovies_info, 9000, i[1]) #print("4",y,q) if not y == year: raise Exception() return urlparse.urlparse(i[0]).path + ep_id except: pass r = client.request(url, post=query, headers=headers) print('>>>', r) r = zip( client.parseDOM(r, 'a', ret='href', attrs={'class': 'ss-title'}), client.parseDOM(r, 'a', attrs={'class': 'ss-title'})) r = [(i[0], re.findall('(.+?) - season (\d+)$', i[1].lower())) for i in r] r = [(i[0], i[1][0][0], i[1][0][1]) for i in r if len(i[1]) > 0] r = [i for i in r if t == cleantitle.get(i[1])] r = [i[0] for i in r if season == '%01d' % int(i[2])][:2] r = [(i, re.findall('(\d+)', i)[-1]) for i in r] print('>>>', r) for i in r: try: y, q = cache.get(self.muchmovies_info, 9000, i[1]) if not y in years: raise Exception() return urlparse.urlparse( i[0]).path + '?episode=%01d' % int(episode) except: pass except: return