def get_movie(self, imdb, title, year): mytitle = title try: t = 'http://www.imdb.com/title/%s' % imdb t = client.source(t, headers={'Accept-Language': 'es-ES'}) t = client.parseDOM(t, 'title')[0] t = re.sub('(?:\(|\s)\d{4}.+', '', t).strip() mytitle = t except: pass try: t = cleantitle.get(mytitle) query = self.search3_link % urllib.quote_plus(cleantitle.query2(mytitle)) query = urlparse.urljoin(self.base_link, query) result = client2.http_get(query) result = re.sub(r'[^\x00-\x7F]+','', result) r = result.split('<li class=') r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'i'), re.findall('\((\d{4})\)', i)) for i in r] r = [(i[0][0], re.sub('\(|\)','', i[1][0]), i[2][0]) for i in r if len(i[0]) > 0 and len(i[1]) > 0 and len(i[2]) > 0] r = [i[0] for i in r if year == i[2]][0] try: url = re.findall('//.+?(/.+)', r)[0] except: url = r url = client.replaceHTMLCodes(url) url = url.encode('utf-8') return url except: pass
def get_show(self, imdb, tvdb, tvshowtitle, year): try: query = self.search_link % (urllib.quote_plus(cleantitle.query2(tvshowtitle))) query = urlparse.urljoin(self.base_link, query) result = client.request(query) result = result.decode('utf-8-sig') result = client.parseDOM(result, 'ul', attrs={'id': 'resultList2'})[0] result = client.parseDOM(result, 'li') result = [(client.parseDOM(i,'div', attrs={'class':'title'}), client.parseDOM(i, 'div', attrs={'class': 'info'}), client.parseDOM(i, 'a', ret='href')[0]) for i in result] result = [(i[0][0], re.findall(r"(\d{4})", i[1][0])[0], i[2]) for i in result] years = ['%s' % str(year), '%s' % str(int(year)+1), '%s' % str(int(year)-1)] result = [i for i in result if cleantitle.movie(tvshowtitle) in cleantitle.movie(i[0])] result = [i[2] for i in result if any(x in i[1] for x in years)][0] try: url = re.compile('//.+?(/.+)').findall(result)[0] except: url = result url = client.replaceHTMLCodes(url) url = url.encode('utf-8') print url return url except: return
def get_movie(self, imdb, title, year): try: t = cleantitle.query2(title) hash = hashlib.md5(title).hexdigest() query = urllib.urlencode({'keyword': title, 'hash':hash}) url = urlparse.urljoin(self.base_link, self.search_link) r = client.request(url, post=query, headers=self.headers) r = json.loads(r)['content'] r = zip(client.parseDOM(r, 'a', ret='href', attrs = {'class': 'ss-title'}), client.parseDOM(r, 'a', attrs = {'class': 'ss-title'})) r = [i[0] for i in r if cleantitle.get(t) == cleantitle.get(i[1])][:2] r = [(i, re.findall('(\d+)', i)[-1]) for i in r] url = None print r for i in r: try: print i[1] #y, q = cache.get(self.myesmovies_info(), 9000, i[1]) y, q = self.myesmovies_info(i[0]) print("yQ",y,q,year) if not y == year: print "NOT",type(y),type(year) raise Exception() return urlparse.urlparse(i[0]).path except: pass print(url) return url except Exception as e: control.log('Error %s' % e) return
def get_movie(self, imdb, title, year): print("ALLtube originaltitle:%s" % title) print cleantitle.query(title) try: query = self.moviesearch_link % urllib.quote_plus(cleantitle.query2(title)) query = urlparse.urljoin(self.base_link, query) control.log('ALLTUBE T URL %s' % query) result = client.source(query) result = json.loads(result) result = [i for i in result['suggestions'] if len(i) > 0] years = ['%s' % str(year), '%s' % str(int(year)+1), '%s' % str(int(year)-1)] result = [(i['data'].encode('utf8'),i['value'].encode('utf8')) for i in result] result = [i for i in result if cleantitle.movie(title) in cleantitle.movie(i[1])] result = [i[0] for i in result if any(x in i[1] for x in years)][0] print("ALLtube result :", result) try: url = re.compile('//.+?(/.+)').findall(result)[0] except: url = result url = client.replaceHTMLCodes(url) url = url.encode('utf-8') control.log('ALLTUBE URL %s' % url) return url except: try: query = self.moviesearch_link % cleantitle.query_quote(originaltitle) query = urlparse.urljoin(self.base_link, query) control.log('ALLTUBE T URL %s' % query) result = client.source(query) result = json.loads(result) result = [i for i in result['suggestions'] if len(i) > 0] years = ['%s' % str(year), '%s' % str(int(year)+1), '%s' % str(int(year)-1)] result = [(i['data'].encode('utf8'),i['value'].encode('utf8')) for i in result] print result result = [i for i in result if cleantitle.movie(originaltitle) in cleantitle.movie(i[1])] result = [i[0] for i in result if any(x in i[1] for x in years)][0] print("ALLtube result :", result) try: url = re.compile('//.+?(/.+)').findall(result)[0] except: url = result url = client.replaceHTMLCodes(url) url = url.encode('utf-8') control.log('ALLTUBE URL %s' % url) return url except: return
def get_movie(self, imdb, title, year): try: r = client.request(self.base_link, limit='0', output='extended') cookie2 = r[4] headers = r[3] r1 = r[0] t = cleantitle.query2(title) hash = hashlib.md5(title).hexdigest() query = urllib.urlencode({'keyword': title, 'hash': hash}) url = urlparse.urljoin(self.base_link, self.search_link) headers['X-Requested-With'] = 'XMLHttpRequest' r = client.request(url, post=query, headers=headers) print "res", r r = json.loads(r)['content'] r = zip( client.parseDOM(r, 'a', ret='href', attrs={'class': 'ss-title'}), client.parseDOM(r, 'a', attrs={'class': 'ss-title'})) r = [i[0] for i in r if cleantitle.get(t) == cleantitle.get(i[1])][:2] r = [(i, re.findall('(\d+)', i)[-1]) for i in r] url = None print r for i in r: try: print i[1] #y, q = cache.get(self.myesmovies_info(), 9000, i[1]) y, q = self.myesmovies_info(i[0]) print("yQ", y, q, year) if not y == year: print "NOT", type(y), type(year) raise Exception() print("URL", urlparse.urlparse(i[0]).path, i[0]) return urlparse.urlparse(i[0]).path except: pass return url except Exception as e: control.log('Error %s' % e) return
def get_movie(self, imdb, title, year): try: query = self.search_link % (urllib.quote_plus(cleantitle.query2(title))) query = urlparse.urljoin(self.base_link, query) result = client.request(query) title = cleantitle.movie(title) result = client.parseDOM(result, 'div', attrs={'class':'well_2'}) result = [(client.parseDOM(i, 'a', ret='href')[0], client.parseDOM(i, 'a')[0],str(re.findall(r"(\d{4})", client.parseDOM(i, 'a')[0])[0])) for i in result] years = ['%s' % str(year), '%s' % str(int(year)+1), '%s' % str(int(year)-1)] result = [i for i in result if title in cleantitle.movie(i[1])] result = [i[0] for i in result if any(x in i[2] for x in years)][0] try: url = re.compile('//.+?(/.+)').findall(result)[0] except: url = result url = client.replaceHTMLCodes(url) url = url.encode('utf-8') control.log('Segos URL %s' % url) return url except: return
def get_show(self, imdb, tvdb, tvshowtitle, year): try: query = self.search_link % (urllib.quote_plus( cleantitle.query2(tvshowtitle))) query = urlparse.urljoin(self.base_link, query) result = client.request(query) result = result.decode('utf-8-sig') result = client.parseDOM(result, 'ul', attrs={'id': 'resultList2'})[0] result = client.parseDOM(result, 'li') result = [(client.parseDOM(i, 'div', attrs={'class': 'title'}), client.parseDOM(i, 'div', attrs={'class': 'info'}), client.parseDOM(i, 'a', ret='href')[0]) for i in result] result = [(i[0][0], re.findall(r"(\d{4})", i[1][0])[0], i[2]) for i in result] years = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1) ] result = [ i for i in result if cleantitle.movie(tvshowtitle) in cleantitle.movie(i[0]) ] result = [i[2] for i in result if any(x in i[1] for x in years)][0] try: url = re.compile('//.+?(/.+)').findall(result)[0] except: url = result url = client.replaceHTMLCodes(url) url = url.encode('utf-8') print url return url except: return
def get_movie(self, imdb, title, year): try: query = self.moviesearch_link % urllib.quote_plus( cleantitle.query2(title)) query = urlparse.urljoin(self.base_link, query) result = client.source(query) result = json.loads(result) result = [i for i in result['suggestions'] if len(i) > 0] years = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1) ] result = [(i['data'].encode('utf8'), i['value'].encode('utf8')) for i in result] result = [ i for i in result if cleantitle.movie(title) in cleantitle.movie(i[1]) ] result = [i[0] for i in result if any(x in i[1] for x in years)][0] try: url = re.compile('//.+?(/.+)').findall(result)[0] except: url = result url = client.replaceHTMLCodes(url) url = url.encode('utf-8') return url except: try: query = self.moviesearch_link % cleantitle.query_quote( originaltitle) query = urlparse.urljoin(self.base_link, query) result = client.source(query) result = json.loads(result) result = [i for i in result['suggestions'] if len(i) > 0] years = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1) ] result = [(i['data'].encode('utf8'), i['value'].encode('utf8')) for i in result] result = [ i for i in result if cleantitle.movie(originaltitle) in cleantitle.movie(i[1]) ] result = [ i[0] for i in result if any(x in i[1] for x in years) ][0] try: url = re.compile('//.+?(/.+)').findall(result)[0] except: url = result url = client.replaceHTMLCodes(url) url = url.encode('utf-8') return url except: return