def scrape_movie(self, title, year, imdb, debrid=False): try: query = '%s' % (title) query = self.search_link % (urllib.quote_plus(query)) query = urlparse.urljoin(self.base_link, query) cleanedTitle = cleantitle.get(title) result = client.request(query) result = result.decode('iso-8859-1').encode('utf-8') items = client.parseDOM(result, "item") for item in items: linkTitle = client.parseDOM(item, 'title')[0] try: parsed = re.compile('(.+?) \((\d{4})\) ').findall( linkTitle)[0] parsedTitle = parsed[0] parsedYears = parsed[1] except: parsedTitle = '' pass if cleanedTitle == cleantitle.get(parsedTitle): url = client.parseDOM(item, "link")[0] return self.sources(client.replaceHTMLCodes(url)) except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) pass return []
def scrape_movie(self, title, year, imdb, debrid = False): try: langMap = {'hi':'hindi', 'ta':'tamil', 'te':'telugu', 'ml':'malayalam', 'kn':'kannada', 'bn':'bengali', 'mr':'marathi', 'pa':'punjabi'} lang = 'http://www.imdb.com/title/%s/' % imdb lang = client.request(lang) lang = re.findall('href\s*=\s*[\'|\"](.+?)[\'|\"]', lang) lang = [i for i in lang if 'primary_language' in i] lang = [urlparse.parse_qs(urlparse.urlparse(i).query) for i in lang] lang = [i['primary_language'] for i in lang if 'primary_language' in i] lang = langMap[lang[0][0]] q = self.search_link % (lang, urllib.quote_plus(title)) q = urlparse.urljoin(self.base_link, q) t = cleantitle.get(title) r = self.request(q) r = client.parseDOM(r, 'li') r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'h3'), client.parseDOM(i, 'div', attrs = {'class': 'info'})) for i in r] r = [(i[0][0], i[1][0], i[2][0]) for i in r if i[0] and i[1] and i[2]] r = [(re.findall('(\d+)', i[0]), i[1], re.findall('(\d{4})', i[2])) for i in r] r = [(i[0][0], i[1], i[2][0]) for i in r if i[0] and i[2]] r = [i[0] for i in r if t == cleantitle.get(i[1]) and year == i[2]][0] url = str(r) return self.sources(client.replaceHTMLCodes(url)) except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) pass return []
def scrape_movie(self, title, year, imdb, debrid=False): try: query = '%s %s' % (title, year) query = self.search_link % (urllib.quote_plus(query)) query = urlparse.urljoin(self.base_link, query) result = client.request(query) result = result.decode('iso-8859-1').encode('utf-8') result = client.parseDOM(result, "item") cleanedTitle = cleantitle.get(title) for item in result: linkTitle = client.parseDOM(item, "title")[0] if cleanedTitle == cleantitle.get(linkTitle): url = client.parseDOM(item, "link")[0] break return self.sources(client.replaceHTMLCodes(url)) except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) pass return []
def scrape_movie(self, title, year, imdb, debrid = False): try: t = cleantitle.get(title) try: query = '%s %s' % (title, year) query = base64.b64decode(self.search_link) % urllib.quote_plus(query) result = client.request(query) result = json.loads(result)['items'] r = [(i['link'], i['title']) for i in result] r = [(i[0], re.compile('(.+?) [\d{4}|(\d{4})]').findall(i[1])) for i in r] r = [(i[0], i[1][0]) for i in r if len(i[1]) > 0] r = [x for y,x in enumerate(r) if x not in r[:y]] r = [i for i in r if t == cleantitle.get(i[1])] #u = [i[0] for i in r][0] if r == None: raise Exception except: return return self.sources(r) except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) pass return []
def scrape_movie(self, title, year, imdb, debrid=False): try: query = '%s %s' % (title, year) query = self.search_link % (urllib.quote_plus(query)) query = urlparse.urljoin(self.base_link, query) result = client.request(query, error=True) items = client.parseDOM(result, "item") cleanedTitle = cleantitle.get(title) for item in items: linkTitle = client.parseDOM(item, "title")[0] if cleanedTitle in cleantitle.get(linkTitle): url = client.parseDOM(item, "a", attrs={"rel": "nofollow"}, ret="href")[0] break return self.sources(client.replaceHTMLCodes(url)) except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) pass return []
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False): try: query = '%s %s' % (title, episode) query = self.search_link % (urllib.quote_plus(query)) result = client.request(self.base_link + query) result = result.decode('iso-8859-1').encode('utf-8') result = result.replace('\n', '').replace('\t', '') items = client.parseDOM(result, 'item') cleanedTitle = cleantitle.get('%s %s' % (title, episode)) for item in items: linkTitle = client.parseDOM(item, 'title')[0] linkTitle = cleantitle.get(linkTitle).replace( 'watchonlineepisodehd', '') if cleanedTitle == linkTitle: url = client.parseDOM(item, "link")[0] break return self.sources(client.replaceHTMLCodes(url)) except: return self.srcs
def scrape_movie(self, title, year, imdb, debrid=False): try: query = self.moviesearch_link % urllib.quote_plus( cleantitle.query(title)) query = urlparse.urljoin(self.base_link, query) result = str(proxy.request(query, 'item')) if 'page=2' in result or 'page%3D2' in result: result += str(proxy.request(query + '&page=2', 'item')) result = client.parseDOM(result, 'div', attrs={'class': 'item'}) title = 'watchputlocker' + cleantitle.get(title) years = [ '(%s)' % str(year), '(%s)' % str(int(year) + 1), '(%s)' % str(int(year) - 1) ] result = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a', ret='title')) for i in result] result = [(i[0][0], i[1][0]) for i in result if len(i[0]) > 0 and len(i[1]) > 0] result = [i for i in result if any(x in i[1] for x in years)] r = [(proxy.parse(i[0]), i[1]) for i in result] match = [ i[0] for i in r if title == cleantitle.get(i[1]) and '(%s)' % str(year) in i[1] ] match2 = [i[0] for i in r] match2 = [x for y, x in enumerate(match2) if x not in match2[:y]] if match2 == []: return for i in match2[:5]: try: if len(match) > 0: url = match[0] break r = proxy.request(urlparse.urljoin(self.base_link, i), 'link_ite') r = re.findall('(tt\d+)', r) if imdb in r: url = i break except: pass url = re.findall('(?://.+?|)(/.+)', url)[0] url = client.replaceHTMLCodes(url) url = url.encode('utf-8') return self.sources(client.replaceHTMLCodes(url)) except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) pass return []
def sources(self, url): try: logger.debug('SOURCES URL %s' % url, __name__) if url == None: return [] data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] cleanedTitle = cleantitle.get(title) hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s S%02dE%02d' % (data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % (data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) r = client.request(url) posts = client.parseDOM(r, 'item') items = [] for post in posts: try: t = client.parseDOM(post, 'title')[0] post = post.replace('\n','').replace('\t','') post = re.compile('<span style="color: #ff0000">Single Link</b></span><br />(.+?)<span style="color: #ff0000">').findall(post)[0] u = re.findall('<a href="(http(?:s|)://.+?)">', post) items += [(t, i) for i in u] except: pass for item in items: try: name = client.replaceHTMLCodes(item[0]) linkTitle = re.sub('(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*|3D)(\.|\)|\]|\s|)(.+|)', '', name) if not cleanedTitle == cleantitle.get(linkTitle): raise Exception() year = re.findall('[\.|\(|\[|\s](\d{4}|S\d*E\d*|S\d*)[\.|\)|\]|\s]', name)[-1].upper() if not year == hdlr: raise Exception() self.source(item) except: pass logger.debug('SOURCES [%s]' % self.srcs, __name__) return self.srcs except: return self.srcs
def scrape_movie(self, title, year, imdb, debrid=False): try: movies = cache.get(self.desiRulezCache, 168) url = [ i['url'] for i in movies if cleantitle.get(i['title'].decode( 'UTF-8')) == cleantitle.get(title) ][0] return self.sources(client.replaceHTMLCodes(url)) except Exception as e: logger.error(e) pass return []
def scrape_movie(self, title, year, imdb, debrid = False): try: query = cleantitle.get(title) query = '/watch?v=%s_%s' % (query.replace(' ','_'),year) query = urlparse.urljoin(self.base_link, query) headers = {'User-Agent':self.userAgent} result = client.request(query, headers=headers) varid = re.compile('var frame_url = "(.+?)"',re.DOTALL).findall(result)[0].replace('/embed/','/streamdrive/info/') res_chk = re.compile('class="title"><h1>(.+?)</h1>',re.DOTALL).findall(result)[0] varid = 'http:'+varid holder = client.request(varid,headers=headers).content links = re.compile('"src":"(.+?)"',re.DOTALL).findall(holder) count = 0 for link in links: link = link.replace('\\/redirect?url=','') link = urllib.unquote(link).decode('utf8') if '1080' in res_chk: res= '1080p' elif '720' in res_chk: res='720p' else: res='DVD' count +=1 self.srcs.append({'source': 'Googlelink','parts' : '1', 'quality': res,'scraper': self.name,'url':link,'direct': False}) return self.srcs except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) pass return []
def get_muscic_url(scraper, title, artist, cache_location, maximum_age, debrid=False): cache_enabled = control.setting("cache_enabled") == 'true' try: dbcon = database.connect(cache_location) dbcur = dbcon.cursor() try: dbcur.execute("SELECT * FROM version") match = dbcur.fetchone() except: desiscrapers.clear_cache() dbcur.execute("CREATE TABLE version (" "version TEXT)") dbcur.execute("INSERT INTO version Values ('0.5.4')") dbcon.commit() dbcur.execute("CREATE TABLE IF NOT EXISTS rel_music_src (" "scraper TEXT, " "title Text, " "artist TEXT, " "urls TEXT, " "added TEXT, " "UNIQUE(scraper, title, artist)" ");") except: pass if cache_enabled: try: sources = [] dbcur.execute( "SELECT * FROM rel_music_src WHERE scraper = '%s' AND title = '%s' AND artist = '%s'" % (scraper.name, cleantitle.get(title).upper(), artist.upper())) match = dbcur.fetchone() t1 = int(re.sub('[^0-9]', '', str(match[4]))) t2 = int(datetime.datetime.now().strftime("%Y%m%d%H%M")) update = abs(t2 - t1) > maximum_age if update == False: sources = json.loads(match[3]) return sources except: pass try: sources = scraper.scrape_music(title, artist, debrid=debrid) if sources == None: sources = [] else: if cache_enabled: dbcur.execute( "DELETE FROM rel_music_src WHERE scraper = '%s' AND title = '%s' AND artist = '%s'" % (scraper.name, cleantitle.get(title).upper(), artist.upper)) dbcur.execute( "INSERT INTO rel_music_src Values (?, ?, ?, ?, ?)", (scraper.name, cleantitle.get(title).upper(), artist.upper(), json.dumps(sources), datetime.datetime.now().strftime("%Y-%m-%d %H:%M"))) dbcon.commit() return sources except: pass
def get_url(scraper, title, show_year, year, season, episode, imdb, tvdb, type, cache_location, maximum_age, check_url=False, debrid=False): cache_enabled = control.setting("cache_enabled") == 'true' try: dbcon = database.connect(cache_location) dbcur = dbcon.cursor() try: dbcur.execute("SELECT * FROM version") match = dbcur.fetchone() except: desiscrapers.clear_cache() dbcur.execute("CREATE TABLE version (" "version TEXT)") dbcur.execute("INSERT INTO version Values ('0.5.4')") dbcon.commit() dbcur.execute("CREATE TABLE IF NOT EXISTS rel_src (" "scraper TEXT, " "title Text, show_year TEXT, year TEXT, " "season TEXT, " "episode TEXT, " "imdb_id TEXT, " "urls TEXT, " "added TEXT, " "UNIQUE(scraper, title, year, season, episode)" ");") except: pass if cache_enabled: try: sources = [] dbcur.execute( "SELECT * FROM rel_src WHERE scraper = '%s' AND title = '%s' AND show_year= '%s' AND year = '%s' AND season = '%s' AND episode = '%s'" % (scraper.name, cleantitle.get(title).upper(), show_year, year, season, episode)) match = dbcur.fetchone() t1 = int(re.sub('[^0-9]', '', str(match[8]))) t2 = int(datetime.datetime.now().strftime("%Y%m%d%H%M")) update = abs(t2 - t1) > maximum_age if update == False: sources = json.loads(match[7]) return sources except: pass try: sources = [] if type == "movie": sources = scraper.scrape_movie(title, year, imdb, debrid=debrid) elif type == "episode": sources = scraper.scrape_episode(title, show_year, year, season, episode, imdb, tvdb, debrid=debrid) if sources == None: sources = [] else: if cache_enabled: dbcur.execute( "DELETE FROM rel_src WHERE scraper = '%s' AND title = '%s' AND show_year= '%s' AND year = '%s' AND season = '%s' AND episode = '%s'" % (scraper.name, cleantitle.get(title).upper(), show_year, year, season, episode)) dbcur.execute( "INSERT INTO rel_src Values (?, ?, ?, ?, ?, ?, ?, ?, ?)", (scraper.name, cleantitle.get(title).upper(), show_year, year, season, episode, imdb, json.dumps(sources), datetime.datetime.now().strftime("%Y-%m-%d %H:%M"))) dbcon.commit() if check_url: noresolver = False try: import urlresolver except: try: import urlresolver9 as urlresolver except: noresolver = True new_sources = [] from common import check_playable for source in sources: if source["direct"]: check = check_playable(source["url"]) if check: new_sources.append(source) elif not noresolver: try: hmf = urlresolver.HostedMediaFile( url=source['url'], include_disabled=False, include_universal=False) if hmf.valid_url(): resolved_url = hmf.resolve() check = check_playable(resolved_url) if check: new_sources.append(source) except: pass else: new_sources.append(source) sources = new_sources return sources except: pass
def scrape_movie(self, title, year, imdb, debrid=False): try: query = '%s %s' % (title, year) query = self.search_link % (urllib.quote_plus(query)) query = urlparse.urljoin(self.base_link, query) result = client.request(query) result = result.decode('iso-8859-1').encode('utf-8') posts = client.parseDOM(result, "item") items = [] for post in posts: try: t = client.parseDOM(post, 'title')[0] if 'trailer' in cleantitle.get(t): raise Exception() try: s = re.findall( '((?:\d+\.\d+|\d+\,\d+|\d+)(?:GB|GiB|MB|MiB|mb|gb))', t)[0] except: s = '0' i = client.parseDOM(post, 'link')[0] items += [{'name': t, 'url': i, 'size': s}] except: pass title = cleantitle.get(title) for item in items: try: name = item.get('name') t = re.sub( '(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*|3D)(\.|\)|\]|\s|)(.+|)', '', name) if cleantitle.get(title) == cleantitle.get(t): y = re.findall( '[\.|\(|\[|\s](\d{4}|S\d*E\d*|S\d*)[\.|\)|\]|\s]', name)[-1].upper() if not y == year: raise Exception() fmt = re.sub( '(.+)(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*)(\.|\)|\]|\s)', '', name.upper()) fmt = re.split('\.|\(|\)|\[|\]|\s|\-', fmt) fmt = [i.lower() for i in fmt] if any( i.endswith(('subs', 'sub', 'dubbed', 'dub')) for i in fmt): raise Exception() if any(i in ['extras'] for i in fmt): raise Exception() if '1080p' in fmt: quality = '1080p' elif '720p' in fmt: quality = 'HD' else: quality = 'SD' if any(i in ['dvdscr', 'r5', 'r6'] for i in fmt): quality = 'SCR' elif any(i in [ 'camrip', 'tsrip', 'hdcam', 'hdts', 'dvdcam', 'dvdts', 'cam', 'telesync', 'ts' ] for i in fmt): quality = 'CAM' info = [] if '3d' in fmt: info.append('3D') try: size = re.findall( '((?:\d+\.\d+|\d+\,\d+|\d+)(?:GB|GiB|MB|MiB|mb|gb))', item.get('size'))[-1] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size)) / div size = '%.2f GB' % size info.append(size) except: pass if any(i in ['hevc', 'h265', 'x265'] for i in fmt): info.append('HEVC') info = ' | '.join(info) movieurl = item.get('url') result = client.request(movieurl) result = result.decode('iso-8859-1').encode('utf-8') result = result.replace('\n', '').replace('\t', '') result = client.parseDOM(result, 'div', attrs={'class': 'entry'})[0] links = client.parseDOM(result, 'a', attrs={'target': '_blank'}, ret='href') for link in links: if 'http' in link: host = client.host(link) self.srcs.append({ 'source': host, 'parts': '1', 'quality': quality, 'scraper': self.name, 'url': link, 'direct': False, 'info': info }) except: pass logger.debug('SOURCES [%s]' % self.srcs, __name__) return self.srcs except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) pass return []
def sources(self, url): try: logger.debug('SOURCES URL %s' % url, __name__) if url == None: return self.srcs data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] cleanedTitle = cleantitle.get(title) hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s S%02dE%02d' % ( data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % ( data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) r = client.request(url) posts = client.parseDOM(r, 'item') link = client.parseDOM(posts[0], 'link')[0] items = [] result = client.request(link) posts = client.parseDOM(result, 'div', attrs={'id': 'content'}) for post in posts: try: items += zip( client.parseDOM(post, 'a', attrs={'target': '_blank'}), client.parseDOM(post, 'a', ret='href', attrs={'target': '_blank'})) except: pass for item in items: try: name = client.replaceHTMLCodes(item[0]) t = re.sub( '(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*|3D)(\.|\)|\]|\s|)(.+|)', '', name) if not cleanedTitle == cleantitle.get(t): raise Exception() y = re.findall( '[\.|\(|\[|\s](\d{4}|S\d*E\d*|S\d*)[\.|\)|\]|\s]', name)[-1].upper() if not y == hdlr: raise Exception() self.source(item) except: pass logger.debug('SOURCES [%s]' % self.srcs, __name__) return self.srcs except: return self.srcs
def test(): global movies, shows try: test_movies = [] test_episodes = [] profile_path = xbmc.translatePath(control.addonInfo('profile')).decode('utf-8') test_file = xbmcvfs.File(os.path.join(profile_path, "testings.xml")) xml = BeautifulStoneSoup(test_file.read()) test_file.close() items = xml.findAll("item") for item in items: try: content = item.find("content") if content: if "movie" in content.text: meta = item.find("meta") test_movies.append({ 'title': meta.find("title").text, 'imdb': meta.find("imdb").text, 'year': meta.find("year").text, }) elif "episode" in content.text: meta = item.find("meta") test_episodes.append({ 'title': meta.find("tvshowtitle").text, 'show_year': int(meta.find("premiered").text[0:4]), 'year': meta.find("year").text, 'season': meta.find("season").text, 'episode': meta.find("season").text, 'imdb': meta.find("imdb").text, }) except: pass movies = test_movies shows = test_episodes except: pass dialog = xbmcgui.Dialog() pDialog = xbmcgui.DialogProgress() if dialog.yesno("Desiscrapers Testing Mode", 'Clear cache?'): desiscrapers.clear_cache() try: dbcon = database.connect(control.cacheFile) dbcur = dbcon.cursor() except: dialog.ok("Desiscrapers Testing Mode", 'Error connecting to db') sys.exit() num_movies = len(movies) if num_movies > 0: pDialog.create('Desiscrapers Testing mode active', 'please wait') index = 0 for movie in movies: index += 1 title = movie['title'] year = movie['year'] imdb = movie['imdb'] if pDialog.iscanceled(): pDialog.close() break pDialog.update((index / num_movies) * 100, "Scraping movie {} of {}".format(index, num_movies), title) links_scraper = desiscrapers.scrape_movie(title, year, imdb) links_scraper = links_scraper() for scraper_links in links_scraper: if pDialog.iscanceled(): break if scraper_links: logger.debug(scraper_links, __name__) random.shuffle(scraper_links) pDialog.close() dbcur.execute("SELECT COUNT(DISTINCT(scraper)) FROM rel_src where episode = ''") match = dbcur.fetchone() num_movie_scrapers = match[0] logger.debug('num_movie_scrapers %s' % num_movie_scrapers, __name__) dbcur.execute("SELECT scraper, count(distinct(urls)) FROM rel_src where episode = '' group by scraper") matches = dbcur.fetchall() failed = [] for match in matches: if int(match[1]) <= 1: failed.append(match[0]) if len(failed) > 0: failedstring = "Failed: {}".format(len(failed)) for fail in failed: failedstring += "\n - {}".format(str(fail)) else: failedstring = "Failed: {}".format(len(failed)) logger.debug('failedString %s' % failedstring, __name__) dbcur.execute("SELECT title, count(distinct(urls)) FROM rel_src where episode = '' group by title") matches = dbcur.fetchall() failed_movies = [] for match in matches: if int(match[1]) <= 1: if int(match[1]) == 1: dbcur.execute( "SELECT scraper, urls FROM rel_src where episode == '' and title == '{}' group by scraper".format( match[0])) new_matches = dbcur.fetchall() found = False for new_match in new_matches: if new_match[1] == "[]": continue else: found = True if not found: failed_movies.append(match[0]) else: failed_movies.append(match[0]) if len(failed_movies) > 0: failed_movie_string = "Failed movies: {}".format(len(failed_movies)) for fail in failed_movies: for movie in movies: if cleantitle.get(movie['title']).upper() == str(fail): failed_movie_string += "\n - {}".format(movie["title"]) else: failed_movie_string = "" num_shows = len(shows) if num_shows > 0: pDialog.create('Desiscrapers Testing mode active', 'please wait') index = 0 for show in shows: index += 1 title = show['title'] show_year = show['show_year'] year = show['year'] season = show['season'] episode = show['episode'] imdb = show['imdb'] tvdb = show.get('tvdb', '') if pDialog.iscanceled(): pDialog.close() break pDialog.update((index / num_shows) * 100, "Scraping show {} of {}".format(index, num_shows), title) links_scraper = desiscrapers.scrape_episode(title, show_year, year, season, episode, imdb, tvdb) links_scraper = links_scraper() for scraper_links in links_scraper: if pDialog.iscanceled(): break if scraper_links: random.shuffle(scraper_links) pDialog.close() dbcur.execute("SELECT COUNT(DISTINCT(scraper)) FROM rel_src where episode != ''") match = dbcur.fetchone() num_show_scrapers = match[0] dbcur.execute("SELECT scraper, count(distinct(urls)) FROM rel_src where episode != '' group by scraper") matches = dbcur.fetchall() failed = [] for match in matches: if int(match[1]) <= 1: if int(match[1]) == 1: dbcur.execute( "SELECT scraper, urls FROM rel_src where episode != '' and scraper == '{}' group by scraper".format( match[0])) match = dbcur.fetchone() if match[1] == "[]": failed.append(match[0]) else: failed.append(match[0]) if len(failed) > 0: show_scraper_failedstring = "Failed: {}".format(len(failed)) for fail in failed: show_scraper_failedstring += "\n - {}".format(str(fail)) else: show_scraper_failedstring = "" dbcur.execute("SELECT title, count(distinct(urls)) FROM rel_src where episode != '' group by title") matches = dbcur.fetchall() failed_shows = [] for match in matches: if int(match[1]) <= 1: if int(match[1]) == 1: dbcur.execute( "SELECT scraper, urls FROM rel_src where episode != '' and title == '{}' group by scraper".format( match[0])) new_matches = dbcur.fetchall() found = False for new_match in new_matches: if new_match[1] == "[]": continue else: found = True if not found: failed_shows.append(match[0]) else: failed_shows.append(match[0]) if len(failed_shows) > 0: failed_show_string = "Failed shows: {}".format(len(failed_shows)) for fail in failed_shows: for show in shows: if cleantitle.get(show['title']).upper() == str(fail): failed_show_string += "\n - {} S{}-E{}".format(show["title"], show["season"], show["episode"]) else: failed_show_string = "" resultstring = 'Results:\n' if num_movies > 0: resultstring = resultstring + \ ' Movie Scrapers: {}\n' \ ' {}\n' \ ' {}\n'.format(num_movie_scrapers, failedstring, failed_movie_string) if num_shows > 0: resultstring = resultstring + \ ' Episode Scrapers: {}\n' \ ' {}\n' \ ' {}\n'.format(num_show_scrapers, show_scraper_failedstring, failed_show_string) dialog.textviewer("Desiscrapers Testing Mode", resultstring)
def super_info(self, i): try: if self.list[i]['metacache'] == True: raise Exception() try: imdb = self.list[i]['imdb'] except: imdb = '0' try: tvdb = self.list[i]['tvdb'] except: tvdb = '0' self.list[i].update({"imdb": imdb, "tvdb": tvdb}) title = self.list[i]['title'] cleanedTitle = cleantitle.get(title) if 'season' in title.lower(): title = title[:title.index('Season') - 1] else: # strip end #'s title = title.replace(' 10', '') logger.debug('Super_Info Title : %s' % title, __name__) url = self.burp_search_link % urllib.quote_plus( title.encode('utf-8'), safe=':/') result = client.request(url) result = result.decode('iso-8859-1').encode('utf-8') result = result.replace('\n', '').replace('\t', '') result = client.parseDOM(result, name="table", attrs={"class": "result"})[0] result = client.parseDOM(result, name="td", attrs={"class": "resultTitle"}) showUrl = None for item in result: linkTitle = client.parseDOM(item, name="a", attrs={"class": "title"})[0] linkTitle = client.parseDOM(linkTitle, name="strong")[0] if cleanedTitle == cleantitle.get(linkTitle): showUrl = client.parseDOM(item, name="a", attrs={"class": "title"}, ret="href")[0] break if showUrl == None: raise Exception() result = client.request(showUrl) if 'No information available!' in result: raise Exception() result = result.decode('iso-8859-1').encode('utf-8') right = client.parseDOM(result, "div", attrs={"class": "Right"})[0] showDetails = client.parseDOM(result, "td", attrs={"class": "showDetails"})[0] try: genre = client.parseDOM(showDetails, "tr") for item in genre: if "genre" in item.lower(): genre = client.parseDOM(item, "td")[0] genre = genre.replace(',', ' / ').strip() elif "show type" in item.lower(): genre = client.parseDOM(item, "td")[0] genre = genre.replace(',', ' / ').strip() except Exception as e: logger.error(e) genre = '' if genre == '': genre = '0' genre = client.replaceHTMLCodes(genre) genre = genre.encode('utf-8') if not genre == '0': self.list[i].update({'genre': genre}) try: poster = client.parseDOM(result, "td", attrs={"class": "showPics"})[0] poster = client.parseDOM(poster, "img", ret="src")[0] except: poster = '' if poster == '' or poster == None: poster = '0' poster = client.replaceHTMLCodes(poster) poster = poster.encode('utf-8') if not poster == '0': self.list[i].update({'poster': poster}) try: plot = client.parseDOM(right, "div", attrs={"class": "synopsis"})[0].strip() try: plot += client.parseDOM(right, "span", attrs={"id": "morecontent"})[0].strip() except: pass except: plot = '' if plot == '': plot = '0' plot = client.replaceHTMLCodes(plot) plot = plot.encode('utf-8') if not plot == '0': self.list[i].update({'plot': plot}) try: metaHTML = client.parseDOM(right, "table", attrs={"class": "meta"})[0] except: metaHTML = None if metaHTML: items = client.parseDOM(metaHTML, "tr") premiered = cast = None for item in items: if "release date" in item.lower(): premiered = client.parseDOM(item, "span", attrs={"itemprop": "name"})[0] premiered = premiered.encode('utf-8') elif "Actor" in item: cast = client.parseDOM(item, "span", attrs={"itemprop": "name"})[0] cast = cast.split(',') if premiered != None: try: year = re.compile('(\d{4})').findall(premiered)[0] except: year = '' if year == '': year = '0' year = year.encode('utf-8') self.list[i].update({'year': year}) self.list[i].update({'premiered': premiered}) if cast != None and len(cast) > 0: self.list[i].update({'cast': cast}) imdb = cleantitle.get(title) tvdb = banner = fanart = studio = duration = rating = votes = mpaa = tmdb = '0' self.meta.append({ 'year': year, 'tmdb': tmdb, 'imdb': imdb, 'tvdb': tvdb, 'lang': self.info_lang, 'item': { 'code': imdb, 'imdb': imdb, 'tvdb': tvdb, 'poster': poster, 'banner': banner, 'fanart': fanart, 'premiered': premiered, 'studio': studio, 'genre': genre, 'duration': duration, 'rating': rating, 'votes': votes, 'mpaa': mpaa, 'cast': cast, 'plot': plot } }) except: pass