def findTVEpisodesForShow(self, mediaObject): searchKey = mediaObject.unique_id results = objectcache.searchCache('MediaScraper_TVShow', searchKey) if results == None or len(results) == 0: results = [] seasonNumber = 1 keepSearching = True while keepSearching: try: resultsAppend = self.api.findTVEpisodesForSeason( mediaObject, seasonNumber) except Exception, e: logging.error('Failed to get episodes for ' + str(mediaObject) + ' season ' + str(seasonNumber) + ' error: ' + str(e)) resultsAppend = [] if len(resultsAppend) == 0: keepSearching = False else: results += resultsAppend seasonNumber += 1 import time time.sleep(5) if len(results) > 0: objectcache.saveObject('MediaScraper_TVShow', searchKey, results)
def matchRatioWithCaption(self, caption, quickMatch=False): assert (caption) matchRatio = 0.0 try: if self.textCompare == None: self.textCompare = Caption._textForComparison(self.text) textA = self.textCompare if caption.textCompare == None: caption.textCompare = Caption._textForComparison(caption.text) textB = caption.textCompare assert (len(textA) > 0) assert (len(textB) > 0) matchRatio = difflib.SequenceMatcher(None, textA, textB).ratio() if (matchRatio > 0.02) and (matchRatio < 0.92): #construct 2 keys with the hashes of each string in each order. Check the cache before performing a match operation hashA = hashlib.md5() hashA.update(textA) hashB = hashlib.md5() hashB.update(textB) keyA = str(hashA.hexdigest()) + '_' + str(hashB.hexdigest()) keyB = str(hashB.hexdigest()) + '_' + str(hashA.hexdigest()) resultA = objectcache.searchCache('Caption_Compare', keyA) resultB = objectcache.searchCache('Caption_Compare', keyB) if resultA != None: logging.debug('Found cached result(a): ' + str(resultA)) matchRatio = resultA elif resultB != None: matchRatio = resultB logging.debug('Found cached result(b): ' + str(resultB)) elif quickMatch == False: logging.info('Performing match compare. Please wait') matchRatio = string_match.matchRatio( self.textCompare, caption.textCompare) logging.debug('Got compare result: ' + str(matchRatio)) objectcache.saveObject('Caption_Compare', keyA, matchRatio) except: pass logging.debug('Match ratio: ' + str(matchRatio)) return matchRatio
def matchRatioWithCaption(self,caption,quickMatch=False): assert(caption) matchRatio = 0.0 try: if self.textCompare == None: self.textCompare = Caption._textForComparison(self.text) textA = self.textCompare if caption.textCompare == None: caption.textCompare = Caption._textForComparison(caption.text) textB = caption.textCompare assert(len(textA)>0) assert(len(textB)>0) matchRatio = difflib.SequenceMatcher(None,textA,textB).ratio() if (matchRatio > 0.02) and (matchRatio < 0.92): #construct 2 keys with the hashes of each string in each order. Check the cache before performing a match operation hashA = hashlib.md5() hashA.update(textA) hashB = hashlib.md5() hashB.update(textB) keyA = str(hashA.hexdigest()) + '_' + str(hashB.hexdigest()) keyB = str(hashB.hexdigest()) + '_' + str(hashA.hexdigest()) resultA = objectcache.searchCache('Caption_Compare',keyA) resultB = objectcache.searchCache('Caption_Compare',keyB) if resultA != None: logging.debug('Found cached result(a): ' + str(resultA)) matchRatio = resultA elif resultB != None: matchRatio = resultB logging.debug('Found cached result(b): ' + str(resultB)) elif quickMatch == False: logging.info('Performing match compare. Please wait') matchRatio = string_match.matchRatio(self.textCompare,caption.textCompare) logging.debug('Got compare result: ' + str(matchRatio)) objectcache.saveObject('Caption_Compare',keyA,matchRatio) except: pass logging.debug('Match ratio: ' + str(matchRatio)) return matchRatio
def subtitlesForTVEpisode(self, episodeObject, language="eng"): key = str(episodeObject.unique_id) + "_" + language + "_" + str(episodeObject.scraper_source) results = objectcache.searchCache("SubtitleScraper_TVEpisode", key) if results == None or len(results) == 0: results = self._api().subtitlesForMovie(episodeObject, 5, language) objectcache.saveObject("SubtitleScraper_TVEpisode", key, results) return results
def subtitlesForTVEpisode(self, episodeObject, language='eng'): key = str(episodeObject.unique_id) + '_' + language + '_' + str( episodeObject.scraper_source) results = objectcache.searchCache('SubtitleScraper_TVEpisode', key) if results == None or len(results) == 0: results = self._api().subtitlesForMovie(episodeObject, 5, language) objectcache.saveObject('SubtitleScraper_TVEpisode', key, results) return results
def findTVEpisode(self, mediaObject, seasonNumber, episodeNumber): searchKey = mediaObject.title + "_S" + str(seasonNumber) + "_E" + str(episodeNumber) results = objectcache.searchCache("MediaScraper_TVEpisode", searchKey) if results == None or len(results) == 0: results = self.api.findTVEpisode(mediaObject, seasonNumber, episodeNumber) objectcache.saveObject("MediaScraper_TVEpisode", searchKey, results) return results
def findTVEpisode(self, mediaObject, seasonNumber, episodeNumber): searchKey = mediaObject.title + '_S' + str(seasonNumber) + '_E' + str( episodeNumber) results = objectcache.searchCache('MediaScraper_TVEpisode', searchKey) if results == None or len(results) == 0: results = self.api.findTVEpisode(mediaObject, seasonNumber, episodeNumber) objectcache.saveObject('MediaScraper_TVEpisode', searchKey, results) return results
def findTVEpisodesForSeason(self, mediaObject, seasonNumber): searchKey = mediaObject.title + "_S" + str(seasonNumber) results = objectcache.searchCache("MediaScraper_TVSeason", searchKey) if results == None or len(results) == 0: results = self.api.findTVEpisodesForSeason(mediaObject, seasonNumber) objectcache.saveObject("MediaScraper_TVSeason", searchKey, results) if results: results.sort(key=lambda x: float(x.episode_number)) return results
def findTVEpisodesForSeason(self, mediaObject, seasonNumber): searchKey = mediaObject.title + '_S' + str(seasonNumber) results = objectcache.searchCache('MediaScraper_TVSeason', searchKey) if results == None or len(results) == 0: results = self.api.findTVEpisodesForSeason(mediaObject, seasonNumber) objectcache.saveObject('MediaScraper_TVSeason', searchKey, results) if results: results.sort(key=lambda x: float(x.episode_number)) return results
def findTVShow(self, tvshow, year=None): tvshow = tvshow.strip() seasonNumber = MediaScraper._extractSeasonNumberFromName(tvshow) tvshow = MediaScraper._removeSeasonFromName(tvshow) #look for a ' '/'_' followed by 'd'/'disc'/'disk' followed by a number and remove tvshow = re.sub(r'(?i)[_ ](d|disc|disk)[_| ]?\d{1,2}', '', tvshow) tvshow = tvshow.strip() if tvshow[-1] == '-': tvshow = tvshow[0:len(tvshow) - 1] tvshow = tvshow.strip() if year is None: year = MediaScraper._extractYearFromName(tvshow) tvshow = MediaScraper._removeYearFromName(tvshow) results = objectcache.searchCache('MediaScraper_TVShow', tvshow) if results == None: results = self.api.findTVShow(tvshow, seasonNumber, year) objectcache.saveObject('MediaScraper_TVShow', tvshow, results) if len(results) == 0: logging.info('No results found for ' + tvshow + ', searching for acronyms') acronyms = MediaScraper._acronymsFromNameWithType(tvshow, 'tvshow') logging.debug('Found acronyms: ' + str(acronyms)) if len(acronyms) == 1: results = self.api.findTVShow(acronyms[0], seasonNumber, year) if results: '''sort by most popular''' results.sort(key=lambda x: float(x.popularity)) results.reverse() logging.debug('Returning TV shows: ' + str(results)) return results
def findTVShow(self, tvshow, year=None): tvshow = tvshow.strip() seasonNumber = MediaScraper._extractSeasonNumberFromName(tvshow) tvshow = MediaScraper._removeSeasonFromName(tvshow) # look for a ' '/'_' followed by 'd'/'disc'/'disk' followed by a number and remove tvshow = re.sub(r"(?i)[_ ](d|disc|disk)[_| ]?\d{1,2}", "", tvshow) tvshow = tvshow.strip() if tvshow[-1] == "-": tvshow = tvshow[0 : len(tvshow) - 1] tvshow = tvshow.strip() if year is None: year = MediaScraper._extractYearFromName(tvshow) tvshow = MediaScraper._removeYearFromName(tvshow) results = objectcache.searchCache("MediaScraper_TVShow", tvshow) if results == None: results = self.api.findTVShow(tvshow, seasonNumber, year) objectcache.saveObject("MediaScraper_TVShow", tvshow, results) if len(results) == 0: logging.info("No results found for " + tvshow + ", searching for acronyms") acronyms = MediaScraper._acronymsFromNameWithType(tvshow, "tvshow") logging.debug("Found acronyms: " + str(acronyms)) if len(acronyms) == 1: results = self.api.findTVShow(acronyms[0], seasonNumber, year) if results: """sort by most popular""" results.sort(key=lambda x: float(x.popularity)) results.reverse() logging.debug("Returning TV shows: " + str(results)) return results
def findMovie(self, movie, year=None): movie = movie.strip() if year is None: year = MediaScraper._extractYearFromName(movie) movie = MediaScraper._removeYearFromName(movie) results = objectcache.searchCache('MediaScraper_Movie', movie) if results == None: results = [] for searchWord in MediaScraper._searchCandidatesFromName(movie): newResults = self.api.findMovie(searchWord, year) if newResults: results += newResults objectcache.saveObject('MediaScraper_Movie', movie, results) if len(results) == 0: logging.debug('No results found for ' + movie + ', searching for acronyms') acronyms = MediaScraper._acronymsFromNameWithType(movie, 'movie') logging.debug('Found acronyms: ' + str(acronyms)) if len(acronyms) == 1: results = self.api.findMovie(acronyms[0], year) if results: '''sort by most popular''' results.sort(key=lambda x: float(x.popularity)) results.reverse() logging.debug('Returning Movies: ' + str(results)) return results
def findMovie(self, movie, year=None): movie = movie.strip() if year is None: year = MediaScraper._extractYearFromName(movie) movie = MediaScraper._removeYearFromName(movie) results = objectcache.searchCache("MediaScraper_Movie", movie) if results == None: results = [] for searchWord in MediaScraper._searchCandidatesFromName(movie): newResults = self.api.findMovie(searchWord, year) if newResults: results += newResults objectcache.saveObject("MediaScraper_Movie", movie, results) if len(results) == 0: logging.debug("No results found for " + movie + ", searching for acronyms") acronyms = MediaScraper._acronymsFromNameWithType(movie, "movie") logging.debug("Found acronyms: " + str(acronyms)) if len(acronyms) == 1: results = self.api.findMovie(acronyms[0], year) if results: """sort by most popular""" results.sort(key=lambda x: float(x.popularity)) results.reverse() logging.debug("Returning Movies: " + str(results)) return results
def findTVEpisodesForShow(self, mediaObject): searchKey = mediaObject.unique_id results = objectcache.searchCache("MediaScraper_TVShow", searchKey) if results == None or len(results) == 0: results = [] seasonNumber = 1 keepSearching = True while keepSearching: try: resultsAppend = self.api.findTVEpisodesForSeason(mediaObject, seasonNumber) except Exception, e: logging.error( "Failed to get episodes for " + str(mediaObject) + " season " + str(seasonNumber) + " error: " + str(e) ) resultsAppend = [] if len(resultsAppend) == 0: keepSearching = False else: results += resultsAppend seasonNumber += 1 import time time.sleep(5) if len(results) > 0: objectcache.saveObject("MediaScraper_TVShow", searchKey, results)