def getHtml(url, cache=True): try: rawHtml = getText(url, cache=cache) decodedHtml = None if rawHtml is not None: decodedHtml = decode_htmlentities(rawHtml) except Exception, ex: printl("Exception (ef): " + str(ex), __name__, "E") printl("\tURL: " + str(Utf8.utf8ToLatin(url)), __name__, "E")
def getMovieByTitle(self, mediaInfo): if mediaInfo.isTypeSerie(): urlTitle = mediaInfo.SearchString urlTitle = re.sub(" ", "+", urlTitle) pageHtml = WebGrabber.getHtml( re.sub("<title>", urlTitle, self.apiSearchTV)) if not pageHtml: return mediaInfo m = re.search(r'Most Popular TV Series With Title Matching', pageHtml) if m and m.group(): mediaInfo = self.parseAdvancedSearchResultScreen( mediaInfo, pageHtml) mediaInfo = self.getMoviesByImdbId(mediaInfo) if mediaInfo.ImdbId != "tt0000000": return mediaInfo urlTitle = mediaInfo.SearchString urlTitle = re.sub(" ", "+", urlTitle) pageHtml = WebGrabber().grab(self.apiSearch + urlTitle) if not pageHtml: return mediaInfo pageHtml = decode_htmlentities(pageHtml) m = re.search(r'<title>IMDb Title Search</title>', pageHtml) if m and m.group(): mediaInfo = self.parseSearchResultScreen(mediaInfo, pageHtml) mediaInfo = self.getMoviesByImdbId(mediaInfo) else: #m = re.search(r'<title>.+?\(?P<year>\d{4}[\/IVX]*\).*?</title>', pageHtml) #if m and m.group(): mediaInfo = self.parseDetailsScreen(mediaInfo, pageHtml) return mediaInfo
def getMovieByTitle(self, mediaInfo): if mediaInfo.isTypeSerie(): urlTitle = mediaInfo.SearchString urlTitle = re.sub(" ", "+", urlTitle) pageHtml = WebGrabber.getHtml(re.sub("<title>", urlTitle, self.apiSearchTV)) if not pageHtml: return mediaInfo m = re.search(r'Most Popular TV Series With Title Matching', pageHtml) if m and m.group(): mediaInfo = self.parseAdvancedSearchResultScreen(mediaInfo, pageHtml) mediaInfo = self.getMoviesByImdbId(mediaInfo) if mediaInfo.ImdbId != "tt0000000": return mediaInfo urlTitle = mediaInfo.SearchString urlTitle = re.sub(" ", "+", urlTitle) pageHtml = WebGrabber().grab(self.apiSearch + urlTitle) if not pageHtml: return mediaInfo pageHtml = decode_htmlentities(pageHtml) m = re.search(r'<title>IMDb Title Search</title>', pageHtml) if m and m.group(): mediaInfo = self.parseSearchResultScreen(mediaInfo, pageHtml) mediaInfo = self.getMoviesByImdbId(mediaInfo) else: #m = re.search(r'<title>.+?\(?P<year>\d{4}[\/IVX]*\).*?</title>', pageHtml) #if m and m.group(): mediaInfo = self.parseDetailsScreen(mediaInfo, pageHtml) return mediaInfo