コード例 #1
0
def getHtml(url, cache=True):
	try:
		rawHtml = getText(url, cache=cache) 
		decodedHtml = None
		if rawHtml is not None:
			decodedHtml = decode_htmlentities(rawHtml)
	except Exception, ex:
		printl("Exception (ef): " + str(ex), __name__, "E")
		printl("\tURL: " + str(Utf8.utf8ToLatin(url)), __name__, "E")
コード例 #2
0
def getHtml(url, cache=True):
	try:
		rawHtml = getText(url, cache=cache) 
		decodedHtml = None
		if rawHtml is not None:
			decodedHtml = decode_htmlentities(rawHtml)
	except Exception, ex:
		printl("Exception (ef): " + str(ex), __name__, "E")
		printl("\tURL: " + str(Utf8.utf8ToLatin(url)), __name__, "E")
コード例 #3
0
    def getMovieByTitle(self, mediaInfo):
        if mediaInfo.isTypeSerie():
            urlTitle = mediaInfo.SearchString
            urlTitle = re.sub(" ", "+", urlTitle)

            pageHtml = WebGrabber.getHtml(
                re.sub("<title>", urlTitle, self.apiSearchTV))

            if not pageHtml:
                return mediaInfo

            m = re.search(r'Most Popular TV Series With Title Matching',
                          pageHtml)
            if m and m.group():
                mediaInfo = self.parseAdvancedSearchResultScreen(
                    mediaInfo, pageHtml)
                mediaInfo = self.getMoviesByImdbId(mediaInfo)
                if mediaInfo.ImdbId != "tt0000000":
                    return mediaInfo

        urlTitle = mediaInfo.SearchString
        urlTitle = re.sub(" ", "+", urlTitle)

        pageHtml = WebGrabber().grab(self.apiSearch + urlTitle)

        if not pageHtml:
            return mediaInfo

        pageHtml = decode_htmlentities(pageHtml)

        m = re.search(r'<title>IMDb Title Search</title>', pageHtml)
        if m and m.group():
            mediaInfo = self.parseSearchResultScreen(mediaInfo, pageHtml)
            mediaInfo = self.getMoviesByImdbId(mediaInfo)
        else:
            #m = re.search(r'<title>.+?\(?P<year>\d{4}[\/IVX]*\).*?</title>', pageHtml)
            #if m and m.group():
            mediaInfo = self.parseDetailsScreen(mediaInfo, pageHtml)
        return mediaInfo
コード例 #4
0
	def getMovieByTitle(self, mediaInfo):
		if mediaInfo.isTypeSerie():
			urlTitle = mediaInfo.SearchString
			urlTitle = re.sub(" ", "+", urlTitle)
			
			pageHtml = WebGrabber.getHtml(re.sub("<title>", urlTitle, self.apiSearchTV))
			
			if not pageHtml:
				return mediaInfo
			
			m = re.search(r'Most Popular TV Series With Title Matching', pageHtml)
			if m and m.group():
				mediaInfo = self.parseAdvancedSearchResultScreen(mediaInfo, pageHtml)
				mediaInfo = self.getMoviesByImdbId(mediaInfo)
				if mediaInfo.ImdbId != "tt0000000":
					return mediaInfo
		
		urlTitle = mediaInfo.SearchString
		urlTitle = re.sub(" ", "+", urlTitle)
		
		pageHtml = WebGrabber().grab(self.apiSearch + urlTitle)
		
		if not pageHtml:
			return mediaInfo
		
		pageHtml = decode_htmlentities(pageHtml)
		
		m = re.search(r'<title>IMDb Title Search</title>', pageHtml)
		if m and m.group():
			mediaInfo = self.parseSearchResultScreen(mediaInfo, pageHtml)
			mediaInfo = self.getMoviesByImdbId(mediaInfo)
		else:
			#m = re.search(r'<title>.+?\(?P<year>\d{4}[\/IVX]*\).*?</title>', pageHtml)
			#if m and m.group():
			mediaInfo = self.parseDetailsScreen(mediaInfo, pageHtml)
		return mediaInfo