def GetMediaInfo(url, mediainfo, query_external=False): """ Retrieve meta data about the passed in LMWT item from a meta provider. Additionally, for any info not returned by the meta provider, try to collect the info directly from the LMWT item page. """ try: if (mediainfo.id and re.match("tt\d+", mediainfo.id)): imdb_id = mediainfo.id else: soup = BeautifulSoup(HTTP.Request(ICEFILMS_URL + url).content) imdb_link = soup.find('a','iframe')['href'] imdb_id = re.search("(tt\d+)", str(imdb_link)).group() if (query_external): # Construct kwargs. kwargs = {} kwargs['imdb_id'] = imdb_id kwargs['season'] = mediainfo.season if hasattr(mediainfo, 'show_name'): kwargs['show_name'] = mediainfo.show_name if hasattr(mediainfo, 'ep_num'): kwargs['ep_num'] = mediainfo.ep_num #Log("Query-ing External Provider. Args:" + str(kwargs)) mediainfo = DBProvider().GetProvider(mediainfo.type).RetrieveItemFromProvider(**kwargs) #Log(str(mediainfo)) else: mediainfo.id = imdb_id return mediainfo except Exception, ex: Log.Exception("Error whilst retrieveing media info") return None
def GetMediaInfo(url, mediainfo, query_external=False): """ Retrieve meta data about the passed in LMWT item from a meta provider. Additionally, for any info not returned by the meta provider, try to collect the info directly from the LMWT item page. """ try: if (mediainfo.id and re.match("tt\d+", mediainfo.id)): imdb_id = mediainfo.id else: soup = BeautifulSoup(HTTP.Request(ICEFILMS_URL + url).content) imdb_link = soup.find('a', 'NOiframe')['href'] imdb_id = re.search("(tt\d+)", str(imdb_link)).group() if (query_external): # Construct kwargs. kwargs = {} kwargs['imdb_id'] = imdb_id kwargs['season'] = mediainfo.season if hasattr(mediainfo, 'show_name'): kwargs['show_name'] = mediainfo.show_name if hasattr(mediainfo, 'ep_num'): kwargs['ep_num'] = mediainfo.ep_num #Log("Query-ing External Provider. Args:" + str(kwargs)) mediainfo = DBProvider().GetProvider( mediainfo.type).RetrieveItemFromProvider(**kwargs) #Log(str(mediainfo)) else: mediainfo.id = imdb_id return mediainfo except Exception, ex: Log.Exception("Error whilst retrieveing media info") return None
def GetMediaInfo(url, mediainfo, query_external=False): """ Retrieve meta data about the passed in LMWT item from a meta provider. Additionally, for any info not returned by the meta provider, try to collect the info directly from the LMWT item page. """ # The description meta header for some shows inserts random double quotes in the # content which breaks the parsing of the page. Work around that by simply # removing the head section in which the meta elements are contained. headMassage = [(re.compile('<head>(.*)</head>', re.S), lambda match: '')] soupMassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE) soupMassage.extend(headMassage) soup = BeautifulSoup(HTTP.Request(Dict['LMWT_URL'] + url).content, markupMassage=soupMassage) try: imdb_id = None try: imdb_link = soup.find('div', 'mlink_imdb').a['href'] imdb_id = re.search("(tt\d+)", str(imdb_link)).group() except: pass # Construct kwargs. kwargs = {} kwargs['imdb_id'] = imdb_id kwargs['show_name'] = mediainfo.show_name kwargs['season'] = mediainfo.season if hasattr(mediainfo, 'ep_num'): kwargs['ep_num'] = mediainfo.ep_num if (query_external): #Log("Query-ing External Provider") mediainfo_ret = DBProvider().GetProvider( mediainfo.type).RetrieveItemFromProvider(**kwargs) #Log(str(mediainfo)) else: mediainfo_ret = MediaInfo() mediainfo_ret.id = imdb_id # Also parse the LMWT page and extract out any info not set by the meta provider. info_div = soup.find('div', 'movie_info') # First, extract out description... info = {} info['Description:'] = info_div.find('td', {'colspan': '2'}).text # Then, ratings.... info['Rating:'] = info_div.find('li', 'current-rating').text # Extract out any other info. for row in info_div.findAll('tr'): row_items = row.findAll('td') if len(row_items) <> 2 or "colspan" in str(row_items[0]): continue info[row_items[0].text] = row_items[1].text # Map available extracted info back to the media info object. # First, define the mapping between LMWT items and media info and an additional function # to extract out sane info out of the LMWT data. item_map = { 'Description:': ['summary', lambda x: Utils.decode_htmlentities(x)], 'Air Date:': ['releasedate', lambda x: datetime.strptime(x, '%B %d, %Y')], 'Runtime:': [ 'duration', lambda x: int(re.search("(\d*)", x).group(0)) * 60 * 1000 if int(re.search("(\d*)", x).group(0) ) * 60 * 1000 < sys.maxint else 0 ], 'Rating:': [ 'rating', lambda x: float(re.search("([\d\.]+)", x).group(0)) * 2 ], 'Title:': ['title', lambda x: decode_htmlentities(x)], } # For each extracted item from LMWT... for lmwt_item in info.keys(): #Log("Processing: " + lmwt_item) # Look for matching entry in map... if lmwt_item not in item_map.keys(): #Log("Not processing - no mapping") continue mi_item = item_map[lmwt_item] if (mi_item is None): #Log("Couldn't find a mi attr!") continue try: # And see if it's already set in the mediaInfo object. mi_val = getattr(mediainfo_ret, mi_item[0], None) #Log("Current mi value: " + str(mi_val)) # And set it if it's not already. if (not mi_val): #Log("Setting mi attr " + mi_item[0] + " to: " + str(mi_item[1](info[lmwt_item]))) setattr(mediainfo_ret, mi_item[0], mi_item[1](info[lmwt_item])) except Exception, ex: #Log.Exception("Error whilst reading in info from LMWT Page. Field " + lmwt_item) pass return mediainfo_ret
def GetMediaInfo(url, mediainfo, query_external=False): """ Retrieve meta data about the passed in LMWT item from a meta provider. Additionally, for any info not returned by the meta provider, try to collect the info directly from the LMWT item page. """ # The description meta header for some shows inserts random double quotes in the # content which breaks the parsing of the page. Work around that by simply # removing the head section in which the meta elements are contained. headMassage = [(re.compile('<head>(.*)</head>', re.S), lambda match: '')] soupMassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE) soupMassage.extend(headMassage) soup = BeautifulSoup(HTTP.Request(Dict['LMWT_URL'] + url).content, markupMassage=soupMassage) try: imdb_id = None try: imdb_link = soup.find('div','mlink_imdb').a['href'] imdb_id = re.search("(tt\d+)", str(imdb_link)).group() except: pass # Construct kwargs. kwargs = {} kwargs['imdb_id'] = imdb_id kwargs['show_name'] = mediainfo.show_name kwargs['season'] = mediainfo.season if hasattr(mediainfo, 'ep_num'): kwargs['ep_num'] = mediainfo.ep_num if (query_external): #Log("Query-ing External Provider") mediainfo_ret = DBProvider().GetProvider(mediainfo.type).RetrieveItemFromProvider(**kwargs) #Log(str(mediainfo)) else: mediainfo_ret = MediaInfo() mediainfo_ret.id = imdb_id # Also parse the LMWT page and extract out any info not set by the meta provider. info_div = soup.find('div', 'movie_info') # First, extract out description... info = {} info['Description:'] = info_div.find('td', { 'colspan': '2' }).text # Then, ratings.... info['Rating:'] = info_div.find('li', 'current-rating').text # Extract out any other info. for row in info_div.findAll('tr'): row_items = row.findAll('td') if len(row_items) <> 2 or "colspan" in str(row_items[0]): continue info[row_items[0].text] = row_items[1].text # Map available extracted info back to the media info object. # First, define the mapping between LMWT items and media info and an additional function # to extract out sane info out of the LMWT data. item_map = { 'Description:' : ['summary', lambda x: Utils.decode_htmlentities(x)], 'Air Date:' : ['releasedate', lambda x: datetime.strptime(x, '%B %d, %Y')], 'Runtime:' : ['duration', lambda x: int(re.search("(\d*)", x).group(0)) * 60 * 1000 if int(re.search("(\d*)", x).group(0)) * 60 * 1000 < sys.maxint else 0], 'Rating:' : ['rating', lambda x: float(re.search("([\d\.]+)", x).group(0)) * 2], 'Title:': ['title', lambda x: decode_htmlentities(x)], } # For each extracted item from LMWT... for lmwt_item in info.keys(): #Log("Processing: " + lmwt_item) # Look for matching entry in map... if lmwt_item not in item_map.keys(): #Log("Not processing - no mapping") continue mi_item = item_map[lmwt_item] if (mi_item is None): #Log("Couldn't find a mi attr!") continue try: # And see if it's already set in the mediaInfo object. mi_val = getattr(mediainfo_ret, mi_item[0], None) #Log("Current mi value: " + str(mi_val)) # And set it if it's not already. if (not mi_val): #Log("Setting mi attr " + mi_item[0] + " to: " + str(mi_item[1](info[lmwt_item]))) setattr(mediainfo_ret, mi_item[0], mi_item[1](info[lmwt_item])) except Exception, ex: #Log.Exception("Error whilst reading in info from LMWT Page. Field " + lmwt_item) pass return mediainfo_ret