def searchForImdbTitles(mediaName, mediaYear, lang): """ Given media name and a candidate title, returns the title result score penalty. """ mediaName = mediaName.lower() page = common.getElementFromHttpRequest(TMDB_GETINFO % mediaName.replace(' ', '%20'), TMDB_PAGE_ENCODING) matches = [] if page is None: Log.Warn('nothing was found on tmdb for media name "%s"' % mediaName) else: movieElems = page.xpath('//movies/movie') itemIndex = 0 for movieElem in movieElems: imdbId = common.getXpathRequiredNode(movieElem, './imdb_id/text()') title = common.getXpathRequiredNode(movieElem, './name/text()') altTitle = common.getXpathOptionalNode(movieElem, './alternative_name/text()') releaseDate = common.getXpathOptionalNode(movieElem, './released/text()') year = common.getReOptionalGroup(MATCHER_RELEASED, releaseDate, 0) score = common.scoreMediaTitleMatch(mediaName, mediaYear, title, altTitle, year, itemIndex) matches.append({'id': imdbId, 'name': title, 'year': year, 'score': score}) itemIndex += 1 return matches
def search(self, results, media, lang, manual=False): """ Searches for matches on KinoPoisk using the title and year passed via the media object. All matches are saved in a list of results as MetadataSearchResult objects. For each results, we determine a page id, title, year, and the score (how good we think the match is on the scale of 1 - 100). """ Log.Debug('SEARCH START <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<') mediaName = media.name mediaYear = media.year Log.Debug('searching for name="%s", year="%s", guid="%s", hash="%s"...' % (str(mediaName), str(mediaYear), str(media.guid), str(media.hash))) # Получаем страницу поиска Log.Debug('quering kinopoisk...') page = common.getElementFromHttpRequest(KINOPOISK_SEARCH % urllib.quote(mediaName.encode(ENCODING_KINOPOISK_PAGE)), ENCODING_KINOPOISK_PAGE) Log.Debug('Loading page "%s"' % urllib.quote(mediaName.encode(ENCODING_KINOPOISK_PAGE))) if page is None: Log.Warn('nothing was found on kinopoisk for media name "%s"' % mediaName) else: # Если страница получена, берем с нее перечень всех названий фильмов. Log.Debug('got a kinopoisk page to parse...') divInfoElems = page.xpath('//self::div[@class="info"]/p[@class="name"]/a[contains(@href,"/level/1/film/")]/..') itemIndex = 0 altTitle = None if len(divInfoElems): Log.Debug('found %d results' % len(divInfoElems)) for divInfoElem in divInfoElems: try: anchorFilmElem = divInfoElem.xpath('./a[contains(@href,"/level/1/film/")]/attribute::href') if len(anchorFilmElem): # Parse kinopoisk movie title id, title and year. match = re.search('\/film\/(.+?)\/', anchorFilmElem[0]) if match is None: Log.Error('unable to parse movie title id') else: kinoPoiskId = match.groups(1)[0] title = common.getXpathRequiredNode(divInfoElem, './/a[contains(@href,"/level/1/film/")]/text()') year = common.getXpathOptionalNode(divInfoElem, './/span[@class="year"]/text()') # Try to parse the alternative (original) title. Ignore failures. # This is a <span> below the title <a> tag. try: altTitle = common.getXpathOptionalNode(divInfoElem, '../span[1]/text()') if altTitle is not None: altTitle = altTitle.split(',')[0].strip() except: pass score = common.scoreMediaTitleMatch(mediaName, mediaYear, title, altTitle, year, itemIndex) results.Append(MetadataSearchResult(id=kinoPoiskId, name=title, year=year, lang=lang, score=score)) else: Log.Warn('unable to find film anchor elements for title "%s"' % mediaName) except: common.logException('failed to parse div.info container') itemIndex += 1 else: Log.Warn('nothing was found on kinopoisk for media name "%s"' % mediaName) # TODO(zhenya): investigate 1 we need this clause at all (haven't seen this happening). # Если не нашли там текст названия, значит сайт сразу дал нам страницу с фильмом (хочется верить =) # try: #title = page.xpath('//h1[@class="moviename-big"]/text()')[0].strip() #kinoPoiskId = re.search('\/film\/(.+?)\/', page.xpath('.//link[contains(@href, "/film/")]/attribute::href')[0]).groups(0)[0] #year = page.xpath('//a[contains(@href,"year")]/text()')[0].strip() #score = common.scoreMediaTitleMatch(mediaName, mediaYear, title, altTitle, year, itemIndex) #results.Append(MetadataSearchResult(id=kinoPoiskId, name=title, year=year, lang=lang, score=score)) #except: # common.logException('failed to parse a KinoPoisk page') # Sort results according to their score (Сортируем результаты). results.Sort('score', descending=True) if IS_DEBUG: common.printSearchResults(results) Log.Debug('SEARCH END <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')