Example #1
0
    def search(self, results, media, lang, manual=False):
        """ Searches for matches on KinoPoisk using the title and year
        passed via the media object. All matches are saved in a list of results
        as MetadataSearchResult objects. For each results, we determine a
        page id, title, year, and the score (how good we think the match
        is on the scale of 1 - 100).
    """
        LOGGER.Debug('SEARCH START <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
        mediaName = media.show
        mediaYear = media.year
        LOGGER.Debug(
            'searching for name="%s", year="%s", guid="%s", hash="%s"...' %
            (str(mediaName), str(mediaYear), str(media.guid), str(media.hash)))

        # Look for matches on KinoPisk (result is returned as an array of tuples [kinoPoiskId, title, year, score]).
        titleResults = KinoPoiskRuShowsAgent.parser.fetchAndParseSearchResults(
            mediaName, mediaYear)
        for titleResult in titleResults:
            results.Append(
                MetadataSearchResult(id=titleResult[0],
                                     name=titleResult[1],
                                     year=titleResult[2],
                                     lang=lang,
                                     score=titleResult[3]))

        # Sort results according to their score (Сортируем результаты).
        results.Sort('score', descending=True)
        if IS_DEBUG:
            common.printSearchResults(results)
        LOGGER.Debug('SEARCH END <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
Example #2
0
  def search(self, results, media, lang, manual=False):
    """ Searches for matches on KinoPoisk using the title and year
        passed via the media object. All matches are saved in a list of results
        as MetadataSearchResult objects. For each results, we determine a
        page id, title, year, and the score (how good we think the match
        is on the scale of 1 - 100).
    """
    LOGGER.Debug('SEARCH START <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
    mediaName = media.show
    mediaYear = media.year
    LOGGER.Debug('searching for name="%s", year="%s", guid="%s", hash="%s"...' %
        (str(mediaName), str(mediaYear), str(media.guid), str(media.hash)))

    # Look for matches on KinoPisk (result is returned as an array of tuples [kinoPoiskId, title, year, score]).
    titleResults = KinoPoiskRuShowsAgent.parser.fetchAndParseSearchResults(mediaName, mediaYear)
    for titleResult in titleResults:
      results.Append(MetadataSearchResult(id=titleResult[0], name=titleResult[1], year=titleResult[2], lang=lang, score=titleResult[3]))

    # Sort results according to their score (Сортируем результаты).
    results.Sort('score', descending=True)
    if IS_DEBUG:
      common.printSearchResults(results)
    LOGGER.Debug('SEARCH END <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
Example #3
0
  def search(self, results, media, lang, manual=False):
    """ Searches for matches on KinoPoisk using the title and year
        passed via the media object. All matches are saved in a list of results
        as MetadataSearchResult objects. For each results, we determine a
        page id, title, year, and the score (how good we think the match
        is on the scale of 1 - 100).
    """
    Log.Debug('SEARCH START <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
    mediaName = media.name
    mediaYear = media.year
    Log.Debug('searching for name="%s", year="%s", guid="%s", hash="%s"...' %
        (str(mediaName), str(mediaYear), str(media.guid), str(media.hash)))
    # Получаем страницу поиска
    Log.Debug('quering kinopoisk...')

    page = common.getElementFromHttpRequest(KINOPOISK_SEARCH % urllib.quote(mediaName.encode(ENCODING_KINOPOISK_PAGE)), ENCODING_KINOPOISK_PAGE)
    Log.Debug('Loading page "%s"' % urllib.quote(mediaName.encode(ENCODING_KINOPOISK_PAGE)))

    if page is None:
      Log.Warn('nothing was found on kinopoisk for media name "%s"' % mediaName)
    else:
      # Если страница получена, берем с нее перечень всех названий фильмов.
      Log.Debug('got a kinopoisk page to parse...')
      divInfoElems = page.xpath('//self::div[@class="info"]/p[@class="name"]/a[contains(@href,"/level/1/film/")]/..')
      itemIndex = 0
      altTitle = None
      if len(divInfoElems):
        Log.Debug('found %d results' % len(divInfoElems))
        for divInfoElem in divInfoElems:
          try:
            anchorFilmElem = divInfoElem.xpath('./a[contains(@href,"/level/1/film/")]/attribute::href')
            if len(anchorFilmElem):
              # Parse kinopoisk movie title id, title and year.
              match = re.search('\/film\/(.+?)\/', anchorFilmElem[0])
              if match is None:
                Log.Error('unable to parse movie title id')
              else:
                kinoPoiskId = match.groups(1)[0]
                title = common.getXpathRequiredNode(divInfoElem, './/a[contains(@href,"/level/1/film/")]/text()')
                year = common.getXpathOptionalNode(divInfoElem, './/span[@class="year"]/text()')
                # Try to parse the alternative (original) title. Ignore failures.
                # This is a <span> below the title <a> tag.
                try:
                  altTitle = common.getXpathOptionalNode(divInfoElem, '../span[1]/text()')
                  if altTitle is not None:
                    altTitle = altTitle.split(',')[0].strip()
                except:
                  pass
                score = common.scoreMediaTitleMatch(mediaName, mediaYear, title, altTitle, year, itemIndex)
                results.Append(MetadataSearchResult(id=kinoPoiskId, name=title, year=year, lang=lang, score=score))
            else:
              Log.Warn('unable to find film anchor elements for title "%s"' % mediaName)
          except:
            common.logException('failed to parse div.info container')
          itemIndex += 1
      else:
        Log.Warn('nothing was found on kinopoisk for media name "%s"' % mediaName)
        # TODO(zhenya): investigate 1 we need this clause at all (haven't seen this happening).
        # Если не нашли там текст названия, значит сайт сразу дал нам страницу с фильмом (хочется верить =)
       # try:
          #title = page.xpath('//h1[@class="moviename-big"]/text()')[0].strip()
          #kinoPoiskId = re.search('\/film\/(.+?)\/', page.xpath('.//link[contains(@href, "/film/")]/attribute::href')[0]).groups(0)[0]
          #year = page.xpath('//a[contains(@href,"year")]/text()')[0].strip()
          #score = common.scoreMediaTitleMatch(mediaName, mediaYear, title, altTitle, year, itemIndex)
          #results.Append(MetadataSearchResult(id=kinoPoiskId, name=title, year=year, lang=lang, score=score))
        #except:
         # common.logException('failed to parse a KinoPoisk page')

    # Sort results according to their score (Сортируем результаты).
    results.Sort('score', descending=True)
    if IS_DEBUG:
      common.printSearchResults(results)
    Log.Debug('SEARCH END <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')