Beispiel #1
0
 def _getitem(self, key):
     """Handle special keys."""
     if self.data.has_key('episode of'):
         if key == 'long imdb episode title':
             return build_title(self.data, canonical=0)
         elif key == 'series title':
             ser_title = self.data['episode of'].get('canonical title') or \
                         self.data['episode of']['title']
             return normalizeTitle(ser_title)
         elif key == 'canonical series title':
             ser_title = self.data['episode of'].get('canonical title') or \
                         self.data['episode of']['title']
             return ser_title
         elif key == 'episode title':
             return normalizeTitle(self.data.get('title', u''))
         elif key == 'canonical episode title':
             return self.data.get('title', u'')
     if self.data.has_key('title'):
         if key == 'title':
             return normalizeTitle(self.data['title'])
         elif key == 'long imdb title':
             return build_title(self.data, canonical=0)
         elif key == 'canonical title':
             return self.data['title']
         elif key == 'long imdb canonical title':
             return build_title(self.data, canonical=1)
     return None
Beispiel #2
0
 def _getitem(self, key):
     """Handle special keys."""
     if self.data.has_key('episode of'):
         if key == 'long imdb episode title':
             return build_title(self.data, canonical=0)
         elif key == 'series title':
             ser_title = self.data['episode of'].get('canonical title') or \
                         self.data['episode of']['title']
             return normalizeTitle(ser_title)
         elif key == 'canonical series title':
             ser_title = self.data['episode of'].get('canonical title') or \
                         self.data['episode of']['title']
             return ser_title
         elif key == 'episode title':
             return normalizeTitle(self.data.get('title', u''))
         elif key == 'canonical episode title':
             return self.data.get('title', u'')
     if self.data.has_key('title'):
         if key == 'title':
             return normalizeTitle(self.data['title'])
         elif key == 'long imdb title':
             return build_title(self.data, canonical=0)
         elif key == 'canonical title':
             return self.data['title']
         elif key == 'long imdb canonical title':
             return build_title(self.data, canonical=1)
     return None
Beispiel #3
0
def titleVariations(title, fromPtdf=0):
    """Build title variations useful for searches; if fromPtdf is true,
    the input is assumed to be in the plain text data files format."""
    if fromPtdf: title1 = u''
    else: title1 = title
    title2 = title3 = u''
    if fromPtdf or re_year_index.search(title):
        # If it appears to have a (year[/imdbIndex]) indication,
        # assume that a long imdb canonical name was provided.
        titldict = analyze_title(title, canonical=1)
        # title1: the canonical name.
        title1 = titldict['title']
        if titldict['kind'] != 'episode':
            # title3: the long imdb canonical name.
            if fromPtdf: title3 = title
            else: title3 = build_title(titldict, canonical=1, ptdf=1)
        else:
            title1 = normalizeTitle(title1)
            title3 = build_title(titldict, canonical=1, ptdf=1)
    else:
        # Just a title.
        # title1: the canonical title.
        title1 = canonicalTitle(title)
        title3 = u''
    # title2 is title1 without the article, or title1 unchanged.
    if title1:
        title2 = title1
        t2s = title2.split(u', ')
        if t2s[-1].lower() in _unicodeArticles:
            title2 = u', '.join(t2s[:-1])
    return title1, title2, title3
Beispiel #4
0
def titleVariations(title, fromPtdf=0):
    """Build title variations useful for searches; if fromPtdf is true,
    the input is assumed to be in the plain text data files format."""
    if fromPtdf: title1 = u''
    else: title1 = title
    title2 = title3 = u''
    if fromPtdf or re_year_index.search(title):
        # If it appears to have a (year[/imdbIndex]) indication,
        # assume that a long imdb canonical name was provided.
        titldict = analyze_title(title, canonical=1)
        # title1: the canonical name.
        title1 = titldict['title']
        if titldict['kind'] != 'episode':
            # title3: the long imdb canonical name.
            if fromPtdf: title3 = title
            else: title3 = build_title(titldict, canonical=1, ptdf=1)
        else:
            title1 = normalizeTitle(title1)
            title3 = build_title(titldict, canonical=1, ptdf=1)
    else:
        # Just a title.
        # title1: the canonical title.
        title1 = canonicalTitle(title)
        title3 = u''
    # title2 is title1 without the article, or title1 unchanged.
    if title1:
        title2 = title1
        t2s = title2.split(u', ')
        if t2s[-1].lower() in _articles:
            title2 = u', '.join(t2s[:-1])
    return title1, title2, title3
Beispiel #5
0
 def _search_movie(self, title, results, _episodes=False):
     title = title.strip()
     if not title: return []
     # Search for these title variations.
     if not _episodes:
         title1, title2, title3 = titleVariations(title, fromPtdf=1)
     else:
         title1 = normalizeTitle(title)
         title2 = ''
         title3 = ''
     resultsST = results
     if not self.doAdult: resultsST = 0
     res = _scan_titles('%stitles.key' % self.__db, title1, title2, title3,
                        resultsST, _episodes)
     if self.doAdult and results > 0: res[:] = res[:results]
     res[:] = [x[1] for x in res]
     # Check for adult movies.
     if not self.doAdult:
         newlist = []
         for entry in res:
             genres = getMovieMisc(
                 movieID=entry[0],
                 dataF='%s%s.data' % (self.__db, 'genres'),
                 indexF='%s%s.index' % (self.__db, 'genres'),
                 attrIF='%sattributes.index' % self.__db,
                 attrKF='%sattributes.key' % self.__db)
             if 'Adult' not in genres: newlist.append(entry)
         res[:] = newlist
         if results > 0: res[:] = res[:results]
     return res
Beispiel #6
0
 def _search_movie(self, title, results, _episodes=False):
     title = title.strip()
     if not title: return []
     # Search for these title variations.
     if not _episodes:
         title1, title2, title3 = titleVariations(title, fromPtdf=1)
     else:
         title1 = normalizeTitle(title)
         title2 = ''
         title3 = ''
     resultsST = results
     if not self.doAdult: resultsST = 0
     res = _scan_titles('%stitles.key' % self.__db,
                         title1, title2, title3, resultsST, _episodes)
     if self.doAdult and results > 0: res[:] = res[:results]
     res[:] = [x[1] for x in res]
     # Check for adult movies.
     if not self.doAdult:
         newlist = []
         for entry in res:
             genres = getMovieMisc(movieID=entry[0],
                             dataF='%s%s.data' % (self.__db, 'genres'),
                             indexF='%s%s.index' % (self.__db, 'genres'),
                             attrIF='%sattributes.index' % self.__db,
                             attrKF='%sattributes.key' % self.__db)
             if 'Adult' not in genres: newlist.append(entry)
         res[:] = newlist
         if results > 0: res[:] = res[:results]
     return res
Beispiel #7
0
 def _search_movie(self, title, results, _episodes=False):
     title = title.strip()
     if not title: return []
     # Search for these title variations.
     if not _episodes:
         title1, title2, title3 = titleVariations(title, fromPtdf=1)
     else:
         title1 = normalizeTitle(title)
         title2 = ''
         title3 = ''
     # XXX: only a guess: results are shrinked, to exclude Adult
     #      titles and to remove duplicated entries.
     resultsST = results * 3
     res = _scan_titles('%stitles.key' % self.__db,
                         title1, title2, title3, resultsST, _episodes)
     res[:] = [x[1] for x in res]
     # Check for adult movies.
     if not self.doAdult:
         newlist = []
         for entry in res:
             genres = getMovieMisc(movieID=entry[0],
                             dataF='%s%s.data' % (self.__db, 'genres'),
                             indexF='%s%s.index' % (self.__db, 'genres'),
                             attrIF='%sattributes.index' % self.__db,
                             attrKF='%sattributes.key' % self.__db)
             if 'Adult' not in genres: newlist.append(entry)
         res[:] = newlist
     # Get the real name, if this is an AKA.
     # XXX: duplicated code!
     new_res = []
     seen_MID = []
     for idx, (movieID, r) in enumerate(res):
         # Remove duplicates.
         # XXX: find a way to prefer titles with an AKA?  Or prefer
         #      the original title?
         if movieID in seen_MID:
             continue
         else:
             seen_MID.append(movieID)
         realMID = self._get_real_movieID(movieID)
         if movieID == realMID:
             new_res.append((movieID, r))
             continue
         if realMID in seen_MID:
             continue
         else:
             seen_MID.append(realMID)
         aka_title = build_title(r, canonical=0)
         real_title = getLabel(realMID, '%stitles.index' % self.__db,
                             '%stitles.key' % self.__db)
         if aka_title == real_title:
             new_res.append((realMID, r))
             continue
         new_r = analyze_title(real_title, canonical=1)
         new_r['akas'] = [aka_title]
         new_res.append((realMID, new_r))
     if results > 0: new_res[:] = new_res[:results]
     return new_res
Beispiel #8
0
    def _search_movie(self, title, results, _episodes=False):
        title = title.strip()
        if not title: return []
        title_dict = analyze_title(title, canonical=1)
        s_title = title_dict['title']
        if not s_title: return []
        episodeOf = title_dict.get('episode of')

        if not episodeOf:
            if not _episodes:
                s_title_split = s_title.split(', ')
                if len(s_title_split) > 1 and \
                        s_title_split[-1].lower() in _articles:
                    s_title_rebuilt = ', '.join(s_title_split[:-1])
                    if s_title_rebuilt:
                        s_title = s_title_rebuilt
        else:
            _episodes = False
            s_title = normalizeTitle(s_title)
        if isinstance(s_title, UnicodeType):
            s_title = s_title.encode('ascii', 'ignore')

        soundexCode = soundex(s_title)

        # XXX: improve the search restricting the kindID if the
        #      "kind" of the input differs from "movie"?
        condition = conditionAka = None
        if _episodes:
            condition = AND(Title.q.phoneticCode == soundexCode,
                            Title.q.kindID == self._kindRev['episode'])
            conditionAka = AND(AkaTitle.q.phoneticCode == soundexCode,
                               AkaTitle.q.kindID == self._kindRev['episode'])
        elif title_dict['kind'] == 'episode' and episodeOf is not None:
            series_title = build_title(episodeOf, canonical=1)
            # XXX: is it safe to get "results" results?
            #      Too many?  Too few?
            serRes = results
            if serRes < 3 or serRes > 10:
                serRes = 10
            searchSeries = self._search_movie(series_title, serRes)
            seriesIDs = [result[0] for result in searchSeries]
            if seriesIDs:
                condition = AND(Title.q.phoneticCode == soundexCode,
                                IN(Title.q.episodeOfID, seriesIDs),
                                Title.q.kindID == self._kindRev['episode'])
                conditionAka = AND(
                    AkaTitle.q.phoneticCode == soundexCode,
                    IN(AkaTitle.q.episodeOfID, seriesIDs),
                    AkaTitle.q.kindID == self._kindRev['episode'])
            else:
                # XXX: bad situation: we have found no matching series;
                #      try searching everything (both episodes and
                #      non-episodes) for the title.
                condition = AND(Title.q.phoneticCode == soundexCode,
                                IN(Title.q.episodeOfID, seriesIDs))
                conditionAka = AND(AkaTitle.q.phoneticCode == soundexCode,
                                   IN(AkaTitle.q.episodeOfID, seriesIDs))
        if condition is None:
            # XXX: excludes episodes?
            condition = AND(Title.q.kindID != self._kindRev['episode'],
                            Title.q.phoneticCode == soundexCode)
            conditionAka = AND(AkaTitle.q.kindID != self._kindRev['episode'],
                               AkaTitle.q.phoneticCode == soundexCode)

        # Up to 3 variations of the title are searched, plus the
        # long imdb canonical title, if provided.
        if not _episodes:
            title1, title2, title3 = titleVariations(title)
        else:
            title1 = title
            title2 = ''
            title3 = ''
        try:
            qr = [(q.id, get_movie_data(q.id, self._kind))
                  for q in Title.select(condition)]
            q2 = [(q.movieID, get_movie_data(q.id, self._kind, fromAka=1))
                  for q in AkaTitle.select(conditionAka)]
            qr += q2
        except NotFoundError, e:
            raise IMDbDataAccessError, \
                    'unable to search the database: "%s"' % str(e)