Example #1
0
 def _getitem(self, key):
     """Handle special keys."""
     if self.data.has_key('episode of'):
         if key == 'long imdb episode title':
             return build_title(self.data)
         elif key == 'series title':
             return self.data['episode of']['title']
         elif key == 'canonical series title':
             ser_title = self.data['episode of']['title']
             return canonicalTitle(ser_title)
         elif key == 'smart canonical series title':
             ser_title = self.data['episode of']['title']
             return self.smartCanonicalTitle(ser_title)
         elif key == 'episode title':
             return self.data.get('title', u'')
         elif key == 'canonical episode title':
             return canonicalTitle(self.data.get('title', u''))
         elif key == 'smart canonical episode title':
             return self.smartCanonicalTitle(self.data.get('title', u''))
     if self.data.has_key('title'):
         if key == 'title':
             return self.data['title']
         elif key == 'long imdb title':
             return build_title(self.data)
         elif key == 'canonical title':
             return canonicalTitle(self.data['title'])
         elif key == 'smart canonical title':
             return self.smartCanonicalTitle(self.data['title'])
         elif key == 'long imdb canonical title':
             return build_title(self.data, canonical=1)
         elif key == 'smart long imdb canonical title':
             return build_title(self.data, canonical=1,
                                 lang=self.guessLanguage())
     return None
Example #2
0
 def _getitem(self, key):
     """Handle special keys."""
     if 'episode of' in self.data:
         if key == 'long imdb episode title':
             return build_title(self.data)
         elif key == 'series title':
             return self._getSeriesTitle(self.data['episode of'])
         elif key == 'canonical series title':
             ser_title = self._getSeriesTitle(self.data['episode of'])
             return canonicalTitle(ser_title)
         elif key == 'smart canonical series title':
             ser_title = self._getSeriesTitle(self.data['episode of'])
             return self.smartCanonicalTitle(ser_title)
         elif key == 'episode title':
             return self.data.get('title', '')
         elif key == 'canonical episode title':
             return canonicalTitle(self.data.get('title', ''))
         elif key == 'smart canonical episode title':
             return self.smartCanonicalTitle(self.data.get('title', ''))
     if 'title' in self.data:
         if key == 'title':
             return self.data['title']
         elif key == 'long imdb title':
             return build_title(self.data)
         elif key == 'canonical title':
             return canonicalTitle(self.data['title'])
         elif key == 'smart canonical title':
             return self.smartCanonicalTitle(self.data['title'])
         elif key == 'long imdb canonical title':
             return build_title(self.data, canonical=True)
         elif key == 'smart long imdb canonical title':
             return build_title(self.data, canonical=True, lang=self.guessLanguage())
     if key == 'full-size cover url':
         return self.get_fullsizeURL()
     return None
Example #3
0
 def _getitem(self, key):
     """Handle special keys."""
     if self.data.has_key('episode of'):
         if key == 'long imdb episode title':
             return build_title(self.data)
         elif key == 'series title':
             return self.data['episode of']['title']
         elif key == 'canonical series title':
             ser_title = self.data['episode of']['title']
             return canonicalTitle(ser_title)
         elif key == 'smart canonical series title':
             ser_title = self.data['episode of']['title']
             return self.smartCanonicalTitle(ser_title)
         elif key == 'episode title':
             return self.data.get('title', u'')
         elif key == 'canonical episode title':
             return canonicalTitle(self.data.get('title', u''))
         elif key == 'smart canonical episode title':
             return self.smartCanonicalTitle(self.data.get('title', u''))
     if self.data.has_key('title'):
         if key == 'title':
             return self.data['title']
         elif key == 'long imdb title':
             return build_title(self.data)
         elif key == 'canonical title':
             return canonicalTitle(self.data['title'])
         elif key == 'smart canonical title':
             return self.smartCanonicalTitle(self.data['title'])
         elif key == 'long imdb canonical title':
             return build_title(self.data, canonical=1)
         elif key == 'smart long imdb canonical title':
             return build_title(self.data,
                                canonical=1,
                                lang=self.guessLanguage())
     return None
Example #4
0
 def _getitem(self, key):
     """Handle special keys."""
     if 'episode of' in self.data:
         if key == 'long imdb episode title':
             return build_title(self.data)
         elif key == 'series title':
             return self._getSeriesTitle(self.data['episode of'])
         elif key == 'canonical series title':
             ser_title = self._getSeriesTitle(self.data['episode of'])
             return canonicalTitle(ser_title)
         elif key == 'smart canonical series title':
             ser_title = self._getSeriesTitle(self.data['episode of'])
             return self.smartCanonicalTitle(ser_title)
         elif key == 'episode title':
             return self.data.get('title', '')
         elif key == 'canonical episode title':
             return canonicalTitle(self.data.get('title', ''))
         elif key == 'smart canonical episode title':
             return self.smartCanonicalTitle(self.data.get('title', ''))
     if 'title' in self.data:
         if key == 'title':
             return self.data['title']
         elif key == 'long imdb title':
             return build_title(self.data)
         elif key == 'canonical title':
             return canonicalTitle(self.data['title'])
         elif key == 'smart canonical title':
             return self.smartCanonicalTitle(self.data['title'])
         elif key == 'long imdb canonical title':
             return build_title(self.data, canonical=True)
         elif key == 'smart long imdb canonical title':
             return build_title(self.data, canonical=True, lang=self.guessLanguage())
     if key == 'full-size cover url':
         return self.get_fullsizeURL()
     return None
Example #5
0
def titleVariations(title, fromPtdf=0):
    """Build title variations useful for searches; if fromPtdf is true,
    the input is assumed to be in the plain text data files format."""
    if fromPtdf: title1 = u''
    else: title1 = title
    title2 = title3 = u''
    if fromPtdf or re_year_index.search(title):
        # If it appears to have a (year[/imdbIndex]) indication,
        # assume that a long imdb canonical name was provided.
        titldict = analyze_title(title, canonical=1)
        # title1: the canonical name.
        title1 = titldict['title']
        if titldict['kind'] != 'episode':
            # title3: the long imdb canonical name.
            if fromPtdf: title3 = title
            else: title3 = build_title(titldict, canonical=1, ptdf=1)
        else:
            title1 = normalizeTitle(title1)
            title3 = build_title(titldict, canonical=1, ptdf=1)
    else:
        # Just a title.
        # title1: the canonical title.
        title1 = canonicalTitle(title)
        title3 = u''
    # title2 is title1 without the article, or title1 unchanged.
    if title1:
        title2 = title1
        t2s = title2.split(u', ')
        if t2s[-1].lower() in _unicodeArticles:
            title2 = u', '.join(t2s[:-1])
    return title1, title2, title3
Example #6
0
def titleVariations(title, fromPtdf=0):
    """Build title variations useful for searches; if fromPtdf is true,
    the input is assumed to be in the plain text data files format."""
    if fromPtdf: title1 = u''
    else: title1 = title
    title2 = title3 = u''
    if fromPtdf or re_year_index.search(title):
        # If it appears to have a (year[/imdbIndex]) indication,
        # assume that a long imdb canonical name was provided.
        titldict = analyze_title(title, canonical=1)
        # title1: the canonical name.
        title1 = titldict['title']
        if titldict['kind'] != 'episode':
            # title3: the long imdb canonical name.
            if fromPtdf: title3 = title
            else: title3 = build_title(titldict, canonical=1, ptdf=1)
        else:
            title1 = normalizeTitle(title1)
            title3 = build_title(titldict, canonical=1, ptdf=1)
    else:
        # Just a title.
        # title1: the canonical title.
        title1 = canonicalTitle(title)
        title3 = u''
    # title2 is title1 without the article, or title1 unchanged.
    if title1:
        title2 = title1
        t2s = title2.split(u', ')
        if t2s[-1].lower() in _articles:
            title2 = u', '.join(t2s[:-1])
    return title1, title2, title3
Example #7
0
 def smartCanonicalTitle(self, title=None, lang=None):
     """Return the canonical title, guessing its language.
     The title can be forces with the 'title' argument (internally
     used) and the language can be forced with the 'lang' argument,
     otherwise it's auto-detected."""
     if title is None:
         title = self.data.get('title', '')
     if lang is None:
         lang = self.guessLanguage()
     return canonicalTitle(title, lang=lang)
Example #8
0
 def smartCanonicalTitle(self, title=None, lang=None):
     """Return the canonical title, guessing its language.
     The title can be forces with the 'title' argument (internally
     used) and the language can be forced with the 'lang' argument,
     otherwise it's auto-detected."""
     if title is None:
         title = self.data.get('title', u'')
     if lang is None:
         lang = self.guessLanguage()
     return canonicalTitle(title, lang=lang)
Example #9
0
def title_soundex(title):
    """Return the soundex code for the given title; the (optional) starting article is pruned.

    :param title: movie title
    :type title: str
    :returns: soundex of the title (without the article, if any)
    :rtype: str
    """
    if not title:
        return None
    title = canonicalTitle(title)
    ts = title.split(', ')
    if ts[-1].lower() in _unicodeArticles:
        title = ', '.join(ts[:-1])
    return soundex(title)
Example #10
0
def title_soundex(title):
    """Return the soundex code for the given title; the (optional) starting article is pruned.

    :param title: movie title
    :type title: str
    :returns: soundex of the title (without the article, if any)
    :rtype: str
    """
    if not title:
        return None
    title = canonicalTitle(title)
    ts = title.split(', ')
    if ts[-1].lower() in _unicodeArticles:
        title = ', '.join(ts[:-1])
    return soundex(title)
Example #11
0
 def _findRefs(self, o, trefs, nrefs):
     """Find titles or names references in strings."""
     if isinstance(o, (unicode, str)):
         for title in re_titleRef.findall(o):
             a_title = analyze_title(title, canonical=0)
             rtitle = build_title(a_title, ptdf=1)
             if trefs.has_key(rtitle): continue
             movieID = self._getTitleID(rtitle)
             if movieID is None:
                 movieID = self._getTitleID(title)
             if movieID is None:
                 continue
             m = Movie(title=rtitle,
                       movieID=movieID,
                       accessSystem=self.accessSystem)
             trefs[rtitle] = m
             rtitle2 = canonicalTitle(a_title.get('title', u''))
             if rtitle2 and rtitle2 != rtitle and rtitle2 != title:
                 trefs[rtitle2] = m
             if title != rtitle:
                 trefs[title] = m
         for name in re_nameRef.findall(o):
             a_name = analyze_name(name, canonical=1)
             rname = build_name(a_name, canonical=1)
             if nrefs.has_key(rname): continue
             personID = self._getNameID(rname)
             if personID is None:
                 personID = self._getNameID(name)
             if personID is None: continue
             p = Person(name=rname,
                        personID=personID,
                        accessSystem=self.accessSystem)
             nrefs[rname] = p
             rname2 = normalizeName(a_name.get('name', u''))
             if rname2 and rname2 != rname:
                 nrefs[rname2] = p
             if name != rname and name != rname2:
                 nrefs[name] = p
     elif isinstance(o, (list, tuple)):
         for item in o:
             self._findRefs(item, trefs, nrefs)
     elif isinstance(o, dict):
         for value in o.values():
             self._findRefs(value, trefs, nrefs)
     return (trefs, nrefs)
Example #12
0
 def _findRefs(self, o, trefs, nrefs):
     """Find titles or names references in strings."""
     if isinstance(o, (UnicodeType, StringType)):
         for title in re_titleRef.findall(o):
             a_title = analyze_title(title, canonical=1)
             rtitle = build_title(a_title, canonical=1, ptdf=1)
             if trefs.has_key(rtitle): continue
             movieID = self._getTitleID(rtitle)
             if movieID is None:
                 movieID = self._getTitleID(title)
             if movieID is None:
                 continue
             m = Movie(title=rtitle, movieID=movieID,
                         accessSystem=self.accessSystem)
             trefs[rtitle] = m
             rtitle2 = canonicalTitle(a_title.get('title', u''))
             if rtitle2 and rtitle2 != rtitle and rtitle2 != title:
                 trefs[rtitle2] = m
             if title != rtitle:
                 trefs[title] = m
         for name in re_nameRef.findall(o):
             a_name = analyze_name(name, canonical=1)
             rname = build_name(a_name, canonical=1)
             if nrefs.has_key(rname): continue
             personID = self._getNameID(rname)
             if personID is None:
                 personID = self._getNameID(name)
             if personID is None: continue
             p = Person(name=rname, personID=personID,
                         accessSystem=self.accessSystem)
             nrefs[rname] = p
             rname2 = normalizeName(a_name.get('name', u''))
             if rname2 and rname2 != rname:
                 nrefs[rname2] = p
             if name != rname and name != rname2:
                 nrefs[name] = p
     elif isinstance(o, (ListType, TupleType)):
         for item in o:
             self._findRefs(item, trefs, nrefs)
     elif isinstance(o, DictType):
         for value in o.values():
             self._findRefs(value, trefs, nrefs)
     return (trefs, nrefs)
Example #13
0
def scan_titles(titles_list,
                title1,
                title2,
                title3,
                results=0,
                searchingEpisode=0,
                onlyEpisodes=0,
                ro_thresold=None):
    """Scan a list of titles, searching for best matches against
    the given variations."""
    if ro_thresold is not None: RO_THRESHOLD = ro_thresold
    else: RO_THRESHOLD = 0.6
    sm1 = SequenceMatcher()
    sm2 = SequenceMatcher()
    sm3 = SequenceMatcher()
    sm1.set_seq1(title1.lower())
    sm2.set_seq2(title2.lower())
    if title3:
        sm3.set_seq1(title3.lower())
        if title3[-1] == '}': searchingEpisode = 1
    hasArt = 0
    if title2 != title1: hasArt = 1
    resd = {}
    for i, t_data in titles_list:
        if onlyEpisodes:
            if t_data.get('kind') != 'episode':
                continue
            til = t_data['title']
            if til[-1] == ')':
                dateIdx = til.rfind('(')
                if dateIdx != -1:
                    til = til[:dateIdx].rstrip()
            if not til:
                continue
            ratio = ratcliff(title1, til, sm1)
            if ratio >= RO_THRESHOLD:
                resd[i] = (ratio, (i, t_data))
            continue
        if searchingEpisode:
            if t_data.get('kind') != 'episode': continue
        elif t_data.get('kind') == 'episode': continue
        til = t_data['title']
        # XXX: on Symbian, here we get a str; not sure this is the
        #      right place to fix it.
        if isinstance(til, str):
            til = unicode(til, 'latin1', 'ignore')
        # Distance with the canonical title (with or without article).
        #   titleS      -> titleR
        #   titleS, the -> titleR, the
        if not searchingEpisode:
            til = canonicalTitle(til)
            ratios = [ratcliff(title1, til, sm1) + 0.05]
            # til2 is til without the article, if present.
            til2 = til
            tils = til2.split(', ')
            matchHasArt = 0
            if tils[-1].lower() in _unicodeArticles:
                til2 = ', '.join(tils[:-1])
                matchHasArt = 1
            if hasArt and not matchHasArt:
                #   titleS[, the]  -> titleR
                ratios.append(ratcliff(title2, til, sm2))
            elif matchHasArt and not hasArt:
                #   titleS  -> titleR[, the]
                ratios.append(ratcliff(title1, til2, sm1))
        else:
            ratios = [0.0]
        if title3:
            # Distance with the long imdb canonical title.
            ratios.append(
                ratcliff(title3, build_title(t_data, canonical=1, ptdf=1), sm3)
                + 0.1)
        ratio = max(ratios)
        if ratio >= RO_THRESHOLD:
            if resd.has_key(i):
                if ratio > resd[i][0]:
                    resd[i] = (ratio, (i, t_data))
            else:
                resd[i] = (ratio, (i, t_data))
    res = resd.values()
    res.sort()
    res.reverse()
    if results > 0: res[:] = res[:results]
    return res
Example #14
0
def strip_article(title):
    no_article_title = canonicalTitle(title)
    t2s = no_article_title.split(', ')
    if t2s[-1].lower() in _unicodeArticles:
        no_article_title = ', '.join(t2s[:-1])
    return no_article_title
Example #15
0
def strip_article(title):
    no_article_title = canonicalTitle(title)
    t2s = no_article_title.split(', ')
    if t2s[-1].lower() in _unicodeArticles:
        no_article_title = ', '.join(t2s[:-1])
    return no_article_title
Example #16
0
def scan_titles(titles_list, title1, title2, title3, results=0,
                searchingEpisode=0, onlyEpisodes=0, ro_thresold=None):
    """Scan a list of titles, searching for best matches against
    the given variations."""
    if ro_thresold is not None: RO_THRESHOLD = ro_thresold
    else: RO_THRESHOLD = 0.6
    sm1 = SequenceMatcher()
    sm2 = SequenceMatcher()
    sm3 = SequenceMatcher()
    sm1.set_seq1(title1.lower())
    sm2.set_seq2(title2.lower())
    if title3:
        sm3.set_seq1(title3.lower())
        if title3[-1] == '}': searchingEpisode = 1
    hasArt = 0
    if title2 != title1: hasArt = 1
    resd = {}
    for i, t_data in titles_list:
        if onlyEpisodes:
            if t_data.get('kind') != 'episode':
                continue
            til = t_data['title']
            if til[-1] == ')':
                dateIdx = til.rfind('(')
                if dateIdx != -1:
                    til = til[:dateIdx].rstrip()
            if not til:
                continue
            ratio = ratcliff(title1, til, sm1)
            if ratio >= RO_THRESHOLD:
                resd[i] = (ratio, (i, t_data))
            continue
        if searchingEpisode:
            if t_data.get('kind') != 'episode': continue
        elif t_data.get('kind') == 'episode': continue
        til = t_data['title']
        # XXX: on Symbian, here we get a str; not sure this is the
        #      right place to fix it.
        if isinstance(til, str):
            til = unicode(til, 'latin1', 'ignore')
        # Distance with the canonical title (with or without article).
        #   titleS      -> titleR
        #   titleS, the -> titleR, the
        if not searchingEpisode:
            til = canonicalTitle(til)
            ratios = [ratcliff(title1, til, sm1) + 0.05]
            # til2 is til without the article, if present.
            til2 = til
            tils = til2.split(', ')
            matchHasArt = 0
            if tils[-1].lower() in _unicodeArticles:
                til2 = ', '.join(tils[:-1])
                matchHasArt = 1
            if hasArt and not matchHasArt:
                #   titleS[, the]  -> titleR
                ratios.append(ratcliff(title2, til, sm2))
            elif matchHasArt and not hasArt:
                #   titleS  -> titleR[, the]
                ratios.append(ratcliff(title1, til2, sm1))
        else:
            ratios = [0.0]
        if title3:
            # Distance with the long imdb canonical title.
            ratios.append(ratcliff(title3,
                        build_title(t_data, canonical=1, ptdf=1), sm3) + 0.1)
        ratio = max(ratios)
        if ratio >= RO_THRESHOLD:
            if resd.has_key(i):
                if ratio > resd[i][0]:
                    resd[i] = (ratio, (i, t_data))
            else: resd[i] = (ratio, (i, t_data))
    res = resd.values()
    res.sort()
    res.reverse()
    if results > 0: res[:] = res[:results]
    return res