コード例 #1
0
ファイル: __init__.py プロジェクト: djace/imdbpy
 def get_imdbID(self, mop):
     """Return the imdbID for the given Movie, Person, Character or Company
     object."""
     imdbID = None
     if mop.accessSystem == self.accessSystem:
         aSystem = self
     else:
         aSystem = IMDb(mop.accessSystem)
     if isinstance(mop, Movie.Movie):
         if mop.movieID is not None:
             imdbID = aSystem.get_imdbMovieID(mop.movieID)
         else:
             imdbID = aSystem.title2imdbID(build_title(mop, canonical=0,
                                             ptdf=0, appendKind=False),
                                             mop['kind'])
     elif isinstance(mop, Person.Person):
         if mop.personID is not None:
             imdbID = aSystem.get_imdbPersonID(mop.personID)
         else:
             imdbID = aSystem.name2imdbID(build_name(mop, canonical=1))
     elif isinstance(mop, Character.Character):
         if mop.characterID is not None:
             imdbID = aSystem.get_imdbCharacterID(mop.characterID)
         else:
             # canonical=0 ?
             imdbID = aSystem.character2imdbID(build_name(mop, canonical=1))
     elif isinstance(mop, Company.Company):
         if mop.companyID is not None:
             imdbID = aSystem.get_imdbCompanyID(mop.companyID)
         else:
             imdbID = aSystem.company2imdbID(build_company_name(mop))
     else:
         raise IMDbError('object ' + repr(mop) + \
                     ' is not a Movie, Person or Character instance')
     return imdbID
コード例 #2
0
ファイル: __init__.py プロジェクト: jsynowiec/imdbpy-legacy
 def get_imdbID(self, mop):
     """Return the imdbID for the given Movie, Person, Character or Company
     object."""
     imdbID = None
     if mop.accessSystem == self.accessSystem:
         aSystem = self
     else:
         aSystem = IMDb(mop.accessSystem)
     if isinstance(mop, Movie.Movie):
         if mop.movieID is not None:
             imdbID = aSystem.get_imdbMovieID(mop.movieID)
         else:
             imdbID = aSystem.title2imdbID(
                 build_title(mop, canonical=0, ptdf=0, appendKind=False),
                 mop['kind'])
     elif isinstance(mop, Person.Person):
         if mop.personID is not None:
             imdbID = aSystem.get_imdbPersonID(mop.personID)
         else:
             imdbID = aSystem.name2imdbID(build_name(mop, canonical=1))
     elif isinstance(mop, Character.Character):
         if mop.characterID is not None:
             imdbID = aSystem.get_imdbCharacterID(mop.characterID)
         else:
             # canonical=0 ?
             imdbID = aSystem.character2imdbID(build_name(mop, canonical=1))
     elif isinstance(mop, Company.Company):
         if mop.companyID is not None:
             imdbID = aSystem.get_imdbCompanyID(mop.companyID)
         else:
             imdbID = aSystem.company2imdbID(build_company_name(mop))
     else:
         raise IMDbError('object ' + repr(mop) + \
                     ' is not a Movie, Person or Character instance')
     return imdbID
コード例 #3
0
ファイル: Person.py プロジェクト: Black0wL/webtechproject
 def _getitem(self, key):
     """Handle special keys."""
     if self.data.has_key('name'):
         if key == 'name':
             return normalizeName(self.data['name'])
         elif key == 'canonical name':
             return self.data['name']
         elif key == 'long imdb name':
             return build_name(self.data, canonical=0)
         elif key == 'long imdb canonical name':
             return build_name(self.data)
     return None
コード例 #4
0
 def _getitem(self, key):
     """Handle special keys."""
     if self.data.has_key('name'):
         if key == 'name':
             return normalizeName(self.data['name'])
         elif key == 'canonical name':
             return self.data['name']
         elif key == 'long imdb name':
             return build_name(self.data, canonical=0)
         elif key == 'long imdb canonical name':
             return build_name(self.data)
     return None
コード例 #5
0
ファイル: Character.py プロジェクト: alberanid/imdbpy
 def isSameName(self, other):
     """Return true if two character have the same name
     and/or characterID."""
     if not isinstance(other, self.__class__):
         return False
     if 'name' in self.data and 'name' in other.data and \
             build_name(self.data, canonical=False) == build_name(other.data, canonical=False):
         return True
     if self.accessSystem == other.accessSystem and \
             self.characterID is not None and \
             self.characterID == other.characterID:
         return True
     return False
コード例 #6
0
 def isSameName(self, other):
     """Return true if two character have the same name
     and/or characterID."""
     if not isinstance(other, self.__class__):
         return False
     if 'name' in self.data and 'name' in other.data and \
             build_name(self.data, canonical=False) == build_name(other.data, canonical=False):
         return True
     if self.accessSystem == other.accessSystem and \
             self.characterID is not None and \
             self.characterID == other.characterID:
         return True
     return False
コード例 #7
0
 def _getitem(self, key):
     """Handle special keys."""
     if 'name' in self.data:
         if key == 'name':
             return normalizeName(self.data['name'])
         elif key == 'canonical name':
             return canonicalName(self.data['name'])
         elif key == 'long imdb name':
             return build_name(self.data, canonical=False)
         elif key == 'long imdb canonical name':
             return build_name(self.data, canonical=True)
     if key == 'full-size headshot':
         return self.get_fullsizeURL()
     return None
コード例 #8
0
ファイル: Person.py プロジェクト: Elettronik/SickRage
 def _getitem(self, key):
     """Handle special keys."""
     if self.data.has_key('name'):
         if key == 'name':
             return normalizeName(self.data['name'])
         elif key == 'canonical name':
             return self.data['name']
         elif key == 'long imdb name':
             return build_name(self.data, canonical=0)
         elif key == 'long imdb canonical name':
             return build_name(self.data)
     if key == 'full-size headshot' and self.data.has_key('headshot'):
         return self._re_fullsizeURL.sub('', self.data.get('headshot', ''))
     return None
コード例 #9
0
 def _getitem(self, key):
     """Handle special keys."""
     if 'name' in self.data:
         if key == 'name':
             return normalizeName(self.data['name'])
         elif key == 'canonical name':
             return self.data['name']
         elif key == 'long imdb name':
             return build_name(self.data, canonical=False)
         elif key == 'long imdb canonical name':
             return build_name(self.data)
     if key == 'full-size headshot' and 'headshot' in self.data:
         return self._re_fullsizeURL.sub('', self.data.get('headshot', ''))
     return None
コード例 #10
0
ファイル: Person.py プロジェクト: GodZZila/SickRage
 def _getitem(self, key):
     """Handle special keys."""
     if self.data.has_key("name"):
         if key == "name":
             return normalizeName(self.data["name"])
         elif key == "canonical name":
             return self.data["name"]
         elif key == "long imdb name":
             return build_name(self.data, canonical=0)
         elif key == "long imdb canonical name":
             return build_name(self.data)
     if key == "full-size headshot" and self.data.has_key("headshot"):
         return self._re_fullsizeURL.sub("", self.data.get("headshot", ""))
     return None
コード例 #11
0
ファイル: Person.py プロジェクト: alberanid/imdbpy
 def _getitem(self, key):
     """Handle special keys."""
     if 'name' in self.data:
         if key == 'name':
             return normalizeName(self.data['name'])
         elif key == 'canonical name':
             return self.data['name']
         elif key == 'long imdb name':
             return build_name(self.data, canonical=False)
         elif key == 'long imdb canonical name':
             return build_name(self.data)
     if key == 'full-size headshot':
         return self.get_fullsizeURL()
     return None
コード例 #12
0
ファイル: Person.py プロジェクト: Black0wL/webtechproject
 def isSameName(self, other):
     """Return true if two persons have the same name and imdbIndex
     and/or personID.
     """
     if not isinstance(other, self.__class__):
         return 0
     if self.data.has_key('name') and \
             other.data.has_key('name') and \
             build_name(self.data, canonical=1) == \
             build_name(other.data, canonical=1):
         return 1
     if self.accessSystem == other.accessSystem and \
             self.personID and self.personID == other.personID:
         return 1
     return 0
コード例 #13
0
ファイル: Character.py プロジェクト: bahar/movie-collection
 def isSameName(self, other):
     """Return true if two character have the same name
     and/or characterID."""
     if not isinstance(other, self.__class__):
         return 0
     if self.data.has_key('name') and \
             other.data.has_key('name') and \
             build_name(self.data, canonical=0) == \
             build_name(other.data, canonical=0):
         return 1
     if self.accessSystem == other.accessSystem and \
             self.characterID is not None and \
             self.characterID == other.characterID:
         return 1
     return 0
コード例 #14
0
 def isSameName(self, other):
     """Return true if two persons have the same name and imdbIndex
     and/or personID.
     """
     if not isinstance(other, self.__class__):
         return False
     if 'name' in self.data and \
             'name' in other.data and \
             build_name(self.data, canonical=True) == \
             build_name(other.data, canonical=True):
         return True
     if self.accessSystem == other.accessSystem and \
             self.personID and self.personID == other.personID:
         return True
     return False
コード例 #15
0
ファイル: Person.py プロジェクト: alberanid/imdbpy
 def isSameName(self, other):
     """Return true if two persons have the same name and imdbIndex
     and/or personID.
     """
     if not isinstance(other, self.__class__):
         return False
     if 'name' in self.data and \
             'name' in other.data and \
             build_name(self.data, canonical=True) == \
             build_name(other.data, canonical=True):
         return True
     if self.accessSystem == other.accessSystem and \
             self.personID and self.personID == other.personID:
         return True
     return False
コード例 #16
0
ファイル: Character.py プロジェクト: Elettronik/SickRage
 def isSameName(self, other):
     """Return true if two character have the same name
     and/or characterID."""
     if not isinstance(other, self.__class__):
         return 0
     if self.data.has_key('name') and \
             other.data.has_key('name') and \
             build_name(self.data, canonical=0) == \
             build_name(other.data, canonical=0):
         return 1
     if self.accessSystem == other.accessSystem and \
             self.characterID is not None and \
             self.characterID == other.characterID:
         return 1
     return 0
コード例 #17
0
ファイル: Person.py プロジェクト: imran2140/imdb-extractors
 def isSameName(self, other):
     """Return true if two persons have the same name and imdbIndex
     and/or personID.
     """
     if not isinstance(other, self.__class__):
         return 0
     if self.data.has_key('name') and \
             other.data.has_key('name') and \
             build_name(self.data, canonical=1) == \
             build_name(other.data, canonical=1):
         return 1
     if self.accessSystem == other.accessSystem and \
             self.personID and self.personID == other.personID:
         return 1
     return 0
コード例 #18
0
class DOMHTMLSearchCharacterParser(DOMHTMLSearchMovieParser):
    _BaseParser = DOMBasicCharacterParser
    _notDirectHitTitle = '<title>find - imdb'
    _titleBuilder = lambda self, x: build_name(x, canonical=False)
    _linkPrefix = '/character/ch'

    _attrs = [
        Attribute(
            key='data',
            multi=True,
            path={
                'link': "./a[1]/@href",
                'name': "./a[1]/text()"
            },
            postprocess=lambda x: (
                analyze_imdbid(x.get('link') or ''),
                {'name': x.get('name')}
            )
        )
    ]

    extractors = [
        Extractor(
            label='search',
            path="//td[@class='result_text']/a[starts-with(@href, '/character/ch')]/..",
            attrs=_attrs
        )
    ]
コード例 #19
0
class DOMHTMLSearchPersonParser(DOMHTMLSearchMovieParser):
    """Parse the html page that the IMDb web server shows when the
    "new search system" is used, for persons."""
    _titleBuilder = lambda self, x: build_name(x, canonical=True)
    _linkPrefix = '/name/nm'

    _attrs = [
        Attribute(
            key='data',
            multi=True,
            path={
                'link': "./a[1]/@href",
                'name': "./a[1]/text()",
                'index': "./text()[1]",
                'akas': ".//div[@class='_imdbpyAKA']/text()"
            },
            postprocess=lambda x: (
                analyze_imdbid(x.get('link') or ''),
                analyze_name((x.get('name') or '') + (x.get('index') or ''),
                             canonical=1), x.get('akas')
            )
        )
    ]

    extractors = [
        Extractor(
            label='search',
            path="//td[@class='result_text']/a[starts-with(@href, '/name/nm')]/..",
            attrs=_attrs
        )
    ]
コード例 #20
0
ファイル: Character.py プロジェクト: alberanid/imdbpy
 def _getitem(self, key):
     """Handle special keys."""
     # XXX: can a character have an imdbIndex?
     if 'name' in self.data:
         if key == 'long imdb name':
             return build_name(self.data)
     return None
コード例 #21
0
ファイル: Character.py プロジェクト: Neprincessa/PrPythonAtom
 def _getitem(self, key):
     """Handle special keys."""
     # XXX: can a character have an imdbIndex?
     if 'name' in self.data:
         if key == 'long imdb name':
             return build_name(self.data)
     return None
コード例 #22
0
class DOMHTMLSearchPersonParser(DOMHTMLSearchMovieParser):
    """Parse the html page that the IMDb web server shows when the
    "new search system" is used, for persons."""
    _BaseParser = DOMBasicPersonParser
    _notDirectHitTitle = '<title>imdb name'
    _titleBuilder = lambda self, x: build_name(x, canonical=True)
    _linkPrefix = '/name/nm'

    _attrs = [Attribute(key='data',
                        multi=True,
                        path={
                            'link': "./a[1]/@href",
                            'name': "./a[1]/text()",
                            'index': "./text()[1]",
                            'akas': ".//div[@class='_imdbpyAKA']/text()"
                            },
                        postprocess=lambda x: (
                            analyze_imdbid(x.get('link') or u''),
                            analyze_name((x.get('name') or u'') + \
                                        (x.get('index') or u''),
                                         canonical=1), x.get('akas')
                        ))]
    extractors = [
        Extractor(label='search',
                  path="//td[3]/a[starts-with(@href, '/name/nm')]/..",
                  attrs=_attrs)
    ]

    def preprocess_string(self, html_string):
        if self._notDirectHitTitle in html_string[:1024].lower():
            html_string = _reAKASp.sub(
                r'\1<div class="_imdbpyAKA">\2::</div>\3', html_string)
        return DOMHTMLSearchMovieParser.preprocess_string(self, html_string)
コード例 #23
0
def scan_names(name_list,
               name1,
               name2,
               name3,
               results=0,
               ro_thresold=None,
               _scan_character=False):
    """Scan a list of names, searching for best matches against
    the given variations."""
    if ro_thresold is not None: RO_THRESHOLD = ro_thresold
    else: RO_THRESHOLD = 0.6
    sm1 = SequenceMatcher()
    sm2 = SequenceMatcher()
    sm3 = SequenceMatcher()
    sm1.set_seq1(name1.lower())
    if name2: sm2.set_seq1(name2.lower())
    if name3: sm3.set_seq1(name3.lower())
    resd = {}
    for i, n_data in name_list:
        nil = n_data['name']
        # XXX: on Symbian, here we get a str; not sure this is the
        #      right place to fix it.
        if isinstance(nil, str):
            nil = unicode(nil, 'latin1', 'ignore')
        # Distance with the canonical name.
        ratios = [ratcliff(name1, nil, sm1) + 0.05]
        namesurname = u''
        if not _scan_character:
            nils = nil.split(', ', 1)
            surname = nils[0]
            if len(nils) == 2: namesurname = '%s %s' % (nils[1], surname)
        else:
            nils = nil.split(' ', 1)
            surname = nils[-1]
            namesurname = nil
        if surname != nil:
            # Distance with the "Surname" in the database.
            ratios.append(ratcliff(name1, surname, sm1))
            if not _scan_character:
                ratios.append(ratcliff(name1, namesurname, sm1))
            if name2:
                ratios.append(ratcliff(name2, surname, sm2))
                # Distance with the "Name Surname" in the database.
                if namesurname:
                    ratios.append(ratcliff(name2, namesurname, sm2))
        if name3:
            # Distance with the long imdb canonical name.
            ratios.append(
                ratcliff(name3, build_name(n_data, canonical=1), sm3) + 0.1)
        ratio = max(ratios)
        if ratio >= RO_THRESHOLD:
            if resd.has_key(i):
                if ratio > resd[i][0]: resd[i] = (ratio, (i, n_data))
            else: resd[i] = (ratio, (i, n_data))
    res = resd.values()
    res.sort()
    res.reverse()
    if results > 0: res[:] = res[:results]
    return res
コード例 #24
0
ファイル: Character.py プロジェクト: bahar/movie-collection
 def _getitem(self, key):
     """Handle special keys."""
     ## XXX: can a character have an imdbIndex?
     if self.data.has_key('name'):
         if key == 'long imdb name':
             return build_name(self.data)
     if key == 'full-size headshot' and self.data.has_key('headshot'):
         return self._re_fullsizeURL.sub('', self.data.get('headshot', ''))
     return None
コード例 #25
0
ファイル: Character.py プロジェクト: Elettronik/SickRage
 def _getitem(self, key):
     """Handle special keys."""
     ## XXX: can a character have an imdbIndex?
     if self.data.has_key('name'):
         if key == 'long imdb name':
             return build_name(self.data)
     if key == 'full-size headshot' and self.data.has_key('headshot'):
         return self._re_fullsizeURL.sub('', self.data.get('headshot', ''))
     return None
コード例 #26
0
ファイル: locsql.py プロジェクト: conwetlab/ezweb-gadgets
def scan_names(name_list, name1, name2, name3, results=0, ro_thresold=None,
                _scan_character=False):
    """Scan a list of names, searching for best matches against
    the given variations."""
    if ro_thresold is not None: RO_THRESHOLD = ro_thresold
    else: RO_THRESHOLD = 0.6
    sm1 = SequenceMatcher()
    sm2 = SequenceMatcher()
    sm3 = SequenceMatcher()
    sm1.set_seq1(name1.lower())
    if name2: sm2.set_seq1(name2.lower())
    if name3: sm3.set_seq1(name3.lower())
    resd = {}
    for i, n_data in name_list:
        nil = n_data['name']
        # XXX: on Symbian, here we get a str; not sure this is the
        #      right place to fix it.
        if isinstance(nil, str):
            nil = unicode(nil, 'latin1', 'ignore')
        # Distance with the canonical name.
        ratios = [ratcliff(name1, nil, sm1) + 0.05]
        namesurname = u''
        if not _scan_character:
            nils = nil.split(', ', 1)
            surname = nils[0]
            if len(nils) == 2: namesurname = '%s %s' % (nils[1], surname)
        else:
            nils = nil.split(' ', 1)
            surname = nils[-1]
            namesurname = nil
        if surname != nil:
            # Distance with the "Surname" in the database.
            ratios.append(ratcliff(name1, surname, sm1))
            if not _scan_character:
                ratios.append(ratcliff(name1, namesurname, sm1))
            if name2:
                ratios.append(ratcliff(name2, surname, sm2))
                # Distance with the "Name Surname" in the database.
                if namesurname:
                    ratios.append(ratcliff(name2, namesurname, sm2))
        if name3:
            # Distance with the long imdb canonical name.
            ratios.append(ratcliff(name3,
                        build_name(n_data, canonical=1), sm3) + 0.1)
        ratio = max(ratios)
        if ratio >= RO_THRESHOLD:
            if resd.has_key(i):
                if ratio > resd[i][0]: resd[i] = (ratio, (i, n_data))
            else: resd[i] = (ratio, (i, n_data))
    res = resd.values()
    res.sort()
    res.reverse()
    if results > 0: res[:] = res[:results]
    return res
コード例 #27
0
 def _findRefs(self, o, trefs, nrefs):
     """Find titles or names references in strings."""
     if isinstance(o, (unicode, str)):
         for title in re_titleRef.findall(o):
             a_title = analyze_title(title, canonical=0)
             rtitle = build_title(a_title, ptdf=1)
             if trefs.has_key(rtitle): continue
             movieID = self._getTitleID(rtitle)
             if movieID is None:
                 movieID = self._getTitleID(title)
             if movieID is None:
                 continue
             m = Movie(title=rtitle,
                       movieID=movieID,
                       accessSystem=self.accessSystem)
             trefs[rtitle] = m
             rtitle2 = canonicalTitle(a_title.get('title', u''))
             if rtitle2 and rtitle2 != rtitle and rtitle2 != title:
                 trefs[rtitle2] = m
             if title != rtitle:
                 trefs[title] = m
         for name in re_nameRef.findall(o):
             a_name = analyze_name(name, canonical=1)
             rname = build_name(a_name, canonical=1)
             if nrefs.has_key(rname): continue
             personID = self._getNameID(rname)
             if personID is None:
                 personID = self._getNameID(name)
             if personID is None: continue
             p = Person(name=rname,
                        personID=personID,
                        accessSystem=self.accessSystem)
             nrefs[rname] = p
             rname2 = normalizeName(a_name.get('name', u''))
             if rname2 and rname2 != rname:
                 nrefs[rname2] = p
             if name != rname and name != rname2:
                 nrefs[name] = p
     elif isinstance(o, (list, tuple)):
         for item in o:
             self._findRefs(item, trefs, nrefs)
     elif isinstance(o, dict):
         for value in o.values():
             self._findRefs(value, trefs, nrefs)
     return (trefs, nrefs)
コード例 #28
0
ファイル: locsql.py プロジェクト: conwetlab/ezweb-gadgets
 def _findRefs(self, o, trefs, nrefs):
     """Find titles or names references in strings."""
     if isinstance(o, (UnicodeType, StringType)):
         for title in re_titleRef.findall(o):
             a_title = analyze_title(title, canonical=1)
             rtitle = build_title(a_title, canonical=1, ptdf=1)
             if trefs.has_key(rtitle): continue
             movieID = self._getTitleID(rtitle)
             if movieID is None:
                 movieID = self._getTitleID(title)
             if movieID is None:
                 continue
             m = Movie(title=rtitle, movieID=movieID,
                         accessSystem=self.accessSystem)
             trefs[rtitle] = m
             rtitle2 = canonicalTitle(a_title.get('title', u''))
             if rtitle2 and rtitle2 != rtitle and rtitle2 != title:
                 trefs[rtitle2] = m
             if title != rtitle:
                 trefs[title] = m
         for name in re_nameRef.findall(o):
             a_name = analyze_name(name, canonical=1)
             rname = build_name(a_name, canonical=1)
             if nrefs.has_key(rname): continue
             personID = self._getNameID(rname)
             if personID is None:
                 personID = self._getNameID(name)
             if personID is None: continue
             p = Person(name=rname, personID=personID,
                         accessSystem=self.accessSystem)
             nrefs[rname] = p
             rname2 = normalizeName(a_name.get('name', u''))
             if rname2 and rname2 != rname:
                 nrefs[rname2] = p
             if name != rname and name != rname2:
                 nrefs[name] = p
     elif isinstance(o, (ListType, TupleType)):
         for item in o:
             self._findRefs(item, trefs, nrefs)
     elif isinstance(o, DictType):
         for value in o.values():
             self._findRefs(value, trefs, nrefs)
     return (trefs, nrefs)
コード例 #29
0
ファイル: __init__.py プロジェクト: conwetlab/ezweb-gadgets
 def get_imdbCharacterID(self, characterID):
     """Translate a characterID in an imdbID.
     If not in the database, try an Exact Primary Name search on IMDb;
     return None if it's unable to get the imdbID.
     """
     try:
         character = CharName.get(characterID)
     except NotFoundError:
         return None
     imdbID = character.imdbID
     if imdbID is not None: return '%07d' % imdbID
     n_dict = {'name': character.name, 'imdbIndex': character.imdbIndex}
     namline = build_name(n_dict, canonical=1)
     imdbID = self.character2imdbID(namline)
     if imdbID is not None:
         try:
             character.imdbID = int(imdbID)
         except:
             pass
     return imdbID
コード例 #30
0
 def get_imdbPersonID(self, personID):
     """Translate a personID in an imdbID.
     If not in the database, try an Exact Primary Name search on IMDb;
     return None if it's unable to get the imdbID.
     """
     try:
         person = Name.get(personID)
     except SQLObjectNotFound:
         return None
     imdbID = person.imdbID
     if imdbID is not None: return '%07d' % imdbID
     n_dict = {'name': person.name, 'imdbIndex': person.imdbIndex}
     namline = build_name(n_dict, canonical=1)
     imdbID = self.name2imdbID(namline)
     if imdbID is not None:
         try:
             person.imdbID = int(imdbID)
         except:
             pass
     return imdbID
コード例 #31
0
ファイル: locsql.py プロジェクト: conwetlab/ezweb-gadgets
def nameVariations(name, fromPtdf=0):
    """Build name variations useful for searches; if fromPtdf is true,
    the input is assumed to be in the plain text data files format."""
    name1 = name2 = name3 = u''
    if fromPtdf or re_nameIndex.search(name):
        # We've a name with an (imdbIndex)
        namedict = analyze_name(name, canonical=1)
        # name1 is the name in the canonical format.
        name1 = namedict['name']
        # name3 is the canonical name with the imdbIndex.
        if fromPtdf:
            if namedict.has_key('imdbIndex'):
                name3 = name
        else:
            name3 = build_name(namedict, canonical=1)
    else:
        # name1 is the name in the canonical format.
        name1 = canonicalName(name)
        name3 = u''
    # name2 is the name in the normal format, if it differs from name1.
    name2 = normalizeName(name1)
    if name1 == name2: name2 = u''
    return name1, name2, name3
コード例 #32
0
def nameVariations(name, fromPtdf=0):
    """Build name variations useful for searches; if fromPtdf is true,
    the input is assumed to be in the plain text data files format."""
    name1 = name2 = name3 = u''
    if fromPtdf or re_nameIndex.search(name):
        # We've a name with an (imdbIndex)
        namedict = analyze_name(name, canonical=1)
        # name1 is the name in the canonical format.
        name1 = namedict['name']
        # name3 is the canonical name with the imdbIndex.
        if fromPtdf:
            if namedict.has_key('imdbIndex'):
                name3 = name
        else:
            name3 = build_name(namedict, canonical=1)
    else:
        # name1 is the name in the canonical format.
        name1 = canonicalName(name)
        name3 = u''
    # name2 is the name in the normal format, if it differs from name1.
    name2 = normalizeName(name1)
    if name1 == name2: name2 = u''
    return name1, name2, name3
コード例 #33
0
 def _search_person(self, name, results):
     name = name.strip()
     if not name: return []
     name1, name2, name3 = nameVariations(name)
     res =  _scan_names('%snames.key' % self.__db,
                         name1, name2, name3, results)
     res[:] = [x[1] for x in res]
     new_res = []
     seen_PID = []
     for idx, (personID, r) in enumerate(res):
         # Remove duplicates.
         # XXX: find a way to prefer names with an AKA?  Or prefer
         #      the original name?
         if personID in seen_PID:
             continue
         else:
             seen_PID.append(personID)
         realPID = self._get_real_personID(personID)
         if personID == realPID:
             new_res.append((personID, r))
             continue
         if realPID in seen_PID:
             continue
         else:
             seen_PID.append(realPID)
         aka_name = build_name(r, canonical=1)
         real_name = getLabel(realPID, '%snames.index' % self.__db,
                             '%snames.key' % self.__db)
         if aka_name == real_name:
             new_res.append((realPID, r))
             continue
         new_r = analyze_name(real_name, canonical=1)
         new_r['akas'] = [aka_name]
         new_res.append((realPID, new_r))
     if results > 0: new_res[:] = new_res[:results]
     return new_res
コード例 #34
0
ファイル: personParser.py プロジェクト: 070499/repo-scripts
def _parseBiography(biol):
    """Parse the biographies.data file."""
    res = {}
    bio = ' '.join(_parseList(biol, 'BG', mline=0))
    bio = _parseBioBy(biol)
    if bio: res['mini biography'] = bio

    for x in biol:
        x4 = x[:4]
        x6 = x[:6]
        if x4 == 'DB: ':
            date, notes = date_and_notes(x[4:])
            if date:
                res['birth date'] = date
            if notes:
                res['birth notes'] = notes
        elif x4 == 'DD: ':
            date, notes = date_and_notes(x[4:])
            if date:
                res['death date'] = date
            if notes:
                res['death notes'] = notes
        elif x6 == 'SP: * ':
            res.setdefault('spouse', []).append(x[6:].strip())
        elif x4 == 'RN: ':
            n = x[4:].strip()
            if not n: continue
            rn = build_name(analyze_name(n, canonical=1), canonical=1)
            res['birth name'] = rn
        elif x6 == 'AT: * ':
            res.setdefault('articles', []).append(x[6:].strip())
        elif x4 == 'HT: ':
            res['height'] = x[4:].strip()
        elif x6 == 'PT: * ':
            res.setdefault('pictorials', []).append(x[6:].strip())
        elif x6 == 'CV: * ':
            res.setdefault('magazine covers', []).append(x[6:].strip())
        elif x4 == 'NK: ':
            res.setdefault('nick names', []).append(normalizeName(x[4:]))
        elif x6 == 'PI: * ':
            res.setdefault('portrayed', []).append(x[6:].strip())
        elif x6 == 'SA: * ':
            sal = x[6:].strip().replace(' -> ', '::')
            res.setdefault('salary history', []).append(sal)

    trl = _parseList(biol, 'TR')
    if trl: res['trivia'] = trl
    quotes = _parseList(biol, 'QU')
    if quotes: res['quotes'] = quotes
    otherworks = _parseList(biol, 'OW')
    if otherworks: res['other works'] = otherworks
    books = _parseList(biol, 'BO')
    if books: res['books'] = books
    agent = _parseList(biol, 'AG')
    if agent: res['agent address'] = agent
    wherenow = _parseList(biol, 'WN')
    if wherenow: res['where now'] = wherenow[0]
    biomovies = _parseList(biol, 'BT')
    if biomovies: res['biographical movies'] = biomovies
    guestapp = _buildGuests([x[6:].strip() for x in biol if x[:6] == 'GA: * '])
    if guestapp: res['notable tv guest appearances'] = guestapp
    tm = _parseList(biol, 'TM')
    if tm: res['trademarks'] = tm
    interv = _parseList(biol, 'IT')
    if interv: res['interviews'] = interv
    return res
コード例 #35
0
ファイル: __init__.py プロジェクト: conwetlab/ezweb-gadgets
 pinfo = _groupListBy(pinfo, 0)
 for group in pinfo:
     sect = group[0][0]
     for pdata in group:
         data = pdata[1]
         if pdata[2]: data += '::%s' % pdata[2]
         res.setdefault(sect, []).append(data)
 # AKA names.
 akan = [(an.name, an.imdbIndex)
         for an in AkaName.select(AkaName.q.personID == personID)]
 if akan:
     res['akas'] = []
     for n in akan:
         nd = {'name': n[0]}
         if n[1]: nd['imdbIndex'] = n[1]
         nt = build_name(nd, canonical=1)
         res['akas'].append(nt)
 # Do some transformation to preserve consistency with other
 # data access systems.
 for key in ('birth date', 'birth notes', 'death date', 'death notes',
             'birth name', 'height'):
     if res.has_key(key):
         res[key] = res[key][0]
 #if res.has_key('mini biography'):
 #    nl = []
 #    for i in res['mini biography']:
 #        if i[-1] == ')':
 #            sauth = i.rfind('::(author: ')
 #            if sauth != -1:
 #                nl.append(i[sauth+11:-1] + '::' + i[:sauth])
 #            else: nl.append(i)
コード例 #36
0
def _parseBiography(biol):
    """Parse the biographies.data file."""
    res = {}
    bio = ' '.join(_parseList(biol, 'BG', mline=0))
    bio = _parseBioBy(biol)
    if bio: res['mini biography'] = bio

    for x in biol:
        x4 = x[:4]
        x6 = x[:6]
        if x4 == 'DB: ':
            date, notes = date_and_notes(x[4:])
            if date:
                res['birth date'] = date
            if notes:
                res['birth notes'] = notes
            #bdate = x.strip()
            #i = bdate.find(',')
            #if i != -1:
            #    res['birth notes'] = bdate[i+1:].strip()
            #    bdate = bdate[:i]
            #res['birth date'] = bdate[4:]
        elif x4 == 'DD: ':
            date, notes = date_and_notes(x[4:])
            if date:
                res['death date'] = date
            if notes:
                res['death notes'] = notes
            #ddate = x.strip()
            #i = ddate.find(',')
            #if i != -1:
            #    res['death notes'] = ddate[i+1:].strip()
            #    ddate = ddate[:i]
            #res['death date'] = ddate[4:]
        elif x6 == 'SP: * ':
            res.setdefault('spouse', []).append(x[6:].strip())
        elif x4 == 'RN: ':
            n = x[4:].strip()
            if not n: continue
            rn = build_name(analyze_name(n, canonical=1), canonical=1)
            res['birth name'] = rn
        elif x6 == 'AT: * ':
            res.setdefault('articles', []).append(x[6:].strip())
        elif x4 == 'HT: ':
            res['height'] = x[4:].strip()
        elif x6 == 'PT: * ':
            res.setdefault('pictorials', []).append(x[6:].strip())
        elif x6 == 'CV: * ':
            res.setdefault('magazine covers', []).append(x[6:].strip())
        elif x4 == 'NK: ':
            res.setdefault('nick names', []).append(normalizeName(x[4:]))
        elif x6 == 'PI: * ':
            res.setdefault('portrayed', []).append(x[6:].strip())
        elif x6 == 'SA: * ':
            sal = x[6:].strip().replace(' -> ', '::')
            res.setdefault('salary history', []).append(sal)

    trl = _parseList(biol, 'TR')
    if trl: res['trivia'] = trl
    quotes = _parseList(biol, 'QU')
    if quotes: res['quotes'] = quotes
    otherworks = _parseList(biol, 'OW')
    if otherworks: res['other works'] = otherworks
    books = _parseList(biol, 'BO')
    if books: res['books'] = books
    agent = _parseList(biol, 'AG')
    if agent: res['agent address'] = agent
    wherenow = _parseList(biol, 'WN')
    if wherenow: res['where now'] = wherenow[0]
    biomovies = _parseList(biol, 'BT')
    if biomovies: res['biographical movies'] = biomovies
    guestapp = _buildGuests([x[6:].strip() for x in biol if x[:6] == 'GA: * '])
    if guestapp: res['notable tv guest appearances'] = guestapp
    tm = _parseList(biol, 'TM')
    if tm: res['trademarks'] = tm
    interv = _parseList(biol, 'IT')
    if interv: res['interviews'] = interv
    return res