Python normalizeName 예제들, imdb.utils.normalizeName Python 예제들

예제 #1

0

파일 보기

파일: personParser.py 프로젝트: conwetlab/ezweb-gadgets

 def _add_items(self):
     # Add a new section in the biography.
     if self._in_content and self._sect_name and self._sect_data:
         sect = self._sect_name.strip().lower()
         # XXX: to get rid of the last colons and normalize section names.
         if sect[-1] == ':':
             sect = sect[:-1]
         if sect == 'salary':
             sect = 'salary history'
         elif sect == 'nickname':
             sect = 'nick names'
         elif sect == 'where are they now':
             sect = 'where now'
         elif sect == 'personal quotes':
             sect = 'quotes'
         elif sect == 'date of birth':
             sect = 'birth date'
         elif sect == 'date of death':
             sect = 'death date'
         data = self._sect_data.strip()
         d_split = data.split('::')
         d_split[:] = filter(None, [x.strip() for x in d_split])
         # Do some transformation on some special cases.
         if sect == 'salary history':
             newdata = []
             for j in d_split:
                 j = filter(None, [x.strip() for x in j.split('@@@@')])
                 newdata.append('::'.join(j))
             d_split[:] = newdata
         elif sect == 'nick names':
             d_split[:] = [normalizeName(x) for x in d_split]
         elif sect == 'birth name':
             d_split = canonicalName(d_split[0])
         elif sect == 'height':
             d_split = d_split[0]
         elif sect == 'spouse':
             d_split[:] = [x.replace(' (', '::(', 1).replace(' ::', '::')
                             for x in d_split]
         # Birth/death date are in both maindetails and bio pages;
         # it's safe to collect both of them.
         if sect == 'birth date':
             date, notes = date_and_notes(d_split[0])
             if date:
                 self._bio_data['birth date'] = date
             if notes:
                 self._bio_data['birth notes'] = notes
         elif sect == 'death date':
             date, notes = date_and_notes(d_split[0])
             if date:
                 self._bio_data['death date'] = date
             if notes:
                 self._bio_data['death notes'] = notes
         elif d_split:
             # Multiple items are added separately (e.g.: 'trivia' is
             # a list of strings).
             self._bio_data[sect] = d_split
     self._sect_name = u''
     self._sect_data = u''
     self._in_sect = 0

예제 #2

0

파일 보기

파일: imdb.py 프로젝트: tijlk/qmdb

 def person_to_dict(person):
     try:
         person_dict = {'canonical_name': utils.canonicalName(person.data['name']),
                        'name': utils.normalizeName(person.data['name']),
                        'person_id': int(person.personID)}
     except:
         person_dict = None
     return person_dict

예제 #3

0

파일 보기

 def _getitem(self, key):
     """Handle special keys."""
     if self.data.has_key('name'):
         if key == 'name':
             return normalizeName(self.data['name'])
         elif key == 'canonical name':
             return self.data['name']
         elif key == 'long imdb name':
             return build_name(self.data, canonical=0)
         elif key == 'long imdb canonical name':
             return build_name(self.data)
     return None

예제 #4

0

파일 보기

파일: Person.py 프로젝트: Black0wL/webtechproject

 def _getitem(self, key):
     """Handle special keys."""
     if self.data.has_key('name'):
         if key == 'name':
             return normalizeName(self.data['name'])
         elif key == 'canonical name':
             return self.data['name']
         elif key == 'long imdb name':
             return build_name(self.data, canonical=0)
         elif key == 'long imdb canonical name':
             return build_name(self.data)
     return None

예제 #5

0

파일 보기

 def _getitem(self, key):
     """Handle special keys."""
     if 'name' in self.data:
         if key == 'name':
             return normalizeName(self.data['name'])
         elif key == 'canonical name':
             return self.data['name']
         elif key == 'long imdb name':
             return build_name(self.data, canonical=False)
         elif key == 'long imdb canonical name':
             return build_name(self.data)
     if key == 'full-size headshot' and 'headshot' in self.data:
         return self._re_fullsizeURL.sub('', self.data.get('headshot', ''))
     return None

예제 #6

0

파일 보기

 def _getitem(self, key):
     """Handle special keys."""
     if 'name' in self.data:
         if key == 'name':
             return normalizeName(self.data['name'])
         elif key == 'canonical name':
             return canonicalName(self.data['name'])
         elif key == 'long imdb name':
             return build_name(self.data, canonical=False)
         elif key == 'long imdb canonical name':
             return build_name(self.data, canonical=True)
     if key == 'full-size headshot':
         return self.get_fullsizeURL()
     return None

예제 #7

0

파일 보기

파일: __init__.py 프로젝트: 070499/repo-scripts

 def _search_character(self, name, results):
     name = name.strip()
     if not name: return []
     s_name = normalizeName(analyze_name(name)['name'])
     nsplit = s_name.split()
     name2 = u''
     if len(nsplit) > 1:
         name2 = '%s %s' % (nsplit[-1], ' '.join(nsplit[:-1]))
         if s_name == name2:
             name2 = u''
     res =  _scan_names('%scharacters.key' % self.__db,
                         s_name, name2, u'', results, _scan_character=1)
     res[:] = [x[1] for x in res]
     return res

예제 #8

0

파일 보기

파일: Person.py 프로젝트: GodZZila/SickRage

 def _getitem(self, key):
     """Handle special keys."""
     if self.data.has_key("name"):
         if key == "name":
             return normalizeName(self.data["name"])
         elif key == "canonical name":
             return self.data["name"]
         elif key == "long imdb name":
             return build_name(self.data, canonical=0)
         elif key == "long imdb canonical name":
             return build_name(self.data)
     if key == "full-size headshot" and self.data.has_key("headshot"):
         return self._re_fullsizeURL.sub("", self.data.get("headshot", ""))
     return None

예제 #9

0

파일 보기

파일: Person.py 프로젝트: alberanid/imdbpy

 def _getitem(self, key):
     """Handle special keys."""
     if 'name' in self.data:
         if key == 'name':
             return normalizeName(self.data['name'])
         elif key == 'canonical name':
             return self.data['name']
         elif key == 'long imdb name':
             return build_name(self.data, canonical=False)
         elif key == 'long imdb canonical name':
             return build_name(self.data)
     if key == 'full-size headshot':
         return self.get_fullsizeURL()
     return None

예제 #10

0

파일 보기

파일: Person.py 프로젝트: Elettronik/SickRage

 def _getitem(self, key):
     """Handle special keys."""
     if self.data.has_key('name'):
         if key == 'name':
             return normalizeName(self.data['name'])
         elif key == 'canonical name':
             return self.data['name']
         elif key == 'long imdb name':
             return build_name(self.data, canonical=0)
         elif key == 'long imdb canonical name':
             return build_name(self.data)
     if key == 'full-size headshot' and self.data.has_key('headshot'):
         return self._re_fullsizeURL.sub('', self.data.get('headshot', ''))
     return None

예제 #11

0

파일 보기

 def _search_character(self, name, results):
     name = name.strip()
     if not name: return []
     s_name = normalizeName(analyze_name(name)['name'])
     nsplit = s_name.split()
     name2 = u''
     if len(nsplit) > 1:
         name2 = '%s %s' % (nsplit[-1], ' '.join(nsplit[:-1]))
         if s_name == name2:
             name2 = u''
     res =  _scan_names('%scharacters.key' % self.__db,
                         s_name, name2, u'', results, _scan_character=1)
     res[:] = [x[1] for x in res]
     return res

예제 #12

0

파일 보기

 def _findRefs(self, o, trefs, nrefs):
     """Find titles or names references in strings."""
     if isinstance(o, (unicode, str)):
         for title in re_titleRef.findall(o):
             a_title = analyze_title(title, canonical=0)
             rtitle = build_title(a_title, ptdf=1)
             if trefs.has_key(rtitle): continue
             movieID = self._getTitleID(rtitle)
             if movieID is None:
                 movieID = self._getTitleID(title)
             if movieID is None:
                 continue
             m = Movie(title=rtitle,
                       movieID=movieID,
                       accessSystem=self.accessSystem)
             trefs[rtitle] = m
             rtitle2 = canonicalTitle(a_title.get('title', u''))
             if rtitle2 and rtitle2 != rtitle and rtitle2 != title:
                 trefs[rtitle2] = m
             if title != rtitle:
                 trefs[title] = m
         for name in re_nameRef.findall(o):
             a_name = analyze_name(name, canonical=1)
             rname = build_name(a_name, canonical=1)
             if nrefs.has_key(rname): continue
             personID = self._getNameID(rname)
             if personID is None:
                 personID = self._getNameID(name)
             if personID is None: continue
             p = Person(name=rname,
                        personID=personID,
                        accessSystem=self.accessSystem)
             nrefs[rname] = p
             rname2 = normalizeName(a_name.get('name', u''))
             if rname2 and rname2 != rname:
                 nrefs[rname2] = p
             if name != rname and name != rname2:
                 nrefs[name] = p
     elif isinstance(o, (list, tuple)):
         for item in o:
             self._findRefs(item, trefs, nrefs)
     elif isinstance(o, dict):
         for value in o.values():
             self._findRefs(value, trefs, nrefs)
     return (trefs, nrefs)

예제 #13

0

파일 보기

파일: locsql.py 프로젝트: conwetlab/ezweb-gadgets

 def _findRefs(self, o, trefs, nrefs):
     """Find titles or names references in strings."""
     if isinstance(o, (UnicodeType, StringType)):
         for title in re_titleRef.findall(o):
             a_title = analyze_title(title, canonical=1)
             rtitle = build_title(a_title, canonical=1, ptdf=1)
             if trefs.has_key(rtitle): continue
             movieID = self._getTitleID(rtitle)
             if movieID is None:
                 movieID = self._getTitleID(title)
             if movieID is None:
                 continue
             m = Movie(title=rtitle, movieID=movieID,
                         accessSystem=self.accessSystem)
             trefs[rtitle] = m
             rtitle2 = canonicalTitle(a_title.get('title', u''))
             if rtitle2 and rtitle2 != rtitle and rtitle2 != title:
                 trefs[rtitle2] = m
             if title != rtitle:
                 trefs[title] = m
         for name in re_nameRef.findall(o):
             a_name = analyze_name(name, canonical=1)
             rname = build_name(a_name, canonical=1)
             if nrefs.has_key(rname): continue
             personID = self._getNameID(rname)
             if personID is None:
                 personID = self._getNameID(name)
             if personID is None: continue
             p = Person(name=rname, personID=personID,
                         accessSystem=self.accessSystem)
             nrefs[rname] = p
             rname2 = normalizeName(a_name.get('name', u''))
             if rname2 and rname2 != rname:
                 nrefs[rname2] = p
             if name != rname and name != rname2:
                 nrefs[name] = p
     elif isinstance(o, (ListType, TupleType)):
         for item in o:
             self._findRefs(item, trefs, nrefs)
     elif isinstance(o, DictType):
         for value in o.values():
             self._findRefs(value, trefs, nrefs)
     return (trefs, nrefs)

예제 #14

0

파일 보기

파일: __init__.py 프로젝트: conwetlab/ezweb-gadgets

 def _search_character(self, name, results):
     name = name.strip()
     if not name: return []
     s_name = analyze_name(name)['name']
     if not s_name: return []
     if isinstance(s_name, UnicodeType):
         s_name = s_name.encode('ascii', 'ignore')
     s_name = normalizeName(s_name)
     soundexCode = soundex(s_name)
     surname = s_name.split(' ')[-1]
     surnameSoundex = soundex(surname)
     name2 = ''
     soundexName2 = None
     nsplit = s_name.split()
     if len(nsplit) > 1:
         name2 = '%s %s' % (nsplit[-1], ' '.join(nsplit[:-1]))
         if s_name == name2:
             name2 = ''
         else:
             soundexName2 = soundex(name2)
     # If the soundex is None, compare only with the first
     # phoneticCode column.
     if soundexCode is not None:
         if soundexName2 is not None:
             condition = OR(
                 surnameSoundex == CharName.q.surnamePcode,
                 IN(CharName.q.namePcodeNf, [soundexCode, soundexName2]),
                 IN(CharName.q.surnamePcode, [soundexCode, soundexName2]))
         else:
             condition = OR(
                 surnameSoundex == CharName.q.surnamePcode,
                 IN(soundexCode,
                    [CharName.q.namePcodeNf, CharName.q.surnamePcode]))
     else:
         condition = ISNULL(Name.q.namePcodeNf)
     try:
         qr = [(q.id, {
             'name': q.name,
             'imdbIndex': q.imdbIndex
         }) for q in CharName.select(condition)]
     except NotFoundError, e:
         raise IMDbDataAccessError, \
                 'unable to search the database: "%s"' % str(e)

예제 #15

0

파일 보기

파일: locsql.py 프로젝트: conwetlab/ezweb-gadgets

def nameVariations(name, fromPtdf=0):
    """Build name variations useful for searches; if fromPtdf is true,
    the input is assumed to be in the plain text data files format."""
    name1 = name2 = name3 = u''
    if fromPtdf or re_nameIndex.search(name):
        # We've a name with an (imdbIndex)
        namedict = analyze_name(name, canonical=1)
        # name1 is the name in the canonical format.
        name1 = namedict['name']
        # name3 is the canonical name with the imdbIndex.
        if fromPtdf:
            if namedict.has_key('imdbIndex'):
                name3 = name
        else:
            name3 = build_name(namedict, canonical=1)
    else:
        # name1 is the name in the canonical format.
        name1 = canonicalName(name)
        name3 = u''
    # name2 is the name in the normal format, if it differs from name1.
    name2 = normalizeName(name1)
    if name1 == name2: name2 = u''
    return name1, name2, name3

예제 #16

0

파일 보기

def nameVariations(name, fromPtdf=0):
    """Build name variations useful for searches; if fromPtdf is true,
    the input is assumed to be in the plain text data files format."""
    name1 = name2 = name3 = u''
    if fromPtdf or re_nameIndex.search(name):
        # We've a name with an (imdbIndex)
        namedict = analyze_name(name, canonical=1)
        # name1 is the name in the canonical format.
        name1 = namedict['name']
        # name3 is the canonical name with the imdbIndex.
        if fromPtdf:
            if namedict.has_key('imdbIndex'):
                name3 = name
        else:
            name3 = build_name(namedict, canonical=1)
    else:
        # name1 is the name in the canonical format.
        name1 = canonicalName(name)
        name3 = u''
    # name2 is the name in the normal format, if it differs from name1.
    name2 = normalizeName(name1)
    if name1 == name2: name2 = u''
    return name1, name2, name3

예제 #17

0

파일 보기

파일: personParser.py 프로젝트: 070499/repo-scripts

def _parseBiography(biol):
    """Parse the biographies.data file."""
    res = {}
    bio = ' '.join(_parseList(biol, 'BG', mline=0))
    bio = _parseBioBy(biol)
    if bio: res['mini biography'] = bio

    for x in biol:
        x4 = x[:4]
        x6 = x[:6]
        if x4 == 'DB: ':
            date, notes = date_and_notes(x[4:])
            if date:
                res['birth date'] = date
            if notes:
                res['birth notes'] = notes
        elif x4 == 'DD: ':
            date, notes = date_and_notes(x[4:])
            if date:
                res['death date'] = date
            if notes:
                res['death notes'] = notes
        elif x6 == 'SP: * ':
            res.setdefault('spouse', []).append(x[6:].strip())
        elif x4 == 'RN: ':
            n = x[4:].strip()
            if not n: continue
            rn = build_name(analyze_name(n, canonical=1), canonical=1)
            res['birth name'] = rn
        elif x6 == 'AT: * ':
            res.setdefault('articles', []).append(x[6:].strip())
        elif x4 == 'HT: ':
            res['height'] = x[4:].strip()
        elif x6 == 'PT: * ':
            res.setdefault('pictorials', []).append(x[6:].strip())
        elif x6 == 'CV: * ':
            res.setdefault('magazine covers', []).append(x[6:].strip())
        elif x4 == 'NK: ':
            res.setdefault('nick names', []).append(normalizeName(x[4:]))
        elif x6 == 'PI: * ':
            res.setdefault('portrayed', []).append(x[6:].strip())
        elif x6 == 'SA: * ':
            sal = x[6:].strip().replace(' -> ', '::')
            res.setdefault('salary history', []).append(sal)

    trl = _parseList(biol, 'TR')
    if trl: res['trivia'] = trl
    quotes = _parseList(biol, 'QU')
    if quotes: res['quotes'] = quotes
    otherworks = _parseList(biol, 'OW')
    if otherworks: res['other works'] = otherworks
    books = _parseList(biol, 'BO')
    if books: res['books'] = books
    agent = _parseList(biol, 'AG')
    if agent: res['agent address'] = agent
    wherenow = _parseList(biol, 'WN')
    if wherenow: res['where now'] = wherenow[0]
    biomovies = _parseList(biol, 'BT')
    if biomovies: res['biographical movies'] = biomovies
    guestapp = _buildGuests([x[6:].strip() for x in biol if x[:6] == 'GA: * '])
    if guestapp: res['notable tv guest appearances'] = guestapp
    tm = _parseList(biol, 'TM')
    if tm: res['trademarks'] = tm
    interv = _parseList(biol, 'IT')
    if interv: res['interviews'] = interv
    return res

예제 #18

0

파일 보기

파일: personParser.py 프로젝트: conwetlab/ezweb-gadgets

 def _add_items(self):
     # Add a new section in the biography.
     if self._in_content and self._sect_name and self._sect_data:
         sect = self._sect_name.strip().lower()
         # XXX: to get rid of the last colons and normalize section names.
         if sect[-1] == ':':
             sect = sect[:-1]
         if sect == 'salary':
             sect = 'salary history'
         elif sect == 'nickname':
             sect = 'nick names'
         elif sect == 'where are they now':
             sect = 'where now'
         elif sect == 'personal quotes':
             sect = 'quotes'
         elif sect == 'date of birth':
             sect = 'birth date'
         elif sect == 'date of death':
             sect = 'death date'
         data = self._sect_data.strip()
         d_split = data.split('::')
         d_split[:] = filter(None, [x.strip() for x in d_split])
         # Do some transformation on some special cases.
         if sect == 'salary history':
             newdata = []
             for j in d_split:
                 j = filter(None, [x.strip() for x in j.split('@@@@')])
                 newdata.append('::'.join(j))
             d_split[:] = newdata
         elif sect == 'nick names':
             d_split[:] = [normalizeName(x) for x in d_split]
         elif sect == 'birth name':
             d_split = canonicalName(d_split[0])
         elif sect == 'height':
             d_split = d_split[0]
         elif sect == 'spouse':
             d_split[:] = [
                 x.replace(' (', '::(', 1).replace(' ::', '::')
                 for x in d_split
             ]
         # Birth/death date are in both maindetails and bio pages;
         # it's safe to collect both of them.
         if sect == 'birth date':
             date, notes = date_and_notes(d_split[0])
             if date:
                 self._bio_data['birth date'] = date
             if notes:
                 self._bio_data['birth notes'] = notes
         elif sect == 'death date':
             date, notes = date_and_notes(d_split[0])
             if date:
                 self._bio_data['death date'] = date
             if notes:
                 self._bio_data['death notes'] = notes
         elif d_split:
             # Multiple items are added separately (e.g.: 'trivia' is
             # a list of strings).
             self._bio_data[sect] = d_split
     self._sect_name = u''
     self._sect_data = u''
     self._in_sect = 0

예제 #19

0

파일 보기

def _parseBiography(biol):
    """Parse the biographies.data file."""
    res = {}
    bio = ' '.join(_parseList(biol, 'BG', mline=0))
    bio = _parseBioBy(biol)
    if bio: res['mini biography'] = bio

    for x in biol:
        x4 = x[:4]
        x6 = x[:6]
        if x4 == 'DB: ':
            date, notes = date_and_notes(x[4:])
            if date:
                res['birth date'] = date
            if notes:
                res['birth notes'] = notes
            #bdate = x.strip()
            #i = bdate.find(',')
            #if i != -1:
            #    res['birth notes'] = bdate[i+1:].strip()
            #    bdate = bdate[:i]
            #res['birth date'] = bdate[4:]
        elif x4 == 'DD: ':
            date, notes = date_and_notes(x[4:])
            if date:
                res['death date'] = date
            if notes:
                res['death notes'] = notes
            #ddate = x.strip()
            #i = ddate.find(',')
            #if i != -1:
            #    res['death notes'] = ddate[i+1:].strip()
            #    ddate = ddate[:i]
            #res['death date'] = ddate[4:]
        elif x6 == 'SP: * ':
            res.setdefault('spouse', []).append(x[6:].strip())
        elif x4 == 'RN: ':
            n = x[4:].strip()
            if not n: continue
            rn = build_name(analyze_name(n, canonical=1), canonical=1)
            res['birth name'] = rn
        elif x6 == 'AT: * ':
            res.setdefault('articles', []).append(x[6:].strip())
        elif x4 == 'HT: ':
            res['height'] = x[4:].strip()
        elif x6 == 'PT: * ':
            res.setdefault('pictorials', []).append(x[6:].strip())
        elif x6 == 'CV: * ':
            res.setdefault('magazine covers', []).append(x[6:].strip())
        elif x4 == 'NK: ':
            res.setdefault('nick names', []).append(normalizeName(x[4:]))
        elif x6 == 'PI: * ':
            res.setdefault('portrayed', []).append(x[6:].strip())
        elif x6 == 'SA: * ':
            sal = x[6:].strip().replace(' -> ', '::')
            res.setdefault('salary history', []).append(sal)

    trl = _parseList(biol, 'TR')
    if trl: res['trivia'] = trl
    quotes = _parseList(biol, 'QU')
    if quotes: res['quotes'] = quotes
    otherworks = _parseList(biol, 'OW')
    if otherworks: res['other works'] = otherworks
    books = _parseList(biol, 'BO')
    if books: res['books'] = books
    agent = _parseList(biol, 'AG')
    if agent: res['agent address'] = agent
    wherenow = _parseList(biol, 'WN')
    if wherenow: res['where now'] = wherenow[0]
    biomovies = _parseList(biol, 'BT')
    if biomovies: res['biographical movies'] = biomovies
    guestapp = _buildGuests([x[6:].strip() for x in biol if x[:6] == 'GA: * '])
    if guestapp: res['notable tv guest appearances'] = guestapp
    tm = _parseList(biol, 'TM')
    if tm: res['trademarks'] = tm
    interv = _parseList(biol, 'IT')
    if interv: res['interviews'] = interv
    return res