def _add_items(self): # Add a new section in the biography. if self._in_content and self._sect_name and self._sect_data: sect = self._sect_name.strip().lower() # XXX: to get rid of the last colons and normalize section names. if sect[-1] == ':': sect = sect[:-1] if sect == 'salary': sect = 'salary history' elif sect == 'nickname': sect = 'nick names' elif sect == 'where are they now': sect = 'where now' elif sect == 'personal quotes': sect = 'quotes' elif sect == 'date of birth': sect = 'birth date' elif sect == 'date of death': sect = 'death date' data = self._sect_data.strip() d_split = data.split('::') d_split[:] = filter(None, [x.strip() for x in d_split]) # Do some transformation on some special cases. if sect == 'salary history': newdata = [] for j in d_split: j = filter(None, [x.strip() for x in j.split('@@@@')]) newdata.append('::'.join(j)) d_split[:] = newdata elif sect == 'nick names': d_split[:] = [normalizeName(x) for x in d_split] elif sect == 'birth name': d_split = canonicalName(d_split[0]) elif sect == 'height': d_split = d_split[0] elif sect == 'spouse': d_split[:] = [x.replace(' (', '::(', 1).replace(' ::', '::') for x in d_split] # Birth/death date are in both maindetails and bio pages; # it's safe to collect both of them. if sect == 'birth date': date, notes = date_and_notes(d_split[0]) if date: self._bio_data['birth date'] = date if notes: self._bio_data['birth notes'] = notes elif sect == 'death date': date, notes = date_and_notes(d_split[0]) if date: self._bio_data['death date'] = date if notes: self._bio_data['death notes'] = notes elif d_split: # Multiple items are added separately (e.g.: 'trivia' is # a list of strings). self._bio_data[sect] = d_split self._sect_name = u'' self._sect_data = u'' self._in_sect = 0
def person_to_dict(person): try: person_dict = {'canonical_name': utils.canonicalName(person.data['name']), 'name': utils.normalizeName(person.data['name']), 'person_id': int(person.personID)} except: person_dict = None return person_dict
def _getitem(self, key): """Handle special keys.""" if self.data.has_key('name'): if key == 'name': return normalizeName(self.data['name']) elif key == 'canonical name': return self.data['name'] elif key == 'long imdb name': return build_name(self.data, canonical=0) elif key == 'long imdb canonical name': return build_name(self.data) return None
def _getitem(self, key): """Handle special keys.""" if 'name' in self.data: if key == 'name': return normalizeName(self.data['name']) elif key == 'canonical name': return self.data['name'] elif key == 'long imdb name': return build_name(self.data, canonical=False) elif key == 'long imdb canonical name': return build_name(self.data) if key == 'full-size headshot' and 'headshot' in self.data: return self._re_fullsizeURL.sub('', self.data.get('headshot', '')) return None
def _getitem(self, key): """Handle special keys.""" if 'name' in self.data: if key == 'name': return normalizeName(self.data['name']) elif key == 'canonical name': return canonicalName(self.data['name']) elif key == 'long imdb name': return build_name(self.data, canonical=False) elif key == 'long imdb canonical name': return build_name(self.data, canonical=True) if key == 'full-size headshot': return self.get_fullsizeURL() return None
def _search_character(self, name, results): name = name.strip() if not name: return [] s_name = normalizeName(analyze_name(name)['name']) nsplit = s_name.split() name2 = u'' if len(nsplit) > 1: name2 = '%s %s' % (nsplit[-1], ' '.join(nsplit[:-1])) if s_name == name2: name2 = u'' res = _scan_names('%scharacters.key' % self.__db, s_name, name2, u'', results, _scan_character=1) res[:] = [x[1] for x in res] return res
def _getitem(self, key): """Handle special keys.""" if self.data.has_key("name"): if key == "name": return normalizeName(self.data["name"]) elif key == "canonical name": return self.data["name"] elif key == "long imdb name": return build_name(self.data, canonical=0) elif key == "long imdb canonical name": return build_name(self.data) if key == "full-size headshot" and self.data.has_key("headshot"): return self._re_fullsizeURL.sub("", self.data.get("headshot", "")) return None
def _getitem(self, key): """Handle special keys.""" if 'name' in self.data: if key == 'name': return normalizeName(self.data['name']) elif key == 'canonical name': return self.data['name'] elif key == 'long imdb name': return build_name(self.data, canonical=False) elif key == 'long imdb canonical name': return build_name(self.data) if key == 'full-size headshot': return self.get_fullsizeURL() return None
def _getitem(self, key): """Handle special keys.""" if self.data.has_key('name'): if key == 'name': return normalizeName(self.data['name']) elif key == 'canonical name': return self.data['name'] elif key == 'long imdb name': return build_name(self.data, canonical=0) elif key == 'long imdb canonical name': return build_name(self.data) if key == 'full-size headshot' and self.data.has_key('headshot'): return self._re_fullsizeURL.sub('', self.data.get('headshot', '')) return None
def _findRefs(self, o, trefs, nrefs): """Find titles or names references in strings.""" if isinstance(o, (unicode, str)): for title in re_titleRef.findall(o): a_title = analyze_title(title, canonical=0) rtitle = build_title(a_title, ptdf=1) if trefs.has_key(rtitle): continue movieID = self._getTitleID(rtitle) if movieID is None: movieID = self._getTitleID(title) if movieID is None: continue m = Movie(title=rtitle, movieID=movieID, accessSystem=self.accessSystem) trefs[rtitle] = m rtitle2 = canonicalTitle(a_title.get('title', u'')) if rtitle2 and rtitle2 != rtitle and rtitle2 != title: trefs[rtitle2] = m if title != rtitle: trefs[title] = m for name in re_nameRef.findall(o): a_name = analyze_name(name, canonical=1) rname = build_name(a_name, canonical=1) if nrefs.has_key(rname): continue personID = self._getNameID(rname) if personID is None: personID = self._getNameID(name) if personID is None: continue p = Person(name=rname, personID=personID, accessSystem=self.accessSystem) nrefs[rname] = p rname2 = normalizeName(a_name.get('name', u'')) if rname2 and rname2 != rname: nrefs[rname2] = p if name != rname and name != rname2: nrefs[name] = p elif isinstance(o, (list, tuple)): for item in o: self._findRefs(item, trefs, nrefs) elif isinstance(o, dict): for value in o.values(): self._findRefs(value, trefs, nrefs) return (trefs, nrefs)
def _findRefs(self, o, trefs, nrefs): """Find titles or names references in strings.""" if isinstance(o, (UnicodeType, StringType)): for title in re_titleRef.findall(o): a_title = analyze_title(title, canonical=1) rtitle = build_title(a_title, canonical=1, ptdf=1) if trefs.has_key(rtitle): continue movieID = self._getTitleID(rtitle) if movieID is None: movieID = self._getTitleID(title) if movieID is None: continue m = Movie(title=rtitle, movieID=movieID, accessSystem=self.accessSystem) trefs[rtitle] = m rtitle2 = canonicalTitle(a_title.get('title', u'')) if rtitle2 and rtitle2 != rtitle and rtitle2 != title: trefs[rtitle2] = m if title != rtitle: trefs[title] = m for name in re_nameRef.findall(o): a_name = analyze_name(name, canonical=1) rname = build_name(a_name, canonical=1) if nrefs.has_key(rname): continue personID = self._getNameID(rname) if personID is None: personID = self._getNameID(name) if personID is None: continue p = Person(name=rname, personID=personID, accessSystem=self.accessSystem) nrefs[rname] = p rname2 = normalizeName(a_name.get('name', u'')) if rname2 and rname2 != rname: nrefs[rname2] = p if name != rname and name != rname2: nrefs[name] = p elif isinstance(o, (ListType, TupleType)): for item in o: self._findRefs(item, trefs, nrefs) elif isinstance(o, DictType): for value in o.values(): self._findRefs(value, trefs, nrefs) return (trefs, nrefs)
def _search_character(self, name, results): name = name.strip() if not name: return [] s_name = analyze_name(name)['name'] if not s_name: return [] if isinstance(s_name, UnicodeType): s_name = s_name.encode('ascii', 'ignore') s_name = normalizeName(s_name) soundexCode = soundex(s_name) surname = s_name.split(' ')[-1] surnameSoundex = soundex(surname) name2 = '' soundexName2 = None nsplit = s_name.split() if len(nsplit) > 1: name2 = '%s %s' % (nsplit[-1], ' '.join(nsplit[:-1])) if s_name == name2: name2 = '' else: soundexName2 = soundex(name2) # If the soundex is None, compare only with the first # phoneticCode column. if soundexCode is not None: if soundexName2 is not None: condition = OR( surnameSoundex == CharName.q.surnamePcode, IN(CharName.q.namePcodeNf, [soundexCode, soundexName2]), IN(CharName.q.surnamePcode, [soundexCode, soundexName2])) else: condition = OR( surnameSoundex == CharName.q.surnamePcode, IN(soundexCode, [CharName.q.namePcodeNf, CharName.q.surnamePcode])) else: condition = ISNULL(Name.q.namePcodeNf) try: qr = [(q.id, { 'name': q.name, 'imdbIndex': q.imdbIndex }) for q in CharName.select(condition)] except NotFoundError, e: raise IMDbDataAccessError, \ 'unable to search the database: "%s"' % str(e)
def nameVariations(name, fromPtdf=0): """Build name variations useful for searches; if fromPtdf is true, the input is assumed to be in the plain text data files format.""" name1 = name2 = name3 = u'' if fromPtdf or re_nameIndex.search(name): # We've a name with an (imdbIndex) namedict = analyze_name(name, canonical=1) # name1 is the name in the canonical format. name1 = namedict['name'] # name3 is the canonical name with the imdbIndex. if fromPtdf: if namedict.has_key('imdbIndex'): name3 = name else: name3 = build_name(namedict, canonical=1) else: # name1 is the name in the canonical format. name1 = canonicalName(name) name3 = u'' # name2 is the name in the normal format, if it differs from name1. name2 = normalizeName(name1) if name1 == name2: name2 = u'' return name1, name2, name3
def _parseBiography(biol): """Parse the biographies.data file.""" res = {} bio = ' '.join(_parseList(biol, 'BG', mline=0)) bio = _parseBioBy(biol) if bio: res['mini biography'] = bio for x in biol: x4 = x[:4] x6 = x[:6] if x4 == 'DB: ': date, notes = date_and_notes(x[4:]) if date: res['birth date'] = date if notes: res['birth notes'] = notes elif x4 == 'DD: ': date, notes = date_and_notes(x[4:]) if date: res['death date'] = date if notes: res['death notes'] = notes elif x6 == 'SP: * ': res.setdefault('spouse', []).append(x[6:].strip()) elif x4 == 'RN: ': n = x[4:].strip() if not n: continue rn = build_name(analyze_name(n, canonical=1), canonical=1) res['birth name'] = rn elif x6 == 'AT: * ': res.setdefault('articles', []).append(x[6:].strip()) elif x4 == 'HT: ': res['height'] = x[4:].strip() elif x6 == 'PT: * ': res.setdefault('pictorials', []).append(x[6:].strip()) elif x6 == 'CV: * ': res.setdefault('magazine covers', []).append(x[6:].strip()) elif x4 == 'NK: ': res.setdefault('nick names', []).append(normalizeName(x[4:])) elif x6 == 'PI: * ': res.setdefault('portrayed', []).append(x[6:].strip()) elif x6 == 'SA: * ': sal = x[6:].strip().replace(' -> ', '::') res.setdefault('salary history', []).append(sal) trl = _parseList(biol, 'TR') if trl: res['trivia'] = trl quotes = _parseList(biol, 'QU') if quotes: res['quotes'] = quotes otherworks = _parseList(biol, 'OW') if otherworks: res['other works'] = otherworks books = _parseList(biol, 'BO') if books: res['books'] = books agent = _parseList(biol, 'AG') if agent: res['agent address'] = agent wherenow = _parseList(biol, 'WN') if wherenow: res['where now'] = wherenow[0] biomovies = _parseList(biol, 'BT') if biomovies: res['biographical movies'] = biomovies guestapp = _buildGuests([x[6:].strip() for x in biol if x[:6] == 'GA: * ']) if guestapp: res['notable tv guest appearances'] = guestapp tm = _parseList(biol, 'TM') if tm: res['trademarks'] = tm interv = _parseList(biol, 'IT') if interv: res['interviews'] = interv return res
def _add_items(self): # Add a new section in the biography. if self._in_content and self._sect_name and self._sect_data: sect = self._sect_name.strip().lower() # XXX: to get rid of the last colons and normalize section names. if sect[-1] == ':': sect = sect[:-1] if sect == 'salary': sect = 'salary history' elif sect == 'nickname': sect = 'nick names' elif sect == 'where are they now': sect = 'where now' elif sect == 'personal quotes': sect = 'quotes' elif sect == 'date of birth': sect = 'birth date' elif sect == 'date of death': sect = 'death date' data = self._sect_data.strip() d_split = data.split('::') d_split[:] = filter(None, [x.strip() for x in d_split]) # Do some transformation on some special cases. if sect == 'salary history': newdata = [] for j in d_split: j = filter(None, [x.strip() for x in j.split('@@@@')]) newdata.append('::'.join(j)) d_split[:] = newdata elif sect == 'nick names': d_split[:] = [normalizeName(x) for x in d_split] elif sect == 'birth name': d_split = canonicalName(d_split[0]) elif sect == 'height': d_split = d_split[0] elif sect == 'spouse': d_split[:] = [ x.replace(' (', '::(', 1).replace(' ::', '::') for x in d_split ] # Birth/death date are in both maindetails and bio pages; # it's safe to collect both of them. if sect == 'birth date': date, notes = date_and_notes(d_split[0]) if date: self._bio_data['birth date'] = date if notes: self._bio_data['birth notes'] = notes elif sect == 'death date': date, notes = date_and_notes(d_split[0]) if date: self._bio_data['death date'] = date if notes: self._bio_data['death notes'] = notes elif d_split: # Multiple items are added separately (e.g.: 'trivia' is # a list of strings). self._bio_data[sect] = d_split self._sect_name = u'' self._sect_data = u'' self._in_sect = 0
def _parseBiography(biol): """Parse the biographies.data file.""" res = {} bio = ' '.join(_parseList(biol, 'BG', mline=0)) bio = _parseBioBy(biol) if bio: res['mini biography'] = bio for x in biol: x4 = x[:4] x6 = x[:6] if x4 == 'DB: ': date, notes = date_and_notes(x[4:]) if date: res['birth date'] = date if notes: res['birth notes'] = notes #bdate = x.strip() #i = bdate.find(',') #if i != -1: # res['birth notes'] = bdate[i+1:].strip() # bdate = bdate[:i] #res['birth date'] = bdate[4:] elif x4 == 'DD: ': date, notes = date_and_notes(x[4:]) if date: res['death date'] = date if notes: res['death notes'] = notes #ddate = x.strip() #i = ddate.find(',') #if i != -1: # res['death notes'] = ddate[i+1:].strip() # ddate = ddate[:i] #res['death date'] = ddate[4:] elif x6 == 'SP: * ': res.setdefault('spouse', []).append(x[6:].strip()) elif x4 == 'RN: ': n = x[4:].strip() if not n: continue rn = build_name(analyze_name(n, canonical=1), canonical=1) res['birth name'] = rn elif x6 == 'AT: * ': res.setdefault('articles', []).append(x[6:].strip()) elif x4 == 'HT: ': res['height'] = x[4:].strip() elif x6 == 'PT: * ': res.setdefault('pictorials', []).append(x[6:].strip()) elif x6 == 'CV: * ': res.setdefault('magazine covers', []).append(x[6:].strip()) elif x4 == 'NK: ': res.setdefault('nick names', []).append(normalizeName(x[4:])) elif x6 == 'PI: * ': res.setdefault('portrayed', []).append(x[6:].strip()) elif x6 == 'SA: * ': sal = x[6:].strip().replace(' -> ', '::') res.setdefault('salary history', []).append(sal) trl = _parseList(biol, 'TR') if trl: res['trivia'] = trl quotes = _parseList(biol, 'QU') if quotes: res['quotes'] = quotes otherworks = _parseList(biol, 'OW') if otherworks: res['other works'] = otherworks books = _parseList(biol, 'BO') if books: res['books'] = books agent = _parseList(biol, 'AG') if agent: res['agent address'] = agent wherenow = _parseList(biol, 'WN') if wherenow: res['where now'] = wherenow[0] biomovies = _parseList(biol, 'BT') if biomovies: res['biographical movies'] = biomovies guestapp = _buildGuests([x[6:].strip() for x in biol if x[:6] == 'GA: * ']) if guestapp: res['notable tv guest appearances'] = guestapp tm = _parseList(biol, 'TM') if tm: res['trademarks'] = tm interv = _parseList(biol, 'IT') if interv: res['interviews'] = interv return res