def _getitem(self, key): """Handle special keys.""" if self.data.has_key('episode of'): if key == 'long imdb episode title': return build_title(self.data, canonical=0) elif key == 'series title': ser_title = self.data['episode of'].get('canonical title') or \ self.data['episode of']['title'] return normalizeTitle(ser_title) elif key == 'canonical series title': ser_title = self.data['episode of'].get('canonical title') or \ self.data['episode of']['title'] return ser_title elif key == 'episode title': return normalizeTitle(self.data.get('title', u'')) elif key == 'canonical episode title': return self.data.get('title', u'') if self.data.has_key('title'): if key == 'title': return normalizeTitle(self.data['title']) elif key == 'long imdb title': return build_title(self.data, canonical=0) elif key == 'canonical title': return self.data['title'] elif key == 'long imdb canonical title': return build_title(self.data, canonical=1) return None
def titleVariations(title, fromPtdf=0): """Build title variations useful for searches; if fromPtdf is true, the input is assumed to be in the plain text data files format.""" if fromPtdf: title1 = u'' else: title1 = title title2 = title3 = u'' if fromPtdf or re_year_index.search(title): # If it appears to have a (year[/imdbIndex]) indication, # assume that a long imdb canonical name was provided. titldict = analyze_title(title, canonical=1) # title1: the canonical name. title1 = titldict['title'] if titldict['kind'] != 'episode': # title3: the long imdb canonical name. if fromPtdf: title3 = title else: title3 = build_title(titldict, canonical=1, ptdf=1) else: title1 = normalizeTitle(title1) title3 = build_title(titldict, canonical=1, ptdf=1) else: # Just a title. # title1: the canonical title. title1 = canonicalTitle(title) title3 = u'' # title2 is title1 without the article, or title1 unchanged. if title1: title2 = title1 t2s = title2.split(u', ') if t2s[-1].lower() in _unicodeArticles: title2 = u', '.join(t2s[:-1]) return title1, title2, title3
def titleVariations(title, fromPtdf=0): """Build title variations useful for searches; if fromPtdf is true, the input is assumed to be in the plain text data files format.""" if fromPtdf: title1 = u'' else: title1 = title title2 = title3 = u'' if fromPtdf or re_year_index.search(title): # If it appears to have a (year[/imdbIndex]) indication, # assume that a long imdb canonical name was provided. titldict = analyze_title(title, canonical=1) # title1: the canonical name. title1 = titldict['title'] if titldict['kind'] != 'episode': # title3: the long imdb canonical name. if fromPtdf: title3 = title else: title3 = build_title(titldict, canonical=1, ptdf=1) else: title1 = normalizeTitle(title1) title3 = build_title(titldict, canonical=1, ptdf=1) else: # Just a title. # title1: the canonical title. title1 = canonicalTitle(title) title3 = u'' # title2 is title1 without the article, or title1 unchanged. if title1: title2 = title1 t2s = title2.split(u', ') if t2s[-1].lower() in _articles: title2 = u', '.join(t2s[:-1]) return title1, title2, title3
def _search_movie(self, title, results, _episodes=False): title = title.strip() if not title: return [] # Search for these title variations. if not _episodes: title1, title2, title3 = titleVariations(title, fromPtdf=1) else: title1 = normalizeTitle(title) title2 = '' title3 = '' resultsST = results if not self.doAdult: resultsST = 0 res = _scan_titles('%stitles.key' % self.__db, title1, title2, title3, resultsST, _episodes) if self.doAdult and results > 0: res[:] = res[:results] res[:] = [x[1] for x in res] # Check for adult movies. if not self.doAdult: newlist = [] for entry in res: genres = getMovieMisc( movieID=entry[0], dataF='%s%s.data' % (self.__db, 'genres'), indexF='%s%s.index' % (self.__db, 'genres'), attrIF='%sattributes.index' % self.__db, attrKF='%sattributes.key' % self.__db) if 'Adult' not in genres: newlist.append(entry) res[:] = newlist if results > 0: res[:] = res[:results] return res
def _search_movie(self, title, results, _episodes=False): title = title.strip() if not title: return [] # Search for these title variations. if not _episodes: title1, title2, title3 = titleVariations(title, fromPtdf=1) else: title1 = normalizeTitle(title) title2 = '' title3 = '' resultsST = results if not self.doAdult: resultsST = 0 res = _scan_titles('%stitles.key' % self.__db, title1, title2, title3, resultsST, _episodes) if self.doAdult and results > 0: res[:] = res[:results] res[:] = [x[1] for x in res] # Check for adult movies. if not self.doAdult: newlist = [] for entry in res: genres = getMovieMisc(movieID=entry[0], dataF='%s%s.data' % (self.__db, 'genres'), indexF='%s%s.index' % (self.__db, 'genres'), attrIF='%sattributes.index' % self.__db, attrKF='%sattributes.key' % self.__db) if 'Adult' not in genres: newlist.append(entry) res[:] = newlist if results > 0: res[:] = res[:results] return res
def _search_movie(self, title, results, _episodes=False): title = title.strip() if not title: return [] # Search for these title variations. if not _episodes: title1, title2, title3 = titleVariations(title, fromPtdf=1) else: title1 = normalizeTitle(title) title2 = '' title3 = '' # XXX: only a guess: results are shrinked, to exclude Adult # titles and to remove duplicated entries. resultsST = results * 3 res = _scan_titles('%stitles.key' % self.__db, title1, title2, title3, resultsST, _episodes) res[:] = [x[1] for x in res] # Check for adult movies. if not self.doAdult: newlist = [] for entry in res: genres = getMovieMisc(movieID=entry[0], dataF='%s%s.data' % (self.__db, 'genres'), indexF='%s%s.index' % (self.__db, 'genres'), attrIF='%sattributes.index' % self.__db, attrKF='%sattributes.key' % self.__db) if 'Adult' not in genres: newlist.append(entry) res[:] = newlist # Get the real name, if this is an AKA. # XXX: duplicated code! new_res = [] seen_MID = [] for idx, (movieID, r) in enumerate(res): # Remove duplicates. # XXX: find a way to prefer titles with an AKA? Or prefer # the original title? if movieID in seen_MID: continue else: seen_MID.append(movieID) realMID = self._get_real_movieID(movieID) if movieID == realMID: new_res.append((movieID, r)) continue if realMID in seen_MID: continue else: seen_MID.append(realMID) aka_title = build_title(r, canonical=0) real_title = getLabel(realMID, '%stitles.index' % self.__db, '%stitles.key' % self.__db) if aka_title == real_title: new_res.append((realMID, r)) continue new_r = analyze_title(real_title, canonical=1) new_r['akas'] = [aka_title] new_res.append((realMID, new_r)) if results > 0: new_res[:] = new_res[:results] return new_res
def _search_movie(self, title, results, _episodes=False): title = title.strip() if not title: return [] title_dict = analyze_title(title, canonical=1) s_title = title_dict['title'] if not s_title: return [] episodeOf = title_dict.get('episode of') if not episodeOf: if not _episodes: s_title_split = s_title.split(', ') if len(s_title_split) > 1 and \ s_title_split[-1].lower() in _articles: s_title_rebuilt = ', '.join(s_title_split[:-1]) if s_title_rebuilt: s_title = s_title_rebuilt else: _episodes = False s_title = normalizeTitle(s_title) if isinstance(s_title, UnicodeType): s_title = s_title.encode('ascii', 'ignore') soundexCode = soundex(s_title) # XXX: improve the search restricting the kindID if the # "kind" of the input differs from "movie"? condition = conditionAka = None if _episodes: condition = AND(Title.q.phoneticCode == soundexCode, Title.q.kindID == self._kindRev['episode']) conditionAka = AND(AkaTitle.q.phoneticCode == soundexCode, AkaTitle.q.kindID == self._kindRev['episode']) elif title_dict['kind'] == 'episode' and episodeOf is not None: series_title = build_title(episodeOf, canonical=1) # XXX: is it safe to get "results" results? # Too many? Too few? serRes = results if serRes < 3 or serRes > 10: serRes = 10 searchSeries = self._search_movie(series_title, serRes) seriesIDs = [result[0] for result in searchSeries] if seriesIDs: condition = AND(Title.q.phoneticCode == soundexCode, IN(Title.q.episodeOfID, seriesIDs), Title.q.kindID == self._kindRev['episode']) conditionAka = AND( AkaTitle.q.phoneticCode == soundexCode, IN(AkaTitle.q.episodeOfID, seriesIDs), AkaTitle.q.kindID == self._kindRev['episode']) else: # XXX: bad situation: we have found no matching series; # try searching everything (both episodes and # non-episodes) for the title. condition = AND(Title.q.phoneticCode == soundexCode, IN(Title.q.episodeOfID, seriesIDs)) conditionAka = AND(AkaTitle.q.phoneticCode == soundexCode, IN(AkaTitle.q.episodeOfID, seriesIDs)) if condition is None: # XXX: excludes episodes? condition = AND(Title.q.kindID != self._kindRev['episode'], Title.q.phoneticCode == soundexCode) conditionAka = AND(AkaTitle.q.kindID != self._kindRev['episode'], AkaTitle.q.phoneticCode == soundexCode) # Up to 3 variations of the title are searched, plus the # long imdb canonical title, if provided. if not _episodes: title1, title2, title3 = titleVariations(title) else: title1 = title title2 = '' title3 = '' try: qr = [(q.id, get_movie_data(q.id, self._kind)) for q in Title.select(condition)] q2 = [(q.movieID, get_movie_data(q.id, self._kind, fromAka=1)) for q in AkaTitle.select(conditionAka)] qr += q2 except NotFoundError, e: raise IMDbDataAccessError, \ 'unable to search the database: "%s"' % str(e)