def __init__(self, dbDirectory, adultSearch=1, *arguments, **keywords): """Initialize the access system. The directory with the files must be supplied. """ IMDbLocalAndSqlAccessSystem.__init__(self, *arguments, **keywords) self.__db = os.path.expandvars(dbDirectory) self.__db = os.path.expanduser(self.__db) if hasattr(os.path, 'realpath'): self.__db = os.path.realpath(self.__db) self.__db = os.path.normpath(self.__db) self.__db = self.__db + getattr(os.path, 'sep', '/') self.__db = os.path.normcase(self.__db) if not os.path.isdir(self.__db): raise IMDbDataAccessError, '"%s" is not a directory' % self.__db # These indices are used to quickly get the mopID # for a given title/name. self.__namesScan = KeyFScan('%snames.key' % self.__db) self.__titlesScan = KeyFScan('%stitles.key' % self.__db) self.do_adult_search(adultSearch)
class IMDbLocalAccessSystem(IMDbLocalAndSqlAccessSystem): """The class used to access IMDb's data through a local installation.""" accessSystem = 'local' def __init__(self, dbDirectory, adultSearch=1, *arguments, **keywords): """Initialize the access system. The directory with the files must be supplied. """ IMDbLocalAndSqlAccessSystem.__init__(self, *arguments, **keywords) self.__db = os.path.expandvars(dbDirectory) self.__db = os.path.expanduser(self.__db) if hasattr(os.path, 'realpath'): self.__db = os.path.realpath(self.__db) self.__db = os.path.normpath(self.__db) self.__db = self.__db + getattr(os.path, 'sep', '/') self.__db = os.path.normcase(self.__db) if not os.path.isdir(self.__db): raise IMDbDataAccessError, '"%s" is not a directory' % self.__db # These indices are used to quickly get the mopID # for a given title/name. self.__namesScan = KeyFScan('%snames.key' % self.__db) self.__titlesScan = KeyFScan('%stitles.key' % self.__db) self.do_adult_search(adultSearch) def _getTitleID(self, title): return self.__titlesScan.getID(title) def _getNameID(self, name): return self.__namesScan.getID(name) def _get_lastID(self, indexF): fsize = os.stat(indexF)[ST_SIZE] return (fsize / 4) - 1 def get_lastMovieID(self): """Return the last movieID""" return self._get_lastID('%stitles.index' % self.__db) def get_lastPersonID(self): """Return the last personID""" return self._get_lastID('%snames.index' % self.__db) def _normalize_movieID(self, movieID): """Normalize the given movieID.""" try: return int(movieID) except (ValueError, OverflowError): raise IMDbError, 'movieID "%s" can\'t be converted to integer' % \ movieID def _normalize_personID(self, personID): """Normalize the given personID.""" try: return int(personID) except (ValueError, OverflowError): raise IMDbError, 'personID "%s" can\'t be converted to integer' % \ personID def _normalize_characterID(self, characterID): """Normalize the given characterID.""" try: return int(characterID) except (ValueError, OverflowError): raise IMDbError, 'characterID "%s" can\'t be converted to integer' \ % characterID def _normalize_companyID(self, companyID): """Normalize the given companyID.""" try: return int(companyID) except (ValueError, OverflowError): raise IMDbError, 'companyID "%s" can\'t be converted to integer' \ % companyID def _get_real_movieID(self, movieID): """Handle title aliases.""" rid = getFullIndex('%saka-titles.index' % self.__db, movieID, kind='akatidx') if rid is not None: return rid return movieID def _get_real_personID(self, personID): """Handle name aliases.""" rid = getFullIndex('%saka-names.index' % self.__db, personID, kind='akanidx') if rid is not None: return rid return personID def get_imdbMovieID(self, movieID): """Translate a movieID in an imdbID. Try an Exact Primary Title search on IMDb; return None if it's unable to get the imdbID. """ titline = getLabel(movieID, '%stitles.index' % self.__db, '%stitles.key' % self.__db) if titline is None: return None return self.title2imdbID(titline) def get_imdbPersonID(self, personID): """Translate a personID in an imdbID. Try an Exact Primary Name search on IMDb; return None if it's unable to get the imdbID. """ name = getLabel(personID, '%snames.index' % self.__db, '%snames.key' % self.__db) if name is None: return None return self.name2imdbID(name) def get_imdbCharacterID(self, characterID): """Translate a characterID in an imdbID. Try an Exact Primary Name search on IMDb; return None if it's unable to get the imdbID. """ name = getCharacterName(characterID, '%scharacters.index' % self.__db, '%scharacters.data' % self.__db) if not name: return None return self.character2imdbID(name) def get_imdbCompanyID(self, companyID): """Translate a companyID in an imdbID. Try an Exact Primary Name search on IMDb; return None if it's unable to get the imdbID. """ name = getCompanyName(companyID, '%scompanies.index' % self.__db, '%scompanies.data' % self.__db) if not name: return None return self.company2imdbID(name) def do_adult_search(self, doAdult): """If set to 0 or False, movies in the Adult category are not shown in the results of a search.""" self.doAdult = doAdult def _search_movie(self, title, results): title = title.strip() if not title: return [] # Search for these title variations. title1, title2, title3 = titleVariations(title) resultsST = results if not self.doAdult: resultsST = 0 res = _scan_titles('%stitles.key' % self.__db, title1, title2, title3, resultsST) if self.doAdult and results > 0: res[:] = res[:results] res[:] = [x[1] for x in res] # Check for adult movies. if not self.doAdult: newlist = [] for entry in res: genres = getMovieMisc(movieID=entry[0], dataF='%s%s.data' % (self.__db, 'genres'), indexF='%s%s.index' % (self.__db, 'genres'), attrIF='%sattributes.index' % self.__db, attrKF='%sattributes.key' % self.__db) if 'Adult' not in genres: newlist.append(entry) res[:] = newlist if results > 0: res[:] = res[:results] return res def get_movie_main(self, movieID): # Information sets provided by this method. infosets = ('main', 'vote details') tl = getLabel(movieID, '%stitles.index' % self.__db, '%stitles.key' % self.__db) # No title, no party. if tl is None: raise IMDbDataAccessError, 'unable to get movieID "%s"' % movieID res = analyze_title(tl) # Build the cast list. actl = [] for castG in ('actors', 'actresses'): midx = getFullIndex('%s%s.titles' % (self.__db, castG), movieID, multi=1) if midx is not None: params = {'movieID': movieID, 'dataF': '%s%s.data' % (self.__db, castG), 'indexF': '%snames.index' % self.__db, 'keyF': '%snames.key' % self.__db, 'attrIF': '%sattributes.index' % self.__db, 'attrKF': '%sattributes.key' % self.__db, 'charNF': '%scharacter2id.index' % self.__db, 'offsList': midx, 'doCast': 1} actl += getMovieCast(**params) if actl: actl.sort() res['cast'] = actl # List of other workers. works = ('writer', 'cinematographer', 'composer', 'costume-designer', 'director', 'editor', 'miscellaneou', 'producer', 'production-designer', 'cinematographer') for i in works: index = getFullIndex('%s%ss.titles' % (self.__db, i), movieID, multi=1) if index is not None: params = {'movieID': movieID, 'dataF': '%s%s.data' % (self.__db, i), 'indexF': '%snames.index' % self.__db, 'keyF': '%snames.key' % self.__db, 'attrIF': '%sattributes.index' % self.__db, 'attrKF': '%sattributes.key' % self.__db, 'offsList': index} name = key = i if '-' in name: name = name.replace('-', ' ') elif name == 'miscellaneou': name = 'miscellaneous crew' key = 'miscellaneou' elif name == 'writer': params['doWriters'] = 1 params['dataF'] = '%s%ss.data' % (self.__db, key) data = getMovieCast(**params) if name == 'writer': data.sort() res[name] = data # Rating. rt = self.get_movie_vote_details(movieID)['data'] if rt: res.update(rt) # Various information. miscInfo = (('runtimes', 'running-times'), ('color info', 'color-info'), ('genres', 'genres'), ('distributors', 'distributors'), ('languages', 'language'), ('certificates', 'certificates'), ('special effects companies', 'special-effects-companies'), ('sound mix', 'sound-mix'), ('tech info', 'technical'), ('production companies', 'production-companies'), ('countries', 'countries')) for name, fname in miscInfo: params = {'movieID': movieID, 'dataF': '%s%s.data' % (self.__db, fname), 'indexF': '%s%s.index' % (self.__db, fname), 'attrIF': '%sattributes.index' % self.__db, 'attrKF': '%sattributes.key' % self.__db} data = getMovieMisc(**params) if name in ('distributors', 'special effects companies', 'production companies'): for nitem in xrange(len(data)): n, notes = split_company_name_notes(data[nitem]) company = Company(name=n, companyID=getCompanyID(n, '%scompany2id.index' % self.__db), notes=notes, accessSystem=self.accessSystem) data[nitem] = company if data: res[name] = data if res.has_key('runtimes') and len(res['runtimes']) > 0: rt = res['runtimes'][0] episodes = re_episodes.findall(rt) if episodes: res['runtimes'][0] = re_episodes.sub('', rt) res['number of episodes'] = episodes[0] # AKA titles. akas = getAkaTitles(movieID, '%saka-titles.data' % self.__db, '%stitles.index' % self.__db, '%stitles.key' % self.__db, '%sattributes.index' % self.__db, '%sattributes.key' % self.__db) if akas: # normalize encoding. for i in xrange(len(akas)): ts = akas[i].split('::') if len(ts) != 2: continue t = ts[0] n = ts[1] nt = self._changeAKAencoding(n, t) if nt is not None: akas[i] = '%s::%s' % (nt, n) res['akas'] = akas if res.get('kind') == 'episode': # Things to do if this is a tv series episode. episodeOf = res.get('episode of') if episodeOf is not None: parentSeries = Movie(data=res['episode of'], accessSystem='local') seriesID = self._getTitleID(parentSeries.get( 'long imdb canonical title')) parentSeries.movieID = seriesID res['episode of'] = parentSeries if not res.get('year'): year = getFullIndex('%smovies.data' % self.__db, movieID, kind='moviedata', rindex=1) if year: res['year'] = year # MPAA info. mpaa = getMPAA(movieID, '%smpaa-ratings-reasons.index' % self.__db, '%smpaa-ratings-reasons.data' % self.__db) if mpaa: res.update(mpaa) return {'data': res, 'info sets': infosets} def get_movie_plot(self, movieID): pl = getPlot(movieID, '%splot.index' % self.__db, '%splot.data' % self.__db) trefs, nrefs = self._extractRefs(pl) if pl: return {'data': {'plot': pl}, 'titlesRefs': trefs, 'namesRefs': nrefs} return {'data': {}} def get_movie_taglines(self, movieID): tg = getTaglines(movieID, '%staglines.index' % self.__db, '%staglines.data' % self.__db) if tg: return {'data': {'taglines': tg}} return {'data': {}} def get_movie_keywords(self, movieID): params = {'movieID': movieID, 'dataF': '%skeywords.data' % self.__db, 'indexF': '%skeywords.index' % self.__db, 'attrIF': '%sattributes.index' % self.__db, 'attrKF': '%sattributes.key' % self.__db} kwds = getMovieMisc(**params) if kwds: return {'data': {'keywords': kwds}} return {'data': {}} def get_movie_alternate_versions(self, movieID): av = parseMinusList(movieID, '%salternate-versions.data' % self.__db, '%salternate-versions.index' % self.__db) trefs, nrefs = self._extractRefs(av) if av: return {'data': {'alternate versions': av}, 'titlesRefs': trefs, 'namesRefs': nrefs} return {'data': {}} def get_movie_crazy_credits(self, movieID): cc = parseMinusList(movieID, '%scrazy-credits.data' % self.__db, '%scrazy-credits.index' % self.__db) trefs, nrefs = self._extractRefs(cc) if cc: return {'data': {'crazy credits': cc}, 'titlesRefs': trefs, 'namesRefs': nrefs} return {'data': {}} def get_movie_goofs(self, movieID): goo = parseMinusList(movieID, '%sgoofs.data' % self.__db, '%sgoofs.index' % self.__db) trefs, nrefs = self._extractRefs(goo) if goo: return {'data': {'goofs': goo}, 'titlesRefs': trefs, 'namesRefs': nrefs} return {'data': {}} def get_movie_soundtrack(self, movieID): goo = parseMinusList(movieID, '%ssoundtracks.data' % self.__db, '%ssoundtracks.index' % self.__db) trefs, nrefs = self._extractRefs(goo) if goo: return {'data': {'soundtrack': goo}, 'titlesRefs': trefs, 'namesRefs': nrefs} return {'data': {}} def get_movie_quotes(self, movieID): mq = getQuotes(movieID, '%squotes.data' % self.__db, '%squotes.index' % self.__db) trefs, nrefs = self._extractRefs(mq) if mq: return {'data': {'quotes': mq}, 'titlesRefs': trefs, 'namesRefs': nrefs} return {'data': {}} def get_movie_release_dates(self, movieID): params = {'movieID': movieID, 'dataF': '%srelease-dates.data' % self.__db, 'indexF': '%srelease-dates.index' % self.__db, 'attrIF': '%sattributes.index' % self.__db, 'attrKF': '%sattributes.key' % self.__db} data = getMovieMisc(**params) if data: return {'data': {'release dates': data}} return {'data': {}} def get_movie_miscellaneous_companies(self, movieID): params = {'movieID': movieID, 'dataF': '%smiscellaneous-companies.data' % self.__db, 'indexF': '%smiscellaneous-companies.index' % self.__db, 'attrIF': '%sattributes.index' % self.__db, 'attrKF': '%sattributes.key' % self.__db} try: data = getMovieMisc(**params) except IMDbDataAccessError: import warnings warnings.warn('miscellaneous-companies files not found; ' 'run the misc-companies4local.py script.') return {'data': {}} for nitem in xrange(len(data)): n, notes = split_company_name_notes(data[nitem]) company = Company(name=n, companyID=getCompanyID(n, '%scompany2id.index' % self.__db), notes=notes, accessSystem=self.accessSystem) data[nitem] = company if data: return {'data': {'miscellaneous companies': data}} return {'data': {}} def get_movie_vote_details(self, movieID): data = getRatingData(movieID, '%sratings.data' % self.__db) return {'data': data} def get_movie_trivia(self, movieID): triv = parseMinusList(movieID, '%strivia.data' % self.__db, '%strivia.index' % self.__db) trefs, nrefs = self._extractRefs(triv) if triv: return {'data': {'trivia': triv}, 'titlesRefs': trefs, 'namesRefs': nrefs} return {'data': {}} def get_movie_locations(self, movieID): params = {'movieID': movieID, 'dataF': '%slocations.data' % self.__db, 'indexF': '%slocations.index' % self.__db, 'attrIF': '%sattributes.index' % self.__db, 'attrKF': '%sattributes.key' % self.__db} data = getMovieMisc(**params) if data: return {'data': {'locations': data}} return {'data': {}} def get_movie_connections(self, movieID): mc = getMovieLinks(movieID, '%smovie-links.data' % self.__db, '%stitles.index' % self.__db, '%stitles.key' % self.__db) if mc: return {'data': {'connections': mc}} return {'data': {}} def get_movie_business(self, movieID): mb = getBusiness(movieID, '%sbusiness.index' % self.__db, '%sbusiness.data' % self.__db) trefs, nrefs = self._extractRefs(mb) if mb: return {'data': {'business': mb}, 'titlesRefs': trefs, 'namesRefs': nrefs} return {'data': {}} def get_movie_literature(self, movieID): ml = getLiterature(movieID, '%sliterature.index' % self.__db, '%sliterature.data' % self.__db) if ml: return {'data': {'literature': ml}} return {'data': {}} def get_movie_laserdisc(self, movieID): ml = getLaserdisc(movieID, '%slaserdisc.index' % self.__db, '%slaserdisc.data' % self.__db) trefs, nrefs = self._extractRefs(ml) if ml: return {'data': {'laserdisc': ml}, 'titlesRefs': trefs, 'namesRefs': nrefs} return {'data': {}} def _buildEpisodes(self, eps_list, parentID): episodes = {} parentTitle = getLabel(parentID, '%stitles.index' % self.__db, '%stitles.key' % self.__db) parentSeries = Movie(title=parentTitle, movieID=parentID, accessSystem='local') for episodeID, episodeTitle in eps_list: episodeTitle = unicode(episodeTitle, 'latin_1', 'replace') data = analyze_title(episodeTitle, canonical=1) m = Movie(data=data, movieID=episodeID, accessSystem='local') m['episode of'] = parentSeries if data.get('year') is None: year = getFullIndex('%smovies.data' % self.__db, key=episodeID, kind='moviedata', rindex=1) if year: m['year'] = year season = data.get('season', 'UNKNOWN') if not episodes.has_key(season): episodes[season] = {} ep_number = data.get('episode') if ep_number is None: ep_number = max((episodes[season].keys() or [0])) + 1 episodes[season][ep_number] = m return episodes def get_movie_episodes(self, movieID): try: me = get_episodes(movieID, '%stitles.index' % self.__db, '%stitles.key' % self.__db) except IOError, e: raise IMDbDataAccessError, str(e) if me: episodes = self._buildEpisodes(me, movieID) data = {'episodes': episodes} data['number of episodes'] = sum([len(x) for x in episodes.values()]) data['number of seasons'] = len(episodes.keys()) return {'data': data} return {'data': {}}
class IMDbLocalAccessSystem(IMDbLocalAndSqlAccessSystem): """The class used to access IMDb's data through a local installation.""" accessSystem = 'local' def __init__(self, dbDirectory, adultSearch=1, *arguments, **keywords): """Initialize the access system. The directory with the files must be supplied. """ IMDbLocalAndSqlAccessSystem.__init__(self, *arguments, **keywords) self.__db = os.path.expandvars(dbDirectory) self.__db = os.path.expanduser(self.__db) if hasattr(os.path, 'realpath'): self.__db = os.path.realpath(self.__db) self.__db = os.path.normpath(self.__db) self.__db = self.__db + getattr(os.path, 'sep', '/') self.__db = os.path.normcase(self.__db) if not os.path.isdir(self.__db): raise IMDbDataAccessError, '"%s" is not a directory' % self.__db # These indices are used to quickly get the mopID # for a given title/name. self.__namesScan = KeyFScan('%snames.key' % self.__db) self.__titlesScan = KeyFScan('%stitles.key' % self.__db) self.do_adult_search(adultSearch) def _getTitleID(self, title): return self.__titlesScan.getID(title) def _getNameID(self, name): return self.__namesScan.getID(name) def _get_lastID(self, indexF): fsize = os.stat(indexF)[ST_SIZE] return (fsize / 4) - 1 def get_lastMovieID(self): """Return the last movieID""" return self._get_lastID('%stitles.index' % self.__db) def get_lastPersonID(self): """Return the last personID""" return self._get_lastID('%snames.index' % self.__db) def _normalize_movieID(self, movieID): """Normalize the given movieID.""" try: return int(movieID) except (ValueError, OverflowError): raise IMDbError, 'movieID "%s" can\'t be converted to integer' % \ movieID def _normalize_personID(self, personID): """Normalize the given personID.""" try: return int(personID) except (ValueError, OverflowError): raise IMDbError, 'personID "%s" can\'t be converted to integer' % \ personID def _normalize_characterID(self, characterID): """Normalize the given characterID.""" try: return int(characterID) except (ValueError, OverflowError): raise IMDbError, 'characterID "%s" can\'t be converted to integer' \ % characterID def _normalize_companyID(self, companyID): """Normalize the given companyID.""" try: return int(companyID) except (ValueError, OverflowError): raise IMDbError, 'companyID "%s" can\'t be converted to integer' \ % companyID def _get_real_movieID(self, movieID): """Handle title aliases.""" rid = getFullIndex('%saka-titles.index' % self.__db, movieID, kind='akatidx') if rid is not None: return rid return movieID def _get_real_personID(self, personID): """Handle name aliases.""" rid = getFullIndex('%saka-names.index' % self.__db, personID, kind='akanidx') if rid is not None: return rid return personID def get_imdbMovieID(self, movieID): """Translate a movieID in an imdbID. Try an Exact Primary Title search on IMDb; return None if it's unable to get the imdbID. """ titline = getLabel(movieID, '%stitles.index' % self.__db, '%stitles.key' % self.__db) if titline is None: return None return self.title2imdbID(titline) def get_imdbPersonID(self, personID): """Translate a personID in an imdbID. Try an Exact Primary Name search on IMDb; return None if it's unable to get the imdbID. """ name = getLabel(personID, '%snames.index' % self.__db, '%snames.key' % self.__db) if name is None: return None return self.name2imdbID(name) def get_imdbCharacterID(self, characterID): """Translate a characterID in an imdbID. Try an Exact Primary Name search on IMDb; return None if it's unable to get the imdbID. """ name = getCharacterName(characterID, '%scharacters.index' % self.__db, '%scharacters.data' % self.__db) if not name: return None return self.character2imdbID(name) def get_imdbCompanyID(self, companyID): """Translate a companyID in an imdbID. Try an Exact Primary Name search on IMDb; return None if it's unable to get the imdbID. """ name = getCompanyName(companyID, '%scompanies.index' % self.__db, '%scompanies.data' % self.__db) if not name: return None return self.company2imdbID(name) def do_adult_search(self, doAdult): """If set to 0 or False, movies in the Adult category are not shown in the results of a search.""" self.doAdult = doAdult def _search_movie(self, title, results, _episodes=False): title = title.strip() if not title: return [] # Search for these title variations. if not _episodes: title1, title2, title3 = titleVariations(title, fromPtdf=1) else: title1 = normalizeTitle(title) title2 = '' title3 = '' # XXX: only a guess: results are shrinked, to exclude Adult # titles and to remove duplicated entries. resultsST = results * 3 res = _scan_titles('%stitles.key' % self.__db, title1, title2, title3, resultsST, _episodes) res[:] = [x[1] for x in res] # Check for adult movies. if not self.doAdult: newlist = [] for entry in res: genres = getMovieMisc(movieID=entry[0], dataF='%s%s.data' % (self.__db, 'genres'), indexF='%s%s.index' % (self.__db, 'genres'), attrIF='%sattributes.index' % self.__db, attrKF='%sattributes.key' % self.__db) if 'Adult' not in genres: newlist.append(entry) res[:] = newlist # Get the real name, if this is an AKA. # XXX: duplicated code! new_res = [] seen_MID = [] for idx, (movieID, r) in enumerate(res): # Remove duplicates. # XXX: find a way to prefer titles with an AKA? Or prefer # the original title? if movieID in seen_MID: continue else: seen_MID.append(movieID) realMID = self._get_real_movieID(movieID) if movieID == realMID: new_res.append((movieID, r)) continue if realMID in seen_MID: continue else: seen_MID.append(realMID) aka_title = build_title(r, canonical=0) real_title = getLabel(realMID, '%stitles.index' % self.__db, '%stitles.key' % self.__db) if aka_title == real_title: new_res.append((realMID, r)) continue new_r = analyze_title(real_title, canonical=1) new_r['akas'] = [aka_title] new_res.append((realMID, new_r)) if results > 0: new_res[:] = new_res[:results] return new_res def _search_episode(self, title, results): title = title.strip() if not title: return _episodes = True if analyze_title(title)['kind'] == 'episode': _episodes = False return self._search_movie(title, results, _episodes=_episodes) def get_movie_main(self, movieID): # Information sets provided by this method. infosets = ('main', 'vote details') tl = getLabel(movieID, '%stitles.index' % self.__db, '%stitles.key' % self.__db) # No title, no party. if tl is None: raise IMDbDataAccessError, 'unable to get movieID "%s"' % movieID res = analyze_title(tl) # Build the cast list. actl = [] for castG in ('actors', 'actresses'): midx = getFullIndex('%s%s.titles' % (self.__db, castG), movieID, multi=1) if midx is not None: params = {'movieID': movieID, 'dataF': '%s%s.data' % (self.__db, castG), 'indexF': '%snames.index' % self.__db, 'keyF': '%snames.key' % self.__db, 'attrIF': '%sattributes.index' % self.__db, 'attrKF': '%sattributes.key' % self.__db, 'charNF': '%scharacter2id.index' % self.__db, 'offsList': midx, 'doCast': 1} actl += getMovieCast(**params) if actl: actl.sort() res['cast'] = actl # List of other workers. works = ('writer', 'cinematographer', 'composer', 'costume-designer', 'director', 'editor', 'miscellaneou', 'producer', 'production-designer', 'cinematographer') for i in works: index = getFullIndex('%s%ss.titles' % (self.__db, i), movieID, multi=1) if index is not None: params = {'movieID': movieID, 'dataF': '%s%s.data' % (self.__db, i), 'indexF': '%snames.index' % self.__db, 'keyF': '%snames.key' % self.__db, 'attrIF': '%sattributes.index' % self.__db, 'attrKF': '%sattributes.key' % self.__db, 'offsList': index} name = key = i if '-' in name: name = name.replace('-', ' ') elif name == 'miscellaneou': name = 'miscellaneous crew' key = 'miscellaneou' elif name == 'writer': params['doWriters'] = 1 params['dataF'] = '%s%ss.data' % (self.__db, key) data = getMovieCast(**params) if name == 'writer': data.sort() res[name] = data # Rating. rt = self.get_movie_vote_details(movieID)['data'] if rt: res.update(rt) # Various information. miscInfo = (('runtimes', 'running-times'), ('color info', 'color-info'), ('genres', 'genres'), ('distributors', 'distributors'), ('languages', 'language'), ('certificates', 'certificates'), ('special effects companies', 'special-effects-companies'), ('sound mix', 'sound-mix'), ('tech info', 'technical'), ('production companies', 'production-companies'), ('countries', 'countries')) for name, fname in miscInfo: params = {'movieID': movieID, 'dataF': '%s%s.data' % (self.__db, fname), 'indexF': '%s%s.index' % (self.__db, fname), 'attrIF': '%sattributes.index' % self.__db, 'attrKF': '%sattributes.key' % self.__db} data = getMovieMisc(**params) if name in ('distributors', 'special effects companies', 'production companies'): for nitem in xrange(len(data)): n, notes = split_company_name_notes(data[nitem]) company = Company(name=n, companyID=getCompanyID(n, '%scompany2id.index' % self.__db), notes=notes, accessSystem=self.accessSystem) data[nitem] = company if data: res[name] = data if res.has_key('runtimes') and len(res['runtimes']) > 0: rt = res['runtimes'][0] episodes = re_episodes.findall(rt) if episodes: res['runtimes'][0] = re_episodes.sub('', rt) res['number of episodes'] = episodes[0] # AKA titles. akas = getAkaTitles(movieID, '%saka-titles.data' % self.__db, '%stitles.index' % self.__db, '%stitles.key' % self.__db, '%sattributes.index' % self.__db, '%sattributes.key' % self.__db) if akas: # normalize encoding. for i in xrange(len(akas)): ts = akas[i].split('::') if len(ts) != 2: continue t = ts[0] n = ts[1] nt = self._changeAKAencoding(n, t) if nt is not None: akas[i] = '%s::%s' % (nt, n) res['akas'] = akas if res.get('kind') == 'episode': # Things to do if this is a tv series episode. episodeOf = res.get('episode of') if episodeOf is not None: parentSeries = Movie(data=res['episode of'], accessSystem='local') seriesID = self._getTitleID(parentSeries.get( 'long imdb canonical title')) parentSeries.movieID = seriesID res['episode of'] = parentSeries if not res.get('year'): year = getFullIndex('%smovies.data' % self.__db, movieID, kind='moviedata', rindex=1) if year: res['year'] = year # MPAA info. mpaa = getMPAA(movieID, '%smpaa-ratings-reasons.index' % self.__db, '%smpaa-ratings-reasons.data' % self.__db) if mpaa: res.update(mpaa) return {'data': res, 'info sets': infosets} def get_movie_plot(self, movieID): pl = getPlot(movieID, '%splot.index' % self.__db, '%splot.data' % self.__db) trefs, nrefs = self._extractRefs(pl) if pl: return {'data': {'plot': pl}, 'titlesRefs': trefs, 'namesRefs': nrefs} return {'data': {}} def get_movie_taglines(self, movieID): tg = getTaglines(movieID, '%staglines.index' % self.__db, '%staglines.data' % self.__db) if tg: return {'data': {'taglines': tg}} return {'data': {}} def get_movie_keywords(self, movieID): params = {'movieID': movieID, 'dataF': '%skeywords.data' % self.__db, 'indexF': '%skeywords.index' % self.__db, 'attrIF': '%sattributes.index' % self.__db, 'attrKF': '%sattributes.key' % self.__db} kwds = getMovieMisc(**params) if kwds: return {'data': {'keywords': kwds}} return {'data': {}} def get_movie_alternate_versions(self, movieID): av = parseMinusList(movieID, '%salternate-versions.data' % self.__db, '%salternate-versions.index' % self.__db) trefs, nrefs = self._extractRefs(av) if av: return {'data': {'alternate versions': av}, 'titlesRefs': trefs, 'namesRefs': nrefs} return {'data': {}} def get_movie_crazy_credits(self, movieID): cc = parseMinusList(movieID, '%scrazy-credits.data' % self.__db, '%scrazy-credits.index' % self.__db) trefs, nrefs = self._extractRefs(cc) if cc: return {'data': {'crazy credits': cc}, 'titlesRefs': trefs, 'namesRefs': nrefs} return {'data': {}} def get_movie_goofs(self, movieID): goo = parseMinusList(movieID, '%sgoofs.data' % self.__db, '%sgoofs.index' % self.__db) trefs, nrefs = self._extractRefs(goo) if goo: return {'data': {'goofs': goo}, 'titlesRefs': trefs, 'namesRefs': nrefs} return {'data': {}} def get_movie_soundtrack(self, movieID): goo = parseMinusList(movieID, '%ssoundtracks.data' % self.__db, '%ssoundtracks.index' % self.__db) trefs, nrefs = self._extractRefs(goo) if goo: return {'data': {'soundtrack': goo}, 'titlesRefs': trefs, 'namesRefs': nrefs} return {'data': {}} def get_movie_quotes(self, movieID): mq = getQuotes(movieID, '%squotes.data' % self.__db, '%squotes.index' % self.__db) trefs, nrefs = self._extractRefs(mq) for idx, quote in enumerate(mq): mq[idx] = quote.split('::') if mq: return {'data': {'quotes': mq}, 'titlesRefs': trefs, 'namesRefs': nrefs} return {'data': {}} def get_movie_release_dates(self, movieID): params = {'movieID': movieID, 'dataF': '%srelease-dates.data' % self.__db, 'indexF': '%srelease-dates.index' % self.__db, 'attrIF': '%sattributes.index' % self.__db, 'attrKF': '%sattributes.key' % self.__db} data = getMovieMisc(**params) if data: return {'data': {'release dates': data}} return {'data': {}} def get_movie_miscellaneous_companies(self, movieID): params = {'movieID': movieID, 'dataF': '%smiscellaneous-companies.data' % self.__db, 'indexF': '%smiscellaneous-companies.index' % self.__db, 'attrIF': '%sattributes.index' % self.__db, 'attrKF': '%sattributes.key' % self.__db} try: data = getMovieMisc(**params) except IMDbDataAccessError: import warnings warnings.warn('miscellaneous-companies files not found; ' 'run the misc-companies4local.py script.') return {'data': {}} for nitem in xrange(len(data)): n, notes = split_company_name_notes(data[nitem]) company = Company(name=n, companyID=getCompanyID(n, '%scompany2id.index' % self.__db), notes=notes, accessSystem=self.accessSystem) data[nitem] = company if data: return {'data': {'miscellaneous companies': data}} return {'data': {}} def get_movie_vote_details(self, movieID): data = getRatingData(movieID, '%sratings.data' % self.__db) return {'data': data} def get_movie_trivia(self, movieID): triv = parseMinusList(movieID, '%strivia.data' % self.__db, '%strivia.index' % self.__db) trefs, nrefs = self._extractRefs(triv) if triv: return {'data': {'trivia': triv}, 'titlesRefs': trefs, 'namesRefs': nrefs} return {'data': {}} def get_movie_locations(self, movieID): params = {'movieID': movieID, 'dataF': '%slocations.data' % self.__db, 'indexF': '%slocations.index' % self.__db, 'attrIF': '%sattributes.index' % self.__db, 'attrKF': '%sattributes.key' % self.__db} data = getMovieMisc(**params) if data: return {'data': {'locations': data}} return {'data': {}} def get_movie_connections(self, movieID): mc = getMovieLinks(movieID, '%smovie-links.data' % self.__db, '%stitles.index' % self.__db, '%stitles.key' % self.__db) if mc: return {'data': {'connections': mc}} return {'data': {}} def get_movie_business(self, movieID): mb = getBusiness(movieID, '%sbusiness.index' % self.__db, '%sbusiness.data' % self.__db) trefs, nrefs = self._extractRefs(mb) if mb: return {'data': {'business': mb}, 'titlesRefs': trefs, 'namesRefs': nrefs} return {'data': {}} def get_movie_literature(self, movieID): ml = getLiterature(movieID, '%sliterature.index' % self.__db, '%sliterature.data' % self.__db) if ml: return {'data': {'literature': ml}} return {'data': {}} def get_movie_laserdisc(self, movieID): ml = getLaserdisc(movieID, '%slaserdisc.index' % self.__db, '%slaserdisc.data' % self.__db) trefs, nrefs = self._extractRefs(ml) if ml: return {'data': {'laserdisc': ml}, 'titlesRefs': trefs, 'namesRefs': nrefs} return {'data': {}} def _buildEpisodes(self, eps_list, parentID): episodes = {} parentTitle = getLabel(parentID, '%stitles.index' % self.__db, '%stitles.key' % self.__db) parentSeries = Movie(title=parentTitle, movieID=parentID, accessSystem='local') for episodeID, episodeTitle in eps_list: episodeTitle = unicode(episodeTitle, 'latin_1', 'replace') data = analyze_title(episodeTitle, canonical=1) m = Movie(data=data, movieID=episodeID, accessSystem='local') m['episode of'] = parentSeries if data.get('year') is None: year = getFullIndex('%smovies.data' % self.__db, key=episodeID, kind='moviedata', rindex=1) if year: m['year'] = year season = data.get('season', 'UNKNOWN') if not episodes.has_key(season): episodes[season] = {} ep_number = data.get('episode') if ep_number is None: ep_number = max((episodes[season].keys() or [0])) + 1 episodes[season][ep_number] = m return episodes def get_movie_episodes(self, movieID): try: me = get_episodes(movieID, '%stitles.index' % self.__db, '%stitles.key' % self.__db) except IOError, e: raise IMDbDataAccessError, str(e) if me: episodes = self._buildEpisodes(me, movieID) data = {'episodes': episodes} data['number of episodes'] = sum([len(x) for x in episodes.values()]) data['number of seasons'] = len(episodes.keys()) return {'data': data} return {'data': {}}