def getQuotes(movieID, dataF, indexF): """Return a list of quotes.""" index = getFullIndex(indexF, movieID) qtL = [] if index is not None: try: qtf = open(dataF, 'rt') except IOError, e: raise IMDbDataAccessError, str(e) fsize = stat(dataF)[ST_SIZE] qtf.seek(index) qtf.readline() qttl = [] while 1: line = latin2utf(qtf.readline()) line = line.rstrip() if line: if line.startswith(' ') and qttl[-1] and \ not qttl[-1].endswith('::'): line = line.lstrip() if line: qttl[-1] += ' %s' % line elif line.startswith('# '): if qttl: qtL.append('::'.join(qttl)) break else: line = line.lstrip() if line: qttl.append(line) elif qttl: qtL.append('::'.join(qttl)) qttl[:] = [] elif qtf.tell() > fsize: break qtf.close()
def _get_real_personID(self, personID): """Handle name aliases.""" rid = getFullIndex('%saka-names.index' % self.__db, personID, kind='akanidx') if rid is not None: return rid return personID
def _get_real_movieID(self, movieID): """Handle title aliases.""" rid = getFullIndex('%saka-titles.index' % self.__db, movieID, kind='akatidx') if rid is not None: return rid return movieID
def _buildEpisodes(self, eps_list, parentID): episodes = {} parentTitle = getLabel(parentID, '%stitles.index' % self.__db, '%stitles.key' % self.__db) parentSeries = Movie(title=parentTitle, movieID=parentID, accessSystem='local') for episodeID, episodeTitle in eps_list: episodeTitle = unicode(episodeTitle, 'latin_1', 'replace') data = analyze_title(episodeTitle, canonical=1) m = Movie(data=data, movieID=episodeID, accessSystem='local') m['episode of'] = parentSeries if data.get('year') is None: year = getFullIndex('%smovies.data' % self.__db, key=episodeID, kind='moviedata', rindex=1) if year: m['year'] = year season = data.get('season', 'UNKNOWN') if not episodes.has_key(season): episodes[season] = {} ep_number = data.get('episode') if ep_number is None: ep_number = max((episodes[season].keys() or [0])) + 1 episodes[season][ep_number] = m return episodes
def parseMinusList(movieID, dataF, indexF): """Parser for lists like goofs.data, crazy-credits.data and so on.""" offset = getFullIndex(indexF, movieID) if offset is None: return [] try: fdata = open(dataF, 'rt') except IOError, e: raise IMDbDataAccessError, str(e)
def getBio(personID, indexF, dataF): """Get biography information for the given person.""" bioidx = getFullIndex(indexF, personID) if bioidx is None: return {} try: fbio = open(dataF, 'r') except IOError, e: raise IMDbDataAccessError, str(e)
def _parseColonList(movieID, indexF, dataF, stopKey, replaceKeys): """Parser for lists with "COMMA: value" strings.""" index = getFullIndex(indexF, movieID, kind='idx2idx') out = {} if index is None: return out try: fd = open(dataF, 'rt') except IOError, e: raise IMDbDataAccessError, str(e)
def getAkaNames(personID, akaDF, namesIF, namesKF): """Return a list of aka names.""" entries = getFullIndex(akaDF, personID, kind='akandb', rindex=None, multi=1, default=[]) res = [] for entry in entries: akaName = getLabel(entry[1], namesIF, namesKF) if akaName: res.append(akaName) return res
def getRatingData(movieID, ratingDF): """Return a dictionary with rating information.""" rd = getFullIndex(ratingDF, movieID, kind='rating', rindex=None) if rd is None: return {} rating = {} rd[:] = rd[1:] rd[2] = rd[2] / 10.0 rating = {'votes distribution': rd[0], 'votes': rd[1], 'rating': rd[2]} return rating
def getPlot(movieID, plotIF, plotDF): """Return a list of plot strings.""" idx = getFullIndex(plotIF, movieID, 'plot') if idx is None: return [] plotl = [] plotltmp = [] try: dataf = open(plotDF, 'rt') except IOError, e: raise IMDbDataAccessError, str(e)
def getMovieMisc(movieID, dataF, indexF, attrIF, attrKF): """Return information from files like production-companies.data, keywords.data and so on.""" index = getFullIndex(indexF, movieID, kind='idx2idx') if index is None: return [] result = [] try: fdata = open(dataF, 'rb') except IOError, e: raise IMDbDataAccessError, str(e)
def getAkaTitles(movieID, akaDF, titlesIF, titlesKF, attrIF , attrKF): """Return a list of aka titles.""" entries = getFullIndex(akaDF, movieID, kind='akatdb', rindex=None, multi=1, default=[]) res = [] for entry in entries: akaTitle = getLabel(entry[1], titlesIF, titlesKF) if not akaTitle: continue attr = getLabel(entry[2], attrIF, attrKF) if attr: akaTitle += '::%s' % attr if akaTitle: res.append(akaTitle) return res
def getMovieLinks(movieID, dataF, movieTitlIF, movieTitlKF): """Return a dictionary with movie connections.""" entries = getFullIndex(dataF, movieID, kind='mlinks', rindex=None, multi=1, default=[]) res = {} for entry in entries: title = getLabel(entry[2], movieTitlIF, movieTitlKF) if not title: continue m = Movie(title=title, movieID=entry[2], accessSystem='local') sect = _links_sect.get(entry[1]) if not sect: continue res.setdefault(sect, []).append(m) return res
def getTaglines(movieID, indexF, dataF): """Return a list of taglines.""" index = getFullIndex(indexF, movieID) tgL = [] if index is not None: try: tgf = open(dataF, 'rt') except IOError, e: raise IMDbDataAccessError, str(e) tgf.seek(index) tgf.readline() while 1: line = latin2utf(tgf.readline().strip()) if not line: break tgL.append(line) tgf.close()
def get_movie_main(self, movieID): # Information sets provided by this method. infosets = ('main', 'vote details') tl = getLabel(movieID, '%stitles.index' % self.__db, '%stitles.key' % self.__db) # No title, no party. if tl is None: raise IMDbDataAccessError, 'unable to get movieID "%s"' % movieID res = analyze_title(tl) # Build the cast list. actl = [] for castG in ('actors', 'actresses'): midx = getFullIndex('%s%s.titles' % (self.__db, castG), movieID, multi=1) if midx is not None: params = {'movieID': movieID, 'dataF': '%s%s.data' % (self.__db, castG), 'indexF': '%snames.index' % self.__db, 'keyF': '%snames.key' % self.__db, 'attrIF': '%sattributes.index' % self.__db, 'attrKF': '%sattributes.key' % self.__db, 'charNF': '%scharacter2id.index' % self.__db, 'offsList': midx, 'doCast': 1} actl += getMovieCast(**params) if actl: actl.sort() res['cast'] = actl # List of other workers. works = ('writer', 'cinematographer', 'composer', 'costume-designer', 'director', 'editor', 'miscellaneou', 'producer', 'production-designer', 'cinematographer') for i in works: index = getFullIndex('%s%ss.titles' % (self.__db, i), movieID, multi=1) if index is not None: params = {'movieID': movieID, 'dataF': '%s%s.data' % (self.__db, i), 'indexF': '%snames.index' % self.__db, 'keyF': '%snames.key' % self.__db, 'attrIF': '%sattributes.index' % self.__db, 'attrKF': '%sattributes.key' % self.__db, 'offsList': index} name = key = i if '-' in name: name = name.replace('-', ' ') elif name == 'miscellaneou': name = 'miscellaneous crew' key = 'miscellaneou' elif name == 'writer': params['doWriters'] = 1 params['dataF'] = '%s%ss.data' % (self.__db, key) data = getMovieCast(**params) if name == 'writer': data.sort() res[name] = data # Rating. rt = self.get_movie_vote_details(movieID)['data'] if rt: res.update(rt) # Various information. miscInfo = (('runtimes', 'running-times'), ('color info', 'color-info'), ('genres', 'genres'), ('distributors', 'distributors'), ('languages', 'language'), ('certificates', 'certificates'), ('special effects companies', 'special-effects-companies'), ('sound mix', 'sound-mix'), ('tech info', 'technical'), ('production companies', 'production-companies'), ('countries', 'countries')) for name, fname in miscInfo: params = {'movieID': movieID, 'dataF': '%s%s.data' % (self.__db, fname), 'indexF': '%s%s.index' % (self.__db, fname), 'attrIF': '%sattributes.index' % self.__db, 'attrKF': '%sattributes.key' % self.__db} data = getMovieMisc(**params) if name in ('distributors', 'special effects companies', 'production companies'): for nitem in xrange(len(data)): n, notes = split_company_name_notes(data[nitem]) company = Company(name=n, companyID=getCompanyID(n, '%scompany2id.index' % self.__db), notes=notes, accessSystem=self.accessSystem) data[nitem] = company if data: res[name] = data if res.has_key('runtimes') and len(res['runtimes']) > 0: rt = res['runtimes'][0] episodes = re_episodes.findall(rt) if episodes: res['runtimes'][0] = re_episodes.sub('', rt) res['number of episodes'] = episodes[0] # AKA titles. akas = getAkaTitles(movieID, '%saka-titles.data' % self.__db, '%stitles.index' % self.__db, '%stitles.key' % self.__db, '%sattributes.index' % self.__db, '%sattributes.key' % self.__db) if akas: # normalize encoding. for i in xrange(len(akas)): ts = akas[i].split('::') if len(ts) != 2: continue t = ts[0] n = ts[1] nt = self._changeAKAencoding(n, t) if nt is not None: akas[i] = '%s::%s' % (nt, n) res['akas'] = akas if res.get('kind') == 'episode': # Things to do if this is a tv series episode. episodeOf = res.get('episode of') if episodeOf is not None: parentSeries = Movie(data=res['episode of'], accessSystem='local') seriesID = self._getTitleID(parentSeries.get( 'long imdb canonical title')) parentSeries.movieID = seriesID res['episode of'] = parentSeries if not res.get('year'): year = getFullIndex('%smovies.data' % self.__db, movieID, kind='moviedata', rindex=1) if year: res['year'] = year # MPAA info. mpaa = getMPAA(movieID, '%smpaa-ratings-reasons.index' % self.__db, '%smpaa-ratings-reasons.data' % self.__db) if mpaa: res.update(mpaa) return {'data': res, 'info sets': infosets}
def get_person_filmography(self, personID): infosets = ('filmography', 'episodes') res = {} episodes = {} works = ('actor', 'actresse', 'producer', 'writer', 'cinematographer', 'composer', 'costume-designer', 'director', 'editor', 'miscellaneou', 'production-designer') for i in works: index = getFullIndex('%s%ss.names' % (self.__db, i), personID) if index is not None: params = {'offset': index, 'indexF': '%stitles.index' % self.__db, 'keyF': '%stitles.key' % self.__db, 'attrIF': '%sattributes.index' % self.__db, 'attrKF': '%sattributes.key' % self.__db, 'charNF': '%scharacter2id.index' % self.__db} name = key = i if '-' in name: name = name.replace('-', ' ') elif name == 'actresse': name = 'actress' params['doCast'] = 1 elif name == 'miscellaneou': name = 'miscellaneous crew' key = 'miscellaneou' elif name == 'actor': params['doCast'] = 1 elif name == 'writer': params['doWriters'] = 1 params['dataF'] = '%s%ss.data' % (self.__db, key) data = getFilmography(**params) movies = [] eps = [] # Split normal titles from episodes. for d in data: if d.get('kind') != 'episode': movies.append(d) else: eps.append(d) movies.sort() if movies: res[name] = movies for e in eps: series = Movie(data=e['episode of'], accessSystem='local') seriesID = self._getTitleID(series.get( 'long imdb canonical title')) series.movieID = seriesID if not e.get('year'): year = getFullIndex('%smovies.data' % self.__db, e.movieID, kind='moviedata', rindex=1) if year: e['year'] = year if not e.currentRole and name not in ('actor', 'actress'): if e.notes: e.notes = ' %s' % e.notes e.notes = '[%s]%s' % (name, e.notes) episodes.setdefault(series, []).append(e) if episodes: for k in episodes: episodes[k].sort() episodes[k].reverse() res['episodes'] = episodes return {'data': res, 'info sets': tuple(infosets)}