Exemple #1
0
def getFilmography(dataF,
                   indexF,
                   keyF,
                   attrIF,
                   attrKF,
                   offset,
                   charNF=None,
                   doCast=0,
                   doWriters=0):
    """Gather information from the given files about the
    person entry found at offset; return a list of Movie objects,
    with the relevant attributes."""
    name, res = getRawData(dataF, offset, doCast, doWriters)
    resList = []
    for movie in res:
        title = getLabel(movie['movieID'], indexF, keyF)
        if not title: continue
        curRole = movie.get('currentRole', u'')
        roleID = None
        if curRole and charNF:
            curRole, roleID = getCharactersIDs(curRole, charNF)
        m = Movie(title=title,
                  movieID=movie['movieID'],
                  currentRole=curRole,
                  roleID=roleID,
                  accessSystem='local')
        if movie.has_key('attributeID'):
            attr = getLabel(movie['attributeID'], attrIF, attrKF)
            if attr: m.notes = attr
        resList.append(m)
    return resList
Exemple #2
0
 def _buildEpisodes(self, eps_list, parentID):
     episodes = {}
     parentTitle = getLabel(parentID, '%stitles.index' % self.__db,
                            '%stitles.key' % self.__db)
     parentSeries = Movie(title=parentTitle,
                          movieID=parentID,
                          accessSystem='local')
     for episodeID, episodeTitle in eps_list:
         episodeTitle = unicode(episodeTitle, 'latin_1', 'replace')
         data = analyze_title(episodeTitle, canonical=1)
         m = Movie(data=data, movieID=episodeID, accessSystem='local')
         m['episode of'] = parentSeries
         if data.get('year') is None:
             year = getFullIndex('%smovies.data' % self.__db,
                                 key=episodeID,
                                 kind='moviedata',
                                 rindex=1)
             if year: m['year'] = year
         season = data.get('season', 'UNKNOWN')
         if not episodes.has_key(season): episodes[season] = {}
         ep_number = data.get('episode')
         if ep_number is None:
             ep_number = max((episodes[season].keys() or [0])) + 1
         episodes[season][ep_number] = m
     return episodes
Exemple #3
0
def _buildGuests(gl):
    """Return a list of Movie objects from a list of GA lines."""
    rl = []
    rlapp = rl.append
    for g in gl:
        # When used by the imdbpy2sql.py script, latin_1 strings are passed.
        if not isinstance(g, UnicodeType):
            g = unicode(g, 'latin_1', 'replace')
        titl = re_titleRef.findall(g)
        if len(titl) != 1: continue
        note = u''
        if g[-1] == ')':
            opi = g.rfind('(episode')
            if opi == -1: opi = g.rfind('(')
            if opi != -1:
                note = g[opi:].replace('_', '"').strip()
                g = g[:opi].strip()
        cr = u''
        cri = g.find('_ (qv), as ')
        if cri != -1:
            cr = g[cri + 11:].replace('[unknown]', u'').strip()
            if cr and cr[-1] == ')':
                opi = cr.rfind('(')
                if opi != -1:
                    if note: note += ' '
                    note += cr[opi:]
                    cr = cr[:opi].strip()
        # As you can see, we've no notion of the movieID, here.
        m = Movie(title=titl[0],
                  currentRole=cr,
                  notes=note,
                  accessSystem='local')
        rlapp(m)
    return rl
Exemple #4
0
 def postprocess_data(self, data):
     result = {}
     for item in ('names refs', 'titles refs', 'characters refs'):
         result[item] = {}
         for k, v in data.get(item, []):
             k = k.strip()
             v = v.strip()
             if not (k and v):
                 continue
             if not v.endswith('/'): continue
             imdbID = analyze_imdbid(v)
             if item == 'names refs':
                 obj = Person(personID=imdbID,
                              name=k,
                              accessSystem=self._as,
                              modFunct=self._modFunct)
             elif item == 'titles refs':
                 obj = Movie(movieID=imdbID,
                             title=k,
                             accessSystem=self._as,
                             modFunct=self._modFunct)
             else:
                 obj = Character(characterID=imdbID,
                                 name=k,
                                 accessSystem=self._as,
                                 modFunct=self._modFunct)
             # XXX: companies aren't handled: are they ever found in text,
             #      as links to their page?
             result[item][k] = obj
     return result
Exemple #5
0
 def _base_person_info(self,
                       personID,
                       movies_cache=None,
                       persons_cache=None):
     if movies_cache is None:
         movies_cache = {}
     if persons_cache is None:
         persons_cache = {}
     if personID in persons_cache:
         return persons_cache[personID]
     nb = self.T['name_basics']
     person = nb.select(nb.c.nconst == personID).execute().fetchone() or {}
     data = self._rename('name_basics', dict(person))
     movies = []
     for movieID in split_array(data.get('known for') or ''):
         if not movieID:
             continue
         movieID = int(movieID)
         movie_data = self._base_title_info(movieID,
                                            movies_cache=movies_cache,
                                            persons_cache=persons_cache)
         movie = Movie(movieID=movieID,
                       data=movie_data,
                       accessSystem=self.accessSystem)
         movies.append(movie)
     data['known for'] = movies
     self._clean(data,
                 ('ns_soundex', 'sn_soundex', 's_soundex', 'personID'))
     persons_cache[personID] = data
     return data
Exemple #6
0
def get_movie_data(movieID, kindDict, fromAka=0):
    """Return a dictionary containing data about the given movieID;
    if fromAka is true, the AkaTitle table is searched."""
    if not fromAka: Table = Title
    else: Table = AkaTitle
    m = Table.get(movieID)
    mdict = {
        'title': m.title,
        'kind': kindDict[m.kindID],
        'year': m.productionYear,
        'imdbIndex': m.imdbIndex,
        'season': m.seasonNr,
        'episode': m.episodeNr
    }
    if not fromAka:
        if m.seriesYears is not None:
            mdict['series years'] = unicode(m.seriesYears)
    if mdict['imdbIndex'] is None: del mdict['imdbIndex']
    if mdict['year'] is None: del mdict['year']
    else:
        try:
            mdict['year'] = int(mdict['year'])
        except (TypeError, ValueError):
            del mdict['year']
    if mdict['season'] is None: del mdict['season']
    else:
        try:
            mdict['season'] = int(mdict['season'])
        except:
            pass
    if mdict['episode'] is None: del mdict['episode']
    else:
        try:
            mdict['episode'] = int(mdict['episode'])
        except:
            pass
    episodeOfID = m.episodeOfID
    if episodeOfID is not None:
        ser_dict = get_movie_data(episodeOfID, kindDict, fromAka)
        mdict['episode of'] = Movie(data=ser_dict,
                                    movieID=episodeOfID,
                                    accessSystem='sql')
        if fromAka:
            ser_note = AkaTitle.get(episodeOfID).note
            if ser_note:
                mdict['episode of'].notes = ser_note
    return mdict
 def end_li(self):
     self._in_li = 0
     if self._in_episodes:
         et = self._cur_episode_title.strip()
         minfo = self._misc_info.strip()
         if et and self._episode_id:
             eps_data = analyze_title(et, canonical=1)
             eps_data['kind'] = u'episode'
             e = Movie(movieID=str(self._episode_id),
                       data=eps_data,
                       accessSystem=self._as,
                       modFunct=self._modFunct)
             e['episode of'] = self._cur_series
             if minfo.startswith('('):
                 pe = minfo.find(')')
                 if pe != -1:
                     date = minfo[1:pe]
                     if date != '????':
                         e['original air date'] = date
                         if eps_data.get('year', '????') == '????':
                             syear = date.split()[-1]
                             if syear.isdigit():
                                 e['year'] = syear
             rolei = minfo.find(' - ')
             if rolei != -1:
                 if not self._got_i_info:
                     role = u''
                     role = minfo[rolei + 3:].strip()
                     notei = role.rfind('(')
                     note = u''
                     if notei != -1 and role and role[-1] == ')':
                         note = role[notei:]
                         role = role[:notei].strip()
                     e.notes = note
                     e.currentRole = role
                 else:
                     randn = minfo[rolei + 3:].strip().split()
                     note = '[%s]' % randn[0]
                     note += ' '.join(randn[1:])
                     e.notes = note
             self._episodes.setdefault(self._cur_series, []).append(e)
         self._cur_episode_title = u''
         self._episode_id = None
     self._in_misc_info = 0
     self._misc_info = u''
 def end_li(self):
     self._in_li = 0
     if self._in_episodes:
         et = self._cur_episode_title.strip()
         minfo = self._misc_info.strip()
         if et and self._episode_id:
             eps_data = analyze_title(et, canonical=1)
             eps_data['kind'] = u'episode'
             e = Movie(movieID=str(self._episode_id), data=eps_data,
                         accessSystem=self._as, modFunct=self._modFunct)
             e['episode of'] = self._cur_series
             if minfo.startswith('('):
                 pe = minfo.find(')')
                 if pe != -1:
                     date = minfo[1:pe]
                     if date != '????':
                         e['original air date'] = date
                         if eps_data.get('year', '????') == '????':
                             syear = date.split()[-1]
                             if syear.isdigit():
                                 e['year'] = syear
             rolei = minfo.find(' - ')
             if rolei != -1:
                 if not self._got_i_info:
                     role = u''
                     role = minfo[rolei+3:].strip()
                     notei = role.rfind('(')
                     note = u''
                     if notei != -1 and role and role[-1] == ')':
                         note = role[notei:]
                         role = role[:notei].strip()
                     e.notes = note
                     e.currentRole = role
                 else:
                     randn = minfo[rolei+3:].strip().split()
                     note = '[%s]' % randn[0]
                     note += ' '.join(randn[1:])
                     e.notes = note
             self._episodes.setdefault(self._cur_series, []).append(e)
         self._cur_episode_title = u''
         self._episode_id = None
     self._in_misc_info = 0
     self._misc_info = u''
Exemple #9
0
 def postprocess_data(self, data):
     if not data:
         return {}
     newData = {}
     for title in data:
         movieID, quotes = data[title]
         if movieID is None:
             movie = title
         else:
             movie = Movie(title=title, movieID=movieID,
                           accessSystem=self._as, modFunct=self._modFunct)
         newData[movie] = [quote.split('::') for quote in quotes]
     return {'quotes': newData}
 def _add_items(self):
     self._quotes = [x.replace(':: ', '::').replace(' ::', '::').rstrip(':')
                     for x in self._quotes]
     self._quotes = [x.replace('   ', ' ').replace('  ', ' ').strip()
                     for x in self._quotes]
     self._quotes = filter(None, self._quotes)
     if not (self._cur_title and self._cur_titleID and self._quotes):
         self._quotes = [u'']
         return
     movie = Movie(title=self._cur_title, movieID=self._cur_titleID,
                     accessSystem=self._as, modFunct=self._modFunct)
     self._tot_quotes[movie] = self._quotes[:]
     self._quotes = [u'']
Exemple #11
0
 def test_search(self):
     ia = IMDb()
     serials = ia.search_movie("good place")
     for serial in serials:
         if serial.data["kind"] == "tv series":
             print(serial.movieID, serial.data["title"], serial.data["kind"])
     print(serials)
     result = ia.get_movie_episodes("4955642")
     print(result)
     for season in result["data"]["episodes"]:
         for index in result["data"]["episodes"][season]:
             item = Movie(result["data"]["episodes"][4][index])
             print(f"{season}x{index}", item.myID)
Exemple #12
0
def getFilmography(dataF, indexF, keyF, attrIF, attrKF, offset,
                    charNF=None, doCast=0, doWriters=0):
    """Gather information from the given files about the
    person entry found at offset; return a list of Movie objects,
    with the relevant attributes."""
    name, res = getRawData(dataF, offset, doCast, doWriters)
    resList = []
    for movie in res:
        title = getLabel(movie['movieID'], indexF, keyF)
        if not title: continue
        curRole =  movie.get('currentRole', u'')
        roleID = None
        if curRole and charNF:
            curRole, roleID = getCharactersIDs(curRole, charNF)
        m = Movie(title=title, movieID=movie['movieID'],
                    currentRole=curRole, roleID=roleID,
                    accessSystem='local')
        if movie.has_key('attributeID'):
            attr = getLabel(movie['attributeID'], attrIF, attrKF)
            if attr: m.notes = attr
        resList.append(m)
    return resList
 def end_a(self):
     if self._in_episode_title:
         self._in_episode_title = 0
         self._in_misc_info = 1
     elif self._in_series_title:
         self._in_series_title = 0
         st = self._cur_series_title.strip()
         if st and self._series_id is not None:
             series_data = analyze_title(st, canonical=1)
             s = Movie(movieID=str(self._series_id),
                       data=series_data,
                       accessSystem=self._as,
                       modFunct=self._modFunct)
             self._cur_series = s
Exemple #14
0
def getMovieLinks(movieID, dataF, movieTitlIF, movieTitlKF):
    """Return a dictionary with movie connections."""
    entries = getFullIndex(dataF, movieID, kind='mlinks',
                            rindex=None, multi=1, default=[])
    res = {}
    for entry in entries:
        title = getLabel(entry[2], movieTitlIF, movieTitlKF)
        if not title: continue
        m = Movie(title=title, movieID=entry[2],
                    accessSystem='local')
        sect = _links_sect.get(entry[1])
        if not sect: continue
        res.setdefault(sect, []).append(m)
    return res
Exemple #15
0
 def _add_ref(self, kind):
     """Add a reference entry to the names and titles dictionaries."""
     if kind == 'tt':
         if self._titleRefCID and self._titleCN:
             if not self._titlesRefs.has_key(self._titleCN):
                 try:
                     movie = Movie(movieID=str(self._titleRefCID),
                                   title=self._titleCN,
                                   accessSystem=self._as,
                                   modFunct=self._modFunct)
                     self._titlesRefs[self._titleCN] = movie
                 except IMDbParserError:
                     pass
             self._titleRefCID = u''
             self._titleCN = u''
             self._inTTRef = 0
             self._inLinkTTRef = 0
     elif kind == 'nm' and self._nameRefCID and self._nameCN:
         # XXX: 'Neo' and 'Keanu Reeves' are two separated
         #      entry in the dictionary.  Check the ID value instead
         #      of the key?
         if not self._namesRefs.has_key(self._nameCN):
             try:
                 person = Person(name=self._nameCN,
                                 personID=str(self._nameRefCID),
                                 accessSystem=self._as,
                                 modFunct=self._modFunct)
                 self._namesRefs[self._nameCN] = person
             except IMDbParserError:
                 pass
         self._nameRefCID = u''
         self._nameCN = u''
         self._inNMRef = 0
     elif kind == 'ch' and self._characterRefCID and self._characterCN:
         if not self._charactersRefs.has_key(self._characterCN):
             try:
                 character = Character(name=self._characterCN,
                                       characterID=str(
                                           self._characterRefCID),
                                       accessSystem='http')
                 self._charactersRefs[self._characterCN] = character
             except IMDbParserError:
                 pass
         self._characterRefCID = u''
         self._characterCN = u''
         self._inCHRef = 0
Exemple #16
0
 def _findRefs(self, o, trefs, nrefs):
     """Find titles or names references in strings."""
     if isinstance(o, (unicode, str)):
         for title in re_titleRef.findall(o):
             a_title = analyze_title(title, canonical=0)
             rtitle = build_title(a_title, ptdf=1)
             if trefs.has_key(rtitle): continue
             movieID = self._getTitleID(rtitle)
             if movieID is None:
                 movieID = self._getTitleID(title)
             if movieID is None:
                 continue
             m = Movie(title=rtitle,
                       movieID=movieID,
                       accessSystem=self.accessSystem)
             trefs[rtitle] = m
             rtitle2 = canonicalTitle(a_title.get('title', u''))
             if rtitle2 and rtitle2 != rtitle and rtitle2 != title:
                 trefs[rtitle2] = m
             if title != rtitle:
                 trefs[title] = m
         for name in re_nameRef.findall(o):
             a_name = analyze_name(name, canonical=1)
             rname = build_name(a_name, canonical=1)
             if nrefs.has_key(rname): continue
             personID = self._getNameID(rname)
             if personID is None:
                 personID = self._getNameID(name)
             if personID is None: continue
             p = Person(name=rname,
                        personID=personID,
                        accessSystem=self.accessSystem)
             nrefs[rname] = p
             rname2 = normalizeName(a_name.get('name', u''))
             if rname2 and rname2 != rname:
                 nrefs[rname2] = p
             if name != rname and name != rname2:
                 nrefs[name] = p
     elif isinstance(o, (list, tuple)):
         for item in o:
             self._findRefs(item, trefs, nrefs)
     elif isinstance(o, dict):
         for value in o.values():
             self._findRefs(value, trefs, nrefs)
     return (trefs, nrefs)
Exemple #17
0
 def postprocess_data(self, data):
     if len(data) == 0:
         return {}
     nd = {}
     for key in data.keys():
         dom = self.get_dom(key)
         link = self.xpath(dom, "//a/@href")[0]
         title = self.xpath(dom, "//a/text()")[0][1:-1]
         series = Movie(movieID=analyze_imdbid(link),
                        data=analyze_title(title),
                        accessSystem=self._as, modFunct=self._modFunct)
         nd[series] = []
         for episode in data[key]:
             # XXX: should we create a copy of 'series', to avoid
             #      circular references?
             episode['episode of'] = series
             nd[series].append(episode)
     return {'episodes': nd}
 def postprocess_data(self, data):
     result = {}
     for item in ('names refs', 'titles refs'):
         result[item] = {}
         for k, v in data.get(item, []):
             k = k.strip()
             v = v.strip()
             if not (k and v):
                 continue
             imdbID = analyze_imdbid(v)
             if item == 'names refs':
                 obj = Person(personID=imdbID, name=k,
                              accessSystem=self._as, modFunct=self._modFunct)
             elif item == 'titles refs':
                 obj = Movie(movieID=imdbID, title=k,
                             accessSystem=self._as, modFunct=self._modFunct)
             result[item][k] = obj
     return result
Exemple #19
0
 def __get_serial(imdb_id: str, title: str, year: str) -> Serial:
     try:
         ia = IMDb()
         info = ia.get_movie_episodes(imdb_id)
         seasons = list()
         for season_index in info["data"]["episodes"]:
             episodes = list()
             for episode_index in info["data"]["episodes"][season_index]:
                 movie = Movie(info["data"]["episodes"][season_index][episode_index])
                 episode = Episode(episode_index, movie.myID.data["title"], movie.myID.movieID)
                 episodes.append(episode)
                 # print(f"{season_index}x{episode_index}", movie.myID)
             season = Season(f"Season {season_index}", episodes)
             seasons.append(season)
         seasons.sort(key=lambda s: s.title)
         serial = Serial(0, title, year, imdb_id, "", seasons)
         return serial
     except Exception as ex:
         print("Exception", ex)
Exemple #20
0
def _build_episode(link, title, minfo, role, roleA, roleAID):
    """Build an Movie object for a given episode of a series."""
    episode_id = analyze_imdbid(link)
    notes = ''
    minidx = minfo.find(' -')
    # Sometimes, for some unknown reason, the role is left in minfo.
    if minidx != -1:
        slfRole = minfo[minidx + 3:].lstrip()
        minfo = minfo[:minidx].rstrip()
        if slfRole.endswith(')'):
            commidx = slfRole.rfind('(')
            if commidx != -1:
                notes = slfRole[commidx:]
                slfRole = slfRole[:commidx]
        if slfRole and role is None and roleA is None:
            role = slfRole
    eps_data = analyze_title(title)
    eps_data['kind'] = 'episode'
    # FIXME: it's wrong for multiple characters (very rare on tv series?).
    if role is None:
        role = roleA  # At worse, it's None.
    if role is None:
        roleAID = None
    if roleAID is not None:
        roleAID = analyze_imdbid(roleAID)
    e = Movie(movieID=episode_id,
              data=eps_data,
              currentRole=role,
              roleID=roleAID,
              notes=notes)
    # XXX: are we missing some notes?
    # XXX: does it parse things as "Episode dated 12 May 2005 (12 May 2005)"?
    if minfo.startswith('('):
        pe = minfo.find(')')
        if pe != -1:
            date = minfo[1:pe]
            if date != '????':
                e['original air date'] = date
                if eps_data.get('year', '????') == '????':
                    syear = date.split()[-1]
                    if syear.isdigit():
                        e['year'] = int(syear)
    return e
 def do_br(self, attrs):
     if self._in_series_title:
         self._in_series_title = 0
         st = self._series_title.strip()
         if st and self.__seriesID:
             d_title = analyze_title(st, canonical=1)
             m = Movie(movieID=str(self.__seriesID),
                       data=d_title,
                       accessSystem=self._as,
                       modFunct=self._modFunct)
             self._result['kind'] = u'episode'
             self._result['episode of'] = m
         self._series_title = u''
     elif self._in_series_info:
         self._in_series_info = 0
         si = ' '.join([x for x in self._series_info.split() if x])
         if si:
             aid = self.re_airdate.findall(si)
             if aid and len(aid[0]) == 3:
                 date, season, episode = aid[0]
                 date = date.strip()
                 try:
                     season = int(season)
                 except:
                     pass
                 try:
                     episode = int(episode)
                 except:
                     pass
                 if date and date != '????':
                     self._result['original air date'] = date
                 # Handle also "episode 0".
                 if season or type(season) is type(0):
                     self._result['season'] = season
                 if episode or type(season) is type(0):
                     self._result['episode'] = episode
         self._series_info = u''
        return None
    length = convBin(dfptr.read(2), 'longlength')
    # Skip character name.
    latin2utf(dfptr.read(length))
    nrItems = convBin(dfptr.read(3), 'nrCharacterItems')
    if limit is not None and nrItems/2 > limit:
        nrItems = limit*2
    filmography = []
    for i in xrange(nrItems/2):
        personID = convBin(dfptr.read(3), 'personID')
        name = getLabel(personID, personIF, personKF)
        movieID = convBin(dfptr.read(3), 'movieID')
        title = getLabel(movieID, movieIF, movieKF)
        # XXX: notes are not retrieved: they can be found scanning
        # actors.list and acresses.list, but it will slow down everything.
        m = Movie(title=title, movieID=movieID, currentRole=name,
                    roleID=personID, roleIsPerson=True, accessSystem='local')
        filmography.append(m)
    dfptr.close()
    return filmography


def _convChID(characterID):
    """Return a numeric value for the given string, or None."""
    if characterID is None:
        return None
    return convBin(characterID, 'characterID')


def getCharactersIDs(names_string, charNF):
    """Returns a tuple (name, roleID) if the supplied string contains
    only one character, otherwise returns a tuple of lists:
Exemple #23
0
 # Complete cast/crew.
 compcast = [
     (self._compcast[cc.subjectID], self._compcast[cc.statusID])
     for cc in CompleteCast.select(CompleteCast.q.movieID == movieID)
 ]
 if compcast:
     for entry in compcast:
         val = unicode(entry[1])
         res[u'complete %s' % entry[0]] = val
 # Movie connections.
 mlinks = [[ml.linkedMovieID, self._link[ml.linkTypeID]]
           for ml in MovieLink.select(MovieLink.q.movieID == movieID)]
 if mlinks:
     for ml in mlinks:
         lmovieData = get_movie_data(ml[0], self._kind)
         m = Movie(movieID=ml[0], data=lmovieData, accessSystem='sql')
         ml[0] = m
     res['connections'] = {}
     mlinks[:] = _groupListBy(mlinks, 1)
     for group in mlinks:
         lt = group[0][1]
         res['connections'][lt] = [i[0] for i in group]
 # Episodes.
 episodes = {}
 eps_list = list(Title.select(Title.q.episodeOfID == movieID))
 eps_list.sort()
 if eps_list:
     ps_data = {
         'title': res['title'],
         'kind': res['kind'],
         'year': res.get('year'),
Exemple #24
0
 def get_movie_main(self, movieID):
     # Information sets provided by this method.
     infosets = ('main', 'vote details')
     tl = getLabel(movieID, '%stitles.index' % self.__db,
                     '%stitles.key' % self.__db)
     # No title, no party.
     if tl is None:
         raise IMDbDataAccessError, 'unable to get movieID "%s"' % movieID
     res = analyze_title(tl)
     # Build the cast list.
     actl = []
     for castG in ('actors', 'actresses'):
         midx = getFullIndex('%s%s.titles' % (self.__db, castG),
                         movieID, multi=1)
         if midx is not None:
             params = {'movieID': movieID,
                         'dataF': '%s%s.data' % (self.__db, castG),
                         'indexF': '%snames.index' % self.__db,
                         'keyF': '%snames.key' % self.__db,
                         'attrIF': '%sattributes.index' % self.__db,
                         'attrKF': '%sattributes.key' % self.__db,
                         'charNF': '%scharacter2id.index' % self.__db,
                         'offsList': midx, 'doCast': 1}
             actl += getMovieCast(**params)
     if actl:
         actl.sort()
         res['cast'] = actl
     # List of other workers.
     works = ('writer', 'cinematographer', 'composer',
             'costume-designer', 'director', 'editor', 'miscellaneou',
             'producer', 'production-designer', 'cinematographer')
     for i in works:
         index = getFullIndex('%s%ss.titles' % (self.__db, i),
                                 movieID, multi=1)
         if index is not None:
             params = {'movieID': movieID,
                         'dataF': '%s%s.data' % (self.__db, i),
                         'indexF': '%snames.index' % self.__db,
                         'keyF': '%snames.key' % self.__db,
                         'attrIF': '%sattributes.index' % self.__db,
                         'attrKF': '%sattributes.key' % self.__db,
                         'offsList': index}
             name = key = i
             if '-' in name:
                 name = name.replace('-', ' ')
             elif name == 'miscellaneou':
                 name = 'miscellaneous crew'
                 key = 'miscellaneou'
             elif name == 'writer':
                 params['doWriters'] = 1
             params['dataF'] = '%s%ss.data' % (self.__db, key)
             data = getMovieCast(**params)
             if name == 'writer': data.sort()
             res[name] = data
     # Rating.
     rt = self.get_movie_vote_details(movieID)['data']
     if rt: res.update(rt)
     # Various information.
     miscInfo = (('runtimes', 'running-times'), ('color info', 'color-info'),
                 ('genres', 'genres'), ('distributors', 'distributors'),
                 ('languages', 'language'), ('certificates', 'certificates'),
                 ('special effects companies', 'special-effects-companies'),
                 ('sound mix', 'sound-mix'), ('tech info', 'technical'),
                 ('production companies', 'production-companies'),
                 ('countries', 'countries'))
     for name, fname in miscInfo:
         params = {'movieID': movieID,
             'dataF': '%s%s.data' % (self.__db, fname),
             'indexF': '%s%s.index' % (self.__db, fname),
             'attrIF': '%sattributes.index' % self.__db,
             'attrKF': '%sattributes.key' % self.__db}
         data = getMovieMisc(**params)
         if name in ('distributors', 'special effects companies',
                     'production companies'):
             for nitem in xrange(len(data)):
                 n, notes = split_company_name_notes(data[nitem])
                 company = Company(name=n, companyID=getCompanyID(n,
                                     '%scompany2id.index' % self.__db),
                                     notes=notes,
                                     accessSystem=self.accessSystem)
                 data[nitem] = company
         if data: res[name] = data
     if res.has_key('runtimes') and len(res['runtimes']) > 0:
         rt = res['runtimes'][0]
         episodes = re_episodes.findall(rt)
         if episodes:
             res['runtimes'][0] = re_episodes.sub('', rt)
             res['number of episodes'] = episodes[0]
     # AKA titles.
     akas = getAkaTitles(movieID,
                 '%saka-titles.data' % self.__db,
                 '%stitles.index' % self.__db,
                 '%stitles.key' % self.__db,
                 '%sattributes.index' % self.__db,
                 '%sattributes.key' % self.__db)
     if akas:
         # normalize encoding.
         for i in xrange(len(akas)):
             ts = akas[i].split('::')
             if len(ts) != 2: continue
             t = ts[0]
             n = ts[1]
             nt = self._changeAKAencoding(n, t)
             if nt is not None: akas[i] = '%s::%s' % (nt, n)
         res['akas'] = akas
     if res.get('kind') == 'episode':
         # Things to do if this is a tv series episode.
         episodeOf = res.get('episode of')
         if episodeOf is not None:
             parentSeries = Movie(data=res['episode of'],
                                         accessSystem='local')
             seriesID = self._getTitleID(parentSeries.get(
                                         'long imdb canonical title'))
             parentSeries.movieID = seriesID
             res['episode of'] = parentSeries
         if not res.get('year'):
             year = getFullIndex('%smovies.data' % self.__db,
                                 movieID, kind='moviedata', rindex=1)
             if year: res['year'] = year
     # MPAA info.
     mpaa = getMPAA(movieID, '%smpaa-ratings-reasons.index' % self.__db,
                     '%smpaa-ratings-reasons.data' % self.__db)
     if mpaa: res.update(mpaa)
     return {'data': res, 'info sets': infosets}
Exemple #25
0
def build_movie(txt,
                movieID=None,
                roleID=None,
                status=None,
                accessSystem='http',
                modFunct=None,
                _parsingCharacter=False,
                _parsingCompany=False,
                year=None,
                chrRoles=None,
                rolesNoChar=None,
                additionalNotes=None):
    """Given a string as normally seen on the "categorized" page of
    a person on the IMDb's web site, returns a Movie instance."""
    # FIXME: Oook, lets face it: build_movie and build_person are now
    # two horrible sets of patches to support the new IMDb design.  They
    # must be rewritten from scratch.
    if _parsingCharacter:
        _defSep = ' Played by '
    elif _parsingCompany:
        _defSep = ' ... '
    else:
        _defSep = ' .... '
    title = re_spaces.sub(' ', txt).strip()
    # Split the role/notes from the movie title.
    tsplit = title.split(_defSep, 1)
    role = u''
    notes = u''
    roleNotes = []
    if len(tsplit) == 2:
        title = tsplit[0].rstrip()
        role = tsplit[1].lstrip()
    if title[-9:] == 'TV Series':
        title = title[:-9].rstrip()
    elif title[-14:] == 'TV mini-series':
        title = title[:-14] + ' (mini)'
    if title and title.endswith(_defSep.rstrip()):
        title = title[:-len(_defSep) + 1]
    # Try to understand where the movie title ends.
    while True:
        if year:
            break
        if title[-1:] != ')':
            # Ignore the silly "TV Series" notice.
            if title[-9:] == 'TV Series':
                title = title[:-9].rstrip()
                continue
            else:
                # Just a title: stop here.
                break
        # Try to match paired parentheses; yes: sometimes there are
        # parentheses inside comments...
        nidx = title.rfind('(')
        while (nidx != -1 and \
                    title[nidx:].count('(') != title[nidx:].count(')')):
            nidx = title[:nidx].rfind('(')
        # Unbalanced parentheses: stop here.
        if nidx == -1: break
        # The last item in parentheses seems to be a year: stop here.
        first4 = title[nidx + 1:nidx + 5]
        if (first4.isdigit() or first4 == '????') and \
                title[nidx+5:nidx+6] in (')', '/'):
            break
        # The last item in parentheses is a known kind: stop here.
        if title[nidx + 1:-1] in ('TV', 'V', 'mini', 'VG'): break
        # Else, in parentheses there are some notes.
        # XXX: should the notes in the role half be kept separated
        #      from the notes in the movie title half?
        if notes: notes = '%s %s' % (title[nidx:], notes)
        else: notes = title[nidx:]
        title = title[:nidx].rstrip()
    if year:
        year = year.strip()
        if title[-1] == ')':
            fpIdx = title.rfind('(')
            if fpIdx != -1:
                if notes: notes = '%s %s' % (title[fpIdx:], notes)
                else: notes = title[fpIdx:]
                title = title[:fpIdx].rstrip()
        title = u'%s (%s)' % (title, year)
    if _parsingCharacter and roleID and not role:
        roleID = None
    if not roleID:
        roleID = None
    elif len(roleID) == 1:
        roleID = roleID[0]
    if not role and chrRoles and isinstance(roleID, (str, unicode)):
        roleID = _re_chrIDs.findall(roleID)
        role = ' / '.join(filter(None, chrRoles.split('@@')))
    # Manages multiple roleIDs.
    if isinstance(roleID, list):
        tmprole = role.split('/')
        role = []
        for r in tmprole:
            nidx = r.find('(')
            if nidx != -1:
                role.append(r[:nidx].rstrip())
                roleNotes.append(r[nidx:])
            else:
                role.append(r)
                roleNotes.append(None)
        lr = len(role)
        lrid = len(roleID)
        if lr > lrid:
            roleID += [None] * (lrid - lr)
        elif lr < lrid:
            roleID = roleID[:lr]
        for i, rid in enumerate(roleID):
            if rid is not None:
                roleID[i] = str(rid)
        if lr == 1:
            role = role[0]
            roleID = roleID[0]
    elif roleID is not None:
        roleID = str(roleID)
    if movieID is not None:
        movieID = str(movieID)
    if (not title) or (movieID is None):
        _b_m_logger.error('empty title or movieID for "%s"', txt)
    if rolesNoChar:
        rolesNoChar = filter(None, [x.strip() for x in rolesNoChar.split('/')])
        if not role:
            role = []
        elif not isinstance(role, list):
            role = [role]
        role += rolesNoChar
    notes = notes.strip()
    if additionalNotes:
        additionalNotes = re_spaces.sub(' ', additionalNotes).strip()
        if notes:
            notes += u' '
        notes += additionalNotes
    m = Movie(title=title,
              movieID=movieID,
              notes=notes,
              currentRole=role,
              roleID=roleID,
              roleIsPerson=_parsingCharacter,
              modFunct=modFunct,
              accessSystem=accessSystem)
    if roleNotes and len(roleNotes) == len(roleID):
        for idx, role in enumerate(m.currentRole):
            try:
                if roleNotes[idx]:
                    role.notes = roleNotes[idx]
            except IndexError:
                break
    # Status can't be checked here, and must be detected by the parser.
    if status:
        m['status'] = status
    return m
Exemple #26
0
 def get_person_filmography(self, personID):
     infosets = ('filmography', 'episodes')
     res = {}
     episodes = {}
     works = ('actor', 'actresse', 'producer', 'writer',
             'cinematographer', 'composer', 'costume-designer',
             'director', 'editor', 'miscellaneou', 'production-designer')
     for i in works:
         index = getFullIndex('%s%ss.names' % (self.__db, i), personID)
         if index is not None:
             params = {'offset': index,
                         'indexF': '%stitles.index' % self.__db,
                         'keyF': '%stitles.key' % self.__db,
                         'attrIF': '%sattributes.index' % self.__db,
                         'attrKF': '%sattributes.key' % self.__db,
                         'charNF': '%scharacter2id.index' % self.__db}
             name = key = i
             if '-' in name:
                 name = name.replace('-', ' ')
             elif name == 'actresse':
                 name = 'actress'
                 params['doCast'] = 1
             elif name == 'miscellaneou':
                 name = 'miscellaneous crew'
                 key = 'miscellaneou'
             elif name == 'actor':
                 params['doCast'] = 1
             elif name == 'writer':
                 params['doWriters'] = 1
             params['dataF'] = '%s%ss.data' % (self.__db, key)
             data = getFilmography(**params)
             movies = []
             eps = []
             # Split normal titles from episodes.
             for d in data:
                 if d.get('kind') != 'episode':
                     movies.append(d)
                 else:
                     eps.append(d)
             movies.sort()
             if movies:
                 res[name] = movies
             for e in eps:
                 series = Movie(data=e['episode of'], accessSystem='local')
                 seriesID = self._getTitleID(series.get(
                                             'long imdb canonical title'))
                 series.movieID = seriesID
                 if not e.get('year'):
                     year = getFullIndex('%smovies.data' % self.__db,
                                         e.movieID, kind='moviedata',
                                         rindex=1)
                     if year: e['year'] = year
                 if not e.currentRole and name not in ('actor', 'actress'):
                     if e.notes: e.notes = ' %s' % e.notes
                     e.notes = '[%s]%s' % (name, e.notes)
                 episodes.setdefault(series, []).append(e)
     if episodes:
         for k in episodes:
             episodes[k].sort()
             episodes[k].reverse()
         res['episodes'] = episodes
     return {'data': res, 'info sets': tuple(infosets)}
Exemple #27
0
def build_movie(txt,
                movieID=None,
                roleID=None,
                status=None,
                accessSystem='http',
                modFunct=None,
                _parsingCharacter=False,
                _parsingCompany=False):
    """Given a string as normally seen on the "categorized" page of
    a person on the IMDb's web site, returns a Movie instance."""
    if _parsingCharacter:
        _defSep = ' Played by '
    elif _parsingCompany:
        _defSep = ' ... '
    else:
        _defSep = ' .... '
    title = re_spaces.sub(' ', txt).strip()
    # Split the role/notes from the movie title.
    tsplit = title.split(_defSep, 1)
    role = u''
    notes = u''
    roleNotes = []
    if len(tsplit) == 2:
        title = tsplit[0].rstrip()
        role = tsplit[1].lstrip()
    if title[-9:] == 'TV Series':
        title = title[:-9].rstrip()
    elif title[-14:] == 'TV mini-series':
        title = title[:-14] + ' (mini)'
    # Try to understand where the movie title ends.
    while True:
        if title[-1:] != ')':
            # Ignore the silly "TV Series" notice.
            if title[-9:] == 'TV Series':
                title = title[:-9].rstrip()
                continue
            else:
                # Just a title: stop here.
                break
        # Try to match paired parentheses; yes: sometimes there are
        # parentheses inside comments...
        nidx = title.rfind('(')
        while (nidx != -1 and \
                    title[nidx:].count('(') != title[nidx:].count(')')):
            nidx = title[:nidx].rfind('(')
        # Unbalanced parentheses: stop here.
        if nidx == -1: break
        # The last item in parentheses seems to be a year: stop here.
        first4 = title[nidx + 1:nidx + 5]
        if (first4.isdigit() or first4 == '????') and \
                title[nidx+5:nidx+6] in (')', '/'):
            break
        # The last item in parentheses is a known kind: stop here.
        if title[nidx + 1:-1] in ('TV', 'V', 'mini', 'VG'): break
        # Else, in parentheses there are some notes.
        # XXX: should the notes in the role half be kept separated
        #      from the notes in the movie title half?
        if notes: notes = '%s %s' % (title[nidx:], notes)
        else: notes = title[nidx:]
        title = title[:nidx].rstrip()
    if _parsingCharacter and roleID and not role:
        roleID = None
    if not roleID:
        roleID = None
    elif len(roleID) == 1:
        roleID = roleID[0]
    # Manages multiple roleIDs.
    if isinstance(roleID, list):
        tmprole = role.split('/')
        role = []
        for r in tmprole:
            nidx = r.find('(')
            if nidx != -1:
                role.append(r[:nidx].rstrip())
                roleNotes.append(r[nidx:])
            else:
                role.append(r)
                roleNotes.append(None)
        lr = len(role)
        lrid = len(roleID)
        if lr > lrid:
            roleID += [None] * (lrid - lr)
        elif lr < lrid:
            roleID = roleID[:lr]
        for i, rid in enumerate(roleID):
            if rid is not None:
                roleID[i] = str(rid)
        if lr == 1:
            role = role[0]
            roleID = roleID[0]
    elif roleID is not None:
        roleID = str(roleID)
    if movieID is not None:
        movieID = str(movieID)
    if (not title) or (movieID is None):
        _b_m_logger.error('empty title or movieID for "%s"', txt)
    m = Movie(title=title,
              movieID=movieID,
              notes=notes,
              currentRole=role,
              roleID=roleID,
              roleIsPerson=_parsingCharacter,
              modFunct=modFunct,
              accessSystem=accessSystem)
    if roleNotes and len(roleNotes) == len(roleID):
        for idx, role in enumerate(m.currentRole):
            if roleNotes[idx]:
                role.notes = roleNotes[idx]
    # Status can't be checked here, and must be detected by the parser.
    if status:
        m['status'] = status
    return m
Exemple #28
0
 def get_movie_main(self, movieID):
     # Information sets provided by this method.
     infosets = ('main', 'vote details')
     tl = getLabel(movieID, '%stitles.index' % self.__db,
                     '%stitles.key' % self.__db)
     # No title, no party.
     if tl is None:
         raise IMDbDataAccessError, 'unable to get movieID "%s"' % movieID
     res = analyze_title(tl)
     # Build the cast list.
     actl = []
     for castG in ('actors', 'actresses'):
         midx = getFullIndex('%s%s.titles' % (self.__db, castG),
                         movieID, multi=1)
         if midx is not None:
             params = {'movieID': movieID,
                         'dataF': '%s%s.data' % (self.__db, castG),
                         'indexF': '%snames.index' % self.__db,
                         'keyF': '%snames.key' % self.__db,
                         'attrIF': '%sattributes.index' % self.__db,
                         'attrKF': '%sattributes.key' % self.__db,
                         'charNF': '%scharacter2id.index' % self.__db,
                         'offsList': midx, 'doCast': 1}
             actl += getMovieCast(**params)
     if actl:
         actl.sort()
         res['cast'] = actl
     # List of other workers.
     works = ('writer', 'cinematographer', 'composer',
             'costume-designer', 'director', 'editor', 'miscellaneou',
             'producer', 'production-designer', 'cinematographer')
     for i in works:
         index = getFullIndex('%s%ss.titles' % (self.__db, i),
                                 movieID, multi=1)
         if index is not None:
             params = {'movieID': movieID,
                         'dataF': '%s%s.data' % (self.__db, i),
                         'indexF': '%snames.index' % self.__db,
                         'keyF': '%snames.key' % self.__db,
                         'attrIF': '%sattributes.index' % self.__db,
                         'attrKF': '%sattributes.key' % self.__db,
                         'offsList': index}
             name = key = i
             if '-' in name:
                 name = name.replace('-', ' ')
             elif name == 'miscellaneou':
                 name = 'miscellaneous crew'
                 key = 'miscellaneou'
             elif name == 'writer':
                 params['doWriters'] = 1
             params['dataF'] = '%s%ss.data' % (self.__db, key)
             data = getMovieCast(**params)
             if name == 'writer': data.sort()
             res[name] = data
     # Rating.
     rt = self.get_movie_vote_details(movieID)['data']
     if rt: res.update(rt)
     # Various information.
     miscInfo = (('runtimes', 'running-times'), ('color info', 'color-info'),
                 ('genres', 'genres'), ('distributors', 'distributors'),
                 ('languages', 'language'), ('certificates', 'certificates'),
                 ('special effects companies', 'special-effects-companies'),
                 ('sound mix', 'sound-mix'), ('tech info', 'technical'),
                 ('production companies', 'production-companies'),
                 ('countries', 'countries'))
     for name, fname in miscInfo:
         params = {'movieID': movieID,
             'dataF': '%s%s.data' % (self.__db, fname),
             'indexF': '%s%s.index' % (self.__db, fname),
             'attrIF': '%sattributes.index' % self.__db,
             'attrKF': '%sattributes.key' % self.__db}
         data = getMovieMisc(**params)
         if name in ('distributors', 'special effects companies',
                     'production companies'):
             for nitem in xrange(len(data)):
                 n, notes = split_company_name_notes(data[nitem])
                 company = Company(name=n, companyID=getCompanyID(n,
                                     '%scompany2id.index' % self.__db),
                                     notes=notes,
                                     accessSystem=self.accessSystem)
                 data[nitem] = company
         if data: res[name] = data
     if res.has_key('runtimes') and len(res['runtimes']) > 0:
         rt = res['runtimes'][0]
         episodes = re_episodes.findall(rt)
         if episodes:
             res['runtimes'][0] = re_episodes.sub('', rt)
             res['number of episodes'] = episodes[0]
     # AKA titles.
     akas = getAkaTitles(movieID,
                 '%saka-titles.data' % self.__db,
                 '%stitles.index' % self.__db,
                 '%stitles.key' % self.__db,
                 '%sattributes.index' % self.__db,
                 '%sattributes.key' % self.__db)
     if akas:
         # normalize encoding.
         for i in xrange(len(akas)):
             ts = akas[i].split('::')
             if len(ts) != 2: continue
             t = ts[0]
             n = ts[1]
             nt = self._changeAKAencoding(n, t)
             if nt is not None: akas[i] = '%s::%s' % (nt, n)
         res['akas'] = akas
     if res.get('kind') == 'episode':
         # Things to do if this is a tv series episode.
         episodeOf = res.get('episode of')
         if episodeOf is not None:
             parentSeries = Movie(data=res['episode of'],
                                         accessSystem='local')
             seriesID = self._getTitleID(parentSeries.get(
                                         'long imdb canonical title'))
             parentSeries.movieID = seriesID
             res['episode of'] = parentSeries
         if not res.get('year'):
             year = getFullIndex('%smovies.data' % self.__db,
                                 movieID, kind='moviedata', rindex=1)
             if year: res['year'] = year
     # MPAA info.
     mpaa = getMPAA(movieID, '%smpaa-ratings-reasons.index' % self.__db,
                     '%smpaa-ratings-reasons.data' % self.__db)
     if mpaa: res.update(mpaa)
     return {'data': res, 'info sets': infosets}
Exemple #29
0
 def get_person_filmography(self, personID):
     infosets = ('filmography', 'episodes')
     res = {}
     episodes = {}
     works = ('actor', 'actresse', 'producer', 'writer',
             'cinematographer', 'composer', 'costume-designer',
             'director', 'editor', 'miscellaneou', 'production-designer')
     for i in works:
         index = getFullIndex('%s%ss.names' % (self.__db, i), personID)
         if index is not None:
             params = {'offset': index,
                         'indexF': '%stitles.index' % self.__db,
                         'keyF': '%stitles.key' % self.__db,
                         'attrIF': '%sattributes.index' % self.__db,
                         'attrKF': '%sattributes.key' % self.__db,
                         'charNF': '%scharacter2id.index' % self.__db}
             name = key = i
             if '-' in name:
                 name = name.replace('-', ' ')
             elif name == 'actresse':
                 name = 'actress'
                 params['doCast'] = 1
             elif name == 'miscellaneou':
                 name = 'miscellaneous crew'
                 key = 'miscellaneou'
             elif name == 'actor':
                 params['doCast'] = 1
             elif name == 'writer':
                 params['doWriters'] = 1
             params['dataF'] = '%s%ss.data' % (self.__db, key)
             data = getFilmography(**params)
             movies = []
             eps = []
             # Split normal titles from episodes.
             for d in data:
                 if d.get('kind') != 'episode':
                     movies.append(d)
                 else:
                     eps.append(d)
             movies.sort()
             if movies:
                 res[name] = movies
             for e in eps:
                 series = Movie(data=e['episode of'], accessSystem='local')
                 seriesID = self._getTitleID(series.get(
                                             'long imdb canonical title'))
                 series.movieID = seriesID
                 if not e.get('year'):
                     year = getFullIndex('%smovies.data' % self.__db,
                                         e.movieID, kind='moviedata',
                                         rindex=1)
                     if year: e['year'] = year
                 if not e.currentRole and name not in ('actor', 'actress'):
                     if e.notes: e.notes = ' %s' % e.notes
                     e.notes = '[%s]%s' % (name, e.notes)
                 episodes.setdefault(series, []).append(e)
     if episodes:
         for k in episodes:
             episodes[k].sort()
             episodes[k].reverse()
         res['episodes'] = episodes
     return {'data': res, 'info sets': tuple(infosets)}
Exemple #30
0
    # Yes: kindID values are hard-coded in the companies4local.py script.
    _kinds = {
        0: 'distributors',
        1: 'production companies',
        2: 'special effect companies',
        3: 'miscellaneous companies'
    }
    for i in xrange(nrItems):
        kind = _kinds.get(ord(dfptr.read(1)))
        if kind is None:
            import warnings
            warnings.warn('Unidentified kindID for a company.')
            break
        movieID = convBin(dfptr.read(3), 'movieID')
        title = getLabel(movieID, movieIF, movieKF)
        m = Movie(title=title, movieID=movieID, accessSystem='local')
        filmography.setdefault(kind, []).append(m)
    dfptr.close()
    return filmography


def _convChID(companyID):
    """Return a numeric value for the given string, or None."""
    if companyID is None:
        return None
    return convBin(companyID, 'companyID')


def getCompanyID(name, compNF):
    """Return a companyID for a name."""
    try:
 def get_movie_main(self, movieID):
     cont = self._mretrieve(self.urls['movie_main'] % movieID + 'maindetails')
     title = _findBetween(cont, '<title>', '</title>', maxRes=1)
     if not title:
         raise IMDbDataAccessError('unable to get movieID "%s"' % movieID)
     title = _unHtml(title[0])
     if title.endswith(' - IMDb'):
         title = title[:-7]
     if cont.find('<span class="tv-extra">TV mini-series</span>') != -1:
         title += ' (mini)'
     d = analyze_title(title)
     kind = d.get('kind')
     tv_series = _findBetween(cont, 'TV Series:</h5>', '</a>', maxRes=1)
     if tv_series: mid = re_imdbID.findall(tv_series[0])
     else: mid = None
     if tv_series and mid:
         s_title = _unHtml(tv_series[0])
         s_data = analyze_title(s_title)
         m = Movie(movieID=str(mid[0]), data=s_data,
                     accessSystem=self.accessSystem,
                     modFunct=self._defModFunct)
         d['kind'] = kind = u'episode'
         d['episode of'] = m
     if kind in ('tv series', 'tv mini series'):
         years = _findBetween(cont, '<h1>', '</h1>', maxRes=1)
         if years:
             years[:] = _findBetween(years[0], 'TV series', '</span>',
                                     maxRes=1)
             if years:
                 d['series years'] = years[0].strip()
     air_date = _findBetween(cont, 'Original Air Date:</h5>', '</div>',
                             maxRes=1)
     if air_date:
         air_date = air_date[0]
         vi = air_date.find('(')
         if vi != -1:
             date = _unHtml(air_date[:vi]).strip()
             if date != '????':
                 d['original air date'] = date
             air_date = air_date[vi:]
             season = _findBetween(air_date, 'Season', ',', maxRes=1)
             if season:
                 season = season[0].strip()
                 try: season = int(season)
                 except: pass
                 if season or type(season) is _inttype:
                     d['season'] = season
             episode = _findBetween(air_date, 'Episode', ')', maxRes=1)
             if episode:
                 episode = episode[0].strip()
                 try: episode = int(episode)
                 except: pass
                 if episode or type(season) is _inttype:
                     d['episode'] = episode
     direct = _findBetween(cont, '<h5>Director', ('</div>', '<br/> <br/>'),
                             maxRes=1)
     if direct:
         direct = direct[0]
         h5idx = direct.find('/h5>')
         if h5idx != -1:
             direct = direct[h5idx+4:]
         direct = self._getPersons(direct)
         if direct: d['director'] = direct
     if kind in ('tv series', 'tv mini series', 'episode'):
         if kind != 'episode':
             seasons = _findBetween(cont, 'Seasons:</h5>', '</div>',
                                     maxRes=1)
             if seasons:
                 d['number of seasons'] = seasons[0].count('|') + 1
         creator = _findBetween(cont, 'Created by</h5>', ('class="tn15more"',
                                                         '</div>',
                                                         '<br/> <br/>'),
                                                         maxRes=1)
         if not creator:
             # They change 'Created by' to 'Creator' and viceversa
             # from time to time...
             # XXX: is 'Creators' also used?
             creator = _findBetween(cont, 'Creator:</h5>',
                                     ('class="tn15more"', '</div>',
                                     '<br/> <br/>'), maxRes=1)
         if creator:
             creator = creator[0]
             if creator.find('tn15more'): creator = '%s>' % creator
             creator = self._getPersons(creator)
             if creator: d['creator'] = creator
     writers = _findBetween(cont, '<h5>Writer', ('</div>', '<br/> <br/>'),
                             maxRes=1)
     if writers:
         writers = writers[0]
         h5idx = writers.find('/h5>')
         if h5idx != -1:
             writers = writers[h5idx+4:]
         writers = self._getPersons(writers)
         if writers: d['writer'] = writers
     cvurl = _getTagsWith(cont, 'name="poster"', toClosure=True, maxRes=1)
     if cvurl:
         cvurl = _findBetween(cvurl[0], 'src="', '"', maxRes=1)
         if cvurl: d['cover url'] = cvurl[0]
     genres = _findBetween(cont, 'href="/genre/', '"')
     if genres:
         d['genres'] = list(set(genres))
     ur = _findBetween(cont, 'id="star-bar-user-rate">', '</div>',
                         maxRes=1)
     if ur:
         rat = _findBetween(ur[0], '<b>', '</b>', maxRes=1)
         if rat:
             if rat:
                 d['rating'] = rat[0].strip()
             else:
                 self._mobile_logger.warn('wrong rating: %s', rat)
         vi = ur[0].rfind('href="ratings"')
         if vi != -1 and ur[0][vi+10:].find('await') == -1:
             try:
                 votes = _findBetween(ur[0][vi:], "title='",
                                     " IMDb", maxRes=1)
                 votes = int(votes[0].replace(',', ''))
                 d['votes'] = votes
             except (ValueError, IndexError):
                 self._mobile_logger.warn('wrong votes: %s', ur)
     top250 = _findBetween(cont, 'href="/chart/top?', '</a>', maxRes=1)
     if top250:
         fn = top250[0].rfind('#')
         if fn != -1:
             try:
                 td = int(top250[0][fn+1:])
                 d['top 250 rank'] = td
             except ValueError:
                 self._mobile_logger.warn('wrong top250: %s', top250)
     castdata = _findBetween(cont, 'Cast overview', '</table>', maxRes=1)
     if not castdata:
         castdata = _findBetween(cont, 'Credited cast', '</table>', maxRes=1)
     if not castdata:
         castdata = _findBetween(cont, 'Complete credited cast', '</table>',
                                 maxRes=1)
     if not castdata:
         castdata = _findBetween(cont, 'Series Cast Summary', '</table>',
                                 maxRes=1)
     if not castdata:
         castdata = _findBetween(cont, 'Episode Credited cast', '</table>',
                                 maxRes=1)
     if castdata:
         castdata = castdata[0]
         # Reintegrate the fist tag.
         fl = castdata.find('href=')
         if fl != -1: castdata = '<a ' + castdata[fl:]
         # Exclude the 'rest of cast listed alphabetically' row.
         smib = castdata.find('<tr><td align="center" colspan="4"><small>')
         if smib != -1:
             smie = castdata.rfind('</small></td></tr>')
             if smie != -1:
                 castdata = castdata[:smib].strip() + \
                             castdata[smie+18:].strip()
         castdata = castdata.replace('/tr> <tr', '/tr><tr')
         cast = self._getPersons(castdata, sep='</tr><tr')
         if cast: d['cast'] = cast
     akas = _findBetween(cont, 'Also Known As:</h5>', '</div>', maxRes=1)
     if akas:
         # For some reason, here <br> is still used in place of <br/>.
         akas[:] = [x for x in akas[0].split('<br>') if x.strip()]
         akas = [_unHtml(x).replace('" - ','::', 1).lstrip('"').strip()
                 for x in akas]
         if 'See more' in akas: akas.remove('See more')
         akas[:] = [x for x in akas if x]
         if akas:
             d['akas'] = akas
     mpaa = _findBetween(cont, 'MPAA</a>:', '</div>', maxRes=1)
     if mpaa: d['mpaa'] = _unHtml(mpaa[0])
     runtimes = _findBetween(cont, 'Runtime:</h5>', '</div>', maxRes=1)
     if runtimes:
         runtimes = runtimes[0]
         runtimes = [x.strip().replace(' min', '').replace(' (', '::(', 1)
                 for x in runtimes.split('|')]
         d['runtimes'] = [_unHtml(x).strip() for x in runtimes]
     if kind == 'episode':
         # number of episodes.
         epsn = _findBetween(cont, 'title="Full Episode List">', '</a>',
                             maxRes=1)
         if epsn:
             epsn = epsn[0].replace(' Episodes', '').strip()
             if epsn:
                 try:
                     epsn = int(epsn)
                 except:
                     self._mobile_logger.warn('wrong episodes #: %s', epsn)
                 d['number of episodes'] = epsn
     country = _findBetween(cont, 'Country:</h5>', '</div>', maxRes=1)
     if country:
         country[:] = country[0].split(' | ')
         country[:] = ['<a %s' % x for x in country if x]
         country[:] = [_unHtml(x.replace(' <i>', '::')) for x in country]
         if country: d['countries'] = country
     lang = _findBetween(cont, 'Language:</h5>', '</div>', maxRes=1)
     if lang:
         lang[:] = lang[0].split(' | ')
         lang[:] = ['<a %s' % x for x in lang if x]
         lang[:] = [_unHtml(x.replace(' <i>', '::')) for x in lang]
         if lang: d['languages'] = lang
     col = _findBetween(cont, '"/search/title?colors=', '</div>')
     if col:
         col[:] = col[0].split(' | ')
         col[:] = ['<a %s' % x for x in col if x]
         col[:] = [_unHtml(x.replace(' <i>', '::')) for x in col]
         if col: d['color info'] = col
     sm = _findBetween(cont, '/search/title?sound_mixes=', '</div>',
                         maxRes=1)
     if sm:
         sm[:] = sm[0].split(' | ')
         sm[:] = ['<a %s' % x for x in sm if x]
         sm[:] = [_unHtml(x.replace(' <i>', '::')) for x in sm]
         if sm: d['sound mix'] = sm
     cert = _findBetween(cont, 'Certification:</h5>', '</div>', maxRes=1)
     if cert:
         cert[:] = cert[0].split(' | ')
         cert[:] = [_unHtml(x.replace(' <i>', '::')) for x in cert]
         if cert: d['certificates'] = cert
     plotoutline = _findBetween(cont, 'Plot:</h5>', ['<a ', '</div>'],
                                 maxRes=1)
     if plotoutline:
         plotoutline = plotoutline[0].strip()
         plotoutline = plotoutline.rstrip('|').rstrip()
         if plotoutline: d['plot outline'] = _unHtml(plotoutline)
     aratio = _findBetween(cont, 'Aspect Ratio:</h5>', ['<a ', '</div>'],
                         maxRes=1)
     if aratio:
         aratio = aratio[0].strip().replace(' (', '::(', 1)
         if aratio:
             d['aspect ratio'] = _unHtml(aratio)
     return {'data': d}