コード例 #1
0
def getMovieCast(dataF, movieID, indexF, keyF, attrIF, attrKF, offsList=[],
                charNF=None, doCast=0, doWriters=0):
    """Read the specified files and return a list of Person objects,
    one for every people in offsList."""
    resList = []
    _globoff = []
    for offset in offsList:
        # One round for person is enough.
        if offset not in _globoff: _globoff.append(offset)
        else: continue
        personID, movies = getRawData(dataF, offset, doCast, doWriters)
        # Consider only the current movie.
        movielist = [x for x in movies if x.get('movieID') == movieID]
        # XXX: a person can be listed more than one time for a single movie:
        #      think about directors of TV series.
        # XXX: here, 'movie' is a dictionary as returned by the getRawData
        #      function, not a Movie class instance.
        for movie in movielist:
            name = getLabel(personID, indexF, keyF)
            if not name: continue
            curRole = movie.get('currentRole', u'')
            roleID = None
            if curRole and charNF:
                curRole, roleID = getCharactersIDs(curRole, charNF)
            p = Person(name=name, personID=personID,
                        currentRole=curRole, roleID=roleID,
                        accessSystem='local')
            if movie.has_key('attributeID'):
                attr = getLabel(movie['attributeID'], attrIF, attrKF)
                if attr: p.notes = attr
            # Used to sort cast.
            if movie.has_key('position'):
                p.billingPos = movie['position'] or None
            resList.append(p)
    return resList
コード例 #2
0
 def _findRefs(self, o, trefs, nrefs):
     """Find titles or names references in strings."""
     if isinstance(o, (UnicodeType, StringType)):
         for title in re_titleRef.findall(o):
             a_title = analyze_title(title, canonical=1)
             rtitle = build_title(a_title, canonical=1, ptdf=1)
             if trefs.has_key(rtitle): continue
             movieID = self._getTitleID(rtitle)
             if movieID is None:
                 movieID = self._getTitleID(title)
             if movieID is None:
                 continue
             m = Movie(title=rtitle,
                       movieID=movieID,
                       accessSystem=self.accessSystem)
             trefs[rtitle] = m
             rtitle2 = canonicalTitle(a_title.get('title', u''))
             if rtitle2 and rtitle2 != rtitle and rtitle2 != title:
                 trefs[rtitle2] = m
             if title != rtitle:
                 trefs[title] = m
         for name in re_nameRef.findall(o):
             a_name = analyze_name(name, canonical=1)
             rname = build_name(a_name, canonical=1)
             if nrefs.has_key(rname): continue
             personID = self._getNameID(rname)
             if personID is None:
                 personID = self._getNameID(name)
             if personID is None: continue
             p = Person(name=rname,
                        personID=personID,
                        accessSystem=self.accessSystem)
             nrefs[rname] = p
             rname2 = normalizeName(a_name.get('name', u''))
             if rname2 and rname2 != rname:
                 nrefs[rname2] = p
             if name != rname and name != rname2:
                 nrefs[name] = p
     elif isinstance(o, (ListType, TupleType)):
         for item in o:
             self._findRefs(item, trefs, nrefs)
     elif isinstance(o, DictType):
         for value in o.values():
             self._findRefs(value, trefs, nrefs)
     return (trefs, nrefs)
コード例 #3
0
 def postprocess_data(self, data):
     result = {}
     for item in ('names refs', 'titles refs'):
         result[item] = {}
         for k, v in data.get(item, []):
             k = k.strip()
             v = v.strip()
             if not (k and v):
                 continue
             imdbID = analyze_imdbid(v)
             if item == 'names refs':
                 obj = Person(personID=imdbID, name=k,
                              accessSystem=self._as, modFunct=self._modFunct)
             elif item == 'titles refs':
                 obj = Movie(movieID=imdbID, title=k,
                             accessSystem=self._as, modFunct=self._modFunct)
             result[item][k] = obj
     return result
コード例 #4
0
 def postprocess_data(self, data):
     result = {}
     for item in ('names refs', 'titles refs', 'characters refs'):
         result[item] = {}
         for k, v in data.get(item, []):
             if not v.endswith('/'): continue
             imdbID = analyze_imdbid(v)
             if item == 'names refs':
                 obj = Person(personID=imdbID, name=k,
                             accessSystem=self._as, modFunct=self._modFunct)
             elif item == 'titles refs':
                 obj = Movie(movieID=imdbID, title=k,
                             accessSystem=self._as, modFunct=self._modFunct)
             else:
                 obj = Character(characterID=imdbID, name=k,
                             accessSystem=self._as, modFunct=self._modFunct)
             # XXX: companies aren't handled: are they ever found in text,
             #      as links to their page?
             result[item][k] = obj
     return result
コード例 #5
0
    def get_movie_main(self, movieID):
        movieID = int(movieID)
        data = self._base_title_info(movieID)
        _movies_cache = {movieID: data}
        _persons_cache = {}

        tc = self.T['title_crew']
        movie = tc.select(tc.c.tconst == movieID).execute().fetchone() or {}
        tc_data = self._rename('title_crew', dict(movie))
        writers = []
        directors = []
        for key, target in (('director', directors), ('writer', writers)):
            for personID in split_array(tc_data.get(key) or ''):
                if not personID:
                    continue
                personID = int(personID)
                person_data = self._base_person_info(
                    personID,
                    movies_cache=_movies_cache,
                    persons_cache=_persons_cache)
                person = Person(personID=personID,
                                data=person_data,
                                accessSystem=self.accessSystem)
                target.append(person)
        tc_data['director'] = directors
        tc_data['writer'] = writers
        data.update(tc_data)

        te = self.T['title_episode']
        movie = te.select(te.c.tconst == movieID).execute().fetchone() or {}
        te_data = self._rename('title_episode', dict(movie))
        if 'parentTconst' in te_data:
            te_data['episodes of'] = self._base_title_info(
                te_data['parentTconst'])
        self._clean(te_data, ('parentTconst', ))
        data.update(te_data)

        tp = self.T['title_principals']
        movie_rows = tp.select(
            tp.c.tconst == movieID).execute().fetchall() or {}
        roles = {}
        for movie_row in movie_rows:
            movie_row = dict(movie_row)
            tp_data = self._rename('title_principals', dict(movie_row))
            category = tp_data.get('category')
            if not category:
                continue
            if category in ('actor', 'actress', 'self'):
                category = 'cast'
            roles.setdefault(category, []).append(movie_row)
        for role in roles:
            roles[role].sort(key=itemgetter('ordering'))
            persons = []
            for person_info in roles[role]:
                personID = person_info.get('nconst')
                if not personID:
                    continue
                person_data = self._base_person_info(
                    personID,
                    movies_cache=_movies_cache,
                    persons_cache=_persons_cache)
                person = Person(personID=personID,
                                data=person_data,
                                billingPos=person_info.get('ordering'),
                                currentRole=person_info.get('characters'),
                                notes=person_info.get('job'),
                                accessSystem=self.accessSystem)
                persons.append(person)
            data[role] = persons

        tr = self.T['title_ratings']
        movie = tr.select(tr.c.tconst == movieID).execute().fetchone() or {}
        tr_data = self._rename('title_ratings', dict(movie))
        data.update(tr_data)

        ta = self.T['title_akas']
        akas = ta.select(ta.c.titleId == movieID).execute()
        akas_list = []
        for aka in akas:
            ta_data = self._rename('title_akas', dict(aka)) or {}
            for key in list(ta_data.keys()):
                if not ta_data[key]:
                    del ta_data[key]
            for key in 't_soundex', 'movieID':
                if key in ta_data:
                    del ta_data[key]
            for key in 'types', 'attributes':
                if key not in ta_data:
                    continue
                ta_data[key] = split_array(ta_data[key])
            akas_list.append(ta_data)
        if akas_list:
            data['akas'] = akas_list

        self._clean(data, ('movieID', 't_soundex'))
        return {'data': data, 'info sets': self.get_movie_infoset()}
コード例 #6
0
ファイル: __init__.py プロジェクト: conwetlab/ezweb-gadgets
     if p[4] in ('actor', 'actress'):
         p[4] = 'cast'
 # Regroup by role/duty (cast, writer, director, ...)
 castdata[:] = _groupListBy(castdata, 4)
 for group in castdata:
     duty = group[0][4]
     for pdata in group:
         curRole = pdata[1]
         curRoleID = None
         if curRole is not None:
             robj = CharName.get(curRole)
             curRole = robj.name
             curRoleID = robj.id
         p = Person(personID=pdata[0],
                    name=pdata[5],
                    currentRole=curRole or u'',
                    roleID=curRoleID,
                    notes=pdata[2] or u'',
                    accessSystem='sql')
         if pdata[6]: p['imdbIndex'] = pdata[6]
         p.billingPos = pdata[3]
         res.setdefault(duty, []).append(p)
     if duty == 'cast':
         res[duty] = merge_roles(res[duty])
     res[duty].sort()
 # Info about the movie.
 minfo = [(self._info[m.infoTypeID], m.info, m.note)
          for m in MovieInfo.select(MovieInfo.q.movieID == movieID)]
 minfo = _groupListBy(minfo, 0)
 for group in minfo:
     sect = group[0][0]
     for mdata in group:
コード例 #7
0
def build_person(txt,
                 personID=None,
                 billingPos=None,
                 roleID=None,
                 accessSystem='http',
                 modFunct=None):
    """Return a Person instance from the tipical <tr>...</tr> strings
    found in the IMDb's web site."""
    #if personID is None
    #    _b_p_logger.debug('empty name or personID for "%s"', txt)
    notes = u''
    role = u''
    # Search the (optional) separator between name and role/notes.
    if txt.find('....') != -1:
        sep = '....'
    elif txt.find('...') != -1:
        sep = '...'
    else:
        sep = '...'
        # Replace the first parenthesis, assuming there are only
        # notes, after.
        # Rationale: no imdbIndex is (ever?) showed on the web site.
        txt = txt.replace('(', '...(', 1)
    txt_split = txt.split(sep, 1)
    name = txt_split[0].strip()
    if len(txt_split) == 2:
        role_comment = txt_split[1].strip()
        # Strip common endings.
        if role_comment[-4:] == ' and':
            role_comment = role_comment[:-4].rstrip()
        elif role_comment[-2:] == ' &':
            role_comment = role_comment[:-2].rstrip()
        elif role_comment[-6:] == '& ....':
            role_comment = role_comment[:-6].rstrip()
        # Get the notes.
        if roleID is not None:
            if not isinstance(roleID, list):
                cmt_idx = role_comment.find('(')
                if cmt_idx != -1:
                    role = role_comment[:cmt_idx].rstrip()
                    notes = role_comment[cmt_idx:]
                else:
                    # Just a role, without notes.
                    role = role_comment
            else:
                role = role_comment
        else:
            # We're managing something that doesn't have a 'role', so
            # everything are notes.
            notes = role_comment
    if role == '....': role = u''
    roleNotes = []
    # Manages multiple roleIDs.
    if isinstance(roleID, list):
        rolesplit = role.split('/')
        role = []
        for r in rolesplit:
            nidx = r.find('(')
            if nidx != -1:
                role.append(r[:nidx].rstrip())
                roleNotes.append(r[nidx:])
            else:
                role.append(r)
                roleNotes.append(None)
        lr = len(role)
        lrid = len(roleID)
        if lr > lrid:
            roleID += [None] * (lrid - lr)
        elif lr < lrid:
            roleID = roleID[:lr]
        for i, rid in enumerate(roleID):
            if rid is not None:
                roleID[i] = str(rid)
        if lr == 1:
            role = role[0]
            roleID = roleID[0]
            notes = roleNotes[0] or u''
    elif roleID is not None:
        roleID = str(roleID)
    if personID is not None:
        personID = str(personID)
    if (not name) or (personID is None):
        # Set to 'debug', since build_person is expected to receive some crap.
        _b_p_logger.debug('empty name or personID for "%s"', txt)
    # XXX: return None if something strange is detected?
    person = Person(name=name,
                    personID=personID,
                    currentRole=role,
                    roleID=roleID,
                    notes=notes,
                    billingPos=billingPos,
                    modFunct=modFunct,
                    accessSystem=accessSystem)
    if roleNotes and len(roleNotes) == len(roleID):
        for idx, role in enumerate(person.currentRole):
            if roleNotes[idx]:
                role.notes = roleNotes[idx]
    return person
コード例 #8
0
from imdb import IMDb
from imdb.Person import Person
import pandas as pd
import re
import numpy as np
import matplotlib.pyplot as plt
import time

ia = IMDb()
per = Person()

names = [
    'Deepika Padukone', 'Priyanka Chopra', 'Alia Bhatt',
    'Aishwarya Rai Bachchan', 'Anushka Sharma', 'Kareena Kapoor',
    'Kangana Ranaut', 'Katrina Kaif', 'Shraddha Kapoor', 'Karisma Kapoor',
    'Kajol', 'Madhuri Dixit', 'Kriti Sanon', 'Vidya Balan', 'Sonam K Ahuja',
    'Sridevi', 'Rani Mukerji', 'Parineeti Chopra', 'Jacqueline Fernandez',
    'Disha Patani', 'Rekha', 'Kiara Advani', 'Taapsee Pannu',
    'Sharmila Tagore', 'Ileana DCruz', 'Bipasha Basu', 'Babita Shivdasani',
    'Preity Zinta', 'Hema Malini', 'Yami Gautam', 'Sonakshi Sinha',
    'Sunny Leone', 'Tabu', 'Smita Patil', 'Madhubala', 'Juhi Chawla', 'Nargis'
]
names = np.unique(names)

c = 0
average_movies = np.zeros(100)
years = []
spike_point = []
for name in names:
    name = name.strip()
    person = ia.search_person(name)
コード例 #9
0
ファイル: __init__.py プロジェクト: Neprincessa/PrPythonAtom
    def get_movie_main(self, movieID):
        movieID = int(movieID)
        data = self._base_title_info(movieID)
        _movies_cache = {movieID: data}
        _persons_cache = {}

        tc = self.T['title_crew']
        movie = tc.select(tc.c.tconst == movieID).execute().fetchone() or {}
        tc_data = self._rename('title_crew', dict(movie))
        writers = []
        directors = []
        for key, target in (('director', directors), ('writer', writers)):
            for personID in split_array(tc_data.get(key) or ''):
                if not personID:
                    continue
                personID = int(personID)
                person_data = self._base_person_info(
                    personID,
                    movies_cache=_movies_cache,
                    persons_cache=_persons_cache)
                person = Person(personID=personID,
                                data=person_data,
                                accessSystem=self.accessSystem)
                target.append(person)
        tc_data['director'] = directors
        tc_data['writer'] = writers
        data.update(tc_data)

        te = self.T['title_episode']
        movie = tc.select(te.c.tconst == movieID).execute().fetchone() or {}
        te_data = self._rename('title_episode', dict(movie))
        if 'parentTconst' in te_data:
            te_data['episodes of'] = self._base_title_info(
                te_data['parentTconst'])
        self._clean(te_data, ('parentTconst', ))
        data.update(te_data)

        tp = self.T['title_principals']
        movie = tp.select(tp.c.tconst == movieID).execute().fetchone() or {}
        tp_data = self._rename('title_principals', dict(movie))
        cast = []
        for personID in split_array(tp_data.get('cast') or ''):
            if not personID:
                continue
            personID = int(personID)
            person_data = self._base_person_info(personID,
                                                 movies_cache=_movies_cache,
                                                 persons_cache=_persons_cache)
            person = Person(personID=personID,
                            data=person_data,
                            accessSystem=self.accessSystem)
            cast.append(person)
        tp_data['cast'] = cast
        data.update(tp_data)

        tr = self.T['title_ratings']
        movie = tr.select(tr.c.tconst == movieID).execute().fetchone() or {}
        tr_data = self._rename('title_ratings', dict(movie))
        data.update(tr_data)

        ta = self.T['title_akas']
        akas = ta.select(ta.c.titleId == movieID).execute()
        akas_list = []
        for aka in akas:
            ta_data = self._rename('title_akas', dict(aka)) or {}
            for key in list(ta_data.keys()):
                if not ta_data[key]:
                    del ta_data[key]
            for key in 't_soundex', 'movieID':
                if key in ta_data:
                    del ta_data[key]
            for key in 'types', 'attributes':
                if key not in ta_data:
                    continue
                ta_data[key] = split_array(ta_data[key])
            akas_list.append(ta_data)
        if akas_list:
            data['akas'] = akas_list

        self._clean(data, ('movieID', 't_soundex'))
        return {'data': data, 'info sets': self.get_movie_infoset()}
コード例 #10
0
def build_person(txt,
                 personID=None,
                 billingPos=None,
                 roleID=None,
                 accessSystem='http',
                 modFunct=None):
    """Return a Person instance from the tipical <tr>...</tr> strings
    found in the IMDb's web site."""
    notes = u''
    role = u''
    # Search the (optional) separator between name and role/notes.
    if txt.find('....') != -1:
        sep = '....'
    elif txt.find('...') != -1:
        sep = '...'
    else:
        sep = '...'
        # Replace the first parenthesis, assuming there are only
        # notes, after.
        # Rationale: no imdbIndex is (ever?) showed on the web site.
        txt = txt.replace('(', '...(', 1)
    txt_split = txt.split(sep, 1)
    name = txt_split[0].strip()
    if len(txt_split) == 2:
        role_comment = txt_split[1].strip()
        # Strip common endings.
        if role_comment[-4:] == ' and':
            role_comment = role_comment[:-4].rstrip()
        elif role_comment[-2:] == ' &':
            role_comment = role_comment[:-2].rstrip()
        elif role_comment[-6:] == '& ....':
            role_comment = role_comment[:-6].rstrip()
        # Get the notes.
        cmt_idx = role_comment.find('(')
        if cmt_idx != -1:
            role = role_comment[:cmt_idx].rstrip()
            notes = role_comment[cmt_idx:]
        else:
            # Just a role, without notes.
            role = role_comment
    if role == '....': role = u''
    # Manages multiple roleIDs.
    if isinstance(roleID, list):
        role = role.split(' / ')
        lr = len(role)
        lrid = len(roleID)
        if lr > lrid:
            roleID += [None] * (lrid - lr)
        elif lr < lrid:
            roleID = roleID[:lr]
        if lr == 1:
            role = role[0]
            roleID = roleID[0]
    # XXX: return None if something strange is detected?
    return Person(name=name,
                  personID=personID,
                  currentRole=role,
                  roleID=roleID,
                  notes=notes,
                  billingPos=billingPos,
                  modFunct=modFunct,
                  accessSystem=accessSystem)