Ejemplo n.º 1
0
    def iter_persons(self, role_filter=None):
        if (role_filter is None
                or (role_filter is not None and role_filter == 'actor')):
            tables = self.doc.xpath('//table[has-class("cast_list")]')
            if len(tables) > 0:
                table = tables[0]
                tds = table.xpath('.//td[has-class("itemprop")]')

                for td in tds:
                    id = td.find('a').attrib.get('href',
                                                 '').strip('/').split('/')[1]
                    name = td.find('a').text
                    char_name = td.xpath(
                        '..//td[has-class("character")]')[0].text_content()
                    person = Person(id, name)
                    person.short_description = char_name
                    person.real_name = NotLoaded
                    person.birth_place = NotLoaded
                    person.birth_date = NotLoaded
                    person.death_date = NotLoaded
                    person.gender = NotLoaded
                    person.nationality = NotLoaded
                    person.short_biography = NotLoaded
                    person.roles = NotLoaded
                    person.thumbnail_url = NotLoaded
                    yield person

        for gloss_link in self.doc.xpath('//table[@cellspacing="1"]//h5//a'):
            role = gloss_link.attrib.get('name', '').rstrip('s')
            if (role_filter is None
                    or (role_filter is not None and role == role_filter)):
                tbody = gloss_link.getparent().getparent().getparent(
                ).getparent()
                for line in tbody.xpath('.//tr')[1:]:
                    for a in line.xpath('.//a'):
                        role_detail = NotAvailable
                        href = a.attrib.get('href', '')
                        if '/name/nm' in href:
                            id = href.strip('/').split('/')[-1]
                            name = a.text
                        if 'glossary' in href:
                            role_detail = a.text
                        person = Person(id, name)
                        person.short_description = role_detail
                        yield person
Ejemplo n.º 2
0
    def get_person(self, id):
        name = NotAvailable
        short_biography = NotAvailable
        short_description = NotAvailable
        birth_place = NotAvailable
        birth_date = NotAvailable
        death_date = NotAvailable
        real_name = NotAvailable
        gender = NotAvailable
        thumbnail_url = NotAvailable
        roles = {}
        nationality = NotAvailable
        td_overview = self.parser.select(self.document.getroot(), 'td#overview-top', 1)
        descs = self.parser.select(td_overview, 'span[itemprop=description]')
        if len(descs) > 0:
            short_biography = unicode(descs[0].text)
        rname_block = self.parser.select(td_overview, 'div.txt-block h4.inline')
        if len(rname_block) > 0 and "born" in rname_block[0].text.lower():
            links = self.parser.select(rname_block[0].getparent(), 'a')
            for a in links:
                href = a.attrib.get('href', '').strip()
                if href == 'bio':
                    real_name = unicode(a.text.strip())
                elif 'birth_place' in href:
                    birth_place = unicode(a.text.lower().strip())
        names = self.parser.select(td_overview, 'h1 span[itemprop=name]')
        if len(names) > 0:
            name = unicode(names[0].text.strip())
        times = self.parser.select(td_overview, 'time[itemprop=birthDate]')
        if len(times) > 0:
            time = times[0].attrib.get('datetime', '').split('-')
            if len(time) == 3 and int(time[0]) >= 1900:
                birth_date = datetime(int(time[0]), int(time[1]), int(time[2]))
        dtimes = self.parser.select(td_overview, 'time[itemprop=deathDate]')
        if len(dtimes) > 0:
            dtime = dtimes[0].attrib.get('datetime', '').split('-')
            if len(dtime) == 3 and int(dtime[0]) >= 1900:
                death_date = datetime(int(dtime[0]), int(dtime[1]), int(dtime[2]))
        img_thumbnail = self.parser.select(self.document.getroot(), 'td#img_primary img')
        if len(img_thumbnail) > 0:
            thumbnail_url = unicode(img_thumbnail[0].attrib.get('src', ''))

        roles = self.get_roles()

        person = Person(id, name)
        person.real_name = real_name
        person.birth_date = birth_date
        person.death_date = death_date
        person.birth_place = birth_place
        person.gender = gender
        person.nationality = nationality
        person.short_biography = short_biography
        person.short_description = short_description
        person.roles = roles
        person.thumbnail_url = thumbnail_url
        return person
Ejemplo n.º 3
0
    def get_person(self, id):
        name = NotAvailable
        short_biography = NotAvailable
        short_description = NotAvailable
        birth_place = NotAvailable
        birth_date = NotAvailable
        death_date = NotAvailable
        real_name = NotAvailable
        gender = NotAvailable
        thumbnail_url = NotAvailable
        roles = {}
        nationality = NotAvailable
        td_overview = self.parser.select(self.document.getroot(),
                                         'td#overview-top', 1)
        descs = self.parser.select(td_overview, 'span[itemprop=description]')
        if len(descs) > 0:
            short_biography = unicode(descs[0].text)
        rname_block = self.parser.select(td_overview,
                                         'div.txt-block h4.inline')
        if len(rname_block) > 0 and "born" in rname_block[0].text.lower():
            links = self.parser.select(rname_block[0].getparent(), 'a')
            for a in links:
                href = a.attrib.get('href', '').strip()
                if href == 'bio':
                    real_name = unicode(a.text.strip())
                elif 'birth_place' in href:
                    birth_place = unicode(a.text.lower().strip())
        names = self.parser.select(td_overview, 'h1 span[itemprop=name]')
        if len(names) > 0:
            name = unicode(names[0].text.strip())
        times = self.parser.select(td_overview, 'time[itemprop=birthDate]')
        if len(times) > 0:
            time = times[0].attrib.get('datetime', '').split('-')
            if len(time) == 3 and int(time[0]) >= 1900:
                birth_date = datetime(int(time[0]), int(time[1]), int(time[2]))
        dtimes = self.parser.select(td_overview, 'time[itemprop=deathDate]')
        if len(dtimes) > 0:
            dtime = dtimes[0].attrib.get('datetime', '').split('-')
            if len(dtime) == 3 and int(dtime[0]) >= 1900:
                death_date = datetime(int(dtime[0]), int(dtime[1]),
                                      int(dtime[2]))
        img_thumbnail = self.parser.select(self.document.getroot(),
                                           'td#img_primary img')
        if len(img_thumbnail) > 0:
            thumbnail_url = unicode(img_thumbnail[0].attrib.get('src', ''))

        roles = self.get_roles()

        person = Person(id, name)
        person.real_name = real_name
        person.birth_date = birth_date
        person.death_date = death_date
        person.birth_place = birth_place
        person.gender = gender
        person.nationality = nationality
        person.short_biography = short_biography
        person.short_description = short_description
        person.roles = roles
        person.thumbnail_url = thumbnail_url
        return person
Ejemplo n.º 4
0
 def iter_movie_persons(self, movie_id, role_filter):
     res = self.readurl(
             'http://api.allocine.fr/rest/v3/movie?partner=YW5kcm9pZC12M3M&code=%s&profile=large&mediafmt=mp4-lc&format=json&filter=movie&striptags=synopsis,synopsisshort' % movie_id)
     if res is not None:
         jres = json.loads(res)
         if 'movie' in jres:
             jres = jres['movie']
         else:
             return
     else:
         return
     if 'castMember' in jres:
         for cast in jres['castMember']:
             if (role_filter is None or (role_filter is not None and cast['activity']['$'].lower().strip() == role_filter.lower().strip())):
                 id = cast['person']['code']
                 name = unicode(cast['person']['name'])
                 short_description = unicode(cast['activity']['$'])
                 if 'role' in cast:
                     short_description += ', %s' % cast['role']
                 thumbnail_url = NotAvailable
                 if 'picture' in cast:
                     thumbnail_url = unicode(cast['picture']['href'])
                 person = Person(id, name)
                 person.short_description = short_description
                 person.real_name = NotLoaded
                 person.birth_place = NotLoaded
                 person.birth_date = NotLoaded
                 person.death_date = NotLoaded
                 person.gender = NotLoaded
                 person.nationality = NotLoaded
                 person.short_biography = NotLoaded
                 person.roles = NotLoaded
                 person.thumbnail_url = thumbnail_url
                 yield person
Ejemplo n.º 5
0
    def get_person(self, id):
        params = [('partner', self.PARTNER_KEY),
                  ('code', id),
                  ('profile', 'large'),
                  ('mediafmt', 'mp4-lc'),
                  ('filter', 'movie'),
                  ('striptags', 'biography,biographyshort'),
                  ('format', 'json')]

        res = self.__do_request('person', params)
        if res is not None:
            jres = json.loads(res)
            if 'person' in jres:
                jres = jres['person']
            else:
                return None
        else:
            return None
        name = NotAvailable
        short_biography = NotAvailable
        biography = NotAvailable
        short_description = NotAvailable
        birth_place = NotAvailable
        birth_date = NotAvailable
        death_date = NotAvailable
        real_name = NotAvailable
        gender = NotAvailable
        thumbnail_url = NotAvailable
        roles = {}
        nationality = NotAvailable

        if 'name' in jres:
            name = u''
            if 'given' in jres['name']:
                name += jres['name']['given']
            if 'family' in jres['name']:
                name += ' %s' % jres['name']['family']
        if 'biographyShort' in jres:
            short_biography = unicode(jres['biographyShort'])
        if 'birthPlace' in jres:
            birth_place = unicode(jres['birthPlace'])
        if 'birthDate' in jres:
            df = jres['birthDate'].split('-')
            birth_date = datetime(int(df[0]), int(df[1]), int(df[2]))
        if 'deathDate' in jres:
            df = jres['deathDate'].split('-')
            death_date = datetime(int(df[0]), int(df[1]), int(df[2]))
        if 'realName' in jres:
            real_name = unicode(jres['realName'])
        if 'gender' in jres:
            gcode = jres['gender']
            if gcode == '1':
                gender = u'Male'
            else:
                gender = u'Female'
        if 'picture' in jres:
            thumbnail_url = unicode(jres['picture']['href'])
        if 'nationality' in jres:
            nationality = u''
            for n in jres['nationality']:
                nationality += '%s, ' % n['$']
            nationality = nationality.strip(', ')
        if 'biography' in jres:
            biography = unicode(jres['biography'])
        if 'participation' in jres:
            for m in jres['participation']:
                if m['activity']['$'] not in roles:
                    roles[m['activity']['$']] = []
                pyear = '????'
                if 'productionYear' in m['movie']:
                    pyear = m['movie']['productionYear']
                roles[m['activity']['$']].append(u'(%s) %s' % (pyear, m['movie']['originalTitle']))


        person = Person(id, name)
        person.real_name = real_name
        person.birth_date = birth_date
        person.death_date = death_date
        person.birth_place = birth_place
        person.gender = gender
        person.nationality = nationality
        person.short_biography = short_biography
        person.biography = biography
        person.short_description = short_description
        person.roles = roles
        person.thumbnail_url = thumbnail_url
        return person
Ejemplo n.º 6
0
    def iter_persons(self, pattern):
        params = [('partner', self.PARTNER_KEY),
                  ('q', pattern.encode('utf-8')),
                  ('format', 'json'),
                  ('filter', 'person')]

        res = self.__do_request('search', params)
        if res is None:
            return
        jres = json.loads(res)
        if 'person' not in jres['feed']:
            return
        for p in jres['feed']['person']:
            thumbnail_url = NotAvailable
            if 'picture' in p:
                thumbnail_url = unicode(p['picture']['href'])
            person = Person(p['code'], unicode(p['name']))
            desc = u''
            if 'birthDate' in p:
                desc += '(%s), ' % p['birthDate']
            if 'activity' in p:
                for a in p['activity']:
                    desc += '%s, ' % a['$']
            person.real_name = NotLoaded
            person.birth_place = NotLoaded
            person.birth_date = NotLoaded
            person.death_date = NotLoaded
            person.gender = NotLoaded
            person.nationality = NotLoaded
            person.short_biography = NotLoaded
            person.short_description = desc.strip(', ')
            person.roles = NotLoaded
            person.thumbnail_url = thumbnail_url
            yield person
Ejemplo n.º 7
0
    def iter_persons(self, pattern):
        params = [("partner", self.PARTNER_KEY), ("q", pattern), ("format", "json"), ("filter", "person")]

        res = self.__do_request("search", params)
        if res is None:
            return
        jres = json.loads(res)
        if "person" not in jres["feed"]:
            return
        for p in jres["feed"]["person"]:
            thumbnail_url = NotAvailable
            if "picture" in p:
                thumbnail_url = unicode(p["picture"]["href"])
            person = Person(p["code"], unicode(p["name"]))
            desc = u""
            if "birthDate" in p:
                desc += "(%s), " % p["birthDate"]
            if "activity" in p:
                for a in p["activity"]:
                    desc += "%s, " % a["$"]
            person.real_name = NotLoaded
            person.birth_place = NotLoaded
            person.birth_date = NotLoaded
            person.death_date = NotLoaded
            person.gender = NotLoaded
            person.nationality = NotLoaded
            person.short_biography = NotLoaded
            person.short_description = desc.strip(", ")
            person.roles = NotLoaded
            person.thumbnail_url = thumbnail_url
            yield person
Ejemplo n.º 8
0
    def iter_movie_persons(self, movie_id, role_filter):
        params = [('partner', self.PARTNER_KEY), ('code', movie_id),
                  ('profile', 'large'), ('mediafmt', 'mp4-lc'),
                  ('filter', 'movie'), ('striptags', 'synopsis,synopsisshort'),
                  ('format', 'json')]

        jres = self.__do_request('movie', params)
        if jres is not None:
            if 'movie' in jres:
                jres = jres['movie']
            else:
                return
        else:
            return
        if 'castMember' in jres:
            for cast in jres['castMember']:
                if (role_filter is None
                        or (role_filter is not None
                            and cast['activity']['$'].lower().strip()
                            == role_filter.lower().strip())):
                    id = cast['person']['code']
                    name = unicode(cast['person']['name'])
                    short_description = unicode(cast['activity']['$'])
                    if 'role' in cast:
                        short_description += ', %s' % cast['role']
                    thumbnail_url = NotAvailable
                    if 'picture' in cast:
                        thumbnail_url = unicode(cast['picture']['href'])
                    person = Person(id, name)
                    person.short_description = short_description
                    person.real_name = NotLoaded
                    person.birth_place = NotLoaded
                    person.birth_date = NotLoaded
                    person.death_date = NotLoaded
                    person.gender = NotLoaded
                    person.nationality = NotLoaded
                    person.short_biography = NotLoaded
                    person.roles = NotLoaded
                    person.thumbnail_url = thumbnail_url
                    yield person
Ejemplo n.º 9
0
    def get_person(self, id):
        params = [
            ("partner", self.PARTNER_KEY),
            ("code", id),
            ("profile", "large"),
            ("mediafmt", "mp4-lc"),
            ("filter", "movie"),
            ("striptags", "biography,biographyshort"),
            ("format", "json"),
        ]

        res = self.__do_request("person", params)
        if res is not None:
            jres = json.loads(res)
            if "person" in jres:
                jres = jres["person"]
            else:
                return None
        else:
            return None
        name = NotAvailable
        short_biography = NotAvailable
        biography = NotAvailable
        short_description = NotAvailable
        birth_place = NotAvailable
        birth_date = NotAvailable
        death_date = NotAvailable
        real_name = NotAvailable
        gender = NotAvailable
        thumbnail_url = NotAvailable
        roles = {}
        nationality = NotAvailable

        if "name" in jres:
            name = u""
            if "given" in jres["name"]:
                name += jres["name"]["given"]
            if "family" in jres["name"]:
                name += " %s" % jres["name"]["family"]
        if "biographyShort" in jres:
            short_biography = unicode(jres["biographyShort"])
        if "birthPlace" in jres:
            birth_place = unicode(jres["birthPlace"])
        if "birthDate" in jres:
            df = jres["birthDate"].split("-")
            birth_date = datetime(int(df[0]), int(df[1]), int(df[2]))
        if "deathDate" in jres:
            df = jres["deathDate"].split("-")
            death_date = datetime(int(df[0]), int(df[1]), int(df[2]))
        if "realName" in jres:
            real_name = unicode(jres["realName"])
        if "gender" in jres:
            gcode = jres["gender"]
            if gcode == "1":
                gender = u"Male"
            else:
                gender = u"Female"
        if "picture" in jres:
            thumbnail_url = unicode(jres["picture"]["href"])
        if "nationality" in jres:
            nationality = u""
            for n in jres["nationality"]:
                nationality += "%s, " % n["$"]
            nationality = nationality.strip(", ")
        if "biography" in jres:
            biography = unicode(jres["biography"])
        if "participation" in jres:
            for m in jres["participation"]:
                if m["activity"]["$"] not in roles:
                    roles[m["activity"]["$"]] = []
                pyear = "????"
                if "productionYear" in m["movie"]:
                    pyear = m["movie"]["productionYear"]
                movie_to_append = (u"%s" % (m["movie"]["code"]), u"(%s) %s" % (pyear, m["movie"]["originalTitle"]))
                roles[m["activity"]["$"]].append(movie_to_append)

        person = Person(id, name)
        person.real_name = real_name
        person.birth_date = birth_date
        person.death_date = death_date
        person.birth_place = birth_place
        person.gender = gender
        person.nationality = nationality
        person.short_biography = short_biography
        person.biography = biography
        person.short_description = short_description
        person.roles = roles
        person.thumbnail_url = thumbnail_url
        return person
Ejemplo n.º 10
0
    def iter_movie_persons(self, movie_id, role_filter):
        params = [
            ("partner", self.PARTNER_KEY),
            ("code", movie_id),
            ("profile", "large"),
            ("mediafmt", "mp4-lc"),
            ("filter", "movie"),
            ("striptags", "synopsis,synopsisshort"),
            ("format", "json"),
        ]

        res = self.__do_request("movie", params)
        if res is not None:
            jres = json.loads(res)
            if "movie" in jres:
                jres = jres["movie"]
            else:
                return
        else:
            return
        if "castMember" in jres:
            for cast in jres["castMember"]:
                if role_filter is None or (
                    role_filter is not None and cast["activity"]["$"].lower().strip() == role_filter.lower().strip()
                ):
                    id = cast["person"]["code"]
                    name = unicode(cast["person"]["name"])
                    short_description = unicode(cast["activity"]["$"])
                    if "role" in cast:
                        short_description += ", %s" % cast["role"]
                    thumbnail_url = NotAvailable
                    if "picture" in cast:
                        thumbnail_url = unicode(cast["picture"]["href"])
                    person = Person(id, name)
                    person.short_description = short_description
                    person.real_name = NotLoaded
                    person.birth_place = NotLoaded
                    person.birth_date = NotLoaded
                    person.death_date = NotLoaded
                    person.gender = NotLoaded
                    person.nationality = NotLoaded
                    person.short_biography = NotLoaded
                    person.roles = NotLoaded
                    person.thumbnail_url = thumbnail_url
                    yield person
Ejemplo n.º 11
0
    def iter_persons(self, role_filter=None):
        if (role_filter is None or (role_filter is not None and role_filter == 'actor')):
            tables = self.doc.xpath('//table[has-class("cast_list")]')
            if len(tables) > 0:
                table = tables[0]
                tds = table.xpath('.//td[has-class("itemprop")]')

                for td in tds:
                    id = td.find('a').attrib.get('href', '').strip('/').split('/')[1]
                    name = td.find('a').text
                    char_name = td.xpath('..//td[has-class("character")]')[0].text_content()
                    person = Person(id, name)
                    person.short_description = char_name
                    person.real_name = NotLoaded
                    person.birth_place = NotLoaded
                    person.birth_date = NotLoaded
                    person.death_date = NotLoaded
                    person.gender = NotLoaded
                    person.nationality = NotLoaded
                    person.short_biography = NotLoaded
                    person.roles = NotLoaded
                    person.thumbnail_url = NotLoaded
                    yield person

        for gloss_link in self.doc.xpath('//table[@cellspacing="1"]//h5//a'):
            role = gloss_link.attrib.get('name', '').rstrip('s')
            if (role_filter is None or (role_filter is not None and role == role_filter)):
                tbody = gloss_link.getparent().getparent().getparent().getparent()
                for line in tbody.xpath('.//tr')[1:]:
                    for a in line.xpath('.//a'):
                        role_detail = NotAvailable
                        href = a.attrib.get('href', '')
                        if '/name/nm' in href:
                            id = href.strip('/').split('/')[-1]
                            name = a.text
                        if 'glossary' in href:
                            role_detail = a.text
                        person = Person(id, name)
                        person.short_description = role_detail
                        yield person
Ejemplo n.º 12
0
    def get_person(self, id):
        name = NotAvailable
        short_biography = NotAvailable
        short_description = NotAvailable
        birth_place = NotAvailable
        birth_date = NotAvailable
        death_date = NotAvailable
        real_name = NotAvailable
        gender = NotAvailable
        thumbnail_url = NotAvailable
        roles = {}
        nationality = NotAvailable
        td_overview = self.doc.xpath('//td[@id="overview-top"]')[0]
        descs = td_overview.xpath('.//span[@itemprop="description"]')
        if len(descs) > 0:
            short_biography = descs[0].text
        rname_block = td_overview.xpath('.//div[has-class("txt-block")]//h4[has-class("inline")]')
        if len(rname_block) > 0 and "born" in rname_block[0].text.lower():
            links = rname_block[0].xpath('..//a')
            for a in links:
                href = a.attrib.get('href', '').strip()
                if href == 'bio':
                    real_name = a.text.strip()
                elif 'birth_place' in href:
                    birth_place = a.text.lower().strip()
        names = td_overview.xpath('.//h1//span[@itemprop="name"]')
        if len(names) > 0:
            name = names[0].text.strip()
        times = td_overview.xpath('.//time[@itemprop="birthDate"]')
        if len(times) > 0:
            time = times[0].attrib.get('datetime', '').split('-')
            if len(time) == 3 and int(time[0]) >= 1900:
                birth_date = datetime(int(time[0]), int(time[1]), int(time[2]))
        dtimes = td_overview.xpath('.//time[@itemprop="deathDate"]')
        if len(dtimes) > 0:
            dtime = dtimes[0].attrib.get('datetime', '').split('-')
            if len(dtime) == 3 and int(dtime[0]) >= 1900:
                death_date = datetime(int(dtime[0]), int(dtime[1]), int(dtime[2]))
        img_thumbnail = self.doc.xpath('//td[@id="img_primary img"]')
        if len(img_thumbnail) > 0:
            thumbnail_url = img_thumbnail[0].attrib.get('src', '')

        roles = self.get_roles()

        person = Person(id, name)
        person.real_name = real_name
        person.birth_date = birth_date
        person.death_date = death_date
        person.birth_place = birth_place
        person.gender = gender
        person.nationality = nationality
        person.short_biography = short_biography
        person.short_description = short_description
        person.roles = roles
        person.thumbnail_url = thumbnail_url
        return person
Ejemplo n.º 13
0
 def iter_persons(self, pattern):
     res = self.readurl('http://api.allocine.fr/rest/v3/search?partner=YW5kcm9pZC12M3M&filter=person&q=%s&format=json' % pattern.encode('utf-8'))
     jres = json.loads(res)
     if 'person' not in jres['feed']:
         return
     for p in jres['feed']['person']:
         thumbnail_url = NotAvailable
         if 'picture' in p:
             thumbnail_url = unicode(p['picture']['href'])
         person = Person(p['code'], unicode(p['name']))
         desc = u''
         if 'birthDate' in p:
             desc += '(%s), ' % p['birthDate']
         if 'activity' in p:
             for a in p['activity']:
                 desc += '%s, ' % a['$']
         person.real_name = NotLoaded
         person.birth_place = NotLoaded
         person.birth_date = NotLoaded
         person.death_date = NotLoaded
         person.gender = NotLoaded
         person.nationality = NotLoaded
         person.short_biography = NotLoaded
         person.short_description = desc.strip(', ')
         person.roles = NotLoaded
         person.thumbnail_url = thumbnail_url
         yield person
Ejemplo n.º 14
0
    def iter_persons(self, role_filter=None):
        if (role_filter is None or (role_filter is not None and role_filter == 'actor')):
            tables = self.parser.select(self.document.getroot(), 'table.cast_list')
            if len(tables) > 0:
                table = tables[0]
                tds = self.parser.select(table, 'td.itemprop')

                for td in tds:
                    id = td.find('a').attrib.get('href', '').strip('/').split('/')[1]
                    name = unicode(td.find('a').text)
                    char_name = unicode(self.parser.select(td.getparent(), 'td.character', 1).text_content())
                    person = Person(id, name)
                    person.short_description = char_name
                    person.real_name = NotLoaded
                    person.birth_place = NotLoaded
                    person.birth_date = NotLoaded
                    person.death_date = NotLoaded
                    person.gender = NotLoaded
                    person.nationality = NotLoaded
                    person.short_biography = NotLoaded
                    person.roles = NotLoaded
                    person.thumbnail_url = NotLoaded
                    yield person

        for gloss_link in self.parser.select(self.document.getroot(), 'table[cellspacing="1"] h5 a'):
            role = gloss_link.attrib.get('name', '').rstrip('s')
            if (role_filter is None or (role_filter is not None and role == role_filter)):
                tbody = gloss_link.getparent().getparent().getparent().getparent()
                for line in self.parser.select(tbody, 'tr')[1:]:
                    for a in self.parser.select(line, 'a'):
                        role_detail = NotAvailable
                        href = a.attrib.get('href', '')
                        if '/name/nm' in href:
                            id = href.strip('/').split('/')[-1]
                            name = unicode(a.text)
                        if 'glossary' in href:
                            role_detail = unicode(a.text)
                        person = Person(id, name)
                        person.short_description = role_detail
                        yield person
Ejemplo n.º 15
0
    def get_person(self, id):
        res = self.readurl(
            'http://api.allocine.fr/rest/v3/person?partner=YW5kcm9pZC12M3M&profile=large&code=%s&mediafmt=mp4-lc&filter=movie&format=json&striptags=biography'
            % id)
        if res is not None:
            jres = json.loads(res)
            if 'person' in jres:
                jres = jres['person']
            else:
                return None
        else:
            return None
        name = NotAvailable
        short_biography = NotAvailable
        biography = NotAvailable
        short_description = NotAvailable
        birth_place = NotAvailable
        birth_date = NotAvailable
        death_date = NotAvailable
        real_name = NotAvailable
        gender = NotAvailable
        thumbnail_url = NotAvailable
        roles = {}
        nationality = NotAvailable

        if 'name' in jres:
            name = u''
            if 'given' in jres['name']:
                name += jres['name']['given']
            if 'family' in jres['name']:
                name += ' %s' % jres['name']['family']
        if 'biographyShort' in jres:
            short_biography = unicode(jres['biographyShort'])
        if 'birthPlace' in jres:
            birth_place = unicode(jres['birthPlace'])
        if 'birthDate' in jres:
            df = jres['birthDate'].split('-')
            birth_date = datetime(int(df[0]), int(df[1]), int(df[2]))
        if 'deathDate' in jres:
            df = jres['deathDate'].split('-')
            death_date = datetime(int(df[0]), int(df[1]), int(df[2]))
        if 'realName' in jres:
            real_name = unicode(jres['realName'])
        if 'gender' in jres:
            gcode = jres['gender']
            if gcode == '1':
                gender = u'Male'
            else:
                gender = u'Female'
        if 'picture' in jres:
            thumbnail_url = unicode(jres['picture']['href'])
        if 'nationality' in jres:
            nationality = u''
            for n in jres['nationality']:
                nationality += '%s, ' % n['$']
            nationality = nationality.strip(', ')
        if 'biography' in jres:
            biography = unicode(jres['biography'])
        if 'participation' in jres:
            for m in jres['participation']:
                if m['activity']['$'] not in roles:
                    roles[m['activity']['$']] = []
                pyear = '????'
                if 'productionYear' in m['movie']:
                    pyear = m['movie']['productionYear']
                roles[m['activity']['$']].append(
                    u'(%s) %s' % (pyear, m['movie']['originalTitle']))

        person = Person(id, name)
        person.real_name = real_name
        person.birth_date = birth_date
        person.death_date = death_date
        person.birth_place = birth_place
        person.gender = gender
        person.nationality = nationality
        person.short_biography = short_biography
        person.biography = biography
        person.short_description = short_description
        person.roles = roles
        person.thumbnail_url = thumbnail_url
        return person
Ejemplo n.º 16
0
    def get_person(self, id):
        params = [('partner', self.PARTNER_KEY), ('code', id),
                  ('profile', 'large'), ('mediafmt', 'mp4-lc'),
                  ('filter', 'movie'),
                  ('striptags', 'biography,biographyshort'),
                  ('format', 'json')]

        jres = self.__do_request('person', params)
        if jres is not None:
            if 'person' in jres:
                jres = jres['person']
            else:
                return None
        else:
            return None
        name = NotAvailable
        short_biography = NotAvailable
        biography = NotAvailable
        short_description = NotAvailable
        birth_place = NotAvailable
        birth_date = NotAvailable
        death_date = NotAvailable
        real_name = NotAvailable
        gender = NotAvailable
        thumbnail_url = NotAvailable
        roles = {}
        nationality = NotAvailable

        if 'name' in jres:
            name = u''
            if 'given' in jres['name']:
                name += jres['name']['given']
            if 'family' in jres['name']:
                name += ' %s' % jres['name']['family']
        if 'biographyShort' in jres:
            short_biography = unicode(jres['biographyShort'])
        if 'birthPlace' in jres:
            birth_place = unicode(jres['birthPlace'])
        if 'birthDate' in jres:
            df = jres['birthDate'].split('-')
            birth_date = datetime(int(df[0]), int(df[1]), int(df[2]))
        if 'deathDate' in jres:
            df = jres['deathDate'].split('-')
            death_date = datetime(int(df[0]), int(df[1]), int(df[2]))
        if 'realName' in jres:
            real_name = unicode(jres['realName'])
        if 'gender' in jres:
            gcode = jres['gender']
            if gcode == '1':
                gender = u'Male'
            else:
                gender = u'Female'
        if 'picture' in jres:
            thumbnail_url = unicode(jres['picture']['href'])
        if 'nationality' in jres:
            nationality = u''
            for n in jres['nationality']:
                nationality += '%s, ' % n['$']
            nationality = nationality.strip(', ')
        if 'biography' in jres:
            biography = unicode(jres['biography'])
        if 'participation' in jres:
            for m in jres['participation']:
                if m['activity']['$'] not in roles:
                    roles[m['activity']['$']] = []
                pyear = '????'
                if 'productionYear' in m['movie']:
                    pyear = m['movie']['productionYear']
                movie_to_append = (u'%s' % (m['movie']['code']), u'(%s) %s' %
                                   (pyear, m['movie']['originalTitle']))
                roles[m['activity']['$']].append(movie_to_append)

        person = Person(id, name)
        person.real_name = real_name
        person.birth_date = birth_date
        person.death_date = death_date
        person.birth_place = birth_place
        person.gender = gender
        person.nationality = nationality
        person.short_biography = short_biography
        person.biography = biography
        person.short_description = short_description
        person.roles = roles
        person.thumbnail_url = thumbnail_url
        return person
Ejemplo n.º 17
0
 def iter_movie_persons(self, movie_id, role_filter):
     res = self.readurl(
         'http://api.allocine.fr/rest/v3/movie?partner=YW5kcm9pZC12M3M&code=%s&profile=large&mediafmt=mp4-lc&format=json&filter=movie&striptags=synopsis,synopsisshort'
         % movie_id)
     if res is not None:
         jres = json.loads(res)
         if 'movie' in jres:
             jres = jres['movie']
         else:
             return
     else:
         return
     if 'castMember' in jres:
         for cast in jres['castMember']:
             if (role_filter is None
                     or (role_filter is not None
                         and cast['activity']['$'].lower().strip()
                         == role_filter.lower().strip())):
                 id = cast['person']['code']
                 name = unicode(cast['person']['name'])
                 short_description = unicode(cast['activity']['$'])
                 if 'role' in cast:
                     short_description += ', %s' % cast['role']
                 thumbnail_url = NotAvailable
                 if 'picture' in cast:
                     thumbnail_url = unicode(cast['picture']['href'])
                 person = Person(id, name)
                 person.short_description = short_description
                 person.real_name = NotLoaded
                 person.birth_place = NotLoaded
                 person.birth_date = NotLoaded
                 person.death_date = NotLoaded
                 person.gender = NotLoaded
                 person.nationality = NotLoaded
                 person.short_biography = NotLoaded
                 person.roles = NotLoaded
                 person.thumbnail_url = thumbnail_url
                 yield person
Ejemplo n.º 18
0
    def iter_persons(self, pattern):
        params = [('partner', self.PARTNER_KEY), ('q', pattern),
                  ('format', 'json'), ('filter', 'person')]

        jres = self.__do_request('search', params)
        if jres is None:
            return
        if 'person' not in jres['feed']:
            return
        for p in jres['feed']['person']:
            thumbnail_url = NotAvailable
            if 'picture' in p:
                thumbnail_url = unicode(p['picture']['href'])
            person = Person(p['code'], unicode(p['name']))
            desc = u''
            if 'birthDate' in p:
                desc += '(%s), ' % p['birthDate']
            if 'activity' in p:
                for a in p['activity']:
                    desc += '%s, ' % a['$']
            person.real_name = NotLoaded
            person.birth_place = NotLoaded
            person.birth_date = NotLoaded
            person.death_date = NotLoaded
            person.gender = NotLoaded
            person.nationality = NotLoaded
            person.short_biography = NotLoaded
            person.short_description = desc.strip(', ')
            person.roles = NotLoaded
            person.thumbnail_url = thumbnail_url
            yield person
Ejemplo n.º 19
0
 def iter_persons(self, pattern):
     res = self.readurl(
         'http://api.allocine.fr/rest/v3/search?partner=YW5kcm9pZC12M3M&filter=person&q=%s&format=json'
         % pattern.encode('utf-8'))
     jres = json.loads(res)
     if 'person' not in jres['feed']:
         return
     for p in jres['feed']['person']:
         thumbnail_url = NotAvailable
         if 'picture' in p:
             thumbnail_url = unicode(p['picture']['href'])
         person = Person(p['code'], unicode(p['name']))
         desc = u''
         if 'birthDate' in p:
             desc += '(%s), ' % p['birthDate']
         if 'activity' in p:
             for a in p['activity']:
                 desc += '%s, ' % a['$']
         person.real_name = NotLoaded
         person.birth_place = NotLoaded
         person.birth_date = NotLoaded
         person.death_date = NotLoaded
         person.gender = NotLoaded
         person.nationality = NotLoaded
         person.short_biography = NotLoaded
         person.short_description = desc.strip(', ')
         person.roles = NotLoaded
         person.thumbnail_url = thumbnail_url
         yield person
Ejemplo n.º 20
0
    def iter_movie_persons(self, movie_id, role_filter):
        params = [('partner', self.PARTNER_KEY),
                  ('code', movie_id),
                  ('profile', 'large'),
                  ('mediafmt', 'mp4-lc'),
                  ('filter', 'movie'),
                  ('striptags', 'synopsis,synopsisshort'),
                  ('format', 'json')]

        res = self.__do_request('movie', params)
        if res is not None:
            jres = json.loads(res)
            if 'movie' in jres:
                jres = jres['movie']
            else:
                return
        else:
            return
        if 'castMember' in jres:
            for cast in jres['castMember']:
                if (role_filter is None or (role_filter is not None and cast['activity']['$'].lower().strip() == role_filter.lower().strip())):
                    id = cast['person']['code']
                    name = unicode(cast['person']['name'])
                    short_description = unicode(cast['activity']['$'])
                    if 'role' in cast:
                        short_description += ', %s' % cast['role']
                    thumbnail_url = NotAvailable
                    if 'picture' in cast:
                        thumbnail_url = unicode(cast['picture']['href'])
                    person = Person(id, name)
                    person.short_description = short_description
                    person.real_name = NotLoaded
                    person.birth_place = NotLoaded
                    person.birth_date = NotLoaded
                    person.death_date = NotLoaded
                    person.gender = NotLoaded
                    person.nationality = NotLoaded
                    person.short_biography = NotLoaded
                    person.roles = NotLoaded
                    person.thumbnail_url = thumbnail_url
                    yield person
Ejemplo n.º 21
0
 def iter_persons(self, pattern):
     res = self.readurl('http://www.imdb.com/xml/find?json=1&nr=1&nm=on&q=%s' % pattern.encode('utf-8'))
     jres = json.loads(res)
     htmlparser = HTMLParser()
     for cat in ['name_popular', 'name_exact', 'name_approx']:
         if cat in jres:
             for p in jres[cat]:
                 person = Person(p['id'], htmlparser.unescape(unicode(p['name'])))
                 person.real_name = NotLoaded
                 person.birth_place = NotLoaded
                 person.birth_date = NotLoaded
                 person.death_date = NotLoaded
                 person.gender = NotLoaded
                 person.nationality = NotLoaded
                 person.short_biography = NotLoaded
                 person.short_description = htmlparser.unescape(p['description'])
                 person.roles = NotLoaded
                 person.thumbnail_url = NotLoaded
                 yield person
Ejemplo n.º 22
0
 def iter_persons(self, pattern):
     res = self.open('http://www.imdb.com/xml/find?json=1&nr=1&nm=on',
                     params={'q': pattern})
     jres = res.json()
     htmlparser = HTMLParser()
     for cat in ['name_popular', 'name_exact', 'name_approx']:
         if cat in jres:
             for p in jres[cat]:
                 person = Person(p['id'],
                                 htmlparser.unescape(unicode(p['name'])))
                 person.real_name = NotLoaded
                 person.birth_place = NotLoaded
                 person.birth_date = NotLoaded
                 person.death_date = NotLoaded
                 person.gender = NotLoaded
                 person.nationality = NotLoaded
                 person.short_biography = NotLoaded
                 person.short_description = htmlparser.unescape(
                     p['description'])
                 person.roles = NotLoaded
                 person.thumbnail_url = NotLoaded
                 yield person
Ejemplo n.º 23
0
    def get_person(self, id):
        name = NotAvailable
        short_biography = NotAvailable
        short_description = NotAvailable
        birth_place = NotAvailable
        birth_date = NotAvailable
        death_date = NotAvailable
        real_name = NotAvailable
        gender = NotAvailable
        thumbnail_url = NotAvailable
        roles = {}
        nationality = NotAvailable
        td_overview = self.doc.xpath('//td[@id="overview-top"]')[0]
        descs = td_overview.xpath('.//span[@itemprop="description"]')
        if len(descs) > 0:
            short_biography = descs[0].text
        rname_block = td_overview.xpath(
            './/div[has-class("txt-block")]//h4[has-class("inline")]')
        if len(rname_block) > 0 and "born" in rname_block[0].text.lower():
            links = rname_block[0].xpath('..//a')
            for a in links:
                href = a.attrib.get('href', '').strip()
                if href == 'bio':
                    real_name = a.text.strip()
                elif 'birth_place' in href:
                    birth_place = a.text.lower().strip()
        names = td_overview.xpath('.//h1//span[@itemprop="name"]')
        if len(names) > 0:
            name = names[0].text.strip()
        times = td_overview.xpath('.//time[@itemprop="birthDate"]')
        if len(times) > 0:
            time = times[0].attrib.get('datetime', '').split('-')
            if len(time) == 3 and int(time[0]) >= 1900:
                birth_date = datetime(int(time[0]), int(time[1]), int(time[2]))
        dtimes = td_overview.xpath('.//time[@itemprop="deathDate"]')
        if len(dtimes) > 0:
            dtime = dtimes[0].attrib.get('datetime', '').split('-')
            if len(dtime) == 3 and int(dtime[0]) >= 1900:
                death_date = datetime(int(dtime[0]), int(dtime[1]),
                                      int(dtime[2]))
        img_thumbnail = self.doc.xpath('//td[@id="img_primary img"]')
        if len(img_thumbnail) > 0:
            thumbnail_url = img_thumbnail[0].attrib.get('src', '')

        roles = self.get_roles()

        person = Person(id, name)
        person.real_name = real_name
        person.birth_date = birth_date
        person.death_date = death_date
        person.birth_place = birth_place
        person.gender = gender
        person.nationality = nationality
        person.short_biography = short_biography
        person.short_description = short_description
        person.roles = roles
        person.thumbnail_url = thumbnail_url
        return person
Ejemplo n.º 24
0
    def iter_persons(self, role_filter=None):
        if (role_filter is None
                or (role_filter is not None and role_filter == 'actor')):
            tables = self.parser.select(self.document.getroot(),
                                        'table.cast_list')
            if len(tables) > 0:
                table = tables[0]
                tds = self.parser.select(table, 'td.itemprop')

                for td in tds:
                    id = td.find('a').attrib.get('href',
                                                 '').strip('/').split('/')[1]
                    name = unicode(td.find('a').text)
                    char_name = unicode(
                        self.parser.select(td.getparent(), 'td.character',
                                           1).text_content())
                    person = Person(id, name)
                    person.short_description = char_name
                    person.real_name = NotLoaded
                    person.birth_place = NotLoaded
                    person.birth_date = NotLoaded
                    person.death_date = NotLoaded
                    person.gender = NotLoaded
                    person.nationality = NotLoaded
                    person.short_biography = NotLoaded
                    person.roles = NotLoaded
                    person.thumbnail_url = NotLoaded
                    yield person

        for gloss_link in self.parser.select(self.document.getroot(),
                                             'table[cellspacing="1"] h5 a'):
            role = gloss_link.attrib.get('name', '').rstrip('s')
            if (role_filter is None
                    or (role_filter is not None and role == role_filter)):
                tbody = gloss_link.getparent().getparent().getparent(
                ).getparent()
                for line in self.parser.select(tbody, 'tr')[1:]:
                    for a in self.parser.select(line, 'a'):
                        role_detail = NotAvailable
                        href = a.attrib.get('href', '')
                        if '/name/nm' in href:
                            id = href.strip('/').split('/')[-1]
                            name = unicode(a.text)
                        if 'glossary' in href:
                            role_detail = unicode(a.text)
                        person = Person(id, name)
                        person.short_description = role_detail
                        yield person
Ejemplo n.º 25
0
    def get_person(self, id):
        res = self.readurl(
                'http://api.allocine.fr/rest/v3/person?partner=YW5kcm9pZC12M3M&profile=large&code=%s&mediafmt=mp4-lc&filter=movie&format=json&striptags=biography' % id)
        if res is not None:
            jres = json.loads(res)
            if 'person' in jres:
                jres = jres['person']
            else:
                return None
        else:
            return None
        name = NotAvailable
        short_biography = NotAvailable
        biography = NotAvailable
        short_description = NotAvailable
        birth_place = NotAvailable
        birth_date = NotAvailable
        death_date = NotAvailable
        real_name = NotAvailable
        gender = NotAvailable
        thumbnail_url = NotAvailable
        roles = {}
        nationality = NotAvailable

        if 'name' in jres:
            name = u''
            if 'given' in jres['name']:
                name += jres['name']['given']
            if 'family' in jres['name']:
                name += ' %s' % jres['name']['family']
        if 'biographyShort' in jres:
            short_biography = unicode(jres['biographyShort'])
        if 'birthPlace' in jres:
            birth_place = unicode(jres['birthPlace'])
        if 'birthDate' in jres:
            df = jres['birthDate'].split('-')
            birth_date = datetime(int(df[0]), int(df[1]), int(df[2]))
        if 'deathDate' in jres:
            df = jres['deathDate'].split('-')
            death_date = datetime(int(df[0]), int(df[1]), int(df[2]))
        if 'realName' in jres:
            real_name = unicode(jres['realName'])
        if 'gender' in jres:
            gcode = jres['gender']
            if gcode == '1':
                gender = u'Male'
            else:
                gender = u'Female'
        if 'picture' in jres:
            thumbnail_url = unicode(jres['picture']['href'])
        if 'nationality' in jres:
            nationality = u''
            for n in jres['nationality']:
                nationality += '%s, ' % n['$']
            nationality = nationality.strip(', ')
        if 'biography' in jres:
            biography = unicode(jres['biography'])
        if 'participation' in jres:
            for m in jres['participation']:
                if m['activity']['$'] not in roles:
                    roles[m['activity']['$']] = []
                pyear = '????'
                if 'productionYear' in m['movie']:
                    pyear = m['movie']['productionYear']
                roles[m['activity']['$']].append(u'(%s) %s' % (pyear, m['movie']['originalTitle']))


        person = Person(id, name)
        person.real_name = real_name
        person.birth_date = birth_date
        person.death_date = death_date
        person.birth_place = birth_place
        person.gender = gender
        person.nationality = nationality
        person.short_biography = short_biography
        person.biography = biography
        person.short_description = short_description
        person.roles = roles
        person.thumbnail_url = thumbnail_url
        return person