# https://github.com/richardasaurus/imdb-pie

from imdbpie import Imdb

imdb = Imdb()
imdb = Imdb(anonymize=True)

print(imdb.search_for_title("The Dark Knight"))
print()
print(imdb.search_for_person("Christian Bale"))
print()
print(imdb.get_episodes('tt0096697'))

top250 = imdb.top_250()

for i in range(0, len(top250)):
    print(top250[i])
    print()

title = imdb.get_title_by_id("tt1210166")
for person in title.credits:
    # check if they are a writer
    if person.token == 'writers':
        print(person.name + ' is a writer')
    else:
        print(person.name + ' is not a writer')
def imdb_content(twitter_id, twitter_name):
    imdb = Imdb()
    # imdb = Imdb(anonymize=True)  # to proxy requests
    id = imdb.search_for_person(twitter_name)[0]['imdb_id']

    hxs = lxml.html.document_fromstring(
        requests.get("http://www.imdb.com/name/" + str(id)).content)

    name = ''
    try:
        name = hxs.xpath('//*[@id="overview-top"]/h1/span/text()')[0].strip()
    except:
        #try:
        #    name = hxs.xpath('//*[@id="overview-top"]/div[1]/div/h1/span/text()')[0].strip()
        #except:
        #    pass
        pass

    if name.lower() == twitter_name.lower():
        try:
            occupation = [
                o.strip() for o in hxs.xpath(
                    '//*[@id="name-job-categories"]/a[*]/span/text()')
            ]
        except:
            occupation = ''

        try:
            birthday = hxs.xpath(
                '//*[@id="name-born-info"]/time')[0].attrib['datetime']
        except:
            birthday = ''

        try:
            death = hxs.xpath(
                '//*[@id="name-death-info"]/time')[0].attrib['datetime']
        except:
            death = ""

        hxs_bio = lxml.html.document_fromstring(
            requests.get("http://www.imdb.com/name/" + id + "/bio").content)

        try:
            content = ''.join(
                hxs_bio.xpath('//*[@id="bio_content"]/div[2]/p[1]/text()'))
        except:
            content = ''

        try:
            spouse = hxs_bio.xpath(
                '//*[@id="tableSpouses"]//tr/td[1]/a/text()')
        except:
            spouse = ''

        try:
            children_content = hxs_bio.xpath(
                '//*[@id="tableSpouses"]//tr/td[2]/text()')
            children = 0
            for line in children_content:
                l = line.encode('UTF-8').replace('(', '( ').strip()
                pattern = '(.*) (.*) child'
                compiled = re.compile(pattern)
                m = compiled.search(l)
                if m is not None:
                    children += int(m.group(2))
        except:
            children = ''

        basic_info = {
            'id': twitter_id,
            'name': name,
            'occupation': occupation,
            'birthday': birthday,
            'death': death,
            'spouse': spouse,
            'children': str(children)
        }

        content_info = {'id': twitter_id, 'content': content.strip()}

        write_imdb_data(json.dumps(basic_info), json.dumps(content_info))
Esempio n. 3
0
actor_names = []


names = pd.read_excel('Actor_names.xlsx')
n = names.iterrows()
actor_names = []

actor_ids = []

for i in n:
    actor_names.append(i[1][0])

print len(actor_names)
for name in actor_names:
	try:
		actor_name = imdb.search_for_person(str(name))
	except:
		continue
	print 'Converting ' + str(name) + ' to IMDB id:  ' + str(actor_name[0]['imdb_id'].replace('nm',''))
	actor_ids.append(actor_name[0]['imdb_id'].replace('nm',''))
	print 'Total actor count: ' + str(len(actor_ids))

no_pic = []

ids = pd.DataFrame(actor_ids)

ac_ids = {}
ac_ages = {}
ac_names = []
ac_pred_age = []
Esempio n. 4
0
actor_ids = []
actor_names = []

names = pd.read_excel('Actor_names.xlsx')
n = names.iterrows()
actor_names = []

actor_ids = []

for i in n:
    actor_names.append(i[1][0])

print len(actor_names)
for name in actor_names:
    try:
        actor_name = imdb.search_for_person(str(name))
    except:
        continue
    print 'Converting ' + str(name) + ' to IMDB id:  ' + str(
        actor_name[0]['imdb_id'].replace('nm', ''))
    actor_ids.append(actor_name[0]['imdb_id'].replace('nm', ''))
    print 'Total actor count: ' + str(len(actor_ids))

no_pic = []

ids = pd.DataFrame(actor_ids)

ac_ids = {}
ac_ages = {}
ac_names = []
ac_pred_age = []