Beispiel #1
0
def imdb_import(number):
    """
    Helper method to import large quantities of movies from IMDB
    as sample data.
    """

    reset_database()

    imdb = Imdb(cache=True)
    top = imdb.top_250()

    movies = []
    count = 0
    for x in top:
        if count >= int(number):
            break

        m = Movie()

        im = imdb.get_title_by_id(x['tconst'])

        m.name = im.title
        m.year = im.year
        m.imdb_id = im.imdb_id
        m.save()
        movies.append(m)

        # adding director and actors
        for person in im.credits:
            if person.token == "directors":
                m.director = Person.objects.create_or_find_imdb(person)

            elif person.token == "cast":
                m.actors.add(Person.objects.create_or_find_imdb(person))

        m.save()
        for i in range(random.randrange(3)):
            mc = MovieCopy()
            mc.movie = m
            mc.save()

        count = count+1

    # imdb.get_title_images("tt0468569")
    # imdb.get_person_images("nm0000033")

    return {
        'number_imported': number,
        'kind': 'movies',
        'movies': movies,
    }
Beispiel #2
0
 def save_top_posters(cls):
     imdb = Imdb()
     top_250 = imdb.top_250()
     # delete all the posters first
     TopPoster.objects.all().delete()
     for movie in top_250:
         title = movie['title']
         url = movie['image']['url']
         num_votes = movie['num_votes']
         TopPoster.objects.create(
             title=title,
             url=url,
             num_votes=num_votes
         )
         print('Saved poster for ', title)
Beispiel #3
0
class IMDB(Miner):

    def __init__(self):

        self.handle = Imdb()
        super(IMDB, self).__init__()

    def top_list(self, number):
        pop_movies = self.handle.top_250()
        return pop_movies

    def get_movie_id(self, index):
        return "tt" + index  # formatting to IMDB_ID

    def get_movie_by_id(self, movie_id):
        return self.handle.get_title_images(movie_id), self.handle.get_title(movie_id)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-k", "--clusters", required=True, type=int, help="Number of cluters")
    args = vars(parser.parse_args())

    k = args["clusters"]
    make_output_dirs(k)

    if os.listdir("posters") == []:
        imdb = Imdb(anonymize=True)
        top = imdb.top_250()
        write_posters(top)

    qp_dir = "quantized_posters_" + str(k) + "/"
    cb_dir = "color_bars_" + str(k) + "/"
    if (os.listdir(qp_dir) == []) and (os.listdir(cb_dir) == []):
        posters = os.listdir("posters")
        for poster in posters:
            process_poster("posters/" + poster, k)
Beispiel #5
0
import redis
from imdb import IMDb

r = redis.StrictRedis(host='localhost', port=6379, db=0)
pipe = r.pipeline()

from imdbpie import Imdb
imdb1 = Imdb()
imdb1 = Imdb(anonymize=True) # to proxy requests

# Creating an instance with caching enabled
# Note that the cached responses expire every 2 hours or so.
# The API response itself dictates the expiry time)
imdb1 = Imdb(cache=True)

top250 = imdb1.top_250()
dict_top250 = {}
for i in range(len(top250)):
    dict_top250[((top250[i]['tconst'].encode('utf-8'))[2:len(top250[i]['tconst'].encode('utf-8'))])] = (top250[i]['title'].encode('utf-8'))


for movId, title in dict_top250.iteritems():
    cast_list = []
    cast_dict = {}
#     print movId
    imdb2 = IMDb()
    my = imdb2.get_movie(movId)
#     pipe.execute()
    for castMember in my['cast'][0:10]:
#         cast_dict[castMember['name'].encode('utf-8')] = castMember.getID();
            cast_list.append(castMember.getID());
Beispiel #6
0
class Quiz:
    movies_type = ''
    imdb = ''
    movie = None

    def __init__(self, session):
        self.session = session
        self.imdb = Imdb()
        self.imdb = Imdb(cache=True)

    def set_level(self, level):
        pass

    def rand_movie(self, rand_type=None):
            movie_id = ''
            while self.movie is None:
                if rand_type == "pop":
                    pop_movies = self.imdb.top_250()
                    number = randrange(0, len(pop_movies) - 1)
                    movie_id = pop_movies[number]['tconst']

                if rand_type is None:
                    number = str(randrange(1, 99999))
                    if len(number) < 7:
                        number = '0' * (7 - len(number)) + number
                    movie_id = "tt"+number  # formatting to IMDB_ID

                self.movie = self.imdb.get_title_by_id(movie_id)

                if self.movie is not None:
                    if len(self.movie.trailer_image_urls) < 1:
                        self.movie = None

    def get_movie_photo(self):
        try:
            return choice(self.movie.trailer_image_urls)
        except ValueError as e:
            raise e

    def get_question(self, rand_type=None):
        try:
            self.rand_movie(rand_type)
            return self.get_movie_photo()
        except ValueError as e:
            raise(_("not_possible_find_movie"))

    def show(self, update, rand_type):
        chat_id = update.message.chat_id
        movie_img = self.get_question(rand_type)
        self.session.messenger.send_msg(chat_id, "CINEMONSTER", "title")
        self.session.messenger.send_photo(chat_id, movie_img, caption=_("question_which_movie"))
        self.session.update_counter()
        self.session.status = "running"

    def check_resps(self, update):
        chat_id = update.message.chat_id
        if str.lower(self.movie.title) == str.lower(update.message.text):
            player = Player(update.message.from_user.id)
            player.name = update.message.from_user.first_name+" "+update.message.from_user.last_name
            try:
                self.session.player_add(player)
            except ValueError as e:
                pass
            self.session.players[update.message.from_user.id].add_points(1)
            self.session.messenger.send_msg(chat_id,
                                            msg=(player.name, _("correct_answer")),
                                            type_msg='bold')
            self.movie = None

    def check_expiration(self):
        try:
            self.session.update_timer()
        except ValueError as e:
            pass
        if self.session.status == "timed_out":
            self.session.messenger.send_msg(chat_id=self.session.chat_id,
                                            msg=(_("times_up"), self.movie.title),
                                            type_msg='bold')
            self.session.status = "stop"
            self.movie = None
from alchemyapi import AlchemyAPI
alchemyapi=AlchemyAPI()
from imdbpie import Imdb
imdb = Imdb()
imdb = Imdb(anonymize=True) # to proxy requests

# Creating an instance with caching enabled
# Note that the cached responses expire every 2 hours or so.
# The API response itself dictates the expiry time)
imdb = Imdb(cache=True)
top_mov = imdb.top_250()


rating = []
title = []
id = []
votes = []
prod_year = []
for i in range(len(top_mov)):
    rating.append(top_mov[i]['rating'])
    title.append(top_mov[i]['title'])
    id.append(top_mov[i]['tconst'])
    votes.append(top_mov[i]['num_votes'])
    prod_year.append(top_mov[i]['year'])

#print rating

reviews={}
reviewScore={}
num = 15
for item in id[201:250]:
Beispiel #8
0
import psycopg2
from imdbpie import Imdb
import random
imdb = Imdb()
imdb = Imdb(anonymize=True)
variable = imdb.search_for_title("The Dark Knight")[0]
# conn = psycopg2.connect()
# cur = conn.cursor()
title = imdb.get_title_by_id("tt0468569")
print (title.title)
print (title.rating)
print (title.runtime)
x = 0
listOfPopularMovies = imdb.top_250()
while x<15:
    temp = random.randint(1, 249)
    t = listOfPopularMovies[temp]
    tid = t["tconst"]
    print (tid)
    print (t["title"] + " is the " + str(temp) +"th rated movie")
    print ("It's score is: " + str(t["rating"]))

    x = x + 1
Beispiel #9
0
import redis
from imdb import IMDb

r = redis.StrictRedis(host='localhost', port=6379, db=0)
pipe = r.pipeline()

from imdbpie import Imdb
imdb1 = Imdb()
imdb1 = Imdb(anonymize=True)  # to proxy requests

# Creating an instance with caching enabled
# Note that the cached responses expire every 2 hours or so.
# The API response itself dictates the expiry time)
imdb1 = Imdb(cache=True)

top250 = imdb1.top_250()
dict_top250 = {}
for i in range(len(top250)):
    dict_top250[((top250[i]['tconst'].encode('utf-8')
                  )[2:len(top250[i]['tconst'].encode('utf-8'))])] = (
                      top250[i]['title'].encode('utf-8'))

for movId, title in dict_top250.iteritems():
    cast_list = []
    cast_dict = {}
    #     print movId
    imdb2 = IMDb()
    my = imdb2.get_movie(movId)
    #     pipe.execute()
    for castMember in my['cast'][0:10]:
        #         cast_dict[castMember['name'].encode('utf-8')] = castMember.getID();
        return 'None'
    if s.find('\'') != -1:
        ss = s.split("\'")
        new = ''
        for x in ss:
            new = new + "\'" + "\'" + x
        return new[2:]
    else:
        return s


imdb = Imdb()
imdb = Imdb(anonymize=True)  # to proxy requests

top250 = []
top250 = imdb.top_250()
for item in top250:
    try:
        title = imdb.get_title_by_id(item['tconst'])
        if len(title.trailers) > 0:
            trailer_url = title.trailers[0]['url']
        else:
            trailer_url = 'None'
        new_movie = (
            '''INSERT INTO movie_movie VALUES (\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\')'''
            .format(
                item['tconst'],
                single_quote(str(item['title'])),
                item['year'],
                title.release_date,
                item['rating'],
Beispiel #11
0
from imdbpie import Imdb
import pandas as pd
from halo import Halo

imdb = Imdb(anonymize=True)

movies = imdb.top_250()

cols = [
    "Title", "Actors", "Director", "Genres", "Rating", "Running Time", "Year",
    "Certification", "Writers"
]
df = pd.DataFrame(columns=cols)

spinner = Halo(text='Loading', spinner='dots')

spinner.start()

for j, el in enumerate(movies):
    movie = imdb.get_title_by_id(el["tconst"])
    title = movie.title
    actors = ', '.join(i.name for i in movie.cast_summary)
    director = movie.directors_summary[0].name
    genres = ', '.join(i for i in movie.genres)
    rating = movie.rating
    rtime = movie.runtime
    year = movie.year
    cert = movie.certification
    writers = ', '.join(i.name for i in movie.writers_summary)
    spinner.text = "Running - " + str((j + 1) / 2.5) + "%"
    df.loc[j] = [
Beispiel #12
0
class ImdbNewly:
	_imdb = None
	_newTop = None
	_oldTop = None
	_newlyAdded = None
	_storedTopFile = "top.json"

	def __init__(self):
		self._imdb = Imdb()
		self._oldTop = self._get_stored_data()
		self._oldTopList = self._generate_oldTop_id_list()
		self._newTop = self._fetch_data()
		self._newTopList = self._generate_newTop_id_list()
		self._newlyAdded = self._find_newly_added()

	def _fetch_data(self):
		today = datetime.datetime.now()

		ret = {
			"top" : self._imdb.top_250(),
			"info" : {
				"date" : today.ctime()
			}
		}

		return ret

	def save_top_data(self):
		top = self._fetch_data()

		f = open(self._storedTopFile, 'w')
		f.write(json.dumps(top))
		f.close()

	def _generate_oldTop_id_list(self):
		return [item["tconst"] for item in self._oldTop["top"]]

	def _generate_newTop_id_list(self):
		return [item["tconst"] for item in self._newTop["top"]]

	def _get_stored_data(self):
		if not os.path.isfile(self._storedTopFile):
			self.save_top_data()

		f = open(self._storedTopFile, 'r')
		jsonTop = f.read()
		f.close()

		return json.loads(jsonTop)

	def _search_newTop_data(self, id):
		return next((item for item in self._newTop["top"] if item["tconst"] == id), None)

	def _find_newly_added(self):
		return set(self._newTopList) - set(self._oldTopList)

	def get_newly_added(self):
		return [self._search_newTop_data(itemId) for itemId in self._newlyAdded]

	def get_newTop_date(self):
		return self._newTop["info"]["date"]

	def get_oldTop_date(self):
		return self._oldTop["info"]["date"]
Beispiel #13
0
import psycopg2
from sqlalchemy import create_engine
import requests
from imdbpie import Imdb
import nltk

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
#########################################
# part 1
# importing top 250 movies from imdb database using api thang into a dataframe
imdb = Imdb()
imdb = Imdb(anonymize=True)
top_250 = pd.DataFrame(imdb.top_250())

# sorting values by rating and selecting only the top 100 movies
top_250 = top_250.sort_values(by='rating', ascending=False)
top_100 = top_250[0:100]

# limiting columns according to starter code
mask = ['num_votes', 'rating', 'tconst', 'title', 'year']
top_100 = top_100[mask]

# getting genre/runtime from OMDB
top_100
movie_list = top_100['tconst']

def get_genre_runtime(b):
    genres = []
Beispiel #14
0
import urllib
from bs4 import BeautifulSoup
import nltk

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline


#1. Connect to the imdbpie API
imdb = Imdb()
imdb = Imdb(anonymize = True)

#2. Query the top 250 rated movies in the database
imdb.top_250()

#3. Put the information into a dataframe, then keep only relevant columns
data = pd.DataFrame(imdb.top_250())
data.head()

data.drop('can_rate', axis=1, inplace=True)
data.drop('image', axis=1, inplace=True)
data.drop('type', axis=1, inplace=True)

#4. Select only the top 100 movies
data = data.iloc[0:100]

#change the column name tconst to movie_id
data.rename(columns={'tconst': 'movie_id'}, inplace=True)
data.head()
Beispiel #15
0
from alchemyapi import AlchemyAPI
alchemyapi = AlchemyAPI()
from imdbpie import Imdb
imdb = Imdb()
imdb = Imdb(anonymize=True)  # to proxy requests

# Creating an instance with caching enabled
# Note that the cached responses expire every 2 hours or so.
# The API response itself dictates the expiry time)
imdb = Imdb(cache=True)
top_mov = imdb.top_250()

rating = []
title = []
id = []
votes = []
prod_year = []
for i in range(len(top_mov)):
    rating.append(top_mov[i]['rating'])
    title.append(top_mov[i]['title'])
    id.append(top_mov[i]['tconst'])
    votes.append(top_mov[i]['num_votes'])
    prod_year.append(top_mov[i]['year'])

#print rating

reviews = {}
reviewScore = {}
num = 15
for item in id[201:250]:
    reviews[item] = []
Beispiel #16
0
        return 'None'
    if s.find('\'') != -1:
        ss = s.split("\'")
        new = ''
        for x in ss:
            new = new + "\'" + "\'" + x
        return new[2:]
    else:
        return s


imdb = Imdb()
imdb = Imdb(anonymize=True)  # to proxy requests

top250 = []
top250 = imdb.top_250()
for item in top250:
    try:
        title = imdb.get_title_by_id(item['tconst'])
        if len(title.trailers) > 0:
            trailer_url = title.trailers[0]['url']
        else:
            trailer_url = 'None'
        new_movie = (
            '''INSERT INTO movie_movie VALUES (\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\')'''.format(
                item['tconst'],
                single_quote(str(item['title'])),
                item['year'],
                title.release_date,
                item['rating'],
                single_quote(item['image']['url']),
#     - Name - first and last name
#     - Known for - url to a movie that they are best known for
#     - Birth date (month/year/day)
#     
# ##### Additional movie data to be retrieved with the API
# Using the "tconst" field returned in the Topp 250 list to retrieve data on individual movies. Data fields that will be retrieved for each movie include:
#   * 
# 
# Each dataset will be initially loaded into Pandas dataframes and then saved as Postgres tables for analysis

# ##### Load the Top 250 Movies of all time into dataframe 'top_250' and drop unwanted columns

# In[ ]:

imdb = Imdb()
imdb_top = imdb.top_250()
#imdb.search_for_title("The Dark Knight")
imdb_top
top_250 = pd.DataFrame(imdb_top, columns=['can_rate', 'image', 'num_votes', 'rating', 'tconst', 'title', 'type', 'year'])
top_250.drop(['can_rate', 'image', 'title', 'type'],inplace=True,axis=1)


# ##### Import the Top 100 Actors and drop unwanted columns

# In[ ]:

top_actors = pd.read_csv("top_100_actors.csv")
top_actors.drop(['created', 'modified'],inplace=True,axis=1)


# ##### Pull selected movie information and add columns to top_250 dataframe
Beispiel #18
0
from imdbpie import Imdb
imdb = Imdb()
imdb = Imdb(anonymize=True)
var1 = imdb.top_250()
print(var1)
Beispiel #19
0
    etree.SubElement(movie_xml, "genre").text = str(movie.genres[0])
    etree.SubElement(movie_xml, "title").text = movie.title
    etree.SubElement(movie_xml, "year").text = str(movie.year)
    etree.SubElement(movie_xml, "description").text = movie.plot_outline
    etree.SubElement(movie_xml, "price").text = str(random.randint(1, 8))

    return movie_xml


if len(sys.argv) < 2:
    print 'usage: imdb_downloader.py [movie name | top50 | top250]'
    sys.exit(1)

if sys.argv[1] == "top50":
    print "Retrieving Top 50 movies"
    top50 = imdb.top_250()[0:50]

    for m in top50:
        movies_xml.getroot().append(get_movie(m['tconst']))

elif sys.argv[1] == "top250":
    print "Retrieving Top 250 movies"
    top50 = imdb.top_250()

    for m in top50:
        movies_xml.getroot().append(get_movie(m['tconst']))

else:
    movie = imdb.find_by_title(sys.argv[1])[0]

    if movie is None: