def load_movies(session): # use u.item, | # movie id | movie title | release date | video release date | # IMDb URL | unknown | Action | Adventure | Animation | # Children's | Comedy | Crime | Documentary | Drama | Fantasy | # Film-Noir | Horror | Musical | Mystery | Romance | Sci-Fi | # Thriller | War | Western | f = open("seed_data/u.item") for line in f: movie_data = line.strip().split("|") movie_id, title, released, imdb = movie_data[0], movie_data[1], movie_data[2], movie_data[4] # creates a string that stores data as unicode, so we can use accent marks in titles title = title.split("(")[0].strip() title = title.decode("latin-1") # convert date from string to datetime object released = released.replace("-", "") # Fixes error with empty string if released == "": new_movie = model.Movie(id=movie_id, name=title, imdb_url=imdb) session.add(new_movie) continue datetime_released = datetime.strptime(released, "%d%b%Y") new_movie = model.Movie(id=movie_id, name=title, released_at=datetime_released, imdb_url=imdb) session.add(new_movie) session.commit()
def test_get_movie_uid_fails(): bad_movie = model.Movie("/title/first_part/second_part", "Movie name") with pytest.raises(AssertionError): fetcher.get_movie_uid(bad_movie) other_bad_movie = model.Movie("/not_title/some_id/", "Movie name") with pytest.raises(AssertionError): fetcher.get_movie_uid(other_bad_movie)
def add_movie_to_watchlist(username, user_id, movie_name, movie_imdb_id, movie_rating): """Check if user exists; if exists, add movie to specified user's watchlist. Also add movie to learning agent database""" user = model.User.query.filter_by(email=username).first() if not user: return "Fail: Cannot find user!" new_movie = model.Movie(name=movie_name, movie_imdb_id=movie_imdb_id, user_rating=movie_rating) for movie in user.movies: if movie.name == movie_name: return "Movie already present in watchlist!" user.movies.append(new_movie) model.session.commit() # Learning Agent watchlist. client = LearningAgentClient() client.add_movie_to_user_history({ 'user_id': user_id, 'movie_imdb_id': movie_imdb_id, 'user_rating': movie_rating, 'timestamp': int(time.time()) }) return "Success"
def run(n_movies, json_file): """The main method for the movie recommender""" json = get_data(json_file) users = json['users'] movies = [model.Movie(int(k), v) for k, v in json['movies'].items()] usermoviematrix = model.UsersMoviesMatrix(users, movies) moviemoviematrix = model.MoviesMatrix(movies) # Register relation between movies for movie in movies: register_relations(movie, usermoviematrix, moviemoviematrix) # Compute cosine similarity between movies for moviea in movies: for movieb_id in range(moviea.id, len(movies)): sim = moviemoviematrix.compute_similarity(moviea.id - 1, movieb_id) moviemoviematrix.insert_value(movieb_id, moviea.id - 1, sim) # Find which movies should be recommended predictions = [] for user in users: predictions.append( find_prediction_by(user['user_id'], usermoviematrix, moviemoviematrix)) for pred in predictions: recommendation = pred.get_n_greatest(n_movies) print '# The recommended movie for the user: %s' % pred.user.id for key, value in recommendation.iteritems(): print filter(lambda x: x.id == key, movies)[0].name
def load_movies(session): """ Import file u.item into database. """ """ Decode movie title from Latin-1 format to database friendly unicode. """ """ Make date string into a datetime format of day, month, year. """ with open("seed/u.item") as f: reader = csv.reader(f, delimiter="|") for row in reader: # Set id = to row 0. id = int(row[0]) # Set movie title = to row 1 and decode from UTF-8 to Latin-1. title = row[1].decode("latin-1") # Format date in %d-%b-%Y or day-month-year or 00-00-0000. released_at = row[2] if released_at: # Applies to places where a date exists. Sometimes dates don't exist. formatted_date = datetime.datetime.strptime( released_at, "%d-%b-%Y") else: None # No format for empty date fields. # Set URL = to row 4. url = row[4] movie = model.Movie(id=id, title=title, released_at=formatted_date, url=url) session.add(movie)
def load_movies(session): # use u.item with open("seed_data/u.item") as fin: movie_reader = csv.reader(fin, delimiter="|") for row in movie_reader: movie = model.Movie() # get release date as string release_date = row[2] if len(release_date) < 3: # print "We have a problem. Date not long enough." continue # convert the string to a datetime object release_datetime = datetime.strptime(release_date, "%d-%b-%Y") # YMD_release_datetime = print release_datetime movie.id = row[0] movie.title = row[1] movie.title = movie.title.decode("latin-1") movie.release_date = release_datetime movie.url = row[4] session.add(movie) # print session session.commit()
def load_movies(session): # use u.item #reads in file and parses data movies_table = open("seed_data/u.item", "r") for line in movies_table: aline = line.split("|") movie_id = aline[0] old_movie_title = aline[1] release_date = aline[2] imdb_url = aline[4] #removes date from movie title column movie_title = old_movie_title.split("(") new_movie_title = movie_title[0] new_movie_title = new_movie_title.strip() #converts to unicode new_movie_title = new_movie_title.decode("latin-1") #parses date to pass to datetime function release_date = release_date.split("-") day, month, year = release_date months = { "Jan": 1, "Feb": 2, "Mar": 3, "Apr": 4, "May": 5, "Jun": 6, "Jul": 7, "Aug": 8, "Sep": 9, "Oct": 10, "Nov": 11, "Dec": 12 } date = datetime.date(int(year), months[month], int(day)) #creates instance of movie movie = model.Movie() movie.id = movie_id movie.name = new_movie_title movie.released_at = date movie.imdb_url = imdb_url #adds movie to session session.add(movie) #commits session changes session.commit()
def load_movies(session): with open('seed_data/u.item', 'rb') as f: reader = csv.reader(f, delimiter='|') for row in reader: if row[2]: d = datetime.strptime(row[2], "%d-%b-%Y") d = d.date() movie = model.Movie(id=row[0], title=row[1].decode("latin-1"), release_date=d, imdb_url=row[4]) session.add(movie) session.commit()
def load_movies(session): # use movies.dat #1::Toy Story (1995)::Adventure|Animation|Children|Comedy|Fantasy with open("../data/ml-10M100K/movies.dat") as itemfile: for line in itemfile: line = line.split('::') title = line[1] title = title.decode("latin-1") movie_id = line[0] genre = line[2] genre = genre.decode("latin-1") item = model.Movie(movie_id_to_index[movie_id], title, genre) session.add(item) session.commit()
def load_movies(session): # use u.item with open("./seed_data/u.item", "rb") as f: reader = csv.reader(f, delimiter="|", quoting=csv.QUOTE_NONE) for row in reader: name = row[1].strip("()1234567890") name = name.decode("latin-1") if row[2]: date = datetime.strptime(row[2], "%d-%b-%Y") entry = model.Movie(id=row[0], name=name, released_date=date, imdb_url=row[4]) # print entry.name, entry.released_date, entry.imdb_url session.add(entry) session.commit()
def load_movies(session, filename): # use u.item with open(filename, 'rb') as csvfile: lines = csv.reader(csvfile, delimiter = '|') for line in lines: movie = model.Movie() movie.movie_id = line[0].strip() movie.title = line[1].strip() movie.title = movie.title[:-6] movie.title = movie.title.decode("latin-1").strip() movie.release_date = datetime.strptime(line[2].strip(),"%d-%b-%Y") movie.imdb = line[4].strip() session.add(movie) session.commit()
def load_movies(session): # use u.item with open("seed_data/u.item") as m: # create reader of u.item reader = csv.reader(m, delimiter="|") for line in reader: # if len(str_time)< 11: # str_time = "0" + str_time str_time = line[2] if str_time != "0": movie_title = line[1].decode("latin-1") movie_title = movie_title[:-6].strip() release_datetime = datetime.strptime(str_time, "%d-%b-%Y") new_movie = model.Movie(id=line[0], movie_title=movie_title, release_date=release_datetime, IMDB=line[4]) # add new movie to session session.add(new_movie) # commit all movies from session session.commit()
def test_get_movie_uid(): movie = model.Movie("/title/title_id/", "Movie name") assert fetcher.get_movie_uid(movie) == "title_id"
def test_get_full_credits(): movie = model.Movie("/title/title_id/", "Movie name") url = fetcher.get_fullcredits_page(movie) assert url == "https://www.imdb.com/title/title_id/fullcredits/"
#!/usr/bin/python import model from model import db from io import open from csv import DictReader db.drop_all() db.create_all() with open('data/movies.csv', 'r', encoding='utf-8-sig') as movies_file: reader = DictReader(movies_file) for row in reader: new_movie = model.Movie(name=row['name'], year=row['year']) actors = row['actors'].split(';') for actor in actors: print(actor) existing_actor = model.Actor.query.filter_by(name=actor).first() if (existing_actor): existing_actor.movies.append(new_movie) new_movie.actors.append(existing_actor) else: new_actor = model.Actor(name=actor) new_actor.movies.append(new_movie) new_movie.actors.append(new_actor) db.session.add(new_actor) db.session.add(new_movie) with open('data/songs.csv', 'r', encoding='utf-8-sig') as songs_file: