예제 #1
0
def load_movies(session):
    # use u.item, |
    # movie id | movie title | release date | video release date |
              # IMDb URL | unknown | Action | Adventure | Animation |
              # Children's | Comedy | Crime | Documentary | Drama | Fantasy |
              # Film-Noir | Horror | Musical | Mystery | Romance | Sci-Fi |
              # Thriller | War | Western |
    f = open("seed_data/u.item")
    for line in f:
        movie_data = line.strip().split("|")

        movie_id, title, released, imdb = movie_data[0], movie_data[1], movie_data[2], movie_data[4]
        
        # creates a string that stores data as unicode, so we can use accent marks in titles
        title = title.split("(")[0].strip()
        title = title.decode("latin-1")

        # convert date from string to datetime object
        released = released.replace("-", "")

        # Fixes error with empty string
        if released == "":
            new_movie = model.Movie(id=movie_id, name=title, imdb_url=imdb)
            session.add(new_movie)
            continue

        datetime_released = datetime.strptime(released, "%d%b%Y")
        
        new_movie = model.Movie(id=movie_id, name=title, released_at=datetime_released, imdb_url=imdb)

        session.add(new_movie)

    session.commit()
예제 #2
0
def test_get_movie_uid_fails():
    bad_movie = model.Movie("/title/first_part/second_part", "Movie name")
    with pytest.raises(AssertionError):
        fetcher.get_movie_uid(bad_movie)

    other_bad_movie = model.Movie("/not_title/some_id/", "Movie name")
    with pytest.raises(AssertionError):
        fetcher.get_movie_uid(other_bad_movie)
def add_movie_to_watchlist(username, user_id, movie_name, movie_imdb_id,
                           movie_rating):
    """Check if user exists; if exists, add movie to specified user's watchlist.
    Also add movie to learning agent database"""

    user = model.User.query.filter_by(email=username).first()
    if not user:
        return "Fail: Cannot find user!"
    new_movie = model.Movie(name=movie_name,
                            movie_imdb_id=movie_imdb_id,
                            user_rating=movie_rating)
    for movie in user.movies:
        if movie.name == movie_name:
            return "Movie already present in watchlist!"
    user.movies.append(new_movie)
    model.session.commit()
    # Learning Agent watchlist.
    client = LearningAgentClient()
    client.add_movie_to_user_history({
        'user_id': user_id,
        'movie_imdb_id': movie_imdb_id,
        'user_rating': movie_rating,
        'timestamp': int(time.time())
    })
    return "Success"
예제 #4
0
def run(n_movies, json_file):
    """The main method for the movie recommender"""
    json = get_data(json_file)
    users = json['users']
    movies = [model.Movie(int(k), v) for k, v in json['movies'].items()]
    usermoviematrix = model.UsersMoviesMatrix(users, movies)
    moviemoviematrix = model.MoviesMatrix(movies)

    # Register relation between movies
    for movie in movies:
        register_relations(movie, usermoviematrix, moviemoviematrix)

    # Compute cosine similarity between movies
    for moviea in movies:
        for movieb_id in range(moviea.id, len(movies)):
            sim = moviemoviematrix.compute_similarity(moviea.id - 1, movieb_id)
            moviemoviematrix.insert_value(movieb_id, moviea.id - 1, sim)

    # Find which movies should be recommended
    predictions = []
    for user in users:
        predictions.append(
            find_prediction_by(user['user_id'], usermoviematrix,
                               moviemoviematrix))
    for pred in predictions:
        recommendation = pred.get_n_greatest(n_movies)
        print '# The recommended movie for the user: %s' % pred.user.id
        for key, value in recommendation.iteritems():
            print filter(lambda x: x.id == key, movies)[0].name
예제 #5
0
def load_movies(session):
    """ Import file u.item into database. """
    """ Decode movie title from Latin-1 format to database friendly unicode. """
    """ Make date string into a datetime format of day, month, year. """

    with open("seed/u.item") as f:
        reader = csv.reader(f, delimiter="|")

        for row in reader:
            # Set id = to row 0.
            id = int(row[0])
            # Set movie title = to row 1 and decode from UTF-8 to Latin-1.
            title = row[1].decode("latin-1")

            # Format date in %d-%b-%Y or day-month-year or 00-00-0000.
            released_at = row[2]
            if released_at:  # Applies to places where a date exists. Sometimes dates don't exist.
                formatted_date = datetime.datetime.strptime(
                    released_at, "%d-%b-%Y")
            else:
                None  # No format for empty date fields.

            # Set URL = to row 4.
            url = row[4]

            movie = model.Movie(id=id,
                                title=title,
                                released_at=formatted_date,
                                url=url)
            session.add(movie)
예제 #6
0
def load_movies(session):
    # use u.item
    with open("seed_data/u.item") as fin:
        movie_reader = csv.reader(fin, delimiter="|")
        for row in movie_reader:
            movie = model.Movie()

            # get release date as string
            release_date = row[2]

            if len(release_date) < 3:
                # print "We have a problem. Date not long enough."
                continue
            # convert the string to a datetime object

            release_datetime = datetime.strptime(release_date, "%d-%b-%Y")

            # YMD_release_datetime =
            print release_datetime

            movie.id = row[0]
            movie.title = row[1]
            movie.title = movie.title.decode("latin-1")
            movie.release_date = release_datetime
            movie.url = row[4]
            session.add(movie)
    # print session
    session.commit()
예제 #7
0
def load_movies(session):
    # use u.item
    #reads in file and parses data
    movies_table = open("seed_data/u.item", "r")
    for line in movies_table:
        aline = line.split("|")
        movie_id = aline[0]
        old_movie_title = aline[1]
        release_date = aline[2]
        imdb_url = aline[4]

        #removes date from movie title column
        movie_title = old_movie_title.split("(")
        new_movie_title = movie_title[0]
        new_movie_title = new_movie_title.strip()
        #converts to unicode
        new_movie_title = new_movie_title.decode("latin-1")

        #parses date to pass to datetime function
        release_date = release_date.split("-")
        day, month, year = release_date
        months = {
            "Jan": 1,
            "Feb": 2,
            "Mar": 3,
            "Apr": 4,
            "May": 5,
            "Jun": 6,
            "Jul": 7,
            "Aug": 8,
            "Sep": 9,
            "Oct": 10,
            "Nov": 11,
            "Dec": 12
        }
        date = datetime.date(int(year), months[month], int(day))

        #creates instance of movie
        movie = model.Movie()
        movie.id = movie_id
        movie.name = new_movie_title
        movie.released_at = date
        movie.imdb_url = imdb_url

        #adds movie to session
        session.add(movie)

    #commits session changes
    session.commit()
예제 #8
0
def load_movies(session):

    with open('seed_data/u.item', 'rb') as f:
        reader = csv.reader(f, delimiter='|')
        for row in reader:
            if row[2]:
                d = datetime.strptime(row[2], "%d-%b-%Y")
                d = d.date()
            movie = model.Movie(id=row[0],
                                title=row[1].decode("latin-1"),
                                release_date=d,
                                imdb_url=row[4])
            session.add(movie)

    session.commit()
def load_movies(session):
    # use movies.dat
    #1::Toy Story (1995)::Adventure|Animation|Children|Comedy|Fantasy

    with open("../data/ml-10M100K/movies.dat") as itemfile:
        for line in itemfile:
            line = line.split('::')
            title = line[1]
            title = title.decode("latin-1")
            movie_id = line[0]
            genre = line[2]
            genre = genre.decode("latin-1")
            item = model.Movie(movie_id_to_index[movie_id], title, genre)
            session.add(item)
        session.commit()
예제 #10
0
파일: seed.py 프로젝트: zardra/Ratings
def load_movies(session):
    # use u.item
    with open("./seed_data/u.item", "rb") as f:
        reader = csv.reader(f, delimiter="|", quoting=csv.QUOTE_NONE)
        for row in reader:
            name = row[1].strip("()1234567890")
            name = name.decode("latin-1")
            if row[2]:
                date = datetime.strptime(row[2], "%d-%b-%Y")
                entry = model.Movie(id=row[0],
                                    name=name,
                                    released_date=date,
                                    imdb_url=row[4])
                # print entry.name, entry.released_date, entry.imdb_url
            session.add(entry)
    session.commit()
예제 #11
0
def load_movies(session, filename):
    # use u.item
    with open(filename, 'rb') as csvfile:
        
        lines = csv.reader(csvfile, delimiter = '|')
        
        for line in lines:
            movie = model.Movie()
            movie.movie_id = line[0].strip()
            movie.title = line[1].strip()
            movie.title = movie.title[:-6]
            movie.title = movie.title.decode("latin-1").strip()
            movie.release_date = datetime.strptime(line[2].strip(),"%d-%b-%Y")
            movie.imdb = line[4].strip()
            session.add(movie)


    session.commit()
예제 #12
0
def load_movies(session):
    # use u.item
    with open("seed_data/u.item") as m:
    # create reader of u.item
        reader = csv.reader(m, delimiter="|")
        for line in reader:
           
            # if len(str_time)< 11:
            #     str_time = "0" + str_time
            str_time = line[2]
            if str_time != "0":
                movie_title = line[1].decode("latin-1")
                movie_title = movie_title[:-6].strip()
            
                release_datetime = datetime.strptime(str_time, "%d-%b-%Y")
                new_movie = model.Movie(id=line[0], movie_title=movie_title, 
                            release_date=release_datetime, IMDB=line[4])
                # add new movie to session
                session.add(new_movie)
    # commit all movies from session
    session.commit()
예제 #13
0
def test_get_movie_uid():
    movie = model.Movie("/title/title_id/", "Movie name")
    assert fetcher.get_movie_uid(movie) == "title_id"
예제 #14
0
def test_get_full_credits():
    movie = model.Movie("/title/title_id/", "Movie name")
    url = fetcher.get_fullcredits_page(movie)
    assert url == "https://www.imdb.com/title/title_id/fullcredits/"
예제 #15
0
#!/usr/bin/python

import model
from model import db
from io import open
from csv import DictReader

db.drop_all()
db.create_all()

with open('data/movies.csv', 'r', encoding='utf-8-sig') as movies_file:
    reader = DictReader(movies_file)
    for row in reader:
        new_movie = model.Movie(name=row['name'], year=row['year'])

        actors = row['actors'].split(';')
        for actor in actors:
            print(actor)
            existing_actor = model.Actor.query.filter_by(name=actor).first()
            if (existing_actor):
                existing_actor.movies.append(new_movie)
                new_movie.actors.append(existing_actor)
            else:
                new_actor = model.Actor(name=actor)
                new_actor.movies.append(new_movie)
                new_movie.actors.append(new_actor)
                db.session.add(new_actor)

        db.session.add(new_movie)

with open('data/songs.csv', 'r', encoding='utf-8-sig') as songs_file: