def test_for_extend_movie_metascore(): movie = Movie("a", 2016) assert movie.metascore is None movie.metascore = -10 assert movie.metascore is None movie.metascore = 101 assert movie.metascore is None movie.metascore = 99 assert movie.metascore == 99
def read_csv_file(self): with open(self.__file_name, mode='r', encoding='utf-8-sig') as csvfile: movie_file_reader = csv.DictReader(csvfile) for row in movie_file_reader: title = row['Title'] release_year = int(row['Year']) movie = Movie(title, release_year) movie.votes = row['Votes'] movie.metascore = row['Metascore'] movie.rating = row['Rating'] movie.description = row['Description'] self.__movies.append(movie) director = Director(row['Director']) if director not in self.__directors: self.__directors.append(director) actors = re.split(", |,", row["Actors"]) for actor_splited in actors: actor = Actor(actor_splited) if actor not in self.__actors: self.__actors.append(actor) genres = row['Genre'].split(",") for genre_splited in genres: genre = Genre(genre_splited) if genre not in self.__genres: self.__genres.append(genre)
def read_csv_file(self): with open(self.__file_name, mode='r', encoding='utf-8-sig') as csvfile: movie_file_reader = csv.DictReader(csvfile) for row in movie_file_reader: try: rank = int(row['Rank']) except ValueError: rank = None title = row['Title'] genres = row['Genre'].split(',') description = row['Description'] actors = row['Actors'].split(',') try: release_year = int(row['Year']) except ValueError: release_year = None try: run_time = int(row['Runtime (Minutes)']) except ValueError: run_time = None try: rating = int(row['Rating']) except ValueError: rating = 0 try: votes = int(row['Votes']) except ValueError: votes = 0 director = Director(row['Director']) try: revenue = float(row['Revenue (Millions)']) except ValueError: revenue = None try: metascore = int(row['Metascore']) except ValueError: metascore = None mov = Movie(title, release_year) mov.director = director mov.description = description mov.metascore = metascore mov.rank = rank mov.runtime_minutes = run_time mov.rating = rating mov.votes = votes mov.revenue = revenue self.load_actors(mov, actors) if director not in self.__directors: self.__directors.add(director) self.load_genres(mov, genres) if mov not in self.__movies: self.__movies.append(mov)
def read_csv_file(self): with open(self.__file_name, mode='r', encoding='utf-8-sig') as csvfile: movie_file_reader = csv.DictReader(csvfile) index = 0 for row in movie_file_reader: title = row["Title"] release_year = int(row["Year"]) movie = Movie(title, release_year) movie.description = row["Description"] director = row["Director"] movie.director = director if Director(director) not in self.__dataset_of_directors: self.__dataset_of_directors.append(Director(director)) actors = row["Actors"].split(",") for actor in actors: movie.add_actor(Actor(actor)) if Actor(actor) not in self.__dataset_of_actors: self.__dataset_of_actors.append(Actor(actor)) genres = row["Genre"].split(",") for genre in genres: movie.add_genre(Genre(genre)) if Genre(genre) not in self.__dataset_of_genres: self.__dataset_of_genres.append(Genre(genre)) movie.runtime_minutes = int(row["Runtime (Minutes)"]) try: movie.external_rating = float(row["Rating"]) except ValueError: pass try: movie.rating_votes = int(row["Votes"]) except ValueError: pass try: movie.revenue_in_millions = float(row["Revenue (Millions)"]) except ValueError: pass try: movie.metascore = int(row["Metascore"]) except ValueError: pass self.__dataset_of_movies.append(movie) # title = row['Title'] # release_year = int(row['Year']) # print(f"Movie {index} with title: {title}, release year {release_year}") index += 1
def read_csv_file(self): with open(self.__file_name, mode='r', encoding='utf-8-sig') as csvfile: movie_file_reader = csv.DictReader(csvfile) for row in movie_file_reader: title = row['Title'] release_year = int(row['Year']) actors_list = row['Actors'] director = row['Director'] genres_list = row['Genre'] movie = Movie(title, release_year) if movie not in self.__dataset_of_movies: self.__dataset_of_movies.append(movie) for actor in actors_list.split(','): if Actor(actor) not in self.__dataset_of_actors: self.__dataset_of_actors.add(Actor(actor)) if Director(director) not in self.__dataset_of_directors: self.__dataset_of_directors.add(Director(director)) for genre in genres_list.split(','): movie.genres.append(Genre(genre)) if Genre(genre) not in self.__dataset_of_genres: self.__dataset_of_genres.add(Genre(genre)) rank = row['Rank'] if int(rank) > 0: movie.rank = rank rating = row['Rating'] if 0 <= float(rating) <= 10: movie.rating = rating votes = row['Votes'] movie.votes = int(votes) revenue_millions = row['Revenue (Millions)'] movie.revenue_millions = revenue_millions metascore = row['Metascore'] try: if 0 <= float(metascore) <= 100: movie.metascore = metascore except ValueError: movie.metascore = metascore
def _read_row(self, row: _ROW) -> Movie: """ Helper method to construct a Movie from a row. Raises: ValueError: unable to parse row: {row} """ error = False try: title = row['Title'] genres = [ self._get_genre(Genre(name)) for name in row['Genre'].split(',') ] description = row['Description'] director = self._get_director(Director(row['Director'])) actors = [ self._get_actor(Actor(name)) for name in row['Actors'].split(',') ] release_year = int(row['Year']) runtime_minutes = int(row['Runtime (Minutes)']) rating = float(row['Rating']) votes = int(row['Votes']) revenue_millions = float( row['Revenue (Millions)'] ) if row['Revenue (Millions)'] != 'N/A' else None metascore = int( row['Metascore']) if row['Metascore'] != 'N/A' else None except KeyError: error = True except ValueError: error = True if error: raise ValueError(f'unable to parse row: {row}') movie = Movie(title, release_year) movie.genres = genres movie.description = description movie.director = director movie.actors = actors movie.runtime_minutes = runtime_minutes movie.rating = rating movie.votes = votes if revenue_millions: movie.revenue_millions = revenue_millions if metascore: movie.metascore = metascore return movie
def read_csv_file(self): with open(self.__file_name, mode='r', encoding='utf-8-sig') as csvfile: movie_file_reader = csv.DictReader(csvfile) index = 0 for row in movie_file_reader: title = row['Title'] release_year = int(row['Year']) #print(f"Movie {index} with title: {title}, release year {release_year}") m = Movie(title, release_year) m.description = row['Description'] director = Director(row['Director']) try: # if director exists, assign the same director object to the movie index = self.dataset_of_directors.index(director) director = self.dataset_of_directors[index] except ValueError: self.dataset_of_directors.append(director) m.director = director m.votes = int(row['Votes']) m.rating = float(row['Rating']) m.runtime_minutes = int(row['Runtime (Minutes)']) if row['Metascore'] != 'N/A': m.metascore = float(row['Metascore']) if row['Revenue (Millions)'] != 'N/A': m.revenue = float(row['Revenue (Millions)']) for g in row['Genre'].split(","): genre = Genre(g) try: index = self.dataset_of_genres.index(genre) genre = self.dataset_of_genres[index] except ValueError: self.dataset_of_genres.append(genre) m.add_genre(genre) for a in row['Actors'].split(","): actor = Actor(a) try: index = self.dataset_of_actors.index(actor) actor = self.dataset_of_actors[index] except ValueError: self.dataset_of_actors.append(actor) m.add_actor(actor) if m not in self.dataset_of_movies: self.dataset_of_movies.append(m)
def populated_movies(genres, directors, actors): movies = [] for i in range(10): movie = Movie(f'Movie{i}', 2000 + i) movie.genres = [genres[i]] movie.description = f'Description{i}' movie.director = directors[i] movie.actors = [actors[i]] movie.runtime_minutes = i + 1 movie.rating = float(i) movie.votes = i if i % 2 == 0: movie.revenue_millions = float(i + 1) if i % 4 == 0: movie.metascore = i * 10 movies.append(movie) return movies
def read_csv_file(self): with open(self.__file_name, mode='r', encoding='utf-8-sig') as csvfile: movie_file_reader = csv.DictReader(csvfile) for row in movie_file_reader: rank = row['Rank'] # make movie from data movie = Movie(row['Title'], int(row['Year'])) movie.description = row['Description'] movie.runtime_minutes = int(row['Runtime (Minutes)']) movie.external_rating = float( row['Rating']) if row['Rating'] != "N/A" else None movie.rating_votes = int( row['Votes']) if row['Votes'] != "N/A" else None movie.revenue = float( row['Revenue (Millions)'] ) if row['Revenue (Millions)'] != "N/A" else None movie.metascore = int( row['Metascore']) if row['Metascore'] != "N/A" else None director = Director(row['Director']) actors = row['Actors'].split(",") genres = row['Genre'].split(",") movie.director = director self._directors.add(director) for actor in actors: actor = Actor(actor) movie.add_actor(actor) self._actors.add(actor) for genre in genres: genre = Genre(genre) movie.add_genre(genre) self._genres.add(genre) self.dataset_of_movies.add(movie)
def read_csv_file(self): csv_file_contents = csv.reader(open(self.__file_name, mode='r', encoding='utf-8-sig')) boolean = True for row in csv_file_contents: if boolean: boolean = False continue temp_movie = Movie("","") temp_movie.id = int(row[0])-1 temp_movie.title = row[1] genre_list = row[2].split(",") genre_list = [Genre(genre) for genre in genre_list] temp_movie.genres = genre_list temp_movie.description = row[3] temp_movie.director = Director(row[4]) actor_list = row[5].split(",") actor_list = [Actor(actor) for actor in actor_list] temp_movie.actors = actor_list temp_movie.releaseDate = int(row[6]) temp_movie.runtime_minutes = int(row[7]) temp_movie.rating = float(row[8]) temp_movie.metascore = row[11] self.movie_full_details.append(temp_movie)
def test_movie_metascore(): mm = Movie("Star Wars The Third Gathers: Backstroke of the West", 2000) mm.metascore = 43.3 assert mm.metascore == 43.3 mm.metascore = -1 assert mm.metascore == 43.3
def load_movies_and_actors_genres_director(data_path: str, repo: MemoryRepository): actor_dict = dict() genre_dict = dict() directors = dict() for data_row in read_csv_file(os.path.join(data_path, 'Data1000Movies.csv')): movie_key = int(data_row[0]) genres = data_row[2] actors = data_row[5] director = data_row[4] genre_list = genres.rsplit(",") actor_list = actors.rsplit(",") for genre in genre_list: if genre not in genre_dict.keys(): genre_dict[genre] = list() genre_dict[genre].append(movie_key) for actor in actor_list: if actor not in actor_dict.keys(): actor_dict[actor] = list() actor_dict[actor].append(movie_key) if director not in directors.keys(): directors[director] = list() directors[director].append(movie_key) movie = Movie(data_row[1], int(data_row[6])) movie.rank = movie_key movie.description = data_row[3] movie.runtime_minutes = int(data_row[7]) if data_row[8] != "N/A": movie.rating = float(data_row[8]) if data_row[9] != "N/A": movie.votes = int(data_row[9]) if data_row[10] != "N/A": movie.revenue = float(data_row[10]) if data_row[11] != "N/A": movie.metascore = float(data_row[11]) repo.add_movie(movie) for genre_name in genre_dict.keys(): genre = Genre(genre_name) for movie_id in genre_dict[genre_name]: movie = repo.get_movie(movie_id) genre.add_movie(movie) movie.add_genre(genre) repo.add_genre(genre) for actor_name in actor_dict.keys(): actor = Actor(actor_name) for movie_id in actor_dict[actor_name]: movie = repo.get_movie(movie_id) actor.add_movie(movie) movie.add_actor(actor) repo.add_actor(actor) for director_name in directors.keys(): director = Director(director_name) for movie_id in directors[director_name]: movie = repo.get_movie(movie_id) director.add_movie(movie) movie.director = director repo.add_director(director)
def read_csv_file(self): with open(self.__file_name, mode='r', encoding='utf-8-sig') as csvfile: movie_file_reader = csv.DictReader(csvfile) self.__dataset_of_movies = list() self.__dataset_of_actors = list() self.__dataset_of_directors = list() self.__dataset_of_genres = list() index = 0 for row in movie_file_reader: title = row['Title'].strip() # stripping to safe release_year = int(row['Year']) actor_full_name_list = get_list_from_comma_string( row['Actors']) director_full_name = row['Director'].strip( ) # stripping to safe genre_name_list = get_list_from_comma_string(row['Genre']) #- other things rank = int(row['Rank']) description = row['Description'] runtime_minutes = int(row['Runtime (Minutes)']) external_rating = float(row['Rating']) external_rating_votes = int(row['Votes']) # These require try/except because some values are N/A's (preferably all would but it's SUPER ugly code) try: revenue = float(row['Revenue(Millions)']) except: revenue = None try: metascore = int(row['Metascore']) except: metascore = None #-------- Make Actor, Director and Genre Objects actor_object_list = [ Actor(actor_full_name) for actor_full_name in actor_full_name_list ] director_object = Director(director_full_name) genre_object_list = [ Genre(genre_name) for genre_name in genre_name_list ] #-------- Create and set up Movie Object movie_object = Movie(title, release_year) movie_object.actors = actor_object_list # thanks to my setter implementation movie_object.director = director_full_name movie_object.genres = genre_object_list # ^ ditto movie_object.release_year = release_year #- other things movie_object.description = description movie_object.runtime_minutes = runtime_minutes movie_object.external_rating = external_rating movie_object.external_rating_votes = external_rating_votes if revenue != None: movie_object.revenue = revenue if metascore != None: movie_object.metascore = metascore #-------- Take all the above information, and add to datasets self.__dataset_of_movies.append( movie_object ) # the assumption is that each row in the csv is a unique movie # I could create a function that adds unique things to lists only, nvm for actor_object in actor_object_list: if actor_object not in self.__dataset_of_actors: self.__dataset_of_actors.append(actor_object) if director_object not in self.__dataset_of_directors: self.__dataset_of_directors.append(director_object) for genre_object in genre_object_list: if genre_object not in self.__dataset_of_genres: self.__dataset_of_genres.append(genre_object) index += 1 # To test my assigmnent, run the following lines of code (I have copy and pasted these in my test_all.py file too) #movie_file_csv_reader_object = MovieFileCSVReader(r"C:\Users\Nathan Longhurst\OneDrive - The University of Auckland\b Comp235\Assignment\GitHub Clone (Current)\CS235FlixSkeleton\datafiles\Data1000Movies.csv") #unclean_dict = movie_file_csv_reader_object.check_csv_file() #movie_file_csv_reader_object.clean_csv_file(unclean_dict)
def read_csv_file(self): with open(self.__file_name, mode='r', encoding='utf-8-sig') as csvfile: movie_file_reader = csv.DictReader(csvfile) index = 0 for row in movie_file_reader: # read data of row into variables title = row['Title'] genres_str = row['Genre'].split(",") genres = [] for genre in genres_str: genres.append(Genre(genre)) year = int(row['Year']) description = row['Description'] director = Director(row['Director']) actors_str = row['Actors'].split(",") actors = [] for actor in actors_str: actors.append(Actor(actor)) runtime = row['Runtime (Minutes)'] rating = row['Rating'] votes = row['Votes'] revenue = row['Revenue (Millions)'] metascore = row['Metascore'] # create and populate Movie obj movieObj = Movie(title, year) for genre in genres: movieObj.add_genre(genre) movieObj.description = description movieObj.director = director for actor in actors: movieObj.add_actor(actor) if not runtime == "N/A": movieObj.runtime_minutes = int(runtime) if not rating == "N/A": movieObj.rating = float(rating) if not votes == "N/A": movieObj.votes = int(votes) if not revenue == "N/A": movieObj.revenue = float(revenue) if not metascore == "N/A": movieObj.metascore = int(metascore) # populate datasets self.dataset_of_movies.append(movieObj) for actor in actors: if actor not in self.dataset_of_actors: self.dataset_of_actors.append(actor) if director not in self.dataset_of_directors: self.dataset_of_directors.append(director) for genre in genres: if genre not in self.dataset_of_genres: self.dataset_of_genres.append(genre) index += 1
def read_csv_file(self): with open(self.__file_name, mode='r', encoding='utf-8-sig') as csvfile: movie_file_reader = csv.DictReader(csvfile) index = 0 for row in movie_file_reader: # Read in Title title = row['Title'] # Read in Year release_year = int(row['Year']) movie = Movie(title, release_year) self.__dataset_of_movies.append(movie) # Read in description description = row["Description"] movie.description = description # Read in runtime runtime = row["Runtime (Minutes)"] movie.runtime_minutes = int(runtime) # Read in Actors actors = row['Actors'] actor = actors.split(",") for i in actor: actor = i.strip() movie.add_actor(Actor(actor)) self.__dataset_of_actors.add(Actor(actor)) # Read in Directors director = row['Director'] movie.director = Director(director) self.__dataset_of_directors.add(Director(director)) # Read in Genre genres = row['Genre'] genre = genres.split(",") for i in genre: a = i.strip() movie.add_genre(Genre(a)) self.__dataset_of_genres.add(Genre(a)) # Read in Rank rank = row["Rank"] movie.rank = rank # Read in Rating rating = row["Rating"] movie.rating = rating # Read in Votes vote = row["Votes"] movie.votes = vote # Read in revenue revenue = row["Revenue (Millions)"] movie.revenue = revenue # Read in meta_scores metascore = row["Metascore"] movie.metascore = metascore index += 1