def read_csv_file(self): with open(self.__file_name, mode='r', encoding='utf-8-sig') as csvfile: movie_file_reader = csv.DictReader(csvfile) index = 0 for row in movie_file_reader: title = row["Title"] release_year = int(row["Year"]) movie = Movie(title, release_year) movie.description = row["Description"] director = row["Director"] movie.director = director if Director(director) not in self.__dataset_of_directors: self.__dataset_of_directors.append(Director(director)) actors = row["Actors"].split(",") for actor in actors: movie.add_actor(Actor(actor)) if Actor(actor) not in self.__dataset_of_actors: self.__dataset_of_actors.append(Actor(actor)) genres = row["Genre"].split(",") for genre in genres: movie.add_genre(Genre(genre)) if Genre(genre) not in self.__dataset_of_genres: self.__dataset_of_genres.append(Genre(genre)) movie.runtime_minutes = int(row["Runtime (Minutes)"]) try: movie.external_rating = float(row["Rating"]) except ValueError: pass try: movie.rating_votes = int(row["Votes"]) except ValueError: pass try: movie.revenue_in_millions = float(row["Revenue (Millions)"]) except ValueError: pass try: movie.metascore = int(row["Metascore"]) except ValueError: pass self.__dataset_of_movies.append(movie) # title = row['Title'] # release_year = int(row['Year']) # print(f"Movie {index} with title: {title}, release year {release_year}") index += 1
def read_csv_file(self): with open(self.__file_name, mode='r', encoding='utf-8-sig') as csvfile: movie_file_reader = csv.DictReader(csvfile) for row in movie_file_reader: rank = row['Rank'] # make movie from data movie = Movie(row['Title'], int(row['Year'])) movie.description = row['Description'] movie.runtime_minutes = int(row['Runtime (Minutes)']) movie.external_rating = float( row['Rating']) if row['Rating'] != "N/A" else None movie.rating_votes = int( row['Votes']) if row['Votes'] != "N/A" else None movie.revenue = float( row['Revenue (Millions)'] ) if row['Revenue (Millions)'] != "N/A" else None movie.metascore = int( row['Metascore']) if row['Metascore'] != "N/A" else None director = Director(row['Director']) actors = row['Actors'].split(",") genres = row['Genre'].split(",") movie.director = director self._directors.add(director) for actor in actors: actor = Actor(actor) movie.add_actor(actor) self._actors.add(actor) for genre in genres: genre = Genre(genre) movie.add_genre(genre) self._genres.add(genre) self.dataset_of_movies.add(movie)
def read_csv_file(self): with open(self.__file_name, mode='r', encoding='utf-8-sig') as csvfile: movie_file_reader = csv.DictReader(csvfile) self.__dataset_of_movies = list() self.__dataset_of_actors = list() self.__dataset_of_directors = list() self.__dataset_of_genres = list() index = 0 for row in movie_file_reader: title = row['Title'].strip() # stripping to safe release_year = int(row['Year']) actor_full_name_list = get_list_from_comma_string( row['Actors']) director_full_name = row['Director'].strip( ) # stripping to safe genre_name_list = get_list_from_comma_string(row['Genre']) #- other things rank = int(row['Rank']) description = row['Description'] runtime_minutes = int(row['Runtime (Minutes)']) external_rating = float(row['Rating']) external_rating_votes = int(row['Votes']) # These require try/except because some values are N/A's (preferably all would but it's SUPER ugly code) try: revenue = float(row['Revenue(Millions)']) except: revenue = None try: metascore = int(row['Metascore']) except: metascore = None #-------- Make Actor, Director and Genre Objects actor_object_list = [ Actor(actor_full_name) for actor_full_name in actor_full_name_list ] director_object = Director(director_full_name) genre_object_list = [ Genre(genre_name) for genre_name in genre_name_list ] #-------- Create and set up Movie Object movie_object = Movie(title, release_year) movie_object.actors = actor_object_list # thanks to my setter implementation movie_object.director = director_full_name movie_object.genres = genre_object_list # ^ ditto movie_object.release_year = release_year #- other things movie_object.description = description movie_object.runtime_minutes = runtime_minutes movie_object.external_rating = external_rating movie_object.external_rating_votes = external_rating_votes if revenue != None: movie_object.revenue = revenue if metascore != None: movie_object.metascore = metascore #-------- Take all the above information, and add to datasets self.__dataset_of_movies.append( movie_object ) # the assumption is that each row in the csv is a unique movie # I could create a function that adds unique things to lists only, nvm for actor_object in actor_object_list: if actor_object not in self.__dataset_of_actors: self.__dataset_of_actors.append(actor_object) if director_object not in self.__dataset_of_directors: self.__dataset_of_directors.append(director_object) for genre_object in genre_object_list: if genre_object not in self.__dataset_of_genres: self.__dataset_of_genres.append(genre_object) index += 1 # To test my assigmnent, run the following lines of code (I have copy and pasted these in my test_all.py file too) #movie_file_csv_reader_object = MovieFileCSVReader(r"C:\Users\Nathan Longhurst\OneDrive - The University of Auckland\b Comp235\Assignment\GitHub Clone (Current)\CS235FlixSkeleton\datafiles\Data1000Movies.csv") #unclean_dict = movie_file_csv_reader_object.check_csv_file() #movie_file_csv_reader_object.clean_csv_file(unclean_dict)