def handle(self, *args, **options): if Movie.objects.exists(): print('Movie data already loaded...exiting.') print(ALREADY_LOADED_ERROR_MESSAGE) return print("Creating movie data") for row in DictReader(open('./movies.csv')): movie = Movie() movie.title = row['Title'] #movie.movie_poster_image = models.ImageField(blank=True, null=True, #upload_to="posters/") movie.release_date = row['Release_date'] movie.director = row['Director'] movie.rating = row['Rating'] movie.genre = row['Genre'] movie.description = row['Description'] movie.save()
def movies(filename): data = readfile(filename, '|', encode='ISO-8859-1') from dateutil.parser import parse for line in data: movie_data = line[0:5] print(movie_data) rating = line[5:] print(rating) movies = Movie() movies.movie_id = line[0] movies.title = line[1] movies.release_date = parse(line[2]).strftime('%Y-%m-%d') movies.video_release_date = parse(line[3]).strftime('%Y-%m-%d') movies.imdb_url = line[4] movies.save() for idx, rate in enumerate(rating): if int(rate) == 1: movies.genre.add(get_genre_object(idx))
def store_movies(year): print(f'storing-----------{year}') with open(f'./data/movie_{year}.json', 'r') as fp: movies = json.load(fp) # delete stored movies movies_year = Movie.objects.filter(release_date__year=year) movies_year.delete() # using bulk to_db_movies = [] for mv in tqdm(movies): movie = Movie() movie.pk = mv['id'] movie.title_en = mv['original_title'] movie.title_ko = mv['title'] movie.rate = mv['vote_average'] movie.rate_people_count = mv['vote_count'] movie.poster_path = mv['poster_path'] movie.description = mv['overview'] movie.release_date = datetime.datetime.strptime( mv['release_date'], '%Y-%m-%d') to_db_movies.append(movie) div = 100 to_db_movies_batch = [] for i in range(len(to_db_movies) // div + 1): to_db_movies_batch.append(to_db_movies[i * div:(i + 1) * div]) for idx, batch in enumerate(to_db_movies_batch): print(round(idx / len(to_db_movies_batch), 2), end='|') temp = Movie.objects.bulk_create(batch) # add genre mgs = [] for mv in movies: for gid in mv['genre_ids']: mg = MG() mg.movie_id = mv['id'] mg.genre_id = gid mgs.append(mg) temp = MG.objects.bulk_create(mgs)
def handle(self, *args, **options): if Movie.objects.exists() or Actor.objects.exists( ) or Director.objects.exists() or Writer.objects.exists(): print('Movie data already loaded...exiting.') print(ALREADY_LOADED_ERROR_MESSAGE) return print( "\nLoading Movie data for Movies available in movies_metadata.csv") i = 1 for row in DictReader(open('./movies_metadata.csv')): print(i, end=', ') i += 1 movie = Movie() movie.id = row['id'] movie.title = row['title'] movie.budget = 0 if row['budget'] == '' else row['budget'] # genres is a list of dictionaries genres_raw = row['genres'] genres_list = eval(genres_raw) genres_name_list = list() for genre in genres_list: genres_name_list.append(genre['name']) movie.genres = ','.join(genres_name_list) movie.language = row['original_language'] movie.overview = row['overview'] # companies is a list of dictionaries companies_raw = row['production_companies'] companies_list = eval(companies_raw) companies_name_list = list() for company in companies_list: companies_name_list.append(company['name']) movie.companies = ','.join(companies_name_list) # countries is a list of dictionaries countries_raw = row['production_countries'] countries_list = eval(countries_raw) countries_name_list = list() for country in countries_list: countries_name_list.append(country['name']) movie.countries = ','.join(countries_name_list) movie.release_date = row['release_date'] movie.revenue = 0 if row['revenue'] == '' else row['revenue'] movie.runtime = 0 if row['runtime'] == '' else row['runtime'] movie.vote_average = 0 if row['vote_average'] == '' else row[ 'vote_average'] movie.vote_count = 0 if row['vote_count'] == '' else row[ 'vote_count'] try: # get the right picture for movie api_req = requests.get("https://api.themoviedb.org/3/movie/" + str(row['id']) + "?api_key=" + str(os.getenv('API_KEY')) + "&language=en-US") if api_req.json()['poster_path'] == None: raise ValueError movie.poster = str(api_req.json()['poster_path']) except: print('failed at loading poster path from: ' + "https://api.themoviedb.org/3/movie/" + str(row['id']) + "?api_key=" + str(os.getenv('API_KEY')) + "&language=en-US") movie.poster = row['poster_path'] movie.save() print( "\nLoading Actor, Director, Writer data for Credits available in credits.csv" ) SEX_CHOICES = {1: 'F', 2: 'M', 0: ''} i = 1 for row in DictReader(open('./credits.csv')): print(i, end=', ') i += 1 # import Actors actors_raw = row['cast'] actors_list = eval(actors_raw) for each_actor in actors_list[:5]: # if we have the actor just add movie id to it try: actor = Actor.objects.get(actor_id=each_actor['id']) actor.movie_ids = actor.movie_ids + ',' + row['id'] except: actor = Actor() actor.actor_id = each_actor['id'] actor.name = each_actor['name'] gender_raw = each_actor['gender'] actor.gender = SEX_CHOICES[gender_raw] actor.movie_ids = row['id'] if each_actor['profile_path']: actor.pic = each_actor['profile_path'] actor.save() # import Directors, Writers from crew crews_raw = row['crew'] crews_list = eval(crews_raw) # get only 1 writer flag_one_writer = False # get only 1 director flag_one_director = False for crew in crews_list: # import Director if flag_one_director == False and crew['job'] == 'Director': try: director = Director.objects.get(director_id=crew['id']) director.movie_ids = director.movie_ids + ',' + row[ 'id'] except: director = Director() director.director_id = crew['id'] director.name = crew['name'] gender_raw = crew['gender'] director.gender = SEX_CHOICES[gender_raw] director.movie_ids = row['id'] director.save() flag_one_director = True # import Writer elif flag_one_writer == False and crew[ 'department'] == 'Writing': try: writer = Writer.objects.get(writer_id=crew['id']) writer.movie_ids = writer.movie_ids + ',' + row['id'] except: writer = Writer() writer.writer_id = crew['id'] writer.name = crew['name'] gender_raw = crew['gender'] writer.gender = SEX_CHOICES[gender_raw] writer.movie_ids = row['id'] writer.save() flag_one_writer = True if flag_one_writer == True and flag_one_director == True: break
def update_dataset(request): movietitles = [] # 장르 항목 요청 response_gens = requests.get(GENRE_URL) response_gens = response_gens.json() print(GENRE_URL) # print(response_gens, '####') for gen in response_gens['genres']: print(gen) genre_instance = Genre() genre_instance.genre_id = gen['id'] genre_instance.name = gen['name'] genre_instance.save() # 요청 보내기 (1, 25페이지 까지) for page in range(1, 25): REQ_URL = MOVIE_URL + f'&page={page}' print(REQ_URL) response = requests.get(REQ_URL) response = response.json() # response에서 20개의 자료 추출하기 for num in range(0, 20): # movie 인스턴스 호출하고 DB에 저장하기 movie = Movie() movie.movie_id = response['results'][num]['id'] movie.popularity = response['results'][num]['popularity'] movie.vote_count = response['results'][num]['vote_count'] movie.poster_path = response['results'][num]['poster_path'] movie.backdrop_path = response['results'][num]['backdrop_path'] movie.original_language = response['results'][num][ 'original_language'] movie.original_title = response['results'][num]['original_title'] movie.title = response['results'][num]['title'] movie.vote_average = response['results'][num]['vote_average'] movie.overview = response['results'][num]['overview'] movie.release_date = response['results'][num]['release_date'] # # movie detail api요청 및 저장(오래걸림) # movie_id = movie.movie_id # DETAIL_URL = BASE_URL + f'movie/{movie_id}?' + API_KEY + '&language=ko-KR' # detail_res = requests.get(DETAIL_URL).json() # # runtime 데이터 저장 # movie.runtime = detail_res['runtime'] movie.save() # # production country data DB에 저장 # for country_data in detail_res['production_countries']: # country = Country() # country.iso = country_data['iso_3166_1'] # country.name = country_data['name'] # country.save() # M2M 관계 중계테이블에 저장(영화-국가) # for country_data in detail_res['production_countries']: # iso = country_data['iso_3166_1'] # country = Country.objects.get(iso=iso) # country.movies.add(movie) # M2M 관계 중계테이블에 저장(영화-장르) for genid in response['results'][num]['genre_ids']: # print(genid) genre = Genre.objects.get(genre_id=genid) genre.movies.add(movie) # 제대로 받아지는지 확인한다. print(movie.title) # movietitles에 담아서 template에 출력해보자. movietitles.append(movie.title) # 완료시 success문구 출력 print('success') context = { 'movietitles': movietitles, } return render(request, 'make_dataset/update_dataset.html', context)