예제 #1
0
 def handle(self, *args, **options):
     if Movie.objects.exists():
         print('Movie data already loaded...exiting.')
         print(ALREADY_LOADED_ERROR_MESSAGE)
         return
     print("Creating movie data")
     for row in DictReader(open('./movies.csv')):
         movie = Movie()
         movie.title = row['Title']
         #movie.movie_poster_image = models.ImageField(blank=True, null=True,
         #upload_to="posters/")
         movie.release_date = row['Release_date']
         movie.director = row['Director']
         movie.rating = row['Rating']
         movie.genre = row['Genre']
         movie.description = row['Description']
         movie.save()
예제 #2
0
 def movies(filename):
     data = readfile(filename, '|', encode='ISO-8859-1')        
     from dateutil.parser import parse
     
     for line in data:                              
         movie_data = line[0:5]
         print(movie_data)
         rating = line[5:]
         print(rating)
         movies = Movie()
         movies.movie_id = line[0]
         movies.title = line[1]
         movies.release_date = parse(line[2]).strftime('%Y-%m-%d')
         movies.video_release_date =  parse(line[3]).strftime('%Y-%m-%d')
         movies.imdb_url = line[4]
         movies.save()
         for idx, rate in enumerate(rating):
             if int(rate) == 1:
                 movies.genre.add(get_genre_object(idx))
예제 #3
0
def store_movies(year):
    print(f'storing-----------{year}')
    with open(f'./data/movie_{year}.json', 'r') as fp:
        movies = json.load(fp)

    # delete stored movies
    movies_year = Movie.objects.filter(release_date__year=year)
    movies_year.delete()
    # using bulk
    to_db_movies = []
    for mv in tqdm(movies):
        movie = Movie()
        movie.pk = mv['id']
        movie.title_en = mv['original_title']
        movie.title_ko = mv['title']
        movie.rate = mv['vote_average']
        movie.rate_people_count = mv['vote_count']
        movie.poster_path = mv['poster_path']
        movie.description = mv['overview']
        movie.release_date = datetime.datetime.strptime(
            mv['release_date'], '%Y-%m-%d')
        to_db_movies.append(movie)

    div = 100
    to_db_movies_batch = []
    for i in range(len(to_db_movies) // div + 1):
        to_db_movies_batch.append(to_db_movies[i * div:(i + 1) * div])

    for idx, batch in enumerate(to_db_movies_batch):
        print(round(idx / len(to_db_movies_batch), 2), end='|')
        temp = Movie.objects.bulk_create(batch)
    # add genre
    mgs = []
    for mv in movies:
        for gid in mv['genre_ids']:
            mg = MG()
            mg.movie_id = mv['id']
            mg.genre_id = gid
            mgs.append(mg)
    temp = MG.objects.bulk_create(mgs)
예제 #4
0
 def handle(self, *args, **options):
     if Movie.objects.exists() or Actor.objects.exists(
     ) or Director.objects.exists() or Writer.objects.exists():
         print('Movie data already loaded...exiting.')
         print(ALREADY_LOADED_ERROR_MESSAGE)
         return
     print(
         "\nLoading Movie data for Movies available in movies_metadata.csv")
     i = 1
     for row in DictReader(open('./movies_metadata.csv')):
         print(i, end=', ')
         i += 1
         movie = Movie()
         movie.id = row['id']
         movie.title = row['title']
         movie.budget = 0 if row['budget'] == '' else row['budget']
         # genres is a list of dictionaries
         genres_raw = row['genres']
         genres_list = eval(genres_raw)
         genres_name_list = list()
         for genre in genres_list:
             genres_name_list.append(genre['name'])
         movie.genres = ','.join(genres_name_list)
         movie.language = row['original_language']
         movie.overview = row['overview']
         # companies is a list of dictionaries
         companies_raw = row['production_companies']
         companies_list = eval(companies_raw)
         companies_name_list = list()
         for company in companies_list:
             companies_name_list.append(company['name'])
         movie.companies = ','.join(companies_name_list)
         # countries is a list of dictionaries
         countries_raw = row['production_countries']
         countries_list = eval(countries_raw)
         countries_name_list = list()
         for country in countries_list:
             countries_name_list.append(country['name'])
         movie.countries = ','.join(countries_name_list)
         movie.release_date = row['release_date']
         movie.revenue = 0 if row['revenue'] == '' else row['revenue']
         movie.runtime = 0 if row['runtime'] == '' else row['runtime']
         movie.vote_average = 0 if row['vote_average'] == '' else row[
             'vote_average']
         movie.vote_count = 0 if row['vote_count'] == '' else row[
             'vote_count']
         try:
             # get the right picture for movie
             api_req = requests.get("https://api.themoviedb.org/3/movie/" +
                                    str(row['id']) + "?api_key=" +
                                    str(os.getenv('API_KEY')) +
                                    "&language=en-US")
             if api_req.json()['poster_path'] == None:
                 raise ValueError
             movie.poster = str(api_req.json()['poster_path'])
         except:
             print('failed at loading poster path from: ' +
                   "https://api.themoviedb.org/3/movie/" + str(row['id']) +
                   "?api_key=" + str(os.getenv('API_KEY')) +
                   "&language=en-US")
             movie.poster = row['poster_path']
         movie.save()
     print(
         "\nLoading Actor, Director, Writer data for Credits available in credits.csv"
     )
     SEX_CHOICES = {1: 'F', 2: 'M', 0: ''}
     i = 1
     for row in DictReader(open('./credits.csv')):
         print(i, end=', ')
         i += 1
         # import Actors
         actors_raw = row['cast']
         actors_list = eval(actors_raw)
         for each_actor in actors_list[:5]:
             # if we have the actor just add movie id to it
             try:
                 actor = Actor.objects.get(actor_id=each_actor['id'])
                 actor.movie_ids = actor.movie_ids + ',' + row['id']
             except:
                 actor = Actor()
                 actor.actor_id = each_actor['id']
                 actor.name = each_actor['name']
                 gender_raw = each_actor['gender']
                 actor.gender = SEX_CHOICES[gender_raw]
                 actor.movie_ids = row['id']
                 if each_actor['profile_path']:
                     actor.pic = each_actor['profile_path']
             actor.save()
         # import Directors, Writers from crew
         crews_raw = row['crew']
         crews_list = eval(crews_raw)
         # get only 1 writer
         flag_one_writer = False
         # get only 1 director
         flag_one_director = False
         for crew in crews_list:
             # import Director
             if flag_one_director == False and crew['job'] == 'Director':
                 try:
                     director = Director.objects.get(director_id=crew['id'])
                     director.movie_ids = director.movie_ids + ',' + row[
                         'id']
                 except:
                     director = Director()
                     director.director_id = crew['id']
                     director.name = crew['name']
                     gender_raw = crew['gender']
                     director.gender = SEX_CHOICES[gender_raw]
                     director.movie_ids = row['id']
                 director.save()
                 flag_one_director = True
             # import Writer
             elif flag_one_writer == False and crew[
                     'department'] == 'Writing':
                 try:
                     writer = Writer.objects.get(writer_id=crew['id'])
                     writer.movie_ids = writer.movie_ids + ',' + row['id']
                 except:
                     writer = Writer()
                     writer.writer_id = crew['id']
                     writer.name = crew['name']
                     gender_raw = crew['gender']
                     writer.gender = SEX_CHOICES[gender_raw]
                     writer.movie_ids = row['id']
                 writer.save()
                 flag_one_writer = True
             if flag_one_writer == True and flag_one_director == True:
                 break
예제 #5
0
def update_dataset(request):
    movietitles = []
    # 장르 항목 요청
    response_gens = requests.get(GENRE_URL)
    response_gens = response_gens.json()
    print(GENRE_URL)
    # print(response_gens, '####')

    for gen in response_gens['genres']:
        print(gen)
        genre_instance = Genre()
        genre_instance.genre_id = gen['id']
        genre_instance.name = gen['name']
        genre_instance.save()

    # 요청 보내기 (1, 25페이지 까지)
    for page in range(1, 25):
        REQ_URL = MOVIE_URL + f'&page={page}'
        print(REQ_URL)
        response = requests.get(REQ_URL)
        response = response.json()

        # response에서 20개의 자료 추출하기
        for num in range(0, 20):
            # movie 인스턴스 호출하고 DB에 저장하기
            movie = Movie()
            movie.movie_id = response['results'][num]['id']
            movie.popularity = response['results'][num]['popularity']
            movie.vote_count = response['results'][num]['vote_count']
            movie.poster_path = response['results'][num]['poster_path']
            movie.backdrop_path = response['results'][num]['backdrop_path']
            movie.original_language = response['results'][num][
                'original_language']
            movie.original_title = response['results'][num]['original_title']
            movie.title = response['results'][num]['title']
            movie.vote_average = response['results'][num]['vote_average']
            movie.overview = response['results'][num]['overview']
            movie.release_date = response['results'][num]['release_date']

            # # movie detail api요청 및 저장(오래걸림)
            # movie_id = movie.movie_id
            # DETAIL_URL = BASE_URL + f'movie/{movie_id}?' + API_KEY + '&language=ko-KR'
            # detail_res = requests.get(DETAIL_URL).json()
            # # runtime 데이터 저장
            # movie.runtime = detail_res['runtime']
            movie.save()

            # # production country data DB에 저장
            # for country_data in detail_res['production_countries']:
            #     country = Country()
            #     country.iso = country_data['iso_3166_1']
            #     country.name = country_data['name']
            #     country.save()

            # M2M 관계 중계테이블에 저장(영화-국가)
            # for country_data in detail_res['production_countries']:
            #     iso = country_data['iso_3166_1']
            #     country = Country.objects.get(iso=iso)
            #     country.movies.add(movie)

            # M2M 관계 중계테이블에 저장(영화-장르)
            for genid in response['results'][num]['genre_ids']:
                # print(genid)
                genre = Genre.objects.get(genre_id=genid)
                genre.movies.add(movie)

            # 제대로 받아지는지 확인한다.
            print(movie.title)
            # movietitles에 담아서 template에 출력해보자.
            movietitles.append(movie.title)

    # 완료시 success문구 출력
    print('success')

    context = {
        'movietitles': movietitles,
    }
    return render(request, 'make_dataset/update_dataset.html', context)