def get_movie_similarity_dfs(connection):
    query = """select * from film_list;"""
    whole_df = ad.get_data_from_query(connection, query)

    if check_if_file_exists("pickled_files", "movie_genre.pkl"):
        with open("pickled_files/movie_genre.pkl", "rb") as f:
            movie_genre = pickle.load(f)

    else:
        movie_genre = whole_df[['FID', 'category']]
        movie_genre = pd.get_dummies(movie_genre, prefix=['category'])
        with open("pickled_files/movie_genre.pkl", "wb") as f:
            pickle.dump(movie_genre, f)

    if check_if_file_exists("pickled_files", "encoded_actor_df.pkl"):
        with open("pickled_files/encoded_actor_df.pkl", "rb") as f:
            encoded_actor_df = pickle.load(f)

    else:
        movie_actors = whole_df[['FID', 'actors']]
        encoded_actor_df = actors_df_categorical(connection, movie_actors)
        with open("pickled_files/encoded_actor_df.pkl", "wb") as f:
            pickle.dump(encoded_actor_df, f)

    if check_if_file_exists("pickled_files", "movie_price.pkl"):
        with open("pickled_files/movie_price.pkl", "rb") as f:
            movie_price = pickle.load(f)

    else:
        movie_price = whole_df[['FID', 'price']]
        with open("pickled_files/movie_price.pkl", "wb") as f:
            pickle.dump(movie_price, f)

    if check_if_file_exists("pickled_files", "movie_length.pkl"):
        with open("pickled_files/movie_length.pkl", "rb") as f:
            movie_length = pickle.load(f)

    else:
        movie_length = whole_df[['FID', 'length']]
        with open("pickled_files/movie_length.pkl", "wb") as f:
            pickle.dump(movie_length, f)

    if check_if_file_exists("pickled_files", "fid_list.pkl"):
        with open("pickled_files/fid_list.pkl", "rb") as f:
            fid_list = pickle.load(f)

    else:
        fid_list = list(
            whole_df['FID']
        )  # The fid_list is used by the function "create_df_with_cos"
        with open("pickled_files/fid_list.pkl", "wb") as f:
            pickle.dump(fid_list, f)

    return movie_genre, encoded_actor_df, movie_price, movie_length, fid_list
def get_actor_list(connection):
    if check_if_file_exists("pickled_files", "actor_list.pkl"):
        with open("pickled_files/actor_list.pkl", "rb") as f:
            actor_list = pickle.load(f)
    else:
        query = """select CONCAT(first_name, " " ,last_name) as Full_Name from actor"""
        actor_df = ad.get_data_from_query(connection, query)
        actor_list = list(actor_df['Full_Name'])

        with open("pickled_files/actor_list.pkl", "wb") as f:
            pickle.dump(actor_list, f)

    return actor_list
def get_movie_details(connection, movie_list):
    temp_string = ""
    for each_movie in movie_list:
        temp_string += str(each_movie) + ","

    temp_string = temp_string.rstrip(',')
    final_temp_string = "(" + temp_string + ")"
    # print("final_temp_string", final_temp_string)

    query = f"""select FID, title, category from film_list
                where FID in {final_temp_string};"""
    required_df = ad.get_data_from_query(connection, query)
    # print(required_df)
    return required_df
def find_recent_purchase(customer_id):

    if check_if_file_exists("pickled_files", "recent_purchase.pkl"):
        with open("pickled_files/recent_purchase.pkl", "rb") as f:
            recent_purchase_df = pickle.load(f)

    else:

        query = f"""select rental.customer_id, film_list.FID, film_list.title, film_list.category, rental.rental_date
                from rental
                join inventory on rental.inventory_id = inventory.inventory_id
                join film_list on inventory.film_id = film_list.FID
                where rental.customer_id = {customer_id}
                order by rental.customer_id, rental.rental_date desc;"""

        recent_purchase_df = ad.get_data_from_query(ad.db_connection, query)

        with open("pickled_files/recent_purchase.pkl", "wb") as f:
            pickle.dump(recent_purchase_df, f)

    return recent_purchase_df
def select_top_genre():
    """
    :return: a dictionary with key as customer_id and values as [FULL_NAME,(TOP 3 genres)]
    """
    query = """select rental.customer_id, CONCAT(customer.first_name," ",customer.last_name) as FULL_NAME,
                category.name as Category, count(*) as COUNT_RENTED_MOVIES
                from rental
                join inventory on rental.inventory_id = inventory.inventory_id
                join film_category on inventory.film_id = film_category.film_id
                join category on film_category.category_id = category.category_id
                join customer on customer.customer_id = rental.customer_id
                group by rental.customer_id, category.category_id
                order by FULL_NAME, COUNT_RENTED_MOVIES desc;

            """

    if check_if_file_exists("pickled_files", "dictionary.pkl"):
        with open("pickled_files/dictionary.pkl", "rb") as f:
            top3_dict = pickle.load(f)
    else:
        read_data = ad.get_data_from_query(ad.db_connection, query, pd_df=True)
        ad.set_multi_index(read_data, ['customer_id', 'FULL_NAME'],
                           inplace=True)
        top3_dict = {}
        # print(df.index.drop_duplicates(keep='first'))
        multi_index = read_data.index.drop_duplicates(keep='first')
        # print(read_data.head())
        for each_element in multi_index:
            temp = tuple(
                read_data.loc[each_element[0]]['Category'].head(3).to_list())
            top3_dict[each_element[0]] = [each_element[1], temp]

        with open("pickled_files/dictionary.pkl", "wb") as f:
            pickle.dump(top3_dict, f)

    return top3_dict
def get_all_customer_ids():
    query = """select distinct customer_id from rental"""
    returned_data = ad.get_data_from_query(ad.db_connection, query)
    # print(list(returned_data['customer_id']))
    customer_id_list = list(returned_data['customer_id'])
    return customer_id_list