예제 #1
0
def main():

    # DO NOT REMOVE the 'Recommender System' option below, however,
    # you are welcome to add more options to enrich your app.
    page_options = ["Recommender System", "Solution Overview"]

    # -------------------------------------------------------------------
    # ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
    # -------------------------------------------------------------------
    page_selection = st.sidebar.selectbox("Choose Option", page_options)
    if page_selection == "Recommender System":
        # Header contents
        st.write('# Movie Recommender Engine')
        st.write('### EXPLORE Data Science Academy Unsupervised Predict')
        st.image('resources/imgs/Image_header.png', use_column_width=True)
        # Recommender System algorithm selection
        sys = st.radio(
            "Select an algorithm",
            ('Content Based Filtering', 'Collaborative Based Filtering'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        movie_1 = st.selectbox('Fisrt Option', title_list[14930:15200])
        movie_2 = st.selectbox('Second Option', title_list[25055:25255])
        movie_3 = st.selectbox('Third Option', title_list[21100:21200])
        fav_movies = [movie_1, movie_2, movie_3]

        # Perform top-10 movie recommendation generation
        if sys == 'Content Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = content_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

        if sys == 'Collaborative Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = collab_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

    # -------------------------------------------------------------------

    # ------------- SAFE FOR ALTERING/EXTENSION -------------------
    if page_selection == "Solution Overview":
        st.title("Solution Overview")
        st.write("Describe your winning approach on this page")
예제 #2
0
def collab_model(movie_list, top_n=10):
    """Performs Collaborative filtering based upon a list of movies supplied
       by the app user.

    Parameters
    ----------
    movie_list : list (str)
        Favorite movies chosen by the app user.
    top_n : type
        Number of top recommendations to return to the user.

    Returns
    -------
    list (str)
        Titles of the top-n movie recommendations to the user.

    """
    # Finding movie ID's for input
    movie_ids = []
    user_ids = []

    for movie in movie_list:
        movie_id = get_movie_id_from_title(movie)
        user_id = get_top_rated_users_for_movie(movie_id)

        if user_id:
            user_ids = user_ids + user_id

    if len(user_ids) < 1:
        return content_model(movie_list, top_n)

    filtered_predictions = get_top_predictions_for_users(user_ids)

    prediction_output = pd.merge(filtered_predictions,
                                 movies_df,
                                 on="movieId",
                                 how="left")

    return list(prediction_output["title"].iloc[:top_n])
예제 #3
0
def main():

    # DO NOT REMOVE the 'Recommender System' option below, however,
    # you are welcome to add more options to enrich your app.
    page_options = ["Recommender System","Solution Overview", "About Us",
                   "Contact Us"]

    # -------------------------------------------------------------------
    # ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
    # -------------------------------------------------------------------
    page_selection = st.sidebar.selectbox("Choose Option", page_options)
    if page_selection == "Recommender System":
        # Header contents
        st.write('# Movie Recommender Engine')
        st.write('### EXPLORE Data Science Academy Unsupervised Predict')
        st.image('resources/imgs/Image_header.png',use_column_width=True)
        # Recommender System algorithm selection
        sys = st.radio("Select an algorithm",
                       ('Content Based Filtering',
                        'Collaborative Based Filtering'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        movie_1 = st.selectbox('First Option',title_list[14930:15200])
        movie_2 = st.selectbox('Second Option',title_list[25055:25255])
        movie_3 = st.selectbox('Third Option',title_list[21100:21200])
        fav_movies = [movie_1,movie_2,movie_3]

        # Perform top-10 movie recommendation generation
        if sys == 'Content Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = content_model(movie_list=fav_movies,
                                                            top_n=10)
                    st.title("We think you'll like:")
                    for i,j in enumerate(top_recommendations):
                        st.subheader(str(i+1)+'. '+j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")


        if sys == 'Collaborative Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = collab_model(movie_list=fav_movies,
                                                           top_n=10)
                    st.title("We think you'll like:")
                    for i,j in enumerate(top_recommendations):
                        st.subheader(str(i+1)+'. '+j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")


    # -------------------------------------------------------------------

    # ------------- SAFE FOR ALTERING/EXTENSION -------------------
    if page_selection == "Solution Overview":
        st.title("Solution Overview")
        st.write("This page describes the winning approach.")
                
        def datasets():
            st.markdown("### **_Datasets_**")
            st.write("""
            A snapshot of the data sets used in training and testing the model.
                    """)
            st.markdown("#### **_Train Data_**")
            st.write(pd.DataFrame({"userId": [5163, 106343, 146790],
                                  "movieId": [57669, 5, 5459],
                                  "rating": [4.0, 4.5, 5.0],
                                  "timestamp": [1518349992, 1206238739, 1076215539]}))
            
            st.markdown("#### **_Test Data_**")
            st.write(pd.DataFrame({"userId": [1, 1, 1],
                                  "movieId": [2011, 4144, 5767]}))
                       
        def eda():
            st.markdown("### **_Exploratory Data Analysis_**")
            st.write("""
            A summary of the insights in the movies data set.
                    """)

            # Reads in saved graphs
            bar = """The bar chart describes the number of ratings in millions for each 
            movie in the database scaled from a rating of 0.5 to 5.0. The lowest 
            rated movies are given a 0.5 rating and the highest rated movies are
            given a 4.0. On average the ratings are 3.0 and above indicating
            that the users enjoy the movies in the data set."""
            
            st.image('resources/imgs/bar.png', caption = bar,  use_column_width = True)
            
            fig = """
            The line chart describes a count of the number of ratings for each movie.
            The shape of the chart indicates a negative relationship between the count
            ratings and the number of movies. This means that a few movies have a 
            higher count of ratings, and as the number of movies increase the count of
            ratings decreases."""
            st.image('resources/imgs/fig.png', caption = fig, use_column_width = True)
        
        if st.sidebar.button("Datasets"):
            datasets()
        if st.sidebar.button("Exploratory Data Analysis"):
            eda()
        
    if page_selection == "About Us":
        st.title("About Us Tendai")
        
        def team():
            st.markdown("### **_ Our Data Scientists_**")
                       
            siya = Image.open('resources/imgs/siya.jpg')
            viwe = Image.open('resources/imgs/viwe.jpeg') 
            mj = Image.open('resources/imgs/mj.jpg') 
            tendi = Image.open('resources/imgs/tendi.jpg')
            b = Image.open('resources/imgs/b.png')
            
            st.image([siya, mj, tendi], 
                     caption = ["Siyasanga", "MJ", "Tendani"],
                     width = 150)
            st.image([viwe, b], caption = ["Siviwe", "Bongani"],
                     width = 150)
            
            st.markdown("### **_ Learn more about Team_3_CPT_**")
            st.write("""Like mystery, Siyasanga creates wonder and wonder is the basis of 
                     her desire to understand.""")
            st.write("""Free spirited and adaptable are the truest words that describe 
                     Siviwe. A little something for rainbows and sunny skies to envy.""")
            st.write("Optimism and hardwork is what drives MJ to become a data scientist.")
            st.write("The most friendliest person alive... a simple guy and a simple motto.")
            st.write(""""All I want is to be a real boy in front of a computer begging
                    my program to run." - Bongani""")
             
        def mission():
            st.markdown("### **_Our Mission_**")
            st.write("""
            We are a creative and passionate group of data scientists who are
            on a mission to make a difference in our community by bringing
            innovative programs and projects that promote ingenuity, inclusivity
            and integrity. We want to make our community a better place by 
            giving people tools and information to make better decisions.""")
        
        def statement():
            st.markdown("### **_Problem Statement_**")
            st.write("""
            To construct a recommendation algorithm based on content or 
            collaborative filtering, capable of accurately predicting how
            a user will rate a movie they have not yet viewed based on their 
            historical preferences.""")
        
        def landscape():
            st.markdown("### **_Problem Landscape_**")
            
            st.image('resources/imgs/cpt.jpg', use_column_width = True)
            st.write("""
            Providing an accurate and robust solution to this challenge has 
            immense economic potential, with users of the system being 
            exposed to content they would like to view or purchase - 
            generating revenue and platform affinity.""")

        def motivation():
            st.markdown("### **_Motivation_**")
            st.write("""Improve customer experience by exposing users to 
            content that matches their taste.""")
            st.write("""Increase sales for service providers.""")
            st.write("""Reduce transaction costs of finding and selecting 
            relevant content in an online platform.""")
            
        if st.sidebar.button("Team"):
            team()
        if st.sidebar.button("Our Mission"):
            mission()
        if st.sidebar.button("Problem Statement"):
            statement()
        if st.sidebar.button("Problem Landscape"):
            landscape()
        if st.sidebar.button("Motivation"):
            motivation()
        
    if page_selection == "Contact Us":
        st.title("Contact Us")
        st.markdown("### **_Have any questions? We would love to hear from you._**")
           
        st.text_input("""Name:""")
        st.text_input("""Surname:""")
        st.text_input("""Email:""")
        st.text_area("""Comment:""")
        st.button("""Submit""")
        
        st.markdown("### **_Contact Info_**")
        st.write("""Tel: 012 589 4856""")
        st.write("""Fax: 012 589 4800""")
        st.write("""Email: [email protected]""")
        st.write("""Postal Address""")
        st.write("""Private Bag X756, Observatory, Western Cape,
                 South Africa""")

        st.markdown("### **_Social Media_**")
        st.write("""LinkedIn: Team_3_CPT""")
        st.write("""Facebook: Team_3_CPT""")
        st.write("""Instagram: @team_3_cpt""")
        st.write("""Twitter: @team_3_cpt""")
예제 #4
0
def main():

    # DO NOT REMOVE the 'Recommender System' option below, however,
    # you are welcome to add more options to enrich your app.
    st.write('----------------------------------------------------------')
    st.title('Movie Recommendation Engine')
    st.write('----------------------------------------------------------')
    page_options = ["Home", "Recommender System", "Info", "Solution Overview", "Insights"]

    # -------------------------------------------------------------------
    # ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
    # -------------------------------------------------------------------
    page_selection = st.sidebar.selectbox("Choose Option", page_options)

    if page_selection == "Home":
        st.title("Home")
        st.subheader("Welcome!")
        st.markdown("This web app recommends movies based on similar or related to movies a user selects.")        
        st.image('resources/imgs/netflix_img.jpg',width=500)
        st.info('See ** Recommender Systems ** page to run the engine')        
        st.subheader("Why recommender systems")
        st.markdown("Any streaming platforms is built around lessening one’s time trying to decide which movie to watch. We supply users with relative content to watch taking into consideration their values and ideals. We would like to determine how people perceive streaming services and whether or not there is an issue that would be better rectified. This would help companies add to their market research efforts in gauging how their service recommendations may be improved.")


    if page_selection == "Recommender System":
        # Header contents
        st.write('### EXPLORE Data Science Academy Unsupervised Predict')
        st.image('resources/imgs/Image_header.png',use_column_width=True)
        st.info("See **Solution Overview** and **Insights** pages for more information on modelling and analysis")
        # Recommender System algorithm selection
        sys = st.radio("Select an algorithm",
                       ('Content Based Filtering',
                        'Collaborative Based Filtering'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')        

        # Perform top-10 movie recommendation generation
        if sys == 'Content Based Filtering':
            # Content Based options
            movie_1 = st.selectbox('First Option',title_list[14930:15200])
            movie_2 = st.selectbox('Second Option',title_list[25055:25255])
            movie_3 = st.selectbox('Third Option',title_list[21100:21200])
            fav_movies = [(movie_1),(movie_2),(movie_3)]

            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = content_model(movie_list=fav_movies,
                                                            top_n=10)
                    st.title("We think you'll like:")
                    for i,j in enumerate(top_recommendations):
                        st.subheader(str(i+1)+'. '+j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")


        if sys == 'Collaborative Based Filtering':
            # Collaborative Based Options
            movie_1_colab = st.selectbox('First Option',title_list[:1000])
            movie_2_colab = st.selectbox('Second Option',title_list[1:1000])
            movie_3_colab = st.selectbox('Third Option',title_list[2:1000])
            fav_movies_colab = [(movie_1_colab,5),(movie_2_colab,5),(movie_3_colab,5)] # Added ratings, for more efficient use in the model

            if st.button("Recommend"):
                # try:
                with st.spinner('Crunching the numbers...'): # spinner just for something to happen during loading time
                    userRatings = m.dropna(thresh=10, axis=1).fillna(0,axis=1) # dropping and filling NaN values
                    corrMatrix = userRatings.corr(method='pearson') # creating a correlation Matrix
                    def get_similar(movie_name,rating=5): # Function for retriving similar movies based off correlation
                        similar_ratings = corrMatrix[movie_name]*(rating-2.5)
                        similar_ratings = similar_ratings.sort_values(ascending=False)
                        return similar_ratings
                    similar_movies = pd.DataFrame() # creating an empty Dataframe
                    for movie,rating in fav_movies_colab: # Filling the empty dataframe, and extracting from it
                        similar_movies = similar_movies.append(get_similar(movie,rating),ignore_index = True)
                    recc_movies = similar_movies.sum().sort_values(ascending=False).head(14)[3:13] #summing and sorting DF, also slicing for no repeats
                    count = 1
                    st.markdown('## Top 10 Recommendations based on your movie picks:')
                    for key, value in dict(recc_movies).items(): # Displaying the output
                        st.info(str(count) + '. ' + str(key))
                        count += 1
                # except:
                #     st.error("Oops! Looks like this algorithm does't work.\
                #               We'll need to fix it!")


    # -------------------------------------------------------------------

    # ------------- SAFE FOR ALTERING/EXTENSION -------------------
    
    if page_selection == "Info":
        st.title("Info")
        st.markdown("* ** Home Screen **  - Landing page, gives brief discription of the App")
        st.markdown("* ** Info ** - Explains how the app works, from navigating on pages, to how efficient the model used is.")
        st.markdown("* ** Recommender Systems **  - Main page that includes the movie recommender engine")
        st.markdown("* ** Insights **          - Exploratory data analysis shows how we analysing the data sets to summarize their main characteristics, using visuals. EDA is for displaying what the data can tell us beyond the formal modeling.")
        st.markdown("* ** Solution Overview ** - An overview on how the two different algorithims work")
        st.subheader("App Usage")
        st.markdown("Select the type of algorithm you want to use then select three farvorite movies from the drop down list, hit the recommend button and wait for the movie recommendations.")
        st.subheader("Model Performance Evaluation")
        st.markdown("Model evaluation aims to estimate the generalization accuracy of a model on future (unseen) data. Methods for evaluating a model's performance use a test set (i.e data not seen by the model) to evaluate model performance. This evaluation shows total efficiency as scores.")
        st.subheader("The Team:")
        st.markdown(" * Buhle Ntushelo ")
        st.markdown(" * Khanyisa Galela ")
        st.markdown(" * Keane Byrne ")
        st.markdown(" * Olwethu Mkhuhlane ")
        st.markdown(" * Londiwe cele ")


    if page_selection == "Solution Overview":
        st.title("Solution Overview")

        st.title("Content-based Filtering")
        st.image('resources/imgs/content_based.png', width=350)
        st.info("Content-based filtering, also referred to as cognitive filtering, recommends items based on a comparison between the content of the compared items, in this case its movie content, and a user profile. The content of each movie is represented as a set of descriptors or terms. Generally, Content-based filtering, makes recommendations based on user preferences for product features")

        st.title("Collaborative-based Filtering")
        st.image('resources/imgs/collab_based.jpg', width=350)
        st.info("Collaborative filtering filters information by using the interactions and data collected by the system from other users. It's based on the idea that people who agreed in their evaluation of certain items are likely to agree again in the future. Collaborative filtering basically mimics user-to-user recommendations.")

    # You may want to add more sections here for aspects such as an EDA,
    # or to provide your business pitch.
    if page_selection == "Insights":
        st.title('Our Insights and EDA')
        st.image('resources/imgs/image_insightpage.jpg', width=500)
        st.markdown("## Introduction")
        st.info("""Recommendation sysytems are an integral part to any online user based service platform. In short an alogrithim is 
                    created that will reccomened you items (eg: movies) based on your history with past items.
                    For this challenge we are tasked to create a movie recommendation system, There were two paths that we could take. 
                    Content Based Filtering and Collaborative Filtering, We chose to try out both in which we will get further into 
                    this notebook""")
        st.markdown('## Insteresting insights and EDA')
        st.write('**Top 5 Movies with the Highest Ratings:**')
        st.markdown("""
                    1. Final Recourse (2013)\n
                    2. Lady and the Tramp (2019)\n
                    3. Virasat (1997)\n
                    4. Committed (2014)\n
                    5. Sole Proprietor (2016)\n  """)
        st.write("\n**Distribution for Number of Ratings**")
        st.image('resources/imgs/rating_distibution.PNG', width=400)
        st.image('resources/imgs/distribution_graph.PNG', width=400)
        st.info("""From this we can see that people are more likely to give a movie an * Average * rating more than a great or awful rating.
        This makes sense because most movies are average and not every movie is great or awful. This could also lead us to belive that there may be certain biasy around our data.""")
    
        st.image('resources/imgs/director_ratings.PNG', width=200)
        st.info("**These are the top 10 directors who have movies with the highest number of ratings Jonathan Nolan has the highest rating. this concludes that his movies are being watched and enjoyed by the users**")
        
        st.write("**Average of Movie Ratings per Year**")
        st.image('resources/imgs/rating_year.PNG', width=400)

        st.write("**Average Movie Rating per Week**")
        st.image('resources/imgs/ratings_week.PNG', width=400)

        st.write("**Number of ratings per year**")
        st.image('resources/imgs/number_ratings.PNG', width=400)

        st.write("**The Elbow Method**")
        st.image('resources/imgs/elbow.PNG', width=400)
        st.info("We use this visualization in order to obtain the optimal clusters for the data, we look for the * bend point * in the data, as we try to optimize the computational power while still having enough data to accuratly build a model from the data. ")

        st.write("**Dendrogram**")
        st.image('resources/imgs/dendogram.PNG', width=400)
        st.info("This is another way to visualize the data to obtain the optimal amount of clusters similar to the * Elbow Method *, we use this to pick the clusters that will maximize our efficientcy in the modelling process.")
def main():

    # DO NOT REMOVE the 'Recommender System' option below, however,
    # you are welcome to add more options to enrich your app.
    page_options = ["About the app","Data visualisation","Recommender System","About Recommender Systems","Solution Overview"]

    # -------------------------------------------------------------------
    # ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
    # -------------------------------------------------------------------
    page_selection = st.sidebar.selectbox("Choose Option", page_options)
    if page_selection == "Recommender System":
        # Header contents
        st.write('# Movie Recommender Engine')
        st.write('### EXPLORE Data Science Academy Unsupervised Predict')
        st.image('resources/imgs/Image_header.png',use_column_width=True)
        # Recommender System algorithm selection
        sys = st.radio("Select an algorithm",
                       ('Content Based Filtering',
                        'Collaborative Based Filtering'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        movie_1 = st.selectbox('Fisrt Option',title_list[14930:15200])
        movie_2 = st.selectbox('Second Option',title_list[25055:25255])
        movie_3 = st.selectbox('Third Option',title_list[21100:21200])
        fav_movies = [movie_1,movie_2,movie_3]

        # Perform top-10 movie recommendation generation
        if sys == 'Content Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = content_model(movie_list=fav_movies,
                                                            top_n=10)
                    st.title("We think you'll like:")
                    for i,j in enumerate(top_recommendations):
                        st.subheader(str(i+1)+'. '+j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")


        if sys == 'Collaborative Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = collab_model(movie_list=fav_movies,
                                                           top_n=10)
                    st.title("We think you'll like:")
                    for i,j in enumerate(top_recommendations):
                        st.subheader(str(i+1)+'. '+j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")


    # -------------------------------------------------------------------

    # ------------- SAFE FOR ALTERING/EXTENSION -------------------
    if page_selection == "About the app":
        st.title("About The App")
        st.markdown("The application is about recomender systems. It recomends movies to a user based on two types of recomender system. Which are Collaborative-filtering and Content-Based filtering. Below is a brief explanation of the recomender systems")		
        st.subheader("Recommender Systems")
        st.image("resources/imgs/image.png")
        st.markdown("**Collaborative-filtering:** Collaborative-filtering items are recommended, for example movies, based on how similar your user profile is to other users’, it finds the users that are most similar to you and then recommends items that they have shown a preference for. This method suffers from the so-called cold-start problem: If new data becomes available, such as the release of a movie, which no-one would seen and subsequently rated, the system will not recommened this movie despite it possibly being a movie that would an apt fit.")
        st.markdown("**Content-based filtering:** This method uses distinguishing attributes of the data, such as genre, plot or cast in this example, as the basis of its recommendation. It does not suffer from a cold-start problem because it works through attributes or tags of the content, such as actors, genres or directors therefore, new movies can be recommended right away.")        
		
    if page_selection == "Solution Overview":
        st.title("Solution Overview")
        st.write("Describe your winning approach on this page")
        st.markdown(open('resources/Winningsolution.md').read())
        
    if page_selection== "Data visualisation":
        st.title("How Our Movie Data Behaves")
        from PIL import Image
        img4 = Image.open('halloween.jpg')
        st.image(img4, width=500)
        st.subheader('Distribution of Users and Ratings')
        eda = st.radio('Distribution of Data:',('Distribution of Ratings', 'Distribution of Top Users'))
        if eda == ('Distribution of Ratings'):
            from PIL import Image
            img5 = Image.open('rating distribution.jpg')
            st.image(img5, width=500)
            st.write('Ratings above three occur more frequently indicting that users who rate the films are either generous or that users are more likely to rate the films if they found if satisfactory or good')
        if eda == ('Distribution of Top Users'):
            img6 = Image.open('user distribution.jpg')
            st.image(img6, width=500)
            st.write('This data respresents the number of users who rated over 2000 films. Very few users rated many films, the films rated by many users represented the distribution of popular films')
        st.subheader('A Look At The Titles Of Popular Or Influential Films')
        wordcloud = st.radio('Highly Rated Films:',('Highly Rated Films', 'Films With A Low Rating' , 'Films Rated by The Greatest Number Of People'))
        if wordcloud == 'Highly Rated Films':
            img7 = Image.open('highly rated films wcb.jpg')
            st.image(img7,width=700)
        if wordcloud == 'Films With A Low Rating':
            img8 = Image.open('Low rated films wcb.jpg')
            st.image(img8,width=700)
        if wordcloud == 'Films Rated by The Greatest Number Of People':
            img9 = Image.open('rated by most people wcb.jpg')
            st.image(img9, width=700)
    if page_selection=="About Recommender Systems":
            st.title('The Recommender System')
            from PIL import Image
            img = Image.open('learn.jpeg')
            st.image(img, width = 500, caption = 'Data segmentation')
            st.subheader('A Closer Look At Recommender System Models')
            system = st.radio('Choose A Recommender Systems:',('SVD', 'Neural Networks', 'KNN'))
            if system == ('SVD'):
                st.write('Singular Value Decomposition(SVD), is a linear algerbra concept')
                st.write('SVD is a type of matrix decomposition and describes a matrix using its constituent elements. This method is popularly used for compressing, denoising and data reduction.')
                st.write('A = U . Sigma . V^T')
                st.write('A : real matrix m * n')
                st.write('U: matrix m * m')
                st.write('V^T transposed n * n matrix')
                st.write('The Sigma matrix are the singular values of  matrix A. The columns of the U matrix are the left singular vectors; columns of V are the right singular vectors of A.')
                st.write('Iterative numerical methods are used to calculate SVD and every rectangular matrix has a singular value decomposition. SVD is used in the calculation of other matrix operations such as matrix inverse and data reduction, least squares linear regression, image compression and data denoising. The function can be called using the svd() function, which takes in a matrix and returns U, Sigma(as a vector), V^T(transposed matrix)')
                st.write('The svd function is also popularly used for dimensionality reduction where the number of features are greater than the number of observations, the function reduces the data to a smaller subset that are more relevant to the prediction. This is done by selecting the largest singular values in Sigma for the columns and the V^T for the rows. ')
                img1 = Image.open('svd.true.jpg')
                st.image(img1, width = 500)
            if system == ('Neural Networks'):
                st.write(' Neural networks were first proposed in 1944 by Warren McCullough and Walter Pitts and are loosely modelled on the human brain. A Neural network consists of thousands or million of simple processing nodes that are densely interconnected.')
                st.write('Neural networks consists of nodes that feed forward (data moves in one direction). Each node is likely connected to several nodes in the layer beneath it, to which it sends data. For each incoming connection, a node will assign a number known as a weight. Once the node is active it will receive data, the data will be multiplied by the weight, it will then add the resulting products together yielding a single number.')
                st.write('If the data exceeds the threshold value, the node fires, and the adjusted data is sent to the next node in the line.')
                st.write('The weights and thresholds are initially set to random values, the training begins at the bottom layer and works its way through the network until it arrives transformed at the output layer. The weights and threshold are continuously adjusted until the training data reaches an optimal output.')
                st.write('One of the common methods used for activation is called ReLU. ReLU takes a real number as input and will return the maximum between 0 and the number. The ReLU function basically just "turns off" a neuron if its input is less than 0, and is linear if its input is greater than zero. ')
                st.write('Neural networks is an effective algorithm for both supervised and unsupervised learning. Meaning, it can be used on both structured and unstructured data. It has however been incredibly successful on unstructured data. The neural network model is highly effective as the amount of data increases.')
                img2 = Image.open('NN.jpg')
                st.image(img2, width = 500)
            if system == ('KNN'):
                st.write('K-nearest neighbours or KNN, is a non-parametric algorithm, this means that the data distribution contains unspecified parameters, in other words it does not make any assumptions on the underlying data distribution.')
                st.write('This algorithm will use a database in which the data points are separated into several classes to predict the classification of a new sample point. KNN does not require training data points to do its generalisation work.')
                st.write('KNN uses feature similarity to categorise the data outputting a class membership. An object is classified by a majority vote of its neighbours, with the object being assigned to the class most common among its k nearest neighbours.')
                st.write('This method can also be used on a regression model by outputting the average or median of the values of its k nearest neighbours. This method requires k to be specified before the calculations are made.')
                img3 = Image.open('u1.jpg')
                st.image(img3, width = 500)
def main():

    # DO NOT REMOVE the 'Recommender System' option below, however,
    # you are welcome to add more options to enrich your app.
    page_options = ["About the app", "Content vs Collaborative", "Data Insights", "Recommender System", "Because You Watched ...", "Blockbusters"]

    # -------------------------------------------------------------------
    # ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
    # -------------------------------------------------------------------
    page_selection = st.sidebar.selectbox("Choose Option", page_options)
    if page_selection == "Recommender System":
        # Header contents
        st.write('# Movie Recommender Engine')
        st.write('### EXPLORE Data Science Academy Unsupervised Predict')
        st.image('resources/imgs/Image_header.png',use_column_width=True)
        # Recommender System algorithm selection
        sys = st.radio("Select an algorithm",
                       ('Content Based Filtering',
                        'Collaborative Based Filtering'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        movie_1 = st.selectbox('First Option',title_list[14930:15200])
        movie_2 = st.selectbox('Second Option',title_list[25055:25255])
        movie_3 = st.selectbox('Third Option',title_list[21100:21200])
        fav_movies = [movie_1,movie_2,movie_3]

        # Perform top-10 movie recommendation generation
        if sys == 'Content Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = content_model(movie_list=fav_movies,
                                                            top_n=10)
                    st.title("We think you'll like:")
                    for i,j in enumerate(top_recommendations):
                        st.subheader(str(i+1)+'. '+j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")


        if sys == 'Collaborative Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = collab_model(movie_list=fav_movies,
                                                           top_n=10)
                    st.title("We think you'll like:")
                    for i,j in enumerate(top_recommendations):
                        st.subheader(str(i+1)+'. '+j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")


    # -------------------------------------------------------------------

    # ------------- SAFE FOR ALTERING/EXTENSION -------------------
    if page_selection == "About the app":
        
        html_temp = """
        <div style="background-color:black;padding:10px">
        <h1 style="color:red;text-align:center;">Movie Recommender App </h2>
        </div>
        """
        st.markdown(html_temp,unsafe_allow_html=True)
        st.info("General Information about the app")
        
        st.markdown(open('resources/info.md').read())
    
    # ------------------------------------------------------------------
                     
    if page_selection == "Data Insights":
        
        html_temp = """
        <div style="background-color:black;padding:10px">
        <h1 style="color:red;text-align:center;">Movie Recommender App </h2>
        </div>
        """
        st.markdown(html_temp,unsafe_allow_html=True)
        st.info("Insights on the movies")
                     
        st.markdown("The following page contains visuals related to the movies dataset.")             
        
        st.subheader("View visuals:")
        
        if st.checkbox('Distribution of Genres'):
            st.image('resources/imgs/Genre.PNG',use_column_width=True)         
                     
        if st.checkbox('Decade of Release of Movies'):
            st.image('resources/imgs/Decades.PNG',use_column_width=True)          
                     
        if st.checkbox('Distribution of Ratings'):
            st.image('resources/imgs/Ratings.PNG',use_column_width=True)          
                     
        if st.checkbox('Highest Rated Movies (Bayesian Average)'):              
            st.image('resources/imgs/Highest_Rated.PNG',use_column_width=True)          
                     
        if st.checkbox('Lowest Rated Movies (Bayesian Average)'):              
            st.image('resources/imgs/Lowest_Rated.PNG',use_column_width=True)                              
                     
    # ------------------------------------------------------------------
                     
    if page_selection == "Because You Watched ...":
        
        html_temp = """
        <div style="background-color:black;padding:10px">
        <h1 style="color:red;text-align:center;">Movie Recommender App </h2>
        </div>
        """
        st.markdown(html_temp,unsafe_allow_html=True)
        st.info("Movie Recommender")
        
        movie_text = st.text_area("Enter your favourite movie:", "Type Here")
        
        if st.button("Get 10 Recommendations"):
            try:
                with st.spinner('Crunching the numbers...'):
                    reco = get_content_based_recommendations(movie_text)
                    string = '\n'.join(reco)
                    movie_name = movie_finder(movie_text)
                    st.success(f'### BECAUSE YOU WATCHED {movie_name.upper()}:')
                    st.text(string)
            except:
                    st.error("Oops! Looks like we cannot find your movie!")
            
    # -------------------------------------------------------------------
    
    if page_selection == "Content vs Collaborative":
        
        html_temp = """
        <div style="background-color:black;padding:10px">
        <h1 style="color:red;text-align:center;">Movie Recommender App </h2>
        </div>
        """
        st.markdown(html_temp,unsafe_allow_html=True)
        st.info("Explanation on Content-based filtering and Collaborative-based filtering")
        
        st.markdown(open('resources/filtering_info.md').read())
        
   # --------------------------------------------------------------------

    if page_selection == "Blockbusters":
        
        html_temp = """
        <div style="background-color:black;padding:10px">
        <h1 style="color:red;text-align:center;">Movie Recommender App </h2>
        </div>
        """
        st.markdown(html_temp,unsafe_allow_html=True)
        st.info("Getting the best 10 movies by year of release and genre")
        
        z = list(range(1980, 2020))
        string = [str(integer) for integer in z]
        
        st.write('### Select the genre')
        
        gen = st.selectbox('Genre',genres())
        
        st.write('### Select the year')
        
        yen = st.selectbox('Year', string)
        
        #reco = top_rated(yen, gen)
        
        if st.button("Get Movies"):
            with st.spinner('Crunching the numbers...'):
                reco = top_rated(yen, gen)
                if reco != "":
                    st.success(f'### THE BEST {gen.upper()} MOVIES OF {yen} ARE:')
                    st.text(reco)
                else:
                    st.error("Oops! Looks like there aren't any movies that fit this description!")
예제 #7
0
def main():

    # DO NOT REMOVE the 'Recommender System' option below, however,
    # you are welcome to add more options to enrich your app.

    # Creating sidebar
    # you can create multiple pages this way
    st.sidebar.title("Menu")

    page_selection = st.sidebar.radio(label="",
                                      options=[
                                          "Recommender System", "Information",
                                          "EDA and Insights",
                                          "Prediction Background Information",
                                          "Conclusion"
                                      ])

    # -------------------------------------------------------------------
    # ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
    # -------------------------------------------------------------------

    if page_selection == "Recommender System":
        # Header contents
        st.write('# Movie Recommender Engine')
        st.write('### EXPLORE Data Science Academy Unsupervised Predict')
        st.image('resources/imgs/Image_header.png', use_column_width=True)
        # Recommender System algorithm selection
        sys = st.radio(
            "Select an algorithm",
            ('Content Based Filtering', 'Collaborative Based Filtering'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        movie_1 = st.selectbox('Fisrt Option', title_list[14930:15200])
        movie_2 = st.selectbox('Second Option', title_list[25055:25255])
        movie_3 = st.selectbox('Third Option', title_list[21100:21200])
        fav_movies = [movie_1, movie_2, movie_3]

        # Perform top-10 movie recommendation generation
        if sys == 'Content Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = content_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

        if sys == 'Collaborative Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = collab_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

    # -------------------------------------------------------------------

    # ------------- SAFE FOR ALTERING/EXTENSION -------------------

    # You may want to add more sections here for aspects such as an EDA,
    # or to provide your business pitch.

    #loading data

    # for movies dataframe
    # Show Dataset
    # load data
    # EDA

    #st.number_input('button label')

    #st.text("Visualisations")

    #Cleaning movies table

    if page_selection == "Information":
        st.title("Information")

        st.image('resources/imgs/Information.png', use_column_width=True)
        st.title("Project Overview")

        st.markdown(
            "Machine Learning (ML) is a subset of Artificial Intelligence (AI), dating as far back as 1959, where computers are trained to pick up patterns and make decisions with little to no human interference. There are two main types, supervised and supervised learning. Supervised ML algorithms are far more flexible as the datasets used do not provide label values, the computer tries to make sense of the data to compute results. They can be used to build recommender systems."
        )
        st.markdown(
            "A recommender system is an engine or platform that can predict certain choices based on responses provided by users. A great example would be a system on a streaming platform that recommends a movie or show for a user to watch found on their previous viewings or the viewings of other users that have watching habits similar to them. With the increasing use of web services such as Netflix, Showmax, YouTube amongst a few, there is an unfathomable amount of content. It would be a tedious task for a user to search through it all for things that they would enjoy watching. They are also used in other services such as online shoppping stores and networking spaces like LinkedIn."
        )
        st.markdown(
            "A recommender system enhances a user's experience as the luxury of recommendations will save the user the time and effort of having to search through a large catalogue for movies that they would enjoy. This allows for the user to also be exposed to new content, creating an opportunity for further streaming because they are giving an option of content that is meaningful and desireable to them. In fact, most companies make a bulk of their revenue from recommendations. The rating functionality also assists in collecting data that can help the streaming platform establish trends and gather insights from what their users are consuming. This can assist in better content selection and marketing."
        )
        st.title("Problem Statement")
        st.markdown(
            "Build a recommendation algorithm that will use a user's historical preferences to accuartely predict the rating that they will give a movie that they haven't watched."
        )
        st.subheader("how to use this app")
        st.subheader("***************************************************")

        st.markdown("Get started by:")
        st.markdown(
            "1. Navigating to the sidebar at the top left of this page")
        st.markdown(
            "2. Choose an option by clicking next to the desired label")
        st.markdown("3. Select the option you wish to view")
        st.markdown("4. Get your desired data")
        if st.checkbox('Choose to Preview Example'):
            st.subheader("would you like a recomendation?")

        st.subheader("how to get a recomendation")

    if page_selection == "EDA and Insights":

        #my_dataset = dataset('resources/data/train.csv')

        st.title("Exploratory Data Analysis")

        #data

        st.markdown(
            "Exploratory Data Analysis refers to the critical process of performing initial investigations on data so as to discover patterns,to spot anomalies,to test hypothesis and to check assumptions with the help of summary statistics and diagramatic representations"
        )
        st.text('Below are several ways you can view your data')

        if st.checkbox('popularity and ratings'):
            st.markdown(
                '''<iframe width="600" height="373.5" src="https://app.powerbi.com/view?r=eyJrIjoiYTZlYjU0ZjYtOTVlNy00YzNhLTkzMDktYjAwZDBhZDNjOTI4IiwidCI6ImIzN2MxY2IzLTRjNmQtNDBjNi05NTljLTFhOGRmN2RjNTVlMCJ9" frameborder="0" allowFullScreen="true"></iframe>''',
                unsafe_allow_html=True)
        elif st.checkbox('ratings per movie'):
            st.markdown(
                '''<iframe width="600" height="486" src="https://app.powerbi.com/view?r=eyJrIjoiNzY2ZDdiZWMtNzc0Yy00YTI4LWJiZTktYmRjZTkwOGNlZjU5IiwidCI6ImIzN2MxY2IzLTRjNmQtNDBjNi05NTljLTFhOGRmN2RjNTVlMCJ9" frameborder="0" allowFullScreen="true"></iframe>''',
                unsafe_allow_html=True)
        elif st.checkbox('ratings per titles'):
            st.markdown(
                """<iframe width="600" height="373.5" src="https://app.powerbi.com/view?r=eyJrIjoiMjlmNmNkNGYtOWU3MS00YzkyLWIyNGItN2Y3Njg3YWI1MmM1IiwidCI6ImIzN2MxY2IzLTRjNmQtNDBjNi05NTljLTFhOGRmN2RjNTVlMCJ9&pageName=ReportSection430ef572e305c1e59141" frameborder="0" allowFullScreen="true"></iframe>""",
                unsafe_allow_html=True)
        elif st.checkbox('genres and years'):
            st.markdown(
                """<iframe width="600" height="373.5" src="https://app.powerbi.com/view?r=eyJrIjoiNmFjMTFiMWQtMTJhMC00ZmI5LThjNjYtOWI2Yzc3NDY0YjQzIiwidCI6ImIzN2MxY2IzLTRjNmQtNDBjNi05NTljLTFhOGRmN2RjNTVlMCJ9" frameborder="0" allowFullScreen="true"></iframe>""",
                unsafe_allow_html=True)
        elif st.checkbox('popularity and word cloud'):
            st.markdown(
                """<iframe width="600" height="373.5" src="https://app.powerbi.com/view?r=eyJrIjoiNmRjOTM5MjItNTUyMy00YjNiLWE1MTMtODEzNjA2MTMwYWZjIiwidCI6ImIzN2MxY2IzLTRjNmQtNDBjNi05NTljLTFhOGRmN2RjNTVlMCJ9" frameborder="0" allowFullScreen="true"></iframe>""",
                unsafe_allow_html=True)
        elif st.checkbox('all data'):
            st.markdown(
                '''<iframe width="600" height="486" src="https://app.powerbi.com/view?r=eyJrIjoiNzY2ZDdiZWMtNzc0Yy00YTI4LWJiZTktYmRjZTkwOGNlZjU5IiwidCI6ImIzN2MxY2IzLTRjNmQtNDBjNi05NTljLTFhOGRmN2RjNTVlMCJ9" frameborder="0" allowFullScreen="true"></iframe>''',
                unsafe_allow_html=True)

        #my_dataset = 'resources/train.csv'

    if page_selection == "Prediction Background Information":
        st.title("Actual Model ")
        st.image('resources/imgs/actua.png', use_column_width=True)
        st.markdown(
            "As seen in the recommender system page, the movie recommendations were based on both content based and collaborative based algorithms."
        )
        st.markdown(
            'Content based algorithms are focused on the metadata of the movies, e.g. genre, cast, directors and genome tags. It works by calculating similarity score between a movie of interest and movies that a user has watched and recommends to the user, movies with a high score'
        )
        st.markdown(
            'There are many different measures to use as a similarity score, but in the development of this App, the cosine similarity was used for both content based and collaborative based recommenders. Cosine similarity measures the cosine of the angle between two vectors (which are feature columns of the movies being compared in our case) and returns the how close they are based on the angle between them. The higher lower the score the higher the similarity and thus the more chances that a user will like/rate the two movies the same.'
        )
        st.markdown(
            'Collaborative based algorithms work by comparing a user of interest with other users who have the same movie preferences as that of the user of interest and recommends a new movie to the user based on what the said users have watched.'
        )

        #st.write("Details behind how the recommender system page works and some code illustrations")
        #st.write("explain what is and why using both content and collarative filltering was the best solution")
        #st.write("also the algorithms e.g SVD,KNN etc")
        #st.write("this page has to be more techinical than page 1")

    if page_selection == "Conclusion":
        st.title("Conclusion")
        st.image('resources/imgs/conclusion.png', use_column_width=True)
        st.write(
            "Our recommender system is an effective and cost saving tool as just like text classification, it will allow business to have different advertising and marketing campaigns for different clients instead of having a one size fits all campaign"
        )
def main():

    #this function allows for the background colour to be changed
    # def local_css(file_name):
    #     with open(file_name) as f:
    #         st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
    # local_css('resources/style.css')

    # DO NOT REMOVE the 'Recommender System' option below, however,
    # you are welcome to add more options to enrich your app.
    st.sidebar.image('resources/imgs/Movie_pic.jpg', use_column_width=True)
    st.sidebar.header('Movie Recommender Engine')
    page_options = [
        "Recommender System", "Find a movie", "Solution Overview",
        "Did you know?", "About this app", "Profile"
    ]

    # -------------------------------------------------------------------
    # ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
    # -------------------------------------------------------------------
    page_selection = st.sidebar.selectbox("Choose Option", page_options)
    if page_selection == "Recommender System":
        # Header contents
        st.write('# Movie Recommender Engine')
        st.write('### EXPLORE Data Science Academy Unsupervised Predict')
        st.image('resources/imgs/Image_header.png', use_column_width=True)
        # Recommender System algorithm selection
        sys = st.radio(
            "Select an algorithm",
            ('Content Based Filtering', 'Collaborative Based Filtering'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        movie_1 = st.selectbox('Fisrt Option', title_list[14930:15200])
        movie_2 = st.selectbox('Second Option', title_list[25055:25255])
        movie_3 = st.selectbox('Third Option', title_list[21100:21200])
        fav_movies = [movie_1, movie_2, movie_3]

        # Perform top-10 movie recommendation generation
        if sys == 'Content Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = content_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

        if sys == 'Collaborative Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = collab_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

    # -------------------------------------------------------------------

    # ------------- SAFE FOR ALTERING/EXTENSION -------------------

    #Solution overview page
    if page_selection == "Solution Overview":
        st.title("Solution Overview")
        st.image('resources/imgs/first.png', use_column_width=True)
        st.image('resources/imgs/second.png', use_column_width=True)
        st.markdown(open('resources/About_solution.md').read())
        st.image('resources/imgs/third.jpg', use_column_width=True)
        st.markdown(open('resources/About2.md').read())
        st.image('resources/imgs/fourth.png', use_column_width=True)
        st.markdown(open('resources/About3.md').read())
        st.image('resources/imgs/fifth.png', use_column_width=True)
        st.markdown(open('resources/About4.md').read())

    # You may want to add more sections here for aspects such as an EDA,
    # or to provide your business pitch.

    #did you know page
    if page_selection == "Did you know?":
        st.title("Did you know?")
        st.markdown(
            'This section contains insights from the Movie Lens dataset which was used to build the Movie recommender system.'
        )
        #top 3 most watched movies
        st.markdown(
            "<h2 style='text-align: left;color: #000000;'>Top 3 most watched movies</h2>",
            unsafe_allow_html=True)
        st.image('resources/imgs/top3.png', use_column_width=True)
        st.markdown(
            'Watch **[The Shawshank Redemption](https://www.youtube.com/watch?v=6hB3S9bIaco)**'
        )
        st.markdown(
            'Watch **[Forrest Gump](https://www.youtube.com/watch?v=bLvqoHBptjg)**'
        )
        st.markdown(
            'Watch **[Pulp Fiction](https://www.youtube.com/watch?v=s7EdQ4FqbhY)**'
        )

        #bar graph most watched
        st.image('resources/imgs/top_10.png', use_column_width=True)
        st.info(
            "The 1990's were a great decade for films, we can see that 9 out of the top 10 movies were made then."
        )

        #word cloud for most common genre
        st.image('resources/imgs/genres.png', use_column_width=True)
        st.info(
            "Above, we can see the most common film genres. It clear that Sci-fi, Comedy-Drama and Action-Adventure are popular."
        )

        #word cloud for most common tags
        st.image('resources/imgs/Tags.png', use_column_width=True)
        st.info(
            "'based-on'(a true story), 'sci-fi', and 'twist ending' are commonly occuring tags."
        )

        #word cloud for most common plot_key_words
        st.image('resources/imgs/plot_key_words.png', use_column_width=True)
        st.info(
            'In the common plot key words, again we see "based-on", suggesting that movies based on true stories or books are popular. We also see "female protagonist" and "front nudity" which supports the idea that sex sells.'
        )

        #word cloud for most common directors
        st.image('resources/imgs/Director.png', use_column_width=True)
        st.info(
            'The most occuring directors include Michael Crichton (authored and directed Jurassic Park), Quentin Tarantino (directed Pulp Fiction) and Lilly Wachowski (directed the Matrix). '
        )

        #word cloud for movies that were watched once
        st.image('resources/imgs/watched_once.png', use_column_width=True)
        st.info('Some movies were only watched once :cry:')

        #rating distribution
        st.image('resources/imgs/rating_hist.png', use_column_width=True)
        st.info(
            'From the plot above it can be observed that the integer values have taller bars than the floating values since most of the users assign rating as integer value i.e. 1, 2, 3, 4 or 5. Furthermore, it is evident that the data has a weak normal distribution with the mean of around 3.5 .'
        )

        #joint plot
        st.image('resources/imgs/jointplot.png', use_column_width=True)
        st.info(
            'The graph shows that, in general, movies with higher average ratings actually have more number of ratings, compared with movies that have lower average ratings.'
        )

        #ave rating per year
        st.image('resources/imgs/ave_ratings.png', use_column_width=True)

        #number of ratings per ratings
        st.image('resources/imgs/number_ratings.png', use_column_width=True)
        st.info(
            'It seems that a dip in average ratings corresponds with a peak in ratings count.'
        )

    #find a movie page
    if page_selection == "Find a movie":
        st.image('resources/imgs/suprise.png', use_column_width=True)
        movie = pd.read_csv('resources/data/movies.csv')
        st.subheader('Find me a movie')
        select_opt = ['Genre', 'Year', 'Number of most popular titles']
        select_opt_select = st.selectbox('Select an option', select_opt)
        if select_opt_select == 'Genre':
            genre_list = [
                'Action', 'Adventure', 'Animation', 'Children', 'Comedy',
                'Crime', 'Drama', 'Fantasy', 'Horror', 'IMAX', 'Romance',
                'Musical', 'Mystery', 'Sci-Fi', 'Thriller', 'War'
            ]
            genre_select = st.radio('Top 10 movies per genre', genre_list)
            if genre_select == 'Adventure':
                st.write(build_chart('Adventure'))
            if genre_select == 'Animation':
                st.write(build_chart('Animation'))
            if genre_select == 'Action':
                st.write(build_chart('Action'))
            if genre_select == 'Children':
                st.write(build_chart('Children'))
            if genre_select == 'Comedy':
                st.write(build_chart('Comedy'))
            if genre_select == 'Crime':
                st.write(build_chart('Crime'))
            if genre_select == 'Drama':
                st.write(build_chart('Drama'))
            if genre_select == 'Fantasy':
                st.write(build_chart('Fantasy'))
            if genre_select == 'Horror':
                st.write(build_chart('Horror'))
            if genre_select == 'IMAX':
                st.write(build_chart('IMAX'))
            if genre_select == 'Romance':
                st.write(build_chart('Romance'))
            if genre_select == 'Musical':
                st.write(build_chart('Musical'))
            if genre_select == 'Mystery':
                st.write(build_chart('Mystery'))
            if genre_select == 'Sci-Fi':
                st.write(build_chart('Sci-Fi'))
            if genre_select == 'Thriller':
                st.write(build_chart('Thriller'))
            if genre_select == 'War':
                st.write(build_chart('War'))
        if select_opt_select == 'Number of most popular titles':
            pop_no_select = st.text_input(
                "Enter the number of titles you'd like to see", "Type Here")
            if st.button("Search titles"):
                st.write(popularity_number(int(pop_no_select)))

        if select_opt_select == 'Year':
            year = st.text_input("Enter the year", "Type Here")
            if st.button("Search for the top movies from this year"):
                st.write(popular_per_year(int(year)))

        st.subheader('Here, a movie will be randomly selected for you')
        if st.button('Suprise me'):
            st.write(random.choice(title_list))
            st.balloons()

    if page_selection == 'About this app':
        st.subheader('About this App')
        st.markdown(open('resources/background.md').read())

    if page_selection == 'Profile':
        name = st.text_input("Name", "")
        email = st.text_input("Email", "")
        password = st.text_input("Password", "", type='password')
        if st.button("Confirm details"):
            st.write(f'Thanks {name} your details are confirmed')

    #side bar description of app
    st.sidebar.info(
        'This app has been developed by SS4_JHB_Unsupervised team. For more info see **About this app** :movie_camera:'
    )
    st.sidebar.image('resources/imgs/EDSA_logo.png', use_column_width=True)
예제 #9
0
def main():

    # DO NOT REMOVE the 'Recommender System' option below, however,
    # you are welcome to add more options to enrich your app.
    page_options = ["Home Page", "Movie Recommenders", "Meet the team"]

    # -------------------------------------------------------------------
    # ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
    # -------------------------------------------------------------------
    page_selection = st.sidebar.selectbox("Choose Option", page_options)
    if page_selection == 'Home Page':
        st.markdown(HOME_PAGE, unsafe_allow_html=True)
        #st.markdown(HOME_2,unsafe_allow_html=True)
    if page_selection == "Movie Recommenders":
        # Header contents
        st.write('# Movie Recommender Engine')
        st.write('### EXPLORE Data Science Academy Unsupervised Predict')
        st.image('resources/imgs/Image_header.png', use_column_width=True)
        # Recommender System algorithm selection
        sys = st.radio("Select an algorithm",
                       ('Content Based Filtering',
                        'Collaborative Based Filtering', 'Popularity Based'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        movie_1 = st.selectbox('First Option', title_list[14930:15200])
        movie_2 = st.selectbox('Second Option', title_list[25055:25255])
        movie_3 = st.selectbox('Third Option', title_list[21100:21200])
        fav_movies = [movie_1, movie_2, movie_3]

        # Perform top-10 movie recommendation generation
        if sys == 'Content Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = content_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

        if sys == 'Collaborative Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = collab_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

    # -------------------------------------------------------------------

    # ------------- SAFE FOR ALTERING/EXTENSION -------------------
    if page_selection == "Statistics and insights":
        insight_selection = st.selectbox('Data Exploration',['Raw Data','Distribution plot for ratings','Top 15 Genres',\
                                         'Ratings over time (1995 - 2019)','Popular words in movie descriptive data'])
        if insight_selection == "Raw Data":
            bootstrap_block_1 = RAW_DATA
            bootstrap_block_1 = bootstrap_block_1.replace(
                '$$', 'The data set use for training')
            bootstrap_block_1 = bootstrap_block_1.replace('&&',
                                                          "<ul>This is the training dataset it contains the following values" \
                                                          "<li><b>moviesId</b> - the id values given to the movie</li>" \
                                                          "<li><b>userId</b> - the id values given to the movie</li><ul>")
            st.markdown(bootstrap_block_1, unsafe_allow_html=True)
    if page_selection == "Solution Overview":
        st.title("Solution Overview")
        st.write("Describe your winning approach on this page")

    if page_selection == 'Meet the team':
        st.markdown(ABOUT, unsafe_allow_html=True)
예제 #10
0
def main():

    # DO NOT REMOVE the 'Recommender System' option below, however,
    # you are welcome to add more options to enrich your app.
    page_options = ["Welcome","Recommender System","View EDA",
                    "Solution Overview", "Meet The Rollicks"]

    # -------------------------------------------------------------------
    # ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
    # -------------------------------------------------------------------
    page_selection = st.sidebar.selectbox("Choose Option", page_options)
    if page_selection == "Recommender System":
        # Header contents
        st.write('# Movie Recommender Engine')
        st.write('### EXPLORE Data Science Academy Unsupervised Predict')
        st.image('resources/imgs/Image_header.png',use_column_width=True)
        # Recommender System algorithm selection
        sys = st.radio("Select an algorithm",
                       ('Content Based Filtering',
                        'Collaborative Based Filtering'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        movie_1 = st.selectbox('Fisrt Option',title_list[14930:15200])
        movie_2 = st.selectbox('Second Option',title_list[25055:25255])
        movie_3 = st.selectbox('Third Option',title_list[21100:21200])
        fav_movies = [movie_1,movie_2,movie_3]

        # Perform top-10 movie recommendation generation
        
        if sys == 'Content Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = content_model(movie_list=fav_movies,
                                                            top_n=10)
                    st.title("We think you'll like:")
                    for i,j in enumerate(top_recommendations):
                        st.subheader(str(i+1)+'. '+j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

        if sys == 'Collaborative Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = collab_model(movie_list=fav_movies,
                                                           top_n=10)
                    st.title("We think you'll like:")
                    for i,j in enumerate(top_recommendations):
                        st.subheader(str(i+1)+'. '+j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

    # -------------------------------------------------------------------

    # ------------- SAFE FOR ALTERING/EXTENSION -------------------------

    st.sidebar.info('Click dropdown above to begin :popcorn:')
    if page_selection == "Solution Overview":
        html_temp = """
        <h1 style="font-size: 30px;margin-bottom: 10px;text-align: center;">Solution Overview</h1>
        <div style="background-color:;padding:10px">
        </div>"""

        st.markdown(html_temp,unsafe_allow_html=True)

        st.markdown("""
            ### Why a moive recommendation engine?
            + Users often struggle to find suitable movies due to the increasing amount of movie variation. As a result, recommender systems are useful for helping customers choose their preferred movies with the existing features. Recommender systems are an essential feature in our digital world, as users are often overwhelmed by choice and need help finding what they're looking for and are amongst the most popular applications of unsupervised learning. This following is an unsupervised machine learning project which seeks to predict the rating that a user will rate for a movie they have not yet viewed based on historical preferences.
            ### Model Evaluation
            + To verify the quality of the recommender system, we adopted the root of mean squared error (RMSE) as our evaluation metric. RSME is used to measure the differences between the model predicted values and the test dataset observed values. Technically it's the square root of the average of the squares of the errors. The lower it is, the better the model will be.
            ### Collaborative Based Filtering: Singular Value Decomposition (SVD)
            + Most collaborative recommender systems perform poorly when dimensions in data increases this is often referred to as the “curse of dimensionality”. There are many dimensionality reduction algorithms such as principal component analysis (PCA) and linear discriminant analysis (LDA), but in this project, SVD algorithm was used. SVD is a well-known matrix factorization method. At a high level, SVD is an algorithm that decomposes a matrix 𝐴A into the best lower rank (i.e. smaller/simpler) approximation of the original matrix 𝐴A. For more information on SVD in recommender systems. Mathematically, it decomposes A into a two unitary matrices and a diagonal matrix.
            """)
        st.image('resources/imgs/collaborative.png',use_column_width=True)
        st.markdown("""
            ### Content Based Filtering
            + Content here refers to the content or attributes of the products or item of interest. So, the idea in content-based filtering is to tag products using certain keywords, understand what the user likes, look up those keywords in the database and recommend different products with the same attributes.
            + However in this notebook what we do is to try and figure if a certain user is going to like a certain movie, and whether or not they like it is gauged on the rating the would give the movie from 0 (dislike the movie) to 5 (highly liking the movie) based on movie meta-data data like cast, director and keywords.
            + So We altermately want to predict rating of a movie based on its contents, basically appraoching this like we would a classification problem. With that in mind the idea we had is to extract all meta-data from the dataset and and merge everything to to data-frames, one containing movieId, megered meta-data and weighted-rating for each movie in the the train dataset and the other movieId and merged meta-data for each movie in the test dataset.
            """)
        st.image('resources/imgs/1_O_GU8xLVlFx8WweIzKNCNw.png',use_column_width=True)
    
    # Landing Page
    if page_selection == "Welcome":
        st.image('resources/imgs/our_logo.png',use_column_width=True)
        html_temp = """
        <div style="background-color:;padding:10px">
        <h3 style="color:#16284c;text-align:center;">Welcome to Rollick, A Machine-Learning Movie Recommender Engine. Our platform helps you find movies you will like using a recommendation ML model through rated movies to build a custom taste profile, then recommends other movies for you to watch based on preselections.</h3>
        </div>""" 
        st.image('resources/imgs/rollick_mascot.png',use_column_width=True)
        st.markdown(html_temp,unsafe_allow_html=True)


    # Exploratory Data Analysis Page
    if page_selection == "View EDA":
        html_temp = """
        <div style="background-color:;padding:10px">
        <h1 style="font-size: 30px;margin-bottom: 10px;text-align: center;">Exploratory Data Analysis</h1>
        </div>"""

        st.markdown(html_temp,unsafe_allow_html=True)

        st.write("### Training data for the model and visualisations to obtain insights") 
        st.write('')

        @st.cache(persist=True)
        def explore_data(dataset):
            df = pd.read_csv(os.path.join(dataset))
            return df 

        # Load Our Dataset
        data = explore_data(rating)

        # Show Entire Dataframe
        tab = ['Visual Data & Observations', 'View Raw Data']
      ##  selection_info = st.selectbox("Select page", tab)
        selection_info = st.radio(label="Select Below", options=tab)
        
        if selection_info == "Visual Data & Observations":
            if st.checkbox("User Ratings"):
                st.markdown("""
            ### Observations
            + Movie that was rated the most by users is "Great Performances" Cats (1998) with the rating of 2.0, this can also tells us that the movie is being watched by most of the users as they have given it a rating.
            + Having that in mind we can draw some insights that the movie is most prefered compared to Female Pleasure (2018) which is rated 4.0 by only a single user.
            + Some movies are rated high but only by a single user.
            + The joint plot shows that one user may give a high single rating for that movie by looking at number of rating.
            """)
                st.image('resources/imgs/Webp.net-resizeimage.png',use_column_width=False)

                st.image('resources/imgs/Webp.net-resizeimage (2).png',use_column_width=False)

            elif st.checkbox("Ratings Per Genre"):
                st.markdown("""
            ### Observations
            + The top 3 most popular movie genres in terms of ratings are Drama, Comedy and Action respectively, with documentary being the least popular genre.
            + We can also tell that on our genre dataset some movie genres were not recorded,of which it maybe due to while rating the user forgot to pick the genre of that movie.
            """)       
                st.image('resources/imgs/genre.png',use_column_width=True)

                st.markdown("""
            ### Observations
            + The top 3 genre are Drama ,Comedy and Thriller when looking at keyword occurance.Which tell us that these are the most prefered genres as those words still include the most rated genre.
            + Word like Romance and Action still looked to be bold or emphasised which shows that these genres are still amoungst the top viewed genres taking into account some movie genres are not liststed.Which shows that users still prefer to watch such movies.
            + The least viewed genre is Western and War ,which is displayed by the size of each word.
            """)
                st.image('resources/imgs/erwwsf.png',use_column_width=True)

            elif st.checkbox("Movie Release Distribution"):
                st.markdown("""
            ### Observations
            + Although the train dataset does not represent the entire ccollection of movies released since the making of movies, it gives an indication of how the movies were released.
            + The gradual increase in movie releases from the early 1900s onwards with a sharp rise from early 2000s.
            """)       
                
            
            elif st.checkbox("Top 10 Most Rated Movies"):
                st.markdown("""
            ### Observations
            + Although the train dataset does not represent the entire collection of movies released since the making of movies, it gives an indication of how the movies were released. There has been a gradual increase in movie releases from the early 1900s onwards with a sharp rise from early 2000s.
            """) 
                st.image('resources/imgs/Movie.png',use_column_width=True)

        if selection_info == "View Raw Data":
            st.markdown("""
            ### Train Data Set
            Below is the data we used to train our model in a csv file format.

            """)
            st.dataframe(data)


# Meet the Team.
    if page_selection == "Meet The Rollicks":
        html_temp = """
			"""
        st.sidebar.markdown(html_temp)
        cl = """
				<div style="margin-top: 50px;">
					<h1 style="font-size: 30px;margin-bottom: 60px;text-align: center;">Meet The Rollicks</h1>
					<div style="  display:flex;justify-content: center;width: auto;text-align: center;flex-wrap: wrap;">
						<div style="background: #f0f2f6;border-radius: 5%;margin: 5px;margin-bottom: 50px;width: 300px;padding: 20px;line-height: 20px;color: #8e8b8b;position: relative;">
						<div style="position: absolute;top: -50px;left: 50%;transform: translateX(-50%);width: 100px;height: 100px;border-radius: 50%;background: #acaeb0;">
							<img src="https://ca.slack-edge.com/TSHE6M7T9-USM78CA85-16e0da239ced-512" alt="Team_image" style="width: 100px;height: 100px;padding: 5px;border-radius: 50%">
						</div>
						<h3 style="color: black;font-family: "Comic Sans MS", cursive, sans-serif;font-size: 26px;margin-top: 50px;">Ritshidze Nethenzheni</h3>
						<p style="color: #6770c2;margin: 12px 0;font-size: 17px;text-transform: uppercase;">Data Scientist/Project Lead</p>
						<div style="justify-content: center;margin-left: auto;margin-right: auto;">
						<ul>
  							<li style="display:inline;">
								<a href="#"><img border="0" alt="Twitter" src="https://image.flaticon.com/icons/svg/1384/1384017.svg" width="25" height="25"></a>  
							</li>
  							<li style="display:inline;">
							  	<a href="#"><img border="0" alt="Linkein" src="https://image.flaticon.com/icons/svg/1384/1384014.svg" width="25" height="25"></a>
							</li>
  							<li style="display:inline;">
							  	<a href="#"><img border="0" alt="Github" src="https://image.flaticon.com/icons/svg/25/25231.svg" width="25" height="25"></a>
							</li>
						</ul>
						</div>
						</div>
											<div style="background: #f0f2f6;border-radius: 5%;margin: 5px;margin-bottom: 50px;width: 300px;padding: 20px;line-height: 20px;color: #8e8b8b;position: sticky;">
						<div style="position: absolute;top: -50px;left: 50%;transform: translateX(-50%);width: 100px;height: 100px;border-radius: 50%;background: #acaeb0;">
							<img src="https://media-exp1.licdn.com/dms/image/C4D03AQGMOiDJlhjN-A/profile-displayphoto-shrink_200_200/0?e=1600905600&v=beta&t=iWkS-BElbv8USxyWkGveRTZFJzRdWGpH1pgwUSNetvI" alt="Team_image" style="width: 100px;height: 100px;padding: 5px;border-radius: 50%">
						</div>
						<h3 style="color: black;font-family: "Comic Sans MS", cursive, sans-serif;font-size: 26px;margin-top: 50px;">Mandla Solomon</h3>
						<p style="color: #6770c2;margin: 12px 0;font-size: 17px;text-transform: uppercase;">ML Engineer</p>
						<div style="justify-content: center;margin-left: auto;margin-right: auto;">
						<ul>
  							<li style="display:inline;">
								<a href="#"><img border="0" alt="Twitter" src="https://image.flaticon.com/icons/svg/1384/1384017.svg" width="25" height="25"></a>  
							</li>
  							<li style="display:inline;">
							  	<a href="#"><img border="0" alt="Linkein" src="https://image.flaticon.com/icons/svg/1384/1384014.svg" width="25" height="25"></a>
							</li>
  							<li style="display:inline;">
							  	<a href="#"><img border="0" alt="Github" src="https://image.flaticon.com/icons/svg/25/25231.svg" width="25" height="25"></a>
							</li>
						</ul>
						</div>
						</div>
												<div style="background: #f0f2f6;border-radius: 5%;margin: 5px;margin-bottom: 50px;width: 300px;padding: 20px;line-height: 20px;color: #8e8b8b;position: relative;">
						<div style="position: absolute;top: -50px;left: 50%;transform: translateX(-50%);width: 100px;height: 100px;border-radius: 50%;background: #acaeb0;">
							<img src="https://media-exp1.licdn.com/dms/image/C5603AQEEzz8gjEkK1w/profile-displayphoto-shrink_200_200/0?e=1600905600&v=beta&t=k9xAqxxsU9qQ2JUHZB6TH6HdK9duyUgi7FCWX6CfYUc" alt="Team_image" style="width: 100px;height: 100px;padding: 5px;border-radius: 50%">
						</div>
						<h3 style="color: black;font-family: "Comic Sans MS", cursive, sans-serif;font-size: 26px;margin-top: 50px;">Bongani Msimanaga</h3>
						<p style="color: #6770c2;margin: 12px 0;font-size: 17px;text-transform: uppercase;">Data Engineer</p>
						<div style="justify-content: center;position: relative;">
						<ul>
  							<li style="display:inline;">
								<a href="#"><img border="0" alt="Twitter" src="https://image.flaticon.com/icons/svg/1384/1384017.svg" width="25" height="25"></a>  
							</li>
  							<li style="display:inline;">
							  	<a href="#"><img border="0" alt="Linkein" src="https://image.flaticon.com/icons/svg/1384/1384014.svg" width="25" height="25"></a>
							</li>
  							<li style="display:inline;">
							  	<a href="#"><img border="0" alt="Github" src="https://image.flaticon.com/icons/svg/25/25231.svg" width="25" height="25"></a>
							</li>
						</ul>
						</div>
						</div>
												<div style="background: #f0f2f6;border-radius: 5%;margin: 5px;margin-bottom: 50px;width: 300px;padding: 20px;line-height: 20px;color: #8e8b8b;position: relative;">
						<div style="position: absolute;top: -50px;left: 50%;transform: translateX(-50%);width: 100px;height: 100px;border-radius: 50%;background: #acaeb0;">
							<img src="https://media-exp1.licdn.com/dms/image/C4E03AQGLQHxMNcVgLQ/profile-displayphoto-shrink_200_200/0?e=1600905600&v=beta&t=sIG5IeSFmZgFcI2KLlBFjrSQn62Zsb4i_YBcKu_0fbY" alt="Team_image" style="width: 100px;height: 100px;padding: 5px;border-radius: 50%">
						</div>
						<h3 style="color: black;font-family: "Comic Sans MS", cursive, sans-serif;font-size: 26px;margin-top: 50px;">Chris Mahlangu</h3>
						<p style="color: #6770c2;margin: 12px 0;font-size: 17px;text-transform: uppercase;">Data Scientist</p>
						<div style="justify-content: center;position: relative;">
						<ul>
  							<li style="display:inline;">
								<a href="#"><img border="0" alt="Twitter" src="https://image.flaticon.com/icons/svg/1384/1384017.svg" width="25" height="25"></a>  
							</li>
  							<li style="display:inline;">
							  	<a href="#"><img border="0" alt="Linkein" src="https://image.flaticon.com/icons/svg/1384/1384014.svg" width="25" height="25"></a>
							</li>
  							<li style="display:inline;">
							  	<a href="#"><img border="0" alt="Github" src="https://image.flaticon.com/icons/svg/25/25231.svg" width="25" height="25"></a>
							</li>
						</ul>
						</div>
						</div>
												<div style="background: #f0f2f6;border-radius: 5%;margin: 5px;margin-bottom: 50px;width: 300px;padding: 20px;line-height: 20px;color: #8e8b8b;position: relative;">
						<div style="position: absolute;top: -50px;left: 50%;transform: translateX(-50%);width: 100px;height: 100px;border-radius: 50%;background: #acaeb0;">
							<img src="https://ca.slack-edge.com/TSHE6M7T9-USB324Y81-fbaf5dc6b1b0-512" alt="Team_image" style="width: 100px;height: 100px;padding: 5px;border-radius: 50%">
						</div>
						<h3 style="color: black;font-family: "Comic Sans MS", cursive, sans-serif;font-size: 26px;margin-top: 50px;">Evans Marema</h3>
						<p style="color: #6770c2;margin: 12px 0;font-size: 17px;text-transform: uppercase;">Data Scientist</p>
						<div style="justify-content: center;position: relative;">
						<ul>
  							<li style="display:inline;">
								<a href="#"><img border="0" alt="Twitter" src="https://image.flaticon.com/icons/svg/1384/1384017.svg" width="25" height="25"></a>  
							</li>
  							<li style="display:inline;">
							  	<a href="#"><img border="0" alt="Linkein" src="https://image.flaticon.com/icons/svg/1384/1384014.svg" width="25" height="25"></a>
							</li>
  							<li style="display:inline;">
							  	<a href="#"><img border="0" alt="Github" src="https://image.flaticon.com/icons/svg/25/25231.svg" width="25" height="25"></a>
							</li>
						</ul>
						</div>
						</div>
					</div>
				</div>
			"""
        st.markdown(cl, unsafe_allow_html=True)
def main():

    # DO NOT REMOVE the 'Recommender System' option below, however,
    # you are welcome to add more options to enrich your app.
    page_options = ["Recommender System", "Solution Overview", "Data Analysis"]

    # -------------------------------------------------------------------
    # ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
    # -------------------------------------------------------------------
    page_selection = st.sidebar.selectbox("Choose Option", page_options)
    if page_selection == "Recommender System":
        # Header contents
        st.write('# Movie Recommender Engine')
        st.write('### EXPLORE Data Science Academy Unsupervised Predict')
        st.image('resources/imgs/Image_header.png', use_column_width=True)
        # Recommender System algorithm selection
        sys = st.radio(
            "Select an algorithm",
            ('Content Based Filtering', 'Collaborative Based Filtering'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        movie_1 = st.selectbox('Fisrt Option', title_list[14930:15200])
        movie_2 = st.selectbox('Second Option', title_list[25055:25255])
        movie_3 = st.selectbox('Third Option', title_list[21100:21200])
        fav_movies = [movie_1, movie_2, movie_3]

        # Perform top-10 movie recommendation generation
        if sys == 'Content Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = content_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

        if sys == 'Collaborative Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = collab_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

    # -------------------------------------------------------------------

    # ------------- SAFE FOR ALTERING/EXTENSION -------------------
    if page_selection == "Solution Overview":
        st.title("Solution Overview")
        st.write("Describe your winning approach on this page")

    # You may want to add more sections here for aspects such as an EDA,
    # or to provide your business pitch.

    # -------------------------------------------------------------------

    if page_selection == "Data Analysis":
        st.title("Exploratory Data Analysis")

        # Create dataframe containing only the movieId and genres
        movies_genres = pd.DataFrame(df_movies[['movieId', 'genres']],
                                     columns=['movieId', 'genres'])

        # Split genres seperated by "|" and create a list containing the genres allocated to each movie
        movies_genres.genres = movies_genres.genres.apply(
            lambda x: x.split('|'))

        # Create expanded dataframe where each movie-genre combination is in a seperate row
        movies_genres = pd.DataFrame([(tup.movieId, d)
                                      for tup in movies_genres.itertuples()
                                      for d in tup.genres],
                                     columns=['movieId', 'genres'])

        # Plot the genres from most common to least common
        plot = plt.figure(figsize=(15, 10))
        plt.title('Most common genres\n', fontsize=20)
        sns.countplot(
            y="genres",
            data=movies_genres,
            order=movies_genres['genres'].value_counts(ascending=False).index,
            palette='Blues_r')
        st.pyplot(plot)

        plot = plt.figure(figsize=(15, 10))
        plt.title('Average rating per genre\n', fontsize=20)
        df_merged.groupby(['genres'])['rating'].mean().plot(kind="bar")
        st.pyplot(plot)

        st.write("Top 10 users with the most ratings")
        top_df = pd.DataFrame(df_merged.userId.value_counts(),
                              columns=(['No of ratings', 'userId']))
        top_df['No of ratings'] = top_df.userId
        top_df.drop('userId', axis=1, inplace=True)
        st.table(top_df.head(10))
예제 #12
0
def main():

    page_options = [
        "Recommender System", "Solution Overview", "Search Your Movie",
        "About App"
    ]

    page_selection = st.sidebar.selectbox("Choose Option", page_options)
    if page_selection == "Recommender System":
        # Header contents
        st.write('# Movie Recommender Engine')
        st.image('resources/imgs/Image_header.png', use_column_width=True)
        # Recommender System algorithm selection
        sys = st.radio(
            "Select an algorithm",
            ('Content Based Filtering', 'Collaborative Based Filtering'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        movie_1 = st.selectbox('First Option', title_list[14930:15200])
        movie_2 = st.selectbox('Second Option', title_list[25055:25255])
        movie_3 = st.selectbox('Third Option', title_list[21100:21200])
        fav_movies = [movie_1, movie_2, movie_3]

        # Perform top-10 movie recommendation generation
        if sys == 'Content Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = content_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

        if sys == 'Collaborative Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = collab_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

    if page_selection == "Solution Overview":
        st.title("Solution Overview")
        st.write("Describe your winning approach on this page")
        st.header("Exploratory Data Analysis")
        st.subheader("Dataset")

        dataset_ratings = 'ratings.csv'

        if st.checkbox("Preview DataFrame"):
            data = explore_data('resources/data/movies.csv')
            if st.button("Head"):
                st.write(data.head())
            if st.button("Tail"):
                st.write(data.tail())

        # Dimensions
        if st.checkbox("Show Dimensions"):
            if st.button("Rows"):
                data = explore_data('resources/data/movies.csv')
                st.text("Length of Rows")
                st.write(len(data))
            if st.button("Columns"):
                data = explore_data('resources/data/movies.csv')
                st.text("Length of Columns")
                st.write(data.shape[1])

        # Bar Plot
        if st.checkbox("Bar Plot"):
            if st.button("Ratings"):
                data = explore_data('resources/data/ratings.csv')
                v_counts = data.rating.value_counts()
                st.bar_chart(v_counts)
            if st.button("Genres"):
                data = explore_data('resources/data/movies.csv')
                words = data['genres'].apply(lambda x: x.split("|"))
                words_list = words.tolist()
                flat_list = []
                for sublist in words_list:
                    for item in sublist:
                        flat_list.append(item)
                dict_ = Counter(flat_list)
                dict_ = sorted(dict_.items(), key=lambda x: x[1], reverse=True)
                x_val = [x[0] for x in dict_]
                y_val = [x[1] for x in dict_]

                plt.barh(x_val, y_val)
                plt.show()
                st.pyplot()

        # Plot Hist
        if st.checkbox("Histogram"):
            hist_values = np.histogram(data[''].dt.hour,
                                       bins=24,
                                       range=(0, 24))[0]
            st.bar_chart(hist_values)

        # Top 10 Movies per User
        if st.checkbox("Highest Rated Movies Per User"):
            train = explore_data('resources/data/train.csv')
            movies = explore_data('resources/data/movies.csv')
            user_input = st.number_input("Insert UserId")
            if user_input:
                merged = pd.merge(train, movies, on='movieId')
                cols = merged.columns.tolist()
                cols = cols[-2:] + cols[:-2]
                merged = merged[cols]
                df = merged[merged['userId'] == user_input].sort_values(
                    by='rating', ascending=False)
                st.write(df.head(10))

    # About
    if page_selection == "About App":
        st.subheader("Movie Recommender")
        st.text("Built with Streamlit")
        st.text('Group SY5')

    if page_selection == "Search Your Movie":
        data = explore_data('resources/data/movies.csv')
        if st.button('Based On Title'):
            user_input = st.text_input("Movie Title", "Toy Story (1995)")
            df = data[data['title'] == str(user_input)]
            st.write(df)
        if st.button('Based On Genre'):
            words = data['genres'].apply(lambda x: x.split("|"))
            data['new'] = words
            data['new'] = data['new'].apply(
                lambda x: ' '.join([str(elem) for elem in x]))
            user_input = st.text_input("Movie Genre")
            if user_input:
                df1 = data[data['new'].str.contains(user_input)]
                st.write(df1)
def main():

    # DO NOT REMOVE the 'Recommender System' option below, however,
    # you are welcome to add more options to enrich your app.
    page_options = [
        "Home", "Recommender System", "Interactive Movie Recommender",
        "Data Analysis & Insights", "Contact Us"
    ]

    # -------------------------------------------------------------------
    # ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
    # -------------------------------------------------------------------
    page_selection = st.sidebar.selectbox("Choose Option", page_options)

    if page_selection == "Home":
        st.image('resources/imgs/Home.png', width=900)

    if page_selection == "Recommender System":
        # Header contents
        st.write('# Movie Recommender Engine')
        st.write('### EXPLORE Data Science Academy Unsupervised Predict')
        st.image('resources/imgs/Image_header.png', use_column_width=True)
        # Recommender System algorithm selection
        sys = st.radio(
            "Select an algorithm",
            ('Content Based Filtering', 'Collaborative Based Filtering'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        #movie_1 = st.selectbox('Fisrt Option',title_list[1:300])
        #movie_2 = st.selectbox('Second Option',title_list[550:555])
        #movie_3 = st.selectbox('Third Option',title_list[311:540])

        movie_1 = st.selectbox('Fisrt Option', title_list[14930:15200])
        movie_2 = st.selectbox('Second Option', title_list[25055:25255])
        movie_3 = st.selectbox('Third Option', title_list[21100:21200])
        fav_movies = [movie_1, movie_2, movie_3]

        # Perform top-10 movie recommendation generation
        if sys == 'Content Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = content_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

        if sys == 'Collaborative Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = collab_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

    # -------------------------------------------------------------------
    # ------------- SAFE FOR ALTERING/EXTENSION -------------------
    if page_selection == "Data Analysis & Insights":

        from bokeh.io import show, output_notebook
        from bokeh.plotting import figure

        st.title("Data Analysis & Insights")
        st.subheader('Understanding the data')
        st.write(
            "Below we explore the data through visualisation to better understand the industry it represents and how it has changed over the years"
        )

        st.warning(
            'This dataset contains over 48 000 unique movies with release dates all the way from 1874, around the time when motion pictures were first invented, to 2019 when Avengers: Infinity War part II rocked the big screen'
        )

        # Rating distribution
        st.write(
            'Although ratings were only created/collected from 1995 to 2019, almost every movie in the dataset has numerous user ratings. The graph below illustrated the trend of high average ratings in general with an overall average rating of 3.5'
        )
        st.image('resources/imgs/ratings_dist.png', width=900)

        # Number of ratings vs ratings scatterplot - more ratings = higher average rating (trend)
        st.write(
            'The plot below illustrates a key trend in the film industry: The more people that view a movie the higher probability it has of having a high average rating. This observation is a key driving point behind marketing strategies in the film industry worldwide'
        )
        st.image('resources/imgs/ratings_scatter.png')

        # Lineplot of movies release from 1874 - 2019
        st.write(
            'The following line graph visualises the trend in number of movies released per year since the first motion pictures were released around 1874. It is important to note that this dataset does not contain every movie ever released (and heavily favours US-released movies), meaning that even though the trend it exposes exists one cannot read off values for any specific year(s) accurately.'
        )
        st.write(
            "The dataset also doesn't contain many movies after 2016, which should not be be interpreted as a reduction in releases around that time."
        )
        st.image('resources/imgs/movies_per_year.png', width=1000)

        # Wordcloud of most prominent release years
        st.write(
            'This word-cloud depicts the years with singularly the most recorded releases in this dataset.'
        )
        st.image('resources/imgs/wordcloud_year.png', width=700)

        # Genre wordcloud
        st.write(
            "This wordcloud similarly depicts the most prominent genres represented in this dataset. Comedy, Drama, Romance, and Sci-Fi dominate with Action, Thriller, Crime, and Drama following closely."
        )
        st.image('resources/imgs/wordcloud_genre.png', width=700)

        # Ratings by Day of Week
        st.markdown(
            "The following graph shows the trends in day-of-week rating of movies in this dataset. Sundays and Saturdays are understandably most prominent, though the step-wise decrease from Sunday to Friday indicates that the overall rating trend here might simply be an artefact of the manner in which the dataset was constructed (everybody knows Thursdays and Fridays are prime movie nights :wink:)."
        )
        st.image('resources/imgs/ratings_DOW.png', width=700)

        # Movie duration distribution
        st.write(
            "The violin-plot below visualises the distribution in movie duration in the dataset, clearly showing that, with few outliers, movie duration is on average around 100 minutes."
        )
        st.image('resources/imgs/movie_duration.png', width=600)

        st.subheader(
            "That wraps up our dive into the dataset! For more in-dept analytics get in touch with us though the 'Contact Us' page."
        )

    if page_selection == "Interactive Movie Recommender":

        # Setting up background image:
        st.image('resources/imgs/background.jpg', width=800)

        # Page title & intro
        st.title("Interactive Movie Recommender")
        st.subheader("Don't trust the system! Find your own movies here.")
        st.info(
            "Below is a more interactive movie recommender. Set your release date (year) preference with the two year sliders and select any combination of your preferred genres. On the table that displays your recommendations, click on the 'year' column title to sort the recommendations by release year. Copy and paste the title of any films that catch your eye into the search bar below the table to search youtube for its trailer"
        )

        # Container for year/genre filtered DF
        with st.beta_container():
            # Load movies.csv dataframe:
            movies_df = pd.read_csv('resources/data/movies.csv',
                                    index_col='movieId')

            # Year selection
            st.subheader('Enter your preferred release-year range')
            start_year = st.slider("Start Year", 1874, 2019)
            end_year = st.slider('End Year', start_year, 2019)
            movies_df = movies_df[(movies_df['year'] >= start_year)
                                  & (movies_df['year'] <= end_year)]

            # Genre selection
            st.subheader('Enter your preferred genres')
            genres_list = [
                'Documentary', 'Animation', 'Film-Noir', 'Romance',
                'Adventure', 'Western', 'Children', 'Sci-Fi', 'Drama',
                'Thriller', 'Mystery', 'War', 'Comedy', 'Action', 'IMAX',
                'Musical', 'Fantasy', 'Horror', 'Crime'
            ]
            genres = list(st.multiselect('Select genres', genres_list))
            drop_rows = []
            for index, row in movies_df.iterrows():
                if set(genres).issubset(set(row['genres'].split())):
                    pass
                else:
                    drop_rows.append(index)

            movies_df = movies_df.drop(drop_rows)

            # Display filtered DF
            st.dataframe(movies_df)

        with st.beta_container():
            st.image('resources/imgs/Youtube_logo.png', width=300)
            st.subheader('Search YouTube')
            st.info(
                'Copy and paste movie titles that catch your fancy below to search youtube!'
            )
            st.components.v1.html("""
                <form action="http://www.youtube.com/results" method="get" target="_blank" >
                <input name="search_query" type="text" maxlength="128" />
                <select name="search_type">
                <option value="">Videos</option>
                <option value="search_users">Channels</option>
                </select>
                <input type="submit" value="Search" />
                </form>
                """)

    #Building out the Contact Page
    if page_selection == "Contact Us":

        st.subheader(
            "We are Team 2, a group of young data scientists from the Explore Data Science Academy, Johannesburg."
        )

        with st.beta_container():
            st.subheader("Samuel Aina")
            st.text('*****@*****.**')

            st.subheader("Jacques Carstens")
            st.text('*****@*****.**')

            st.subheader("Mokgadi Maake")
            st.text('*****@*****.**')

        st.info("Get in touch with us for all your ML needs")
        firstname = st.text_input("Enter your Name")
        lastname = st.text_input("Enter your Last Name")
        contactdetails = st.text_input("Enter your contact details here")
        message = st.text_area(
            "Tell us about your company's Data Science needs")

        if st.button("Submit"):
            result = message.title()
            st.success("Thank you, we'll be in touch!")
def main():

    # DO NOT REMOVE the 'Recommender System' option below, however,
    # you are welcome to add more options to enrich your app.
    page_options = ["Recommender System","Solution Overview","Exploratory Data Analysis","Hybrid Recommender System","New Movie Release","Search for a Movie","About Machine Learning App","Instruction of use"]

    # -------------------------------------------------------------------
    # ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
    # -------------------------------------------------------------------
    page_selection = st.sidebar.selectbox("Choose Option", page_options)
    if page_selection == "Recommender System":
        # Header contents
        st.write('# Movie Recommender Engine')
        st.write('### EXPLORE Data Science Academy Unsupervised Predict')
        st.image('resources/imgs/Image_header.png',use_column_width=True)
        # Recommender System algorithm selection
        sys = st.radio("Select an algorithm",
                       ('Content Based Filtering',
                        'Collaborative Based Filtering'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        movie_1 = st.selectbox('Fisrt Option',title_list[14930:15200])
        movie_2 = st.selectbox('Second Option',title_list[25055:25255])
        movie_3 = st.selectbox('Third Option',title_list[21100:21200])
        fav_movies = [movie_1,movie_2,movie_3]

        # Perform top-10 movie recommendation generation
        if sys == 'Content Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = content_model(movie_list=fav_movies,
                                                            top_n=10)
                    st.title("We think you'll like:")
                    for i,j in enumerate(top_recommendations):
                        st.subheader(str(i+1)+'. '+j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")


        if sys == 'Collaborative Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = collab_model(movie_list=fav_movies,
                                                           top_n=10)
                    st.title("We think you'll like:")
                    for i,j in enumerate(top_recommendations):
                        st.subheader(str(i+1)+'. '+j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")


    # -------------------------------------------------------------------

    # ------------- SAFE FOR ALTERING/EXTENSION -------------------
    if page_selection == "Solution Overview":
        st.image('resources/imgs/Avengers_Header.png',width = 600)
        st.title("Solution Overview")
        st.markdown("")
        st.markdown('On this application you will find three methods to recommend movies to users, namely Content-Based filtering , Collaborative filtering and hybrid approach. A Hybrid approach is a combination of Content-Based filtering and collaborative filtering which the reason it works better than the individual approaches.\
                     Both Content-Based filtering and Collaborative filtering suffer from what we call a Cold start problem. A Cold start is a potential problem in computer-based information systems which involve a degree of automated data modelling. Specifically, it concerns the issue that the system cannot draw any inferences for users or items about which it has not yet gathered sufficient information.\
                     The item cold-start problem refers to when items added to the catalogue have either none or very little interactions. This constitutes a problem mainly for collaborative filtering algorithms due to the fact that they rely on the item\'s interactions to make recommendations.', unsafe_allow_html=True)
        st.markdown("If no interactions are available then a pure collaborative algorithm cannot recommend the item.\
                     In case only a few interactions are available, although a collaborative algorithm will be able to recommend it, the quality of those recommendations will be poor.\
                     Content-based algorithms relying on user provided features suffer from the cold-start item problem as well, since for new items if no (or very few) interactions exist, also no (or very few) user reviews and tags will be available.")             
        st.markdown('When a new user enrolls in the system and for a certain period of time the recommender has to provide recommendation without relying on the user\'s past interactions, since none has occurred yet.\
                    This problem is of particular importance when the recommender is part of the service offered to users, since a user who is faced with recommendations of poor quality might soon decide to stop using the system before providing enough interaction to allow the recommender to understand his/her interests.\
                    The main strategy in dealing with new users is to ask them to provide some preferences to build an initial user profile. A threshold has to be found between the length of the user registration process, which if too long might indice too many users to abandon it, and the amount of initial data required for the recommender to work properly.', unsafe_allow_html=True)        
        st.markdown('The main approach is to rely on hybrid recommenders, in order to mitigate the disadvantages of one category or model by combining it with another.', unsafe_allow_html=True) 
        st.markdown('We use the RMSE to measure how accurate our model is when recommending movies for users.')     
        st.markdown('Root Mean Square Error (RMSE) is the standard deviation of the residuals (prediction errors). Residuals are a measure of how far from the regression line data points are; RMSE is a measure of how spread out these residuals are. In other words, it tells you how concentrated the data is around the line of best fit.', unsafe_allow_html=True)  
        st.markdown('The Hybrid Recommender Sytem gives the following RMSE for a subset of movies as when trying to use the whole dataframe gives memoryError')
        st.markdown('<strong>RMSE:</strong> 0.9462',unsafe_allow_html=True)
        st.markdown('The calculation of the RMSE can be found on the notebook in this Repository. The notebook name is Solution Overview.')
        st.image('resources/imgs/Marvel_head.png',width = 800)
    # You may want to add more sections here for aspects such as an EDA,
    # or to provide your business pitch.
    #Search for a Movie page
    if page_selection  =="Search for a Movie":
        st.image('resources/imgs/52115.png',width = 600)
        st.title("Search for Movies")
        st.markdown('Please Refer to the About Machine Learning Page to learn more about the techniques used to recommend movies. If you decide not to use the recommender systems you can use this page to filter movies based on the rating of the movie , the year in which the movie was released and the genre of the movies. After you change the filter you will be left with movies that are specific to that filter used. Then when you scroll down you will see the movie name and the link to a youtube trailer of that movie. When you click the link ,you will see a page on youtube for that specific movie and you can watch the trailer and see if you like it. This is an alternative method to you if you are not satisfied with the recommender engine . Enjoy! ', unsafe_allow_html=True)
        # Movies
        df = pd.read_csv('resources/data/movies.csv')

        def explode(df, lst_cols, fill_value='', preserve_index=False):
            import numpy as np
             # make sure `lst_cols` is list-alike
            if (lst_cols is not None
                    and len(lst_cols) > 0
                    and not isinstance(lst_cols, (list, tuple, np.ndarray, pd.Series))):
                lst_cols = [lst_cols]
            # all columns except `lst_cols`
            idx_cols = df.columns.difference(lst_cols)
            # calculate lengths of lists
            lens = df[lst_cols[0]].str.len()
            # preserve original index values    
            idx = np.repeat(df.index.values, lens)
            # create "exploded" DF
            res = (pd.DataFrame({
                        col:np.repeat(df[col].values, lens)
                        for col in idx_cols},
                        index=idx)
                    .assign(**{col:np.concatenate(df.loc[lens>0, col].values)
                            for col in lst_cols}))
            # append those rows that have empty lists
            if (lens == 0).any():
                # at least one list in cells is empty
                res = (res.append(df.loc[lens==0, idx_cols], sort=False)
                            .fillna(fill_value))
            # revert the original index order
            res = res.sort_index()   
            # reset index if requested
            if not preserve_index:        
                res = res.reset_index(drop=True)
            return res  

        movie_data = pd.merge(rating_m, df, on='movieId')
        movie_data['year'] = movie_data.title.str.extract('(\(\d\d\d\d\))',expand=False)
        #Removing the parentheses
        movie_data['year'] = movie_data.year.str.extract('(\d\d\d\d)',expand=False)

        movie_data.genres = movie_data.genres.str.split('|')
        movie_rating = st.sidebar.number_input("Pick a rating ",0.5,5.0, step=0.5)

        movie_data = explode(movie_data, ['genres'])
        movie_title = movie_data['genres'].unique()
        title = st.selectbox('Genre', movie_title)
        movie_data['year'].dropna(inplace = True)
        movie_data = movie_data.drop(['movieId','timestamp','userId'], axis = 1)
        year_of_movie_release = movie_data['year'].sort_values(ascending=False).unique()
        release_year = st.selectbox('Year', year_of_movie_release)

        movie = movie_data[(movie_data.rating == movie_rating)&(movie_data.genres == title)&(movie_data.year == release_year)]
        df = movie.drop_duplicates(subset = ["title"])
        if len(df) !=0:
            st.write(df)
        if len(df) ==0:
            st.write('We have no movies for that rating!')        
        def youtube_link(title):
    
            """This function takes in the title of a movie and returns a Search query link to youtube
    
            INPUT: ('The Lttle Mermaid')
            -----------
    
            OUTPUT: https://www.youtube.com/results?search_query=The+little+Mermaid&page=1
            ----------
            """
            title = title.replace(' ','+')
            base = "https://www.youtube.com/results?search_query="
            q = title
            page = "&page=1"
            URL = base + q + page
            return URL            
        if len(df) !=0:           
            for _, row in df.iterrows():
                st.write(row['title'])
                st.write(youtube_link(title = row['title']))

        st.image('resources/imgs/horror.jpg',width = 600)                
    # Building out the EDA page
    if page_selection == "Exploratory Data Analysis":
        st.image('resources/imgs/philmovieheader.jpg',width = 600)
        st.title("Insights on how people rate movies")       
        if st.checkbox('Show Rating graph'):
            rating_m.groupby('rating')['userId'].count().plot(kind = 'bar', color = 'g',figsize = (8,7))
            plt.xticks(rotation=85, fontsize = 14)
            plt.yticks(fontsize = 14)
            plt.xlabel('Ratings (scale: 0.5 - 5.0)', fontsize=16)
            plt.ylabel('No. of Ratings', fontsize=16)
            plt.title('Distribution of User Ratings ',bbox={'facecolor':'k', 'pad':5},color='w',fontsize = 18)
            st.pyplot()
            st.markdown("This is a bar graph showing the rating of movie by people who have watched them.")
            st.markdown("The number of ratings is the total number of rating for each scale from 0.5 upto 5.0 rated by people who watched the movies.")
        if st.checkbox('Show Pie chart for ratings'):
            # Calculate and categorise ratings proportions
            a = len(rating_m.loc[rating_m['rating']== 0.5]) / len(rating_m)
            b = len(rating_m.loc[rating_m['rating']==1.0]) / len(rating_m)
            c = len(rating_m.loc[rating_m['rating']==1.5]) / len(rating_m)
            d = len(rating_m.loc[rating_m['rating']==2.0]) / len(rating_m)
            low_ratings= a+b+c+d
            e = len(rating_m.loc[rating_m['rating']==2.5]) / len(rating_m)
            f = len(rating_m.loc[rating_m['rating']== 3.0]) / len(rating_m)
            g = len(rating_m.loc[rating_m['rating']==3.5]) / len(rating_m)
            medium_ratings= e+f+g
            h = len(rating_m.loc[rating_m['rating']==4.0]) / len(rating_m)
            i = len(rating_m.loc[rating_m['rating']==4.5]) / len(rating_m)
            j = len(rating_m.loc[rating_m['rating']==5.0]) / len(rating_m)
            high_ratings= h+i+j 
            # To view proportions of ratings categories, it is best practice to use pie charts
            # Where the slices will be ordered and plotted clockwise:
            labels = 'Low Ratings', 'Medium Ratings', 'High Ratings'
            sizes = [low_ratings, medium_ratings,  high_ratings]
            explode = (0, 0, 0.1)  # Only "explore" the 3rd slice (i.e. 'Anti')

            # Create pie chart with the above labels and calculated class proportions as inputs
            fig1, ax1 = plt.subplots()
            ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%',
                    shadow=True, startangle=270)#,textprops={'rotation': 65}
            ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
            plt.title('Categorised Proportions of User Ratings ',bbox={'facecolor':'k', 'pad':5},color='w',fontsize = 18)
            st.pyplot()
            st.markdown("This is a pie chart showing the rating of movies by people who have watched them.")
            st.markdown("Low Ratings (scale: 0.5 - 2.0)")
            st.markdown("Medium Ratings (scale: 2.5 - 3.5)")
            st.markdown("High Ratings (scale: 4.0 - 5.0)")

        if st.checkbox('Show WordCloud of directors'):   
            imdb["title_cast"] = imdb["title_cast"].astype('str')
            imdb["director"] = imdb["director"].astype('str')
            imdb["plot_keywords"] = imdb["plot_keywords"].astype('str')
            imdb["plot_keywords"] = imdb["plot_keywords"].apply(lambda x: x.replace('|',' '))
            imdb["title_cast"] = imdb["title_cast"].apply(lambda x: x.replace(' ',''))
            imdb["title_cast"] = imdb["title_cast"].apply(lambda x: x.replace('|',' '))
            imdb["director"] = imdb["director"].apply(lambda x: x.replace(' ',''))
            imdb["director"] = imdb["director"].apply(lambda x: x.replace('Seefullsummary',''))
            imdb["director"] = imdb["director"].apply(lambda x: x.replace('nan',''))
            imdb["title_cast"] = imdb["title_cast"].apply(lambda x: x.replace('nan',''))
            imdb["plot_keywords"] = imdb["plot_keywords"].apply(lambda x: x.replace('nan',''))  

            directors = ' '.join([text for text in imdb["director"]])

            # Word cloud for the overall data checking out which words do people use more often
            wordcloud = WordCloud(width=1000, height=800).generate(directors)

            #ploting the word cloud
            plt.figure(figsize=(16,12))
            plt.imshow(wordcloud)
            plt.axis('off')
            st.pyplot() 
            st.markdown("This is a wordcloud of the directors of movies in this Application.")
            st.markdown("This wordcloud shows the most popular directors on the movies.")
        if st.checkbox('Show WordCloud of Actors/Actresses'):
            imdb["title_cast"] = imdb["title_cast"].astype('str')
            imdb["director"] = imdb["director"].astype('str')
            imdb["plot_keywords"] = imdb["plot_keywords"].astype('str')
            imdb["plot_keywords"] = imdb["plot_keywords"].apply(lambda x: x.replace('|',' '))
            imdb["title_cast"] = imdb["title_cast"].apply(lambda x: x.replace(' ',''))
            imdb["title_cast"] = imdb["title_cast"].apply(lambda x: x.replace('|',' '))
            imdb["director"] = imdb["director"].apply(lambda x: x.replace(' ',''))
            imdb["director"] = imdb["director"].apply(lambda x: x.replace('Seefullsummary',''))
            imdb["director"] = imdb["director"].apply(lambda x: x.replace('nan',''))
            imdb["title_cast"] = imdb["title_cast"].apply(lambda x: x.replace('nan',''))
            imdb["plot_keywords"] = imdb["plot_keywords"].apply(lambda x: x.replace('nan',''))   

            title_cast= ' '.join([text for text in imdb["title_cast"]])

            # Word cloud for the overall data checking out which words do people use more often
            wordcloud = WordCloud(width=1000, height=800).generate(title_cast)

            #ploting the word cloud
            plt.figure(figsize=(16,12))
            plt.imshow(wordcloud)
            plt.axis('off')
            st.pyplot()  
            st.markdown("This is a wordcloud for Actors/Actresses on the movies on this Application.")
            st.markdown("This wordcloud shows the most popular Actors/Actresses on the movies.")
        if st.checkbox("Show wordcloud of different genres"):    
            movies = pd.read_csv('resources/data/movies.csv')
            #here we  make census of the genres:
            genre_labels = set()
            for s in movies['genres'].str.split('|').values:
                genre_labels = genre_labels.union(set(s))  

            #counting how many times each of genres occur:
            keyword_occurences, dum = count_word(movies, 'genres', genre_labels)
            #Finally, the result is shown as a wordcloud:
            words = dict()
            trunc_occurences = keyword_occurences[0:50]
            for s in trunc_occurences:
                words[s[0]] = s[1]
            tone = 100 # define the color of the words
            f, ax = plt.subplots(figsize=(14, 6))
            wordcloud = WordCloud(width=1000,height=800, background_color='white', 
                                max_words=1628,relative_scaling=0.7,
                                color_func = random_color_func,
                                normalize_plurals=False)
            wordcloud.generate_from_frequencies(words)
            plt.figure(figsize=(16,12))
            plt.imshow(wordcloud, interpolation="bilinear")
            plt.axis('off')
            st.pyplot()
            st.markdown("This is a wordcloud for all the different genres in this Application.")
        st.image('resources/imgs/genre.jpg',width = 600)

    if page_selection == "Hybrid Recommender System":
        st.title('Hybrid Recommender System')
        st.image('resources/imgs/Image_header.png',use_column_width=True)

        title_list1 = load_movie_titles('recommenders/hybrid_movies.csv')
        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        movie_1 = st.selectbox('Fisrt Option',title_list1[200:470])
        movie_2 = st.selectbox('Second Option',title_list1[700:900])
        movie_3 = st.selectbox('Third Option',title_list1[2000:2100])
        fav_movies = [movie_1,movie_2,movie_3]
        #st.write(movie_1)
        #fav_movies = [movie_1]
        if st.button("Recommend"):
            #try:
                with st.spinner('Crunching the numbers...'):
                    top_recommendations = recommendation(fav_movies, top_n = 10)

                st.title("We think you'll like:")
                for i,j in enumerate(top_recommendations):
                    st.subheader(str(i+1)+'. '+j)
                 

    if page_selection == "Instruction of use":
        st.image('resources/imgs/joker-header.jpg',width = 600)
        st.title("Instructions")
        st.markdown('When the application opens the first page you will see is the Recommender System Page. Here you will see two algorithms you can choose from.')
        st.image('recommenders/images/page1.png', width = 600)
        st.markdown('Then you will have three options to choose from. On the select box you will choose your three favourite movie and then press the Recommend button.')
        st.image(['recommenders/images/page2.png','recommenders/images/page3.png'],width=600)
        st.markdown('After pressing the Recommend button then the recommended movies will be shown to you.')
        st.image('recommenders/images/page4.png',width = 600)
        st.markdown('Then you can choose the next algorithm to do the same task.')
        st.image('recommenders/images/page16.png',width = 600)
        st.markdown('On the left you will see a side bar that has all of the pages on this App.')
        st.image('recommenders/images/page5.png',width = 600)
        st.markdown('We have another page where we show you Actors , directors and how other people that watched the movies rate them.')
        st.image('recommenders/images/page18.png',width = 600 )
        st.markdown('This word cloud show the most popular Actors/Actresses on this movie App.')
        st.image('recommenders/images/page7.png',width = 600)
        st.markdown('This word cloud shows the different genres you can find on this App.')
        st.image('recommenders/images/page8.png',width = 600)
        st.markdown('The Search for a Movie page is an alternative to search for movies using filters instead of using Recommender Systems . The filters you can use are the release year of a movie, the rating of a movie and the genre of a movie and this will allow you to play around and find a specific movie using filters.')
        st.image(['recommenders/images/page10.png','recommenders/images/page11.png'],width = 600)
        st.image(['recommenders/images/page12.png','recommenders/images/page13.png'],width = 600)
        st.markdown('Whe you scroll down this page you will see that as you change the movie name and a link . The link is a youtube movie trailer for that movie.')
        st.image('recommenders/images/page14.png',width = 600)
        st.markdown('Then when you click the link it will take you to the youtube page for the trailer of that movie.')
        st.image('recommenders/images/page15.png',width = 600)
        st.markdown('We have Another page where you can see new movie releases!')
        st.image('recommenders/images/page19.png',width = 600)

    if page_selection == "New Movie Release":
        st.image('resources/imgs/ws_The_Avengers_Silhouettes_1680x1050.jpg',width = 600)
        st.title("New Movie Release ")
        st.markdown('You will find all new movie releases here . Enjoy!')
        st.subheader("The Old Guard")
        st.video('https://www.youtube.com/watch?v=aK-X2d0lJ_s')
        st.markdown("Directed by :	Gina Prince-Bythewood")
        st.markdown("Starring : Charlize Theron, KiKi Layne, Marwan Kenzari, Luca Marinelli ,Harry Melling ")
        st.markdown("Plot : Led by a warrior named Andy (Charlize Theron), a covert group of tight-knit mercenaries with a mysterious inability to die have fought to protect the mortal world for centuries. But when the team is recruited to take on an emergency mission and their extraordinary abilities are suddenly exposed, it's up to Andy and Nile (Kiki Layne), the newest soldier to join their ranks, to help the group eliminate the threat of those who seek to replicate and monetize their power by any means necessary.")
        st.markdown("")
        st.subheader("Bad Boys For Life (2020)")
        st.video('https://www.youtube.com/watch?v=jKCj3XuPG8M')
        st.markdown('Directed by :	Adil & Bilall')
        st.markdown('Starring : Will Smith, Martin Lawrence, Paola Núñez, Vanessa Hudgens, Alexander Ludwig, Charles Melton, Jacob Scipio')
        st.markdown('Plot : Marcus and Mike have to confront new issues (career changes and midlife crises), as they join the newly created elite team AMMO of the Miami police department to take down the ruthless Armando Armas, the vicious leader of a Miami drug cartel')
        st.subheader("Bloodshot (2020)")
        st.video('https://www.youtube.com/watch?v=vOUVVDWdXbo')
        st.markdown('Directed by : David S. F. Wilson ')
        st.markdown('Starring : Vin Diesel, Eiza González ,Sam Heughan, Toby Kebbell')
        st.markdown("Plot : After he and his wife are murdered, marine Ray Garrison is resurrected by a team of scientists. Enhanced with nanotechnology, he becomes a superhuman, biotech killing machine-'Bloodshot'. As Ray first trains with fellow super-soldiers, he cannot recall anything from his former life. But when his memories flood back and he remembers the man that killed both him and his wife, he breaks out of the facility to get revenge, only to discover that there's more to the conspiracy than he thought. ")
        st.markdown("")
        st.markdown("You can watch new movies in the following sites, Enjoy!")
        st.image('resources/imgs/download1.jpg',width = 200)
        st.markdown('<p><a href="https://www.netflix.com/za/">Netflix</a></p>', unsafe_allow_html=True)	
        st.image('resources/imgs/unnamed.jpg',width = 200)
        st.markdown('<p><a href="https://www.showmax.com/eng/browse?type=movie">Showmax</a></p>', unsafe_allow_html=True)
        st.image('resources/imgs/Disney_plus.jpg',width = 200)	
        st.markdown('<p><a href="https://preview.disneyplus.com/za">Disney plus</a></p>', unsafe_allow_html=True)	
        st.markdown('<p>This application is sponsored by <a href="https://explore-datascience.net/">Explore Data Science Academy</a> </p>', unsafe_allow_html=True)
        st.image('resources/imgs/EDSA_logo.png', width = 800)
    # Building out the About Machine Learning App page
    if page_selection == "About Machine Learning App":
        st.image('resources/imgs/star-wars-rise-of-skywalker-header.jpg',width = 600)
        st.title("Welcome to the Recommender System Machine Learning App")
        st.subheader('Machine Learning')
        st.markdown('<p>Machine learning (ML) is the study of computer algorithms that improve automatically through experience.It is seen as a subset of artificial intelligence. Machine learning algorithms build a mathematical model based on sample data, known as "training data", in order to make predictions or decisions without being explicitly programmed to do so.Machine learning algorithms are used in a wide variety of applications, such as email filtering and computer vision, where it is difficult or infeasible to develop conventional algorithms to perform the needed tasks. </p>', unsafe_allow_html=True)       
        st.subheader('Machine Learning Algorithms')
        st.markdown('<p>A machine learning (ML) algorithm is essentially a process or sets of procedures that helps a model adapt to the data given an objective. An ML algorithm normally specifies the way the data is transformed from input to output and how the model learns the appropriate mapping from input to output. </p>', unsafe_allow_html=True)
        st.subheader('Recommendation System')
        st.markdown('<p>A recommendation system  is a subclass of information filtering system that seeks to predict the "rating" or "preference" a user would give to an item. They are primarily used in commercial applications.Recommender systems are utilized in a variety of areas and are most commonly recognized as playlist generators for video and music services like Netflix, YouTube and Spotify, product recommenders for services such as Amazon, or content recommenders for social media platforms such as Facebook and Twitter. These systems can operate using a single input, like music, or multiple inputs within and across platforms like news, books, and search queries. Recommender systems usually make use of either or both collaborative filtering and content-based filtering.<p/>', unsafe_allow_html=True)
        st.subheader('Collaborative filtering')
        st.markdown('<p> Collaborative filtering approaches build a model from a user\'s past behavior (items previously purchased or selected and/or numerical ratings given to those items) as well as similar decisions made by other users. This model is then used to predict items (or ratings for items) that the user may have an interest in.<p/>', unsafe_allow_html=True)
        st.subheader('Content-based filtering')
        st.markdown('<p>Content-based filtering approaches utilize a series of discrete, pre-tagged characteristics of an item in order to recommend additional items with similar properties. Current recommender systems typically combine one or more approaches into a hybrid system. </p>', unsafe_allow_html=True)
        st.subheader('Hybrid filtering')
        st.markdown(' Hybrid filtering technique is a combination of multiple recommendation techniques like, merging collaborative filtering with content-based filtering or vice-versa.')
        st.markdown(' Most recommender systems now use a hybrid approach, combining collaborative filtering, content-based filtering, and other approaches . There is no reason why several different techniques of the same type could not be hybridized. Hybrid approaches can be implemented in several ways: by making content-based and collaborative-based predictions separately and then combining them; by adding content-based capabilities to a collaborative-based approach (and vice versa); or by unifying the approaches into one model .')
        st.markdown(' For more information about building data Apps Please go to :<a href="https://www.streamlit.io/">streamlit site</a></p>', unsafe_allow_html=True)	
        st.markdown('<p> </p>', unsafe_allow_html=True)	
        st.image('resources/imgs/Freaks.jpeg',width = 600)
def main():

    # DO NOT REMOVE the 'Recommender System' option below, however,
    # you are welcome to add more options to enrich your app.
    page_options = ["Welcome","About the App", "Recommender System","Solution Overview", "Movie Insights","Contact Us" ]

    # -------------------------------------------------------------------
    # ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
    # -------------------------------------------------------------------
    page_selection = st.sidebar.selectbox("Choose Option", page_options)
    if page_selection == "Recommender System":
        # Header contents
        st.write('# Movie Recommender Engine')
        st.write('### EXPLORE Data Science Academy Unsupervised Predict')
        st.image('resources/imgs/Image_header.png',use_column_width=True)
        # Recommender System algorithm selection
        sys = st.radio("Select an algorithm",
                       ('Content Based Filtering',
                        'Collaborative Based Filtering'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        movie_1 = st.selectbox('Fisrt Option',title_list[14930:15200])
        movie_2 = st.selectbox('Second Option',title_list[25055:25255])
        movie_3 = st.selectbox('Third Option',title_list[21100:21200])
        fav_movies = [movie_1,movie_2,movie_3]

        # Perform top-10 movie recommendation generation
        if sys == 'Content Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = content_model(movie_list=fav_movies,
                                                            top_n=10)
                    st.title("We think you'll like:")
                    for i,j in enumerate(top_recommendations):
                        st.subheader(str(i+1)+'. '+j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")


        if sys == 'Collaborative Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = collab_model(movie_list=fav_movies,
                                                           top_n=10)
                    st.title("We think you'll like:")
                    for i,j in enumerate(top_recommendations):
                        st.subheader(str(i+1)+'. '+j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")


    # -------------------------------------------------------------------

    # ------------- SAFE FOR ALTERING/EXTENSION -------------------
    if page_selection == "About the App":
        st.title("About the App")
        st.markdown("Are you a movie lover? Are you tired of wasting your time watching tons of trailers and ending up not watching their movies? Are you tired of finishing your popcorns before you find the right movie? Not anymore!!")
        st.image(["images/tired1.jpg", "images/tired22.jpg"],width=200)
        st.markdown("You have come to the right app.")
        #st.title("How to use the app")
        st.markdown(open('resources/data/information.md').read())
        
    if page_selection == "Solution Overview":
        st.title("Solution Overview")
        st.markdown("The app uses recommender systems to produce your recommendation in any of the two ways –")
        st.markdown("**Content-based filtering**: which uses a series of discrete characteristics of your selected movies in order to recommend additional movies with similar properties.")
        st.markdown("**Collaborative filtering**: which builds a model from your past behavior (i.e. movies watched or selected by the you) as well as similar decisions made by other users.")
        st.markdown('For this app, the best filtering method is the collaborative filter. This recommender uses user ratings to find similarity between the movies users select and those that they have not watched yet. It allows users discover new interests. In isolation, a Machine Learning model may not know the user is interested in a given movie, but this model might still recommend it because similar movie was rated the same by other users. The collaborative filtering model runs faster, preventing users from running out of patience and data.')
        st.markdown("Below is a video explaining more about recommender systems, how they work and why the recommender systems are important.")
        if st.checkbox('View video'): # data is hidden if box is unchecked
            st.video('https://www.youtube.com/watch?v=aCRN67RAMco')
        
    if page_selection == "Welcome":
        st.markdown("![Alt Text](https://github.com/Clarencia/unsupervised-predict-streamlit-template/blob/master/images/welcome.gif?raw=true)")
        st.markdown("![Alt Text](https://cdn.clipart.email/11d8b5822102da1a7c7a2d015a569485_animated-popcorn-clipart-gif_350-350.gif)")
        
    if page_selection == "Contact Us":
        st.title("Connect with us")
        st.markdown('''<span style="color:green"> **Please help improve the app by rating it and telling us what you think could be changed to make your experience better.** </span>''', unsafe_allow_html=True)
        @st.cache(allow_output_mutation=True)
        def get_data():
            return []
        name = st.text_input("User name")
        inputs = st.text_input("How can we make your experience better?")
        rate = st.slider("Rate us", 0, 5)
        if st.button("Submit"):
            get_data().append({"User name": name, "Suggestion": inputs,"rating":rate})
        st.markdown('''<span style="color:green"> **What other users said:** </span>''', unsafe_allow_html=True)
        st.write(pd.DataFrame(get_data()))
        st.markdown('''<span style="color:green"> **For any questions contact us here:** </span>''', unsafe_allow_html=True)
        st.image(["images/contact.PNG"],width=800)
    if page_selection ==  "Movie Insights":
        st.title("Movie Insights")
        st.markdown('This page gives you all the insights you must have about movies from the IMDb and The Movie DB sites. The visuals will be updated everytime a new movie is uploaded into on of the mentioned websites.')
        insights= st.radio("Select a visual you would like to see",('Graph displaying a count of ratings', 'Number of movies in each genre', 'Proportion of genres per year', 'Genre performance per year', 'Top 50 words in movie titles','Distribution of movie runtime', 'Change in movies runtime over the years',"View all visuals"))
        if insights=='Graph displaying a count of ratings':
            st.image(Image.open("images/ratingss.PNG"))
            st.markdown('The bar plot gives an indication that the ratings were positive. More than 60% of the ratings were above 3.5 which indicates that some users rate the movies that they have watched/seen as positive.')
        elif insights =='Number of movies in each genre':
            st.image(Image.open("images/genre_dist_vs.PNG"))
            st.markdown("The Drama genre has  more movies than any other genre in the movies. This makes sense as this genre is about a representation of real life experiences, which you are most likely to find in movies. Comedy, Romance and Thriller also appear to be common genre, supporting that most movies are about love and violence as seen in the title wordcloud.") 
        elif insights== 'Proportion of genres per year':
            st.image(Image.open("images/genre_dist_year_vs.PNG"))
            st.markdown(open('resources/data/rate.md').read())
        elif insights== 'Genre performance per year':   
            st.image(Image.open("images/incre_genre_vs.PNG"))
            st.markdown('Just like in "Propotion of genres per year", this graph shows that the proportion of drama films have been falling since 2005 while other genres fluctuated a little.')
        elif insights== 'Top 50 words in movie titles':
            st.image(Image.open("images/wordcloud_titles_vs.PNG")) 
            st.markdown(open('resources/data/word.md').read())
        elif insights=='Distribution of movie runtime':
            st.image(Image.open("images/run_time_vs.PNG"))
            st.markdown('This shows that most of the movies are having a runtime of around 100 minutes')
        elif insights=="Change in movies runtime over the years":
            st.image(Image.open("images/run_timeyear_vs.PNG"))
            st.markdown(open('resources/data/running.md').read())
        elif insights=="View all visuals":    
            st.markdown('Graph displaying a count of ratings')
            st.image(Image.open("images/ratingss.PNG"))
            st.markdown('Top 50 words in movie titles')
            st.image(Image.open("images/wordcloud_titles_vs.PNG"))
            st.markdown('Number of movies in each genre')
            st.image(Image.open("images/genre_dist_vs.PNG"))
            st.markdown('Proportion of genres per year')
            st.image(Image.open("images/genre_dist_year_vs.PNG"))
            st.markdown('Genre performance per year')
            st.image(Image.open("images/incre_genre_vs.PNG"))
            st.markdown('Distribution of movie runtime')
            st.image(Image.open("images/run_time_vs.PNG"))
            st.markdown('Change in movies runtime over the years')
            st.image(Image.open("images/run_timeyear_vs.PNG"))
def main():

    # DO NOT REMOVE the 'Recommender System' option below, however,
    # you are welcome to add more options to enrich your app.
    page_options = ["Recommender System","EDA", "Solution Overview", "About Us"]

    # -------------------------------------------------------------------
    # ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
    # -------------------------------------------------------------------
    page_selection = st.sidebar.selectbox("Choose Option", page_options)
    if page_selection == "Recommender System":
        # Header contents
        st.write('# Movie Recommender Engine')
        st.write('### EXPLORE Data Science Academy Unsupervised Predict')
        st.image('resources/imgs/Image_header.png',use_column_width=True)
        # Recommender System algorithm selection
        sys = st.radio("Select an algorithm",
                       ('Content Based Filtering',
                        'Collaborative Based Filtering'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        movie_1 = st.selectbox('Fisrt Option',title_list[14930:15200])
        movie_2 = st.selectbox('Second Option',title_list[25055:25255])
        movie_3 = st.selectbox('Third Option',title_list[21100:21200])
        fav_movies = [movie_1,movie_2,movie_3]

        # Perform top-10 movie recommendation generation
        if sys == 'Content Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = content_model(movie_list=fav_movies,
                                                            top_n=10)
                    st.title("We think you'll like:")
                    for i,j in enumerate(top_recommendations):
                        st.subheader(str(i+1)+'. '+j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")


        if sys == 'Collaborative Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = collab_model(movie_list=fav_movies,
                                                           top_n=10)
                    st.title("We think you'll like:")
                    for i,j in enumerate(top_recommendations):
                        st.subheader(str(i+1)+'. '+j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")


    # -------------------------------------------------------------------

    # ------------- SAFE FOR ALTERING/EXTENSION -------------------
    if page_selection == "EDA":
        st.header("Movie Recommender System Datasets explorer")
        st.markdown(""" This dataset shown below was extracted from set of data available from a kaggle competition obtained from the link https://www.kaggle.com/c/edsa-recommender-system-predict/data""")
        st.sidebar.header("Configuration")
        st.sidebar.subheader("Available Visuals obtained from the sections below:")
        all_cols = df_merge1.columns.values
        numeric_cols = df_merge1.columns.values
        obj_cols = df_merge1.columns.values

        if st.sidebar.checkbox("Data preview", True):

            st.subheader("Data preview")
            st.markdown(f"Shape of dataset : {df_merge3.shape[0]} rows, {df_merge3.shape[1]} columns")
            if st.checkbox("Data types"):
                st.dataframe(df_merge3.dtypes)
            if st.checkbox("Pandas Summary"):
                st.write(df_merge1.describe())
            cols_to_style = st.multiselect(
                "Choose numeric columns to apply BG gradient", numeric_cols
                )
            st.dataframe(df_merge3.head(50).style.background_gradient(subset=cols_to_style, cmap="BuGn"))
            st.markdown("---")
        #st.markdown("<h1 style='text-align: center; color: black;'>Exploratory Data Analysis</h1>", unsafe_allow_html=True)
        st.markdown("""The Data Visualisation done on this page was extracted from the a kaggle notebook which can be found from the link:
            https://www.kaggle.com/kundaninetshiongolwe/team-5-recommenders""")



        if st.sidebar.checkbox("Visuals on Ratings"):
            if st.checkbox("Ratings count by year"):
                fig, ax = plt.subplots(1, 1, figsize = (12, 6))
                ax1 = df_merge1.groupby('rating_year')['rating'].count().plot(kind='bar', title='Ratings by year')
                st.write(fig)


            if st.checkbox("How ratings are distributed: histogram"):
                f = px.histogram(df_merge1["rating"], x="rating", nbins=10, title="The Distribution of the Movie Ratings")
                f.update_xaxes(title="Ratings")
                f.update_yaxes(title="Number of Movies per rating")
                st.plotly_chart(f)
            if st.checkbox("How ratings are distributed: scatter plot"):
                fig, ax = plt.subplots(figsize=(14, 7))
                ax.spines['top'].set_visible(False)
                ax.spines['right'].set_visible(False)
                ax.set_title('Rating vs. Number of Ratings', fontsize=24, pad=20)
                ax.set_xlabel('Rating', fontsize=16, labelpad=20)
                ax.set_ylabel('Number of Ratings', fontsize=16, labelpad=20)

                plt.scatter(ratings_df['Mean_Rating'], ratings_df['Num_Ratings'], alpha=0.5, color='green')
                st.pyplot(fig)

        if st.sidebar.checkbox("Visuals on Genres"):
            st.info("The number of movie per genre")
            fig=make_bar_chart(genre_df, 'Genre', title='Most Popular Movie Genres', xlab='Genre', ylab='Counts')
            st.pyplot(fig)

        if st.sidebar.checkbox("Movie published"):
            st.info("Movies published by year")
            st.pyplot(make_histogram(df_merge3, 'moviePubYear', title='Movies Published per Year', xlab='Year', ylab='Counts'))


    st.sidebar.header("About")
    st.sidebar.text("Team name : Team_5_EDSAJuly2020")
    st.sidebar.text(
        "Code : https://github.com/Thami-ex/unsupervised-predict-streamlit-template"
    )




    if page_selection == "Solution Overview":
        st.title("Solution Overview")
        st.write("Describe your winning approach on this page")

        st.markdown("""A `Recommender System (RS)` is no doubt one of the most obvious ways in which companies are enhancing the user experience
        in the platform that they provide their customers services. Companies Like Facebook, Netflix, Amazon, and Youtube are using RS to do so.
        More likely, these companies and other companies that are implementing the RS are doing so in introducing machine learning into these
        companies. It is therefore important for aspiring Data Scientists to develop skills in such areas. At `Explore Data Science Academy (EDSA)`,
        this team was given a task to build a RS. There are 3 available approaches to building a recommender system. As part of this project the
        team explored two of these which were the `Content Based Filtering (CBF)` and `Collaborative Filtering (CF)` algorithm.

            """)

        st.subheader("**Collaborative Filtering (CF)**")
        st.markdown("""This recommender engine was easy to implement in this work as it provides us with the recommendation of the 10 movies easily
         as compared to the other approach. On the other hand, the CF is one of the most popular implemented recommender engines and it is based on
         the assumption that the people were in agreement in the past and there is a high chance that they are in agreement in the future. An example
          indicating what is meant by the statement about agreement is considering that a friend and the other friend have probably liked an identical
          range of books in the past. Because the friend has now read new books that the other has not read there is a high chance that the other friend
          will enjoy and probably like those same books. This logic describes what is known as `user-based` collaborative filtering which was implemented
          in this application. """)

        st.subheader("**Building the Recommender Sytem**")
        st.markdown("""The recommender system application was built mainly for consumers to have an experience of watching movies that they are
        likely to enjoy based on the three movies they have selected. Figure below shows a recommender engine from Netflix showing new release
         movies. Ideally, more recommender systems look like the one from the figure below, however, the approach to building this one was somehow
         different. """)


        image=Image.open("./images/rs.jpg")
        st.image(image, use_column_width=True)

        st.markdown("""In building this web application, a couple of steps were followed starting with forking the repository from Github given by EDSA,
         using the dataset provided and obtained from the repository to build the recommender system. Following that was working with the script of the
         collaborative filtering algorithm by editing the code to obtain a movie prediction when using the main script run with streamlit. The `about
         us` has a link to the Github repo for if the intention is to attain better grasp on how the code works using python code.
            """)

        st.markdown("""This recommender engine is considered to be user friendly and one can easily use it to get movies that others have enjoyed and are
         related to the movies that they enjoy. This is done by only selecting three movies and press `Recommend` and 10 movies will be suggested. """ )

    if page_selection == "About Us":
        # st.markdown("<h1 style='text-align: center; color: black;'>About Us</h1>", unsafe_allow_html=True)
        image = Image.open("./images/about_us3.jpeg")
        st.image(image, use_column_width=True)
def main():

    # DO NOT REMOVE the 'Recommender System' option below, however,
    # you are welcome to add more options to enrich your app.
    st.sidebar.title("Pages")
    page_selection = st.sidebar.radio(label="",
                                      options=[
                                          "Information", "EDA and Insights",
                                          "Recommender System",
                                          "Solution Overview", "Business Pitch"
                                      ])

    # -------------------------------------------------------------------
    # ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
    # -------------------------------------------------------------------

    if page_selection == "Recommender System":
        # Header contents
        st.write('# Movie Recommender Engine')
        st.write('### EXPLORE Data Science Academy Unsupervised Predict')
        st.image('resources/imgs/Image_header.png', use_column_width=True)
        # Recommender System algorithm selection
        sys = st.radio(
            "Select an algorithm",
            ('Content Based Filtering', 'Collaborative Based Filtering'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        movie_1 = st.selectbox('First Option', title_list[14930:15200])
        movie_2 = st.selectbox('Second Option', title_list[25055:25255])
        movie_3 = st.selectbox('Third Option', title_list[21100:21200])
        fav_movies = [movie_1, movie_2, movie_3]

        # Perform top-10 movie recommendation generation
        if sys == 'Content Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = content_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

        if sys == 'Collaborative Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = collab_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

    # -------------------------------------------------------------------

    # ------------- SAFE FOR ALTERING/EXTENSION -------------------

    if page_selection == "Exploratory Data Analysis":
        st.title("Exploratory Data Analysis")
        # Ensure movies['genres'] column contains strings and split into a list of genres
        movies['genres'] = movies['genres'].apply(str).apply(
            lambda x: x.split('|'))

        # Create a label binarizer class
        mlb = MultiLabelBinarizer()

        # Create a new dataframe with the binarized genres
        df_genres = pd.DataFrame(mlb.fit_transform(movies['genres']),
                                 columns=mlb.classes_)
        #df = pd.merge(left=movies,right=df_genres, left_index=True, right_index=True)
        a = pd.melt(df_genres)
        plt.figure(figsize=(10, 8))
        sns.countplot(data=a.loc[a['value'] == 1],
                      y='variable',
                      palette='viridis')
        plt.title('* Some movies are labelled with multiple genres')
        plt.suptitle('Number of movies belonging to each category',
                     fontsize=15)
        plt.xlabel('Count')
        plt.ylabel('')
        st.pyplot()

    if page_selection == "Solution Overview":
        st.title("Solution Overview")

        st.write("### Content-Based Recommender system")
        st.write(
            "A Content-based recommender system tries to recommend items to users, based on their profile. The user’s profile revolves around the user’s preferences and tastes, or based on the user ratings. Furthermore; contents of the movie such as cast or keyword are used to find similarities with other movies. Then the movies that are most likely to be similar are recommended."
        )
        st.write(
            "Term Frequency-Inverse Document Frequency (TF-IDF) vectors were computed to find a matrix where each column represents a word in the genre vocabulary. A cosine similarity matrix was then computed to build a model that would estimate user ratings of new movies"
        )

        st.write("### Collaborative Filtering")
        st.write("#### The SVD Model from SurPRISE")
        st.write(
            "Surprise is a Python scikit for building and analysing recommender systems that deals with rating data (Nicolas Hug, https://surprise.readthedocs.io/en/stable/getting_started.html). Within this library is a collection of algorithms for prediction experiments. For this competition, we have evaluated three of the algorithms (SVD, SVDpp and KNNWithMeans) in our collaborative-based models. We will discuss the SVD model in further detail as it proved to be the best scoring algorithm overall with an RMSE of 0.78675."
        )
        st.write(
            "A training dataset is given, it comprises of movie Id, user IDs and the rating (1-5) that the user gave the respective movie. This data can be visualised as a sparse matrix with users as row indexes, movies as column name and ratings as values. It is assumed that every movie can be described in terms of attributes such as genre or cast; and every user's likes and dislikes can be described from their historical ratings. Therefore, it is possible to describe these patterns using fewer numbers than is present in the full matrix, for example, a single value representing comedy may be sufficient to explain why many comedy-lovers rated a particular movie highly. Essentially, we are aiming to decompose the original sparse matrix into two low-rank matrices that represent user factors and item factors. The SVD model uses matrix factorisation to compute these parameters accordingly (Simon Funk, https://sifter.org/~simon/journal/20061211.html)."
        )

    # Build information page
    if page_selection == "Information":
        st.title('Recommender Systems')
        st.info(
            "A recommender system is a subclass of information filtering system that seeks to predict the rating or preference a user would give to an item"
        )
        st.image('resources/imgs/Image_header.png', use_column_width=True)

        if st.button("How does the app work"):
            app_info = open("resources/info.md").read()
            st.markdown(app_info, unsafe_allow_html=True)

        if st.button("Data description"):
            data_descript = open("resources/data_description.md").read()
            st.markdown(data_descript, unsafe_allow_html=True)

        raw_data = pd.merge(left=train, right=movies, on='movieId')
        st.subheader("Raw movies data")
        if st.checkbox('Show data'):  # data is hidden if box is unchecked
            st.write(raw_data.head())  # will write the df to the page

    # Build EDA page
    if page_selection == "EDA and Insights":
        st.title('Exploratory Data Analysis and Insights')
        st.info(
            "The main characteristics of the data are summarized and insights are drawn."
        )
        st.write(
            '###  Use the sidebar to view visuals and insights for particular variables'
        )

        # Adding to sidebar
        variable_selection = st.sidebar.radio(label="Select variable(s):",
                                              options=[
                                                  "Genres", "Ratings",
                                                  "Genres and Ratings",
                                                  'Runtime', "Movies",
                                                  "Directors"
                                              ])

        if variable_selection == "Genres":
            a = pd.melt(df_genres)
            plt.figure(figsize=(10, 8))
            sns.countplot(data=a.loc[a['value'] == 1],
                          y='variable',
                          palette='viridis')
            plt.title('* Some movies are labelled with multiple genres')
            plt.suptitle('Number of movies belonging to each category',
                         fontsize=15)
            plt.xlabel('Count')
            plt.ylabel('')
            st.pyplot()

            st.markdown(
                "This graphs shows the number of movies in each genre, some movies are labelled with multiple genres. It is quite clear that drama is the most popular genre, with comedy falling second. Film-Noir and IMAX genres are the least popular genres.<br><br>Film noir is a style of filmmaking characterized by such elements as cynical heroes, stark lighting effects, frequent use of flashbacks, intricate plots, and an underlying existentialist philosophy.The genre was prevalent mostly in American crime dramas of the post-World War II era. This shows that Film noir is associated with some Western and war genres, therefore might share half of the movies in the western and war genres. Western and war genres seem to have a small number of movies, this shows why film noir has an even lower number.",
                unsafe_allow_html=True)

        if variable_selection == "Genres and Ratings":
            # Calculate the number of ratings per genre of movie
            df_genres['movieId'] = df['movieId']
            genre_ratings = pd.merge(left=train,
                                     right=df_genres,
                                     left_on='movieId',
                                     right_on='movieId')
            genre_ratings.drop(['userId', 'movieId', 'timestamp'],
                               axis=1,
                               inplace=True)
            genre_ratings = genre_ratings.groupby(by=['rating'], axis=0).sum()

            # Examine how the different movie genres are historically rated by users
            names = list(genre_ratings.columns)
            labels = list(genre_ratings.index)
            colours = sns.color_palette(palette='viridis',
                                        n_colors=len(labels),
                                        desat=None)

            fig = plt.figure()
            fig.subplots_adjust(hspace=1, wspace=1)
            for i in range(1, 21):
                plt.subplot(4, 5, i)
                plt.pie(genre_ratings[names[i - 1]],
                        colors=colours,
                        radius=2,
                        autopct='%0.1f%%',
                        pctdistance=1.2)
                fig.set_size_inches(20, 16)
                plt.title(names[i - 1], pad=58, fontsize=14)
            plt.legend(labels,
                       title='Rating',
                       fancybox=True,
                       loc=6,
                       bbox_to_anchor=(1.7, 6.8))
            st.pyplot()

            st.markdown(
                "The pie charts show the ratings associated with each genre. Based on the number of ratings attained by each genre, it can be seen that the rating of 4 has the bigger piece of the pie for all of the genres. This supports the findings on the ratings distribution graph which showed that a vast majority of the movies are rated 4. <br><br>It is also evident that the lower ratings have small percentages for all the genres.",
                unsafe_allow_html=True)

        if variable_selection == "Ratings":
            # Examine movie ratings from all users
            plt.figure(figsize=(6, 4))
            sns.countplot(train['rating'], palette='viridis')
            plt.title('Distribution of ratings from all users')
            plt.xlabel('Rating')
            plt.ylabel('Count')
            st.pyplot()

            st.markdown(
                "From the plot and table above it is evident that majority of users gave the movies a rating of 4, 26.53% of them to be precise, while the lowest rating 0.5 accounts for only 1.58% of the users.<br><br> It can also be seen that the ratings are left skewed, whiich suggests that most of the movies have high ratings and also that the mean is lower than the mode.",
                unsafe_allow_html=True)

            # Five number summary
            st.write("#### Five number summary and boxplot")

            summary = train['rating'].describe(include='all')
            st.write(summary)

            # Box plot
            plt.boxplot(train['rating'])
            plt.ylabel("Rating")
            plt.xlabel("movies")
            st.pyplot()

            st.markdown(
                'On average a user is most likely to give a movie a rating of 3.5257. The lowest rating given to a movie is 0.5, which is visible from the boxplot and that it is an outlier, meaning that it is an event that is less likely to occur. The standard deviation of the data is low which indicates that the ratings that the users make are usually closer to the mean.<br><br> The boxplot is also confirming that the ratings are left skewed, as it is visible that the mean value is lower than the mode which is 4, as seen on the distribution plot.',
                unsafe_allow_html=True)

        if variable_selection == "Movies":

            # Preview the movies datframe
            st.write("Preview movies dataframe:")
            st.write(movies.head(3))

            st.write("#### Use the selectbox below to navigate the visuals")

            options = [
                'Top 20 movies with highest rating',
                'Top 20 most rated movies',
                'Top 20 movies with highest relevance'
            ]
            selection = st.selectbox("Choose Option", options)

            # Merge dataframes for rating analysis
            movies_train_df = pd.merge(train, movies, how='left', on='movieId')
            movies_train_df['title'] = movies_train_df['title'].str.replace(
                '(\(\d\d\d\d\))', '')

            if selection == 'Top 20 movies with highest rating':

                # group movies by title and rating
                rating_grouped = movies_train_df.groupby(['title'])[['rating'
                                                                     ]].sum()
                high_rated = rating_grouped.nlargest(20, 'rating')

                plt.figure(figsize=(30, 30))
                plt.title('Top 20 movies with highest rating', fontsize=40)
                colours = [
                    'forestgreen', 'burlywood', 'gold', 'azure', 'magenta',
                    'cyan', 'aqua', 'navy', 'lightblue', 'khaki'
                ]
                plt.ylabel('ratings', fontsize=30)
                plt.xticks(fontsize=25, rotation=90)
                plt.xlabel('movies title', fontsize=30)
                plt.yticks(fontsize=25)
                plt.bar(high_rated.index,
                        high_rated['rating'],
                        linewidth=3,
                        edgecolor=colours,
                        color=colours)
                plt.subplots_adjust(bottom=0.7)
                plt.xticks(rotation=60, ha='right')
                st.pyplot()

                st.markdown(
                    'This graph shows the top 20 highest rated movies. The(1994) Shawshank Redemption is the highest rated movie taking the number 1 spot. The Shawshank Redemption is a 1994 American drama film written and directed by Frank Darabont, based on the 1982 Stephen King novella Rita Hayworth and Shawshank. Following at number 2 is the pulp fiction.The ratings are grouped based on the title the calculate the sum of the ratings to get a total.',
                    unsafe_allow_html=True)

            if selection == 'Top 20 most rated movies':

                # group movies by title and rating
                no_ratings_df = movies_train_df.groupby('title')[['rating'
                                                                  ]].count()
                rating_count_20 = no_ratings_df.nlargest(20, 'rating')

                # plot movies with the highest number of ratings
                plt.figure(figsize=(30, 30))
                plt.title('Top 20 movies with highest number of ratings',
                          fontsize=40)
                colours = [
                    'forestgreen', 'burlywood', 'gold', 'azure', 'magenta',
                    'cyan', 'aqua', 'navy', 'lightblue', 'khaki'
                ]
                plt.xticks(fontsize=25, rotation=90)
                plt.yticks(fontsize=25)
                plt.xlabel('movies title', fontsize=30)
                plt.ylabel('ratings', fontsize=30)
                plt.bar(rating_count_20.index,
                        rating_count_20.rating,
                        color=colours)
                plt.subplots_adjust(bottom=0.7)
                plt.xticks(rotation=60, ha='right')
                st.pyplot()

                st.markdown(
                    'This graph shows most rated movies. The(1994) Shawshank Redemption is the most rated movie in the dataset. If we combine these findings with the ones on the "highest rated movies" plot we can see that it is not only the most rated movie but the users are rating it very high, which can lead to the conlusion that it a satisfying movie.',
                    unsafe_allow_html=True)

            if selection == 'Top 20 movies with highest relevance':

                # Create a merged dataframe with g_scores and movies
                genome_movies_df = pd.merge(g_scores,
                                            movies,
                                            how='left',
                                            on='movieId')
                genome_train_grouped = genome_movies_df.groupby(
                    ['title'])[['relevance']].sum()
                high_relevance = genome_train_grouped.nlargest(20, 'relevance')

                plt.figure(figsize=(30, 30))
                plt.title('Top 20 movies with highest relevance', fontsize=40)
                colors = [
                    'forestgreen', 'burlywood', 'gold', 'azure', 'magenta',
                    'cyan', 'aqua', 'navy', 'lightblue', 'khaki'
                ]
                plt.ylabel('Relevance', fontsize=30)
                plt.xticks(fontsize=25, rotation=90)
                plt.xlabel('Movies title', fontsize=30)
                plt.yticks(fontsize=25)
                plt.bar(high_relevance.index,
                        high_relevance['relevance'],
                        linewidth=3,
                        edgecolor=colors,
                        color=colors)
                plt.subplots_adjust(bottom=0.7)
                plt.xticks(rotation=60, ha='right')
                st.pyplot()

                st.markdown(
                    'The above graph shows the top 20 most relevant movies. These are the movies that can connect to people and can also be recommended to new users that do not have a history in a platform',
                    unsafe_allow_html=True)

        if variable_selection == 'Runtime':

            plt.figure(figsize=(6, 4))
            plt.hist(imdb['runtime'],
                     color='skyblue',
                     edgecolor='black',
                     bins=int(100 / 5))
            plt.xlim(0, 250)

            # seaborn histogram
            sns.distplot(imdb['runtime'],
                         hist=True,
                         kde=False,
                         bins=int(100 / 5),
                         color='green',
                         hist_kws={'edgecolor': 'black'})
            # Add labels
            plt.title('Distribution of Movie Runtimes')
            plt.xlabel('Runtime')
            plt.ylabel('Movies')
            st.pyplot()

            # Getting the five number summary and boxplot of runtimes
            st.write("#### Five number summary and boxplot")

            summary = imdb['runtime'].describe(include='all')
            st.write(summary)

            st.markdown(
                'The average runtime for a movie is 102.72 minutes, it can also be seen from the graph that there is a huge spike of frequency at the 100 minutes runtime. The shortest movie runs for 1 minute and the longest movie runs for 750 minutes, which suggests an anomaly with the value.',
                unsafe_allow_html=True)

        if variable_selection == "Directors":
            directors_movies = df[[
                'director'
            ]]  # Create dataframe to analyse director variable

            directors_movies['count'] = 1
            directors_movies = directors_movies.groupby(
                'director').sum().sort_values(by='count', ascending=False)

            directors_rating = df[['director', 'movieId']]
            directors_rating = pd.merge(left=directors_rating,
                                        right=train,
                                        left_on='movieId',
                                        right_on='movieId')
            directors_rating.drop(['movieId', 'userId', 'timestamp'],
                                  axis=1,
                                  inplace=True)
            directors_rating = directors_rating.groupby(
                'director').mean().sort_values(by='rating', ascending=False)

            directors = pd.merge(left=directors_rating,
                                 right=directors_movies,
                                 left_index=True,
                                 right_index=True)

            # Sort directors dataframe by rating and count to analyse by both
            directors_rating = directors.sort_values(by=['rating'],
                                                     ascending=False)
            directors_count = directors.sort_values(by=['count'],
                                                    ascending=False)

            st.write("#### Use the selectbox below to navigate the visuals")

            options = [
                'Highest ranking directors',
                "Highest number of movies a director worked on",
                "Lowest number of movies a director worked on"
            ]
            selection = st.selectbox("Choose Option", options)

            if selection == 'Highest ranking directors':

                # Examine performance of directors
                # Because each director has directed different number of movies, we will calculate a weighted score for each using their mean movie rating and number of movies directed
                directors = df[['director', 'movieId']]
                directors = pd.merge(left=directors,
                                     right=train,
                                     left_on='movieId',
                                     right_on='movieId')
                directors.drop(['userId', 'timestamp'], axis=1, inplace=True)
                directors = directors.groupby('director', as_index=False).agg({
                    'movieId':
                    'count',
                    'rating':
                    'mean'
                })
                all_movies = directors['movieId'].sum()
                directors['movieId'] = directors['movieId'] / all_movies * 100
                directors['score'] = directors['movieId'] * directors['rating']
                directors = directors.sort_values('score', ascending=False)
                directors = directors.set_index('director')

                # Examine top 10 rated directors
                fig, ax = plt.subplots(figsize=(12, 9))  #(15, 10)

                people = directors.index[:10]
                y_pos = np.arange(len(people))

                performance = directors['score'][:10]

                ax.barh(y_pos,
                        performance,
                        align='center',
                        color=sns.color_palette(palette='viridis',
                                                n_colors=10))
                ax.set_yticks(y_pos)
                ax.set_yticklabels(people)
                ax.invert_yaxis()  # labels read top-to-bottom
                ax.set_xlabel('Score (weighted mean rating)', fontsize=13)
                ax.set_title('Highest ranking directors', pad=20, fontsize=30)
                st.pyplot()

                st.markdown(
                    'Here we examined the best directors by calculating a weighted score comprising the number of movies directed and the rating of each movie. The results indicate the Quentin Tarantino has directed the most, high rating movies in the database. This is followed by a number of authors and directors suggesting that there may be an error in the data.',
                    unsafe_allow_html=True)

            if selection == "Highest number of movies a director worked on":

                plt.xticks(rotation=90, fontsize=7)
                plt.subplots_adjust(bottom=0.3)
                plt.bar(directors_count.index[0:20],
                        height=directors_count['count'][0:20],
                        color=sns.color_palette(palette='viridis',
                                                n_colors=20))
                plt.title("Highest number of movies a director worked on",
                          fontsize=12,
                          pad=20)
                plt.ylabel("Number of movies directed", fontsize=8)
                plt.xlabel("Director", fontsize=8)
                st.pyplot()

                st.markdown(
                    'The plot above shows us the directors that worked on the most projects compared to others. Woody Allen and Luc Besson seem to be the busiest directors with 23 and 22 projects they have respectively worked on. Looking the top two directors and the third one, the difference in the number of projects is not that big, but when you consider the context of data and the scale of making a movie, one could argue that it is a big deal to for a director to surpass another by even one project.',
                    unsafe_allow_html=True)

            if selection == "Lowest number of movies a director worked on":

                plt.bar(directors.index[-20:],
                        height=directors['count'][-20:],
                        color=sns.color_palette(palette='viridis',
                                                n_colors=20))
                plt.xticks(rotation=90, fontsize=7)
                plt.subplots_adjust(bottom=0.3)
                plt.title("Lowest number of movies a director worked on",
                          fontsize=12,
                          pad=20)
                plt.ylabel("Number of movies directed", fontsize=8)
                plt.xlabel("Director", fontsize=8)
                st.pyplot()

                st.markdown(
                    'From the plot it can be seen that the lowest number of movies a director has worked on is 1, this coud be because of a lot of factors i.e. new directors, failed careers etc.',
                    unsafe_allow_html=True)

    if page_selection == "Business Pitch":
        st.title('Business Proposal')
        st.write(
            "Looking at the current and increased demand of precise and accurate movie recommender models. We have developed an application that evaluates the appetite of viewers and utilizes aggregates that would be able to satisfy your viewers. Considering that the structure of viewership from a television channel is vastly different from that of online movie hosts, in regards that the online movie viewers have the liberty to choose the film of their choice at any given time whereas on television there are restriction on choice and prefered time to watch the film."
        )
        st.write(
            "Given that a high population of the world uses the internet and television to view movies,  we have structured our web app so that it would be able to render solutions and insights for both the platforms. We will observe the TV platform then followed by the online platform."
        )

        st.write('### 1. Television Platform')
        st.write(
            'The limitation of choice on TV has led us to use world aggregates to determine the top movies that would mesmerize the clients. First we will observe the top rated genres world wide: below are pie charts showing each genre and their respective ratings.'
        )

        # Calculate the number of ratings per genre of movie
        df_genres['movieId'] = df['movieId']
        genre_ratings = pd.merge(left=train,
                                 right=df_genres,
                                 left_on='movieId',
                                 right_on='movieId')
        genre_ratings.drop(['userId', 'movieId', 'timestamp'],
                           axis=1,
                           inplace=True)
        genre_ratings.drop('(no genres listed)', axis=1, inplace=True)
        genre_ratings = genre_ratings.groupby(by=['rating'], axis=0).sum()

        # Examine how the different movie genres are historically rated by users
        names = list(genre_ratings.columns)
        labels = list(genre_ratings.index)
        colours = sns.color_palette(palette='viridis',
                                    n_colors=len(labels),
                                    desat=None)

        fig = plt.figure()
        fig.subplots_adjust(hspace=1, wspace=1)
        for i in range(1, 20):
            plt.subplot(4, 5, i)
            plt.pie(genre_ratings[names[i - 1]],
                    colors=colours,
                    radius=2,
                    autopct='%0.1f%%',
                    pctdistance=1.2)
            fig.set_size_inches(20, 16)
            plt.title(names[i - 1], pad=58, fontsize=14)
        plt.legend(labels,
                   title='Rating',
                   fancybox=True,
                   loc=4,
                   bbox_to_anchor=(3, 0))
        st.pyplot()

        st.write(
            'Now that we have the top genres we could filter out movies and get the top movies of the top genres and thus base our movie playlist from that perspective.'
        )

        st.write('### 2. Online Movie Platform')
        st.write(
            'When approaching the internet platform we will apply some of the most popular and proven recommender algorithms to make catered recommendations for each individual. The application is based primarily on the concept shown on the Recommender System page, where a content and a collaborative model were used to make movie predictions. The following is an example of results produced for a specific user in our dataset. Below you can see a table showing the top 10 movies recommended for user no. 777.'
        )
        userx = pd.read_csv('resources/data/userec.csv')
        st.write(userx['title'][:10])

    st.sidebar.title("About")
    st.sidebar.info(""" 
        This app is maintained by EDSA students.
        It serves as a project for an unsupervised machine learning sprint.
        
    
        **Authors:**\n
        Caryn Pialat\n
        Kganedi Magolo\n
        Lesego Bhebe\n
        Nombulelo Msibi\n
        Tshokelo Tumelo Mokubi\n
        Tuduetso Mmokwa\n
    
    """)
예제 #18
0
def main():

    # DO NOT REMOVE the 'Recommender System' option below, however,
    # you are welcome to add more options to enrich your app.
    page_options = ["Home", "Visuals", "Recommender System"]

    # -------------------------------------------------------------------
    # ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
    # -------------------------------------------------------------------
    page_selection = st.sidebar.selectbox("Choose Option", page_options)
    if page_selection == "Recommender System":
        # Header contents
        st.write('# Movie Recommender Engine')
        st.write('### EXPLORE Data Science Academy Unsupervised Predict')
        st.image('resources/imgs/Image_header.png', use_column_width=True)
        # Recommender System algorithm selection
        sys = st.radio(
            "Select an algorithm",
            ('Content Based Filtering', 'Collaborative Based Filtering'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        movie_1 = st.selectbox('first Option', title_list[14930:15200])
        movie_2 = st.selectbox('Second Option', title_list[25055:25255])
        movie_3 = st.selectbox('Third Option', title_list[21100:21200])
        fav_movies = [movie_1, movie_2, movie_3]

        # Perform top-10 movie recommendation generation
        if sys == 'Content Based Filtering':
            if st.button("Recommend"):
                print(fav_movies)
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = content_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

        if sys == 'Collaborative Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = collab_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

    # -------------------------------------------------------------------

    # ------------- SAFE FOR ALTERING/EXTENSION -------------------
    if page_selection == "Home":
        #st.title("Solution Overview")
        #st.write("Describe your winning approach on this page")
        #st.image('resources/imgs/Image_header.png')
        st.write('# Project Building Overview')
        st.image('resources/imgs/Image_header.png', use_column_width=True)
        page = codecs.open("resources/HTML/info_page.html", 'r',
                           'utf-8').read()
        st.markdown(page, unsafe_allow_html=True)
        st.write(" ")
        st.write("# Learn more about the recommenders")
        st.image('resources/imgs/filters.png', use_column_width=True)
        st.write(" ")
        page = codecs.open("resources/HTML/filter_explanation.html", 'r',
                           'utf-8').read()
        st.markdown(page, unsafe_allow_html=True)

        st.write("# Approach to the winning solution")
        st.markdown("""
            For the content based filter, we applied the cosine similarity method to find
            similarities between genres and keywords for selected movie titles.

            For the collaborative based filter, we use the Singular Value Decomposition(SVD)
            algorithm to find similar users. We then recommend movies that the predicted users have
            already watched.

        """)

        st.write('# Meet the team')
        st.markdown(
            """| Team member                             | Primary Duty                                                       |
        | :---------------------                | :--------------------                                             |
        | Marcus Moeng                 | Kaggle                           |
        | Thabo Mahlangu | Recommender system model                |
        | Stanley Kobo       | Simple implementation of content-based filtering.                 |
        | Pinky Maredi                    | Trello |
        | Claude Monareng                   | Folder to store model and data binaries if produced.              |
        | Mukovhe Mukwevho                      | Streamlit |""")

    # You may want to add more sections here for aspects such as an EDA,
    # or to provide your business pitch.

    #--------------------------------------------------------------------
    if page_selection == "Visuals":
        visual = st.sidebar.selectbox("Visual category", ('Movies', 'Genres'))

        if visual == 'Movies':
            movie_visual = st.sidebar.radio(
                "select movie visual",
                ('Top 5 Movies', 'Top 5 Actors', 'Movie title wordcloud'))

            if movie_visual == 'Movie title wordcloud':
                st.write('# Words used in movie titles')
                st.image('resources/imgs/word_cloud.png')

            elif movie_visual == 'Top 5 Actors':
                st.write("# Top 5 Actors in our Database")
                st.write('## 1. Samuel L. Jackson')
                st.image('resources/imgs/samuel_jackson.jpg',
                         width=10,
                         use_column_width=True)
                st.write('more...')

                st.write('## 2. Steve Buscemi')
                st.image('resources/imgs/steve_B.jpeg', use_column_width=True)

                st.write("## 3. Robert De Niro")
                st.image('resources/imgs/robert.jpg', use_column_width=True)

                st.write("## 4. Nicolas Cage")
                st.image('resources/imgs/cage.jpg', use_column_width=True)

                st.write("## 5. Gerard Depardieu")
                st.image('resources/imgs/gerard.jpg', use_column_width=True)

            elif movie_visual == 'Top 5 Movies':
                st.write("# The best 5 Movies")
                st.write('## 1. Interstellar 2010')
                st.image('resources/imgs/interstellar.jpg',
                         use_column_width=True)

                st.write('## 2. Django Unchained 2012')
                st.image('resources/imgs/django.jpg', use_column_width=True)

                st.write("## 3. Dark Knight Rises 2012")
                st.image('resources/imgs/dark.jpg', use_column_width=True)

                st.write("## 4. Avengers 2012")
                st.image('resources/imgs/avengers.jpg', use_column_width=True)

                st.write("## 5. Guardians of the Galaxy 2014")
                st.image('resources/imgs/galaxy.jpeg', use_column_width=True)

        if visual == "Genres":
            #st.write('# Interesting info on Genres')
            genre_visual = st.sidebar.radio(
                "select genre visual", ('Popularity', 'Runtime', 'Budget'))

            if genre_visual == 'Popularity':
                st.write('# Genre popularity')
                st.image('resources/imgs/genres.png', use_column_width=True)

                st.image('resources/imgs/tree_map.png', use_column_width=True)

            elif genre_visual == 'Runtime':
                st.image('resources/imgs/genre_runtime.png',
                         use_column_width=True)

            else:
                st.image('resources/imgs/genre_budget.png',
                         use_column_width=True)
def main():

    # DO NOT REMOVE the 'Recommender System' option below, however,
    # you are welcome to add more options to enrich your app.
    page_options = [
        "Solution Overview", "EDA", "Recommender System", "About Us"
    ]
    st.markdown(
        """
		<style>
		.sidebar .sidebar-content{
		background-image: linear-gradient(white, red);
		font color: white;
		}
		</style>
		""",
        unsafe_allow_html=True,
    )

    # -------------------------------------------------------------------
    # ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
    # -------------------------------------------------------------------
    page_selection = st.sidebar.selectbox("Choose Option", page_options)
    if page_selection == "Recommender System":
        # Header contents
        title_tag('Movie Recommender Engine')
        subheading('EXPLORE Data Science Academy Unsupervised Predict')
        st.image('resources/imgs/Image_header.png', use_column_width=True)
        # Recommender System algorithm selection
        sys = st.radio(
            "Select an algorithm",
            ('Content Based Filtering', 'Collaborative Based Filtering'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        movie_1 = st.selectbox('Fisrt Option', title_list[14930:15200])
        movie_2 = st.selectbox('Second Option', title_list[25055:25255])
        movie_3 = st.selectbox('Third Option', title_list[21100:21200])
        fav_movies = [movie_1, movie_2, movie_3]

        # Perform top-10 movie recommendation generation
        if sys == 'Content Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = content_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
							  We'll need to fix it!")

        if sys == 'Collaborative Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = collab_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
							  We'll need to fix it!")

    # -------------------------------------------------------------------

    # ------------- SAFE FOR ALTERING/EXTENSION -------------------
    if page_selection == "Solution Overview":
        title_tag(page_selection)
        subheading("Describe your winning approach on this page")

    #About Us page
    if page_selection == "About Us":
        title_tag(
            "TEAM 4 is a group of six members from EDSA comprising of Lesedi, Chuene, Kgomotso, Thabisile, Charles and Tumelo"
        )
        subheading("Visit our Contact Page and lets get in touch!")

    if page_selection == "EDA":
        title_tag("Our Exploratory Data Analysis")
        visual_options = [
            "The top 15 movies", "Genres with the most number movies",
            "A count of films by directors", "Wordcloud"
        ]
        visual_selection = st.selectbox(
            "Choose Exploratory Data Analaysis Visuas Option", visual_options)

        if visual_selection == "The top 15 movies":
            subheading('Top 15 movies by number of Ratings')
            st.image('resources/imgs/top_15_titles.png', use_column_width=True)
        elif visual_selection == "Genres with the most number movies":
            subheading('Genres with the most number movies')
            st.image('resources/imgs/Genres.png', use_column_width=True)
        elif visual_selection == "A count of films by directors":
            subheading('A count of films by directors')
            st.image('resources/imgs/director.png', use_column_width=True)
        elif visual_selection == "Wordcloud":
            subheading('Most Popular Movie Keywords')
            st.image('resources/imgs/wordcloud.png', use_column_width=True)
예제 #20
0
def main():

    # DO NOT REMOVE the 'Recommender System' option below, however,
    # you are welcome to add more options to enrich your app.
    st.sidebar.title('Movie Recommender App')
    st.sidebar.markdown('''
        ## **Pages**\n
        * Recommender System\n
        * Data Description\n
        * Exploratory Data Analysis\n
        * How a Recommender System Works\n
        ## Choose a page in the selectbox below:
        ''')
    page_options = [
        "Recommender System", "Data Description", "Exploratory Data Analysis",
        "How a Recommender System Works"
    ]

    # -------------------------------------------------------------------
    # ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
    # -------------------------------------------------------------------
    page_selection = st.sidebar.selectbox("Choose Option", page_options)
    if page_selection == "Recommender System":
        # Header contents
        st.write('# Movie Recommender Engine')
        st.write('### EXPLORE Data Science Academy Unsupervised Predict')
        st.image('resources/imgs/Image_header.png', use_column_width=True)
        # Recommender System algorithm selection
        sys = st.radio(
            "Select an algorithm",
            ('Content Based Filtering', 'Collaborative Based Filtering'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        movie_1 = st.selectbox('Fisrt Option', title_list[14930:15200])
        movie_2 = st.selectbox('Second Option', title_list[25055:25255])
        movie_3 = st.selectbox('Third Option', title_list[21100:21200])
        fav_movies = [movie_1, movie_2, movie_3]

        # Perform top-10 movie recommendation generation
        if sys == 'Content Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = content_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

        if sys == 'Collaborative Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = collab_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

    # -------------------------------------------------------------------

    # ------------- SAFE FOR ALTERING/EXTENSION -------------------------

    # -------------- Data Description Page ------------------------------
    if page_selection == "Data Description":
        st.title("Data Description")
        st.subheader(
            "This recommender makes use of data from the MovieLens recommendation service"
        )

        data_descrip = markdown(
            open('resources/md_files/movielens_data_descrip.md').read())
        st.markdown(data_descrip, unsafe_allow_html=True)

    # -------------- EDA PAGE -------------------------------------------
    if page_selection == "Exploratory Data Analysis":
        st.title("Exploratory Data Aanalysis")
        st.info(
            "On this page we will Explore the data and relay any insights we have gained from it"
        )

        m_df = pd.read_csv(s3_path / 'movies.csv')
        r_df = pd.read_csv(s3_path / 'train.csv')

        st.markdown('### Movie information Data')
        st.write(m_df.head())
        st.write('The Movie Data has {} rows and {} columns'.format(
            len(m_df.axes[0]), len(m_df.axes[1])))

        st.markdown('### Rating Data')
        st.write(r_df.head())
        st.write('The Rating Data has {} rows and {} columns'.format(
            len(r_df.axes[0]), len(r_df.axes[1])))

        movie_rate_df = pd.merge(r_df, m_df, on="movieId")

        st.markdown('### Correlation between Rating Data')
        corr = r_df.corr()

        # create a mask and only show half of the cells
        mask = np.zeros_like(corr)
        mask[np.triu_indices_from(mask)] = True
        fig, ax = plt.subplots(figsize=(10, 5))
        # plot the data using seaborn
        graph = sns.heatmap(
            corr,
            mask=mask,
            vmax=0.3,
            #square = True,
            cmap="viridis")

        plt.title("Correlation between features")
        st.pyplot()

        st.markdown(
            'We can see a strong correlation between _timestamp_ and _movieId_.<br> It appears that movies with the lowest ratings last for around 1.5 hours, which implies that the rating users give a film can be dependant on the length of the film.',
            unsafe_allow_html=True)

        # Rating distribution
        st.markdown('### Distribution of Ratings')
        rate_dist = movie_rate_df.groupby('rating').size()
        rate_dist_df = rate_dist.reset_index()
        fig = px.bar(rate_dist_df,
                     y=0,
                     x='rating',
                     labels={
                         'rating': "Rating",
                         '0': 'Count'
                     },
                     color=0)
        st.plotly_chart(fig)

        st.markdown(
            'Looking at the rating distribution we can see that most Users are generous when rating a Film, with the majority of ratings 3 or more stars'
        )

        st.markdown(
            'Looking at the rating distribution we can see that most Users are generous when rating a Film, with the majority of ratings 3 or more stars'
        )

        most_rate = movie_rate_df.groupby('title').size().sort_values(
            ascending=False)[:10]
        most_rate_df = most_rate.reset_index()
        fig = px.bar(most_rate_df,
                     y='title',
                     x=0,
                     labels={
                         'title': "Movie Title",
                         0: 'Count'
                     },
                     color=0)
        st.plotly_chart(fig)

        st.markdown("### Pairwise plot of Rating Data")
        pairplot = Image.open('resources/imgs/pairplot.jpg')
        st.image(pairplot, use_column_width=True)
        st.markdown(
            'Most plots here do not show anything of value except the final bottom right one.<br> This shows a very clear correlation between the movie length and ratings.',
            unsafe_allow_html=True)
        st.markdown(
            'Movies that are longer than average seem to get lower reviews than shorter movies, which can give some insight into how most people view movie length.'
        )
    # -------------- HOW IT WORKS PAGE ----------------------------------
    if page_selection == "How a Recommender System Works":

        rec_image = Image.open("resources/imgs/rec_eng_img.jpg.jpeg")
        st.image(rec_image, use_column_width=True)

        st.title("How a Recommender System Works")
        st.info(
            "Here you wil find some simple explanations on how a recommender system works."
        )

        st.markdown("## What is a Recommender System?")
        st.markdown(
            ">Put simply, a recommender system is used to allow a service provider to build a catalogue of items or suggestions that they want to present to a user.<br> This allows them to offer relevant service to their users without overloading them with information that they may not want to see or sift through themselves.<br> In this era of technology and bountifull information it is very important that a user is given relevant information, but also in manageable amounts, about content, as there is too much for a user to give attention to individually.",
            unsafe_allow_html=True)
        st.markdown(
            "A Recommender System/Engine can suggest items or actions of interest, or in our case, movie recommendations to a user, based on their similarity to other users.<br> By similarity one means how similar one user is to another, based on their likes and dislikes, their demographic information, their preferred genre's, or the rating that they give items.",
            unsafe_allow_html=True)

        if st.button('Recommender Types'):
            st.markdown(
                "We chose to mainly focuss on a **Collaborative-Based** Recommender.<br> The recommender system we created is one that will provide movie recommendations to a user(user1), by having them choose 3 movies that they like from a list, and from that choice we calculate their similarity to other users(user2,5 and 6) who also rated those movie's highly.<br> We then see which other movies Users 2, 5 and 6 have rated highly that User 1 has not seen yet, and recommend those to User 1.<br> <img src='https://miro.medium.com/max/2728/1*x8gTiprhLs7zflmEn1UjAQ.png' alt='colab' width='550' height='450'/>",
                unsafe_allow_html=True)

            st.markdown(
                "There is also a **Content-Based** Recommender system, which instead of the user ratings, takes into account the content of the films, and how similar that content is to the content of other films, such as: Genre, duration, actors, release year, director, demographics and more.<br> <img src='https://miro.medium.com/max/1642/1*BME1JjIlBEAI9BV5pOO5Mg.png' alt='content' width='400' height='500'/>",
                unsafe_allow_html=True)

            st.markdown(
                "The drawback to this method is that it does not always take into account the _'Humanity'_ aspect, where users are likely to belong to more than one 'demographic' into which a Content-Based System creates it's similarities."
            )

        st.markdown("## Our approach")
        st.markdown("""
            >We chose _Singular Value Decomposition_ (SVD) as our base model (this algorithm was made famous when it was used to win a Netflix Recommender challenge with Collaborative Filtering)\n
>SVD turns our very sparse matrix into a low ranking matrix (reducing the dimensionality) with userID and ItemID or simply known as factors.\n
>We then use this matrix and the SVD Algorithm to predict any missing parts of the matrix, so that each user has a corresponding rating for an Item.\n
>After this, to actually make recommendations to a user we ask them to choose 3 of their favorite movies from a list. Usingtheir choices we then calculate how similar they are to other users who also rated those movies highly. (To accomplish this we used Cosine Similarity to create a similarity matrix and calculated their similar users from this)\n
>We then take the top rated movies from each of the similar users and calculate which top 10 we want to recommend, and return that as our recommendation to the user.

            """)
        st.markdown("## Final thoughts")
        st.markdown("""
        >Why not content based filtering?\n
>Besides the fact that content based filtering is more resource heavy and having large amounts of data to process therefore taking far longer.\n
>Humans in general would prefer to watch a movie someone else has seen and with a similar taste to theirs (this being collaborative).\n
>Consider this, we might all like action movies , but not all action movies are great!\n
>This is where content based filtering fails, It might recommend a movie because you last watched something similar but only to find you hate that movie or worse you might like both horror and comedy , but content based filtering will most likely only recommend one genre over the other.\n
>This means that our chosen algorithm might need a larger dataset, however it is more varied, and willl most likely give a range of recommendation.\n
>As powerful as machine learning is , We learned that in our task of making prediction, what we were actaully doing was predictiong emotions based on past behaviuor of what a user would rate a certain movie.\n
>That is a very complex task and will never be 100% correct all the time. Humans are complex, and their tastes vary widely, not always conforming into one _Category_.\n
>This recommender system we built is not only useful in just predictiong movie ratings but a recommender system can be usedin a number of other tasks aswell, like music or book recommendations, Online purchasing websites recommending products for sale or any number of other methods.
>        """)

    st.sidebar.title('About')
    st.sidebar.info("""
            This App is maintained by EDSA students. It serves as a project for
            an unsupervised learning sprint, by deploying a recommender engine.

            **Authors:**\n
            Heinrich de Klerk\n
            Michael Ilic\n
            Rolivhuwa Malise\n
            Rirhandzu Mahlaule\n
            Nceba Mandlana\n
            Siyabonga Mtshemla\n

    """)
예제 #21
0
def main():

    # DO NOT REMOVE the 'Recommender System' option below, however,
    # you are welcome to add more options to enrich your app.
    page_options = [
        "Recommender System", "Trending", "Solution Overview", "EDA"
    ]

    # -------------------------------------------------------------------
    # ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
    # -------------------------------------------------------------------
    page_selection = st.sidebar.selectbox("Choose Option", page_options)
    if page_selection == "Recommender System":
        # Header contents
        st.write('# Movie Recommender Engine')
        st.write('### EXPLORE Data Science Academy Unsupervised Predict')
        st.image('resources/imgs/Image_header.png', use_column_width=True)
        # Recommender System algorithm selection
        sys = st.radio(
            "Select an algorithm",
            ('Content Based Filtering', 'Collaborative Based Filtering'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        movie_1 = st.selectbox('Fisrt Option', title_list[14930:15200])
        movie_2 = st.selectbox('Second Option', title_list[25055:25255])
        movie_3 = st.selectbox('Third Option', title_list[21100:21200])
        fav_movies = [movie_1, movie_2, movie_3]

        # Perform top-10 movie recommendation generation
        if sys == 'Content Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = content_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

        if sys == 'Collaborative Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = collab_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

    # -------------------------------------------------------------------

    # ------------- SAFE FOR ALTERING/EXTENSION -------------------
    if page_selection == "Solution Overview":
        st.title("Solution Overview")
        st.write("Describe your winning approach on this page")

    #----- Code for trending page-----
    # Merge train and movies tables
    merged_df = pd.merge(movies_df, train_df, on='movieId')
    # Get average rating for each movie
    average_rating = pd.DataFrame(
        merged_df.groupby('title')['rating'].mean().reset_index())
    # Get number of votes for each movie
    vote_counts = pd.DataFrame(merged_df['title'].value_counts().reset_index())
    vote_counts.rename(columns={
        'title': 'vote_count',
        'index': 'title'
    },
                       inplace=True)
    # Create dataframe with movies, vote counts, average ratings
    movies_with_scores = movies_df.copy()
    movies_with_scores = pd.merge(movies_with_scores, vote_counts, on='title')
    movies_with_scores = pd.merge(movies_with_scores,
                                  average_rating,
                                  on='title')

    # Calculate weighted score
    C = movies_with_scores['rating'].mean()
    # Minimum votes required to be listed in the chart - 90th percentile
    m = movies_with_scores['vote_count'].quantile(0.9)
    qual_movies = movies_with_scores.copy().loc[
        movies_with_scores['vote_count'] >= m]

    def weighted_rating(x, m=m, C=C):
        v = x['vote_count']
        R = x['rating']
        # Calculation based on IMDB formula
        return (v / (v + m) * R) + (m / (m + v) * C)

    # Create a new feature containing the weighted score
    qual_movies['score'] = qual_movies.apply(weighted_rating, axis=1)
    # Sort movies based on score
    qual_movies = qual_movies.sort_values('score', ascending=False)

    # Split pipe-separated genres
    split_genres = pd.DataFrame(qual_movies.genres.str.split('|').tolist(),
                                index=qual_movies.movieId).stack()
    split_genres = split_genres.reset_index([0, 'movieId'])
    split_genres.columns = ['movieId', 'genres']
    # Merge on movie ID
    split_genres_merge = pd.merge(split_genres, qual_movies, on='movieId')
    split_genres_merge = split_genres_merge[['title', 'genres_x', 'score']]
    # List of genres for dropdown
    genres_list = split_genres_merge['genres_x'].unique().tolist()

    if page_selection == "Trending":
        st.title("Trending")
        st.write(
            "These are the movies that are the most popular among all users.")
        st.write(
            "The score is calculated using IMDb's weighted rating formula that "
            +
            "considers both the ratings of the movies and the number of votes that "
            + "a movie has. This formula provides a 'true Bayesian estimate'" +
            ", resulting in a more accurate vote average.")
        st.write("IMDb's weighted rating formula is as follows: ")
        st.write("![](https://image.ibb.co/jYWZp9/wr.png)")
        st.write("where, \n* v = number of votes given for movie" +
                 "\n* m = minimum votes required to be listed in the dataset" +
                 "\n* R = average rating of a movie" +
                 "\n* C = mean vote across all movies")
        st.write("## All genres")
        trending_num = st.slider("Number of movies to be shown", 1, 10, 100)
        st.write(qual_movies[['title', 'score']].head(trending_num))
        st.write("## Highest rated movies per genre")
        trending_genre = st.selectbox("Select genre", genres_list)
        trending_genre_num = st.slider("Number of movies to be shown", 1, 10,
                                       30)
        st.write(split_genres_merge[
            split_genres_merge['genres_x'] == trending_genre][[
                'title', 'score'
            ]].sort_values('score', ascending=False).head(trending_genre_num))
예제 #22
0
def main():

    # DO NOT REMOVE the 'Recommender System' option below, however,
    # you are welcome to add more options to enrich your app.
    page_options = ["Recommender System", "Contributors"]

    # -------------------------------------------------------------------
    # ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
    # -------------------------------------------------------------------
    page_selection = st.sidebar.selectbox("Choose Option", page_options)
    if page_selection == "Recommender System":
        # Header contents
        st.write('# Movie Recommender Engine')
        st.write('### EXPLORE Data Science Academy Unsupervised Predict')
        st.image('resources/imgs/Image_header.png', use_column_width=True)
        # Recommender System algorithm selection
        sys = st.radio(
            "Select an algorithm",
            ('Content Based Filtering', 'Collaborative Based Filtering'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        movie_1 = st.selectbox('Fisrt Option', title_list[14930:15200])
        movie_2 = st.selectbox('Second Option', title_list[25055:25255])
        movie_3 = st.selectbox('Third Option', title_list[21100:21200])
        fav_movies = [movie_1, movie_2, movie_3]

        # Perform top-10 movie recommendation generation
        if sys == 'Content Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = content_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

        if sys == 'Collaborative Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = collab_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

    # -------------------------------------------------------------------

    # ------------- SAFE FOR ALTERING/EXTENSION -------------------
    if page_selection == "Solution Overview":
        titles_list = sorted(title_list)
        st.title("Solution Overview")
        st.markdown("""
            
            """)
        options = [
            'Scraping IMDb Movie Data', 'Exploratory Data Analysis',
            'Building Content Column'
        ]
        option = st.selectbox('Select Preprocessing To View', options)

        if option == 'Scraping IMDb Movie Data':
            st.subheader('Scrape')
            st.markdown("""
                #### Description
                - Whoops.

                """)

        if option == 'Exploratory Data Analysis':
            st.write('EDA')

        if option == 'Building Content Column':
            st.write('Content')

    # You may want to add more sections here for aspects such as an EDA,
    # or to provide your business pitch.

    # ------------------------------------------------------------------------

    # ------------------------------- NEW PAGE -------------------------------

    if page_selection == "Contributors":

        st.title("Team EN1 JHB Unsupervised")
        st.markdown("""

            \n
            - [Sibusiso Luthuli](https://github.com/SibusisoTL)
            - [Nondumiso Magudulela](https://github.com/Ndumi-ndumi)
            - [Kgaogelo Mamadi](https://github.com/mrmamadi)
            - [Hlamulo Mavasa](https://github.com/Hlamulo-Mav)
            - [Gavin Pillay](https://github.com/GHP012)
            - [Callin Reeby](https://github.com/CallinR)

            """)
예제 #23
0
def main():

    # DO NOT REMOVE the 'Recommender System' option below, however,
    # you are welcome to add more options to enrich your app.
    page_options = [
        "Solution Overview", "Insights", "Recommender System",
        "Why Choose Us?", "About Us", "Contact Us"
    ]
    st.markdown(
        """
		<style>
		.sidebar .sidebar-content{
		background-image: linear-gradient(white, red);
		font-color: white;
		}
		</style>
		""",
        unsafe_allow_html=True,
    )

    # -------------------------------------------------------------------
    # ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
    # -------------------------------------------------------------------
    page_selection = st.sidebar.selectbox("Choose Option", page_options)
    if page_selection == "Recommender System":
        # Header contents
        st.write('# Movie Recommender Engine')
        st.write('### EXPLORE Data Science Academy Unsupervised Predict')
        st.image('resources/imgs/Image_header.png', use_column_width=True)
        # Recommender System algorithm selection
        sys = st.radio(
            "Select an algorithm",
            ('Content Based Filtering', 'Collaborative Based Filtering'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        movie_1 = st.selectbox('Fisrt Option', title_list[14930:15200])
        movie_2 = st.selectbox('Second Option', title_list[25055:25255])
        movie_3 = st.selectbox('Third Option', title_list[21100:21200])
        fav_movies = [movie_1, movie_2, movie_3]

        # Perform top-10 movie recommendation generation
        if sys == 'Content Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = content_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
							  We'll need to fix it!")

        if sys == 'Collaborative Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = collab_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
							  We'll need to fix it!")

    # -------------------------------------------------------------------

    # ------------- SAFE FOR ALTERING/EXTENSION -------------------
    #Home Page
    #if page_selection == "Home":

    #Solution Overview Page
    if page_selection == "Solution Overview":
        overview_options = ["Problem Statement", "Introduction", "Conclusion"]
        overview_selection = st.selectbox(
            "Please select an option to get an overview of the project",
            overview_options)

        if overview_selection == "Problem Statement":
            st.title('Problem Statement')
            break_h = """
				<br>
				<br>
				"""
            st.markdown(break_h, unsafe_allow_html=True)

            st.write(
                'Accurately predict unseen movie ratings gathered from thousands of users based on their historic preferences. The objective of this App is to construct a recommendation algorithm based on content and collaborative filtering, capable of accurately predicting how a user will rate a movie they have not watched yet based on their historical preference.'
            )

            break_h = """
				<br>
				<br>
				<br>
				"""

            st.markdown(break_h, unsafe_allow_html=True)

            st.image("resources/imgs/researcjh.png")

        if overview_selection == "Introduction":
            st.title('Introduction')
            break_h = """
				<br>
				<br>
				"""
            st.markdown(break_h, unsafe_allow_html=True)

            st.write(
                'Recommender systems are systems that are designed to recommend things to the user based on many different factors. These systems predict the most likely product that the user is most likely to purchase and are of interest. Companies like Netflix and Amazon use recommender systems to help their users to identify the correct product or movies for them. Recommender systems are an important class of machine learning algorithms that offer relevant suggestions to users. The suggested items are as relevant to the user as possible so that the user can engage with those items: YouTube videos, news articles, online products, movie and series recommendation. Items are ranked according to their relevancy, and the most relevant ones are shown to the user. The relevance is determined by the recommender system, mainly based on historical data. For example, If you have recently watched YouTube videos about elephants, then YouTube is going to start showing you many elephant videos with similar titles and themes. Recommender systems are generally divided into two main categories: collaborative filtering and content-based systems. Both users and service providers have benefited from these kinds of systems. Intelligent algorithms can help viewers find great titles from tens of thousands of options. This notebook will construct a recommendation algorithm based on content and collaborative filtering, capable of accurately predicting how a user will rate a movie they have not yet viewed based on their historic preferences. Providing an accurate and robust solution will have immense economic potential, with users of the system being exposed to content they would like to view or purchase - generating revenue and platform affinity.'
            )

            break_h = """
				<br>
				<br>
				<br>
				"""

            st.markdown(break_h, unsafe_allow_html=True)

            st.image("resources/imgs/data.png")

        if overview_selection == "Conclusion":
            st.title('Conclusion')
            break_h = """
				<br>
				<br>
				"""
            st.markdown(break_h, unsafe_allow_html=True)

            st.write(
                "Facebook, YouTube, LinkedIn are among the most used websites on the internet today that use recommender systems. Facebook suggests us to make more friends using the 'People You May Know' section. Similarly, LinkedIn recommends you connect with people you may know, and YouTube suggests relevant videos based on your previous browsing history. All of these are recommender systems in action. While most of the people are aware of these features, only a few know that the algorithms used behind these features are known as 'Recommender Systems'. They 'recommend' personalised content based on user's past / current preference to improve the user experience. We were tasked with accurately predicting unseen movie ratings gathered from thousands of users based on their historic preferences. Broadly, there are two types of recommendation systems: Content-Based and Collaborative filtering based as mention. In the notebook, we observation algorithms of both content-based and collaborative filtering. When we used the linear regression model (content-based) on the test data, it produced an RMSE score of 0.82565. However, the Singular Value Decomposition (collaborative-filtering) performed better on the test data with an RMSE score of 0.80773, which is our final score on the Kaggle leaderboard."
            )
            break_h = """
				<br>
				<br>
				<br>
				"""

            st.markdown(break_h, unsafe_allow_html=True)

            st.image("resources/imgs/dota.png")

    #About Us page
    if page_selection == "About Us":
        title = """
				<h2 style="color:black;text-align:center;">TEAM Four is a group of data scientists from EDSA</h2>
				"""
        st.markdown(title, unsafe_allow_html=True)

        st.image("resources/imgs/team.png", use_column_width=True)

    if page_selection == "Insights":
        #title_tag("Insights extracted from the data")
        visual_options = [
            "The top 15 movies", "Genres with the most number movies",
            "A count of films by directors", "Top 10 ratings",
            "Genre distribution", "Wordcloud", "Wordcloud analysis",
            "Model accuracy"
        ]
        visual_selection = st.selectbox(
            "Choose Exploratory Data Analaysis Visuas Option", visual_options)

        if visual_selection == "The top 15 movies":
            subheading('Top 15 movies by number of Ratings')
            st.image('resources/imgs/top_15_titles.png', use_column_width=True)
        elif visual_selection == "Genres with the most number movies":
            subheading('Genres with the most number movies')
            st.image('resources/imgs/Genres.png', use_column_width=True)
        elif visual_selection == "A count of films by directors":
            subheading('A count of films by directors')
            st.image('resources/imgs/director.png', use_column_width=True)
        elif visual_selection == "Wordcloud":
            subheading('Most Popular Movie Keywords')
            st.image('resources/imgs/wordcloud.png', use_column_width=True)
        elif visual_selection == "Top 10 ratings":
            subheading('Top 10 Movie Ratings')
            st.image('resources/imgs/top_10_ratings.png',
                     use_column_width=True)
        elif visual_selection == "Genre distribution":
            subheading('Number of times a genre appears')
            st.image('resources/imgs/Genres_2.png', use_column_width=True)
        elif visual_selection == "Wordcloud analysis":
            subheading('Most Popular Movie Keywords analysis')
            st.image('resources/imgs/wordcloud_2.png', use_column_width=True)
        elif visual_selection == "Model accuracy":
            subheading('Model accuracy by means of RMSE score')
            st.image('resources/imgs/model_accuracy.png',
                     use_column_width=True)

#Building out the business pitch
    if page_selection == "Why Choose Us?":
        st.title("Why Choose Us?")

        break_h = """
				<br>
				<br>
				"""

        st.markdown(break_h, unsafe_allow_html=True)

        st.write(
            "Our team can help your organisation find interesting patterns and inisghts from available data. One such data science project we proud on working on is a recommender system. Recommender systems can mean big business for your organisation. Research shows that recommender systems can increase an organisation's turnover by up to 30%"
        )

        break_h = """
				<br>
				"""

        st.markdown(break_h, unsafe_allow_html=True)

        st.image('resources/imgs/data-science.png')

    #Building out the Contact Page
    if page_selection == "Contact Us":
        title = """
		<div style="background-color:#464e5f00;padding:5px;border-radius:10px;margin:10px;">
		<h3 style="color:black;text-align:center;">Lets get in touch for all your Machine Learning needs!</h3>
  		"""

        st.markdown(title, unsafe_allow_html=True)
        firstname = st.text_input("Enter your Name", "Type Here Please...")
        lastname = st.text_input("Enter your last Name", "Type Here Please..")
        contactdetails = st.text_input("Enter your contact details here",
                                       "Type Here Please...")
        message = st.text_area(
            "What is company trying to achieve through data",
            "Type here Please..")

        if st.button("Submit"):
            result = message.title()
            st.success(result)
def main():

    # DO NOT REMOVE the 'Recommender System' option below, however,
    # you are welcome to add more options to enrich your app.
    page_options = [
        "Recommender System", "Solution Overview", "Data analysis and plots",
        "Meet the team", "Pitch"
    ]

    # -------------------------------------------------------------------
    # ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
    # -------------------------------------------------------------------
    page_selection = st.sidebar.selectbox("Choose Option", page_options)
    if page_selection == "Recommender System":
        # Header contents
        st.write('# Movie Recommender Engine')
        st.write('### EXPLORE Data Science Academy Unsupervised Predict')
        st.image('resources/imgs/Image_header.png', use_column_width=True)
        # Recommender System algorithm selection
        sys = st.radio(
            "Select an algorithm",
            ('Content Based Filtering', 'Collaborative Based Filtering'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        movie_1 = st.selectbox('First Option', title_list[14930:15200])
        movie_2 = st.selectbox('Second Option', title_list[25055:25255])
        movie_3 = st.selectbox('Third Option', title_list[21100:21200])
        fav_movies = [movie_1, movie_2, movie_3]

        # Perform top-10 movie recommendation generation
        if sys == 'Content Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = content_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

        if sys == 'Collaborative Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = collab_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

    # -------------------------------------------------------------------

    # ------------- SAFE FOR ALTERING/EXTENSION -------------------
    if page_selection == "Solution Overview":
        st.title("Solution Overview")
        st.write(
            "For the recommendations made in this app we used two approaches: \
                user similarity (collaborative based filtering) and movie similarity (content based filtering).\
                The following is an overview of how they work.")
        st.title("Content based")
        st.write(
            "Here we made recommendations based on how similar the properties\
                or features of a movie are to other movies. We used movies with similar\
                themes (in the same genre) or have similar actors to predict what \
                the user will like. If you make your movie selection based on \
                actors you love the recommendations will reflect that, and \
                if you select movies based on a genre you prefer on a \
                specific day the recommendations will reflect that.\
                This algorithm takes a lot more into consideration than just ratings from other users. "
        )
        st.image("resources/imgs/plots/content_model.jpg", width=450)
        st.title("Collaborative based")
        st.write(
            "This algorithm is based on past interactions between users and the\
                movies in the database. We make recommendations based on movies that those users rated.\
                This method takes into account movies that users with similar tastes liked and makes a \
                recommendation on that basis and no additional information.")
        st.image("resources/imgs/plots/collab_model.jpg", width=650)

    if page_selection == "Data analysis and plots":
        st.title("Data analysis")
        if st.checkbox("Ratings insights"):
            st.subheader(
                "These plots give insights about the ratings given for the movies"
            )
            st.write(
                "This is a count of movies that have been given a certain rating with 5 being \
                    the highest rating that a movie can get. Most of the movies have been given a \
                    rating of 4 which means that the majority of people enjoy most of the movies in the database \
                    The poorly rated movies have ratings from 0.5-2.5 and they all have a low number of movies. \
                    The reason could be that a poorly rated movie is less likely to be watched by a lot of people."
            )
            st.image("resources/imgs/plots/ratings_distribution.png",
                     width=650)
            st.write(
                "These are the 20 most rated movies. In the top 10 we only have movies from \
                    the 90s, with 1994 taking the top 3 spots.")
            st.image("resources/imgs/plots/highest_rated_movies.png",
                     width=650)
        if st.checkbox("Movie insights"):
            st.subheader(
                "A number of factors influence movie choices and below we take a look at \
                    some of those factors such as popular themes, actors, directors and era"
            )
            st.write(
                "The average movie runtime is 116.1 minutes which equates to ~1.9 hours."
            )
            st.image("resources/imgs/plots/runtime.png", width=650)
            st.write(
                "Drama holds the most number of movies in the database followed by comedy and action."
            )
            st.image("resources/imgs/plots/number_of_movies_by_genre2.png",
                     width=600)
            st.write(
                "The graph below shows the distribution on movies in the dataset. At first glance, \
                it is clear that the 2010s have the highest number of movies released in one decade."
            )
            st.image("resources/imgs/plots/movies_per_era.png", width=650)
            st.write(
                "These are the most popular themes. The keywords are a reflection of the top 3 genres \
                    in the database (drama, comedy and action). If you watch movies in these genres it is \
                    likely that the movie will have these keywords and that is why these movies have high age \
                    restictions. The keywords also show what themes people enjoy watching."
            )
            st.image("resources/imgs/plots/wordcloud2.png", width=650)
            st.image("resources/imgs/plots/director_movies.png", width=650)
            st.write(
                "The graph above shows the number of times movies with specific actors in the dataset \
                    have been rated. Tom Hanks takes the lead with more than 195000 movie ratings to his name.\
                    In second place is Samuel L.Jackson followed by Morgan Freeman in third place. It makes sense \
                    that the top 3 actors with the most ratings associated with their names also star in the top \
                    3 most rated movies (refer to 'most rated movies' section). It is important\
                    to note that most of the movies in this database are American based and therefore \
                    the most popular actors are American.")
            st.image("resources/imgs/plots/frequent_actors.png", width=650)

    if page_selection == "Pitch":
        st.title("Pitch slide deck")
        st.markdown(slides, unsafe_allow_html=True)

    if page_selection == "Meet the team":
        st.title("Meet the data science team")
        st.markdown(team, unsafe_allow_html=True)
        local_css('resources/pages/html_style.css')
예제 #25
0
def main():

    # DO NOT REMOVE the 'Recommender System' option below, however,
    # you are welcome to add more options to enrich your app.
    page_options = ["Welcome","About The App","EDA","Recommender System","Search for a movie","Solution Overview","Contact Us","About Us"]

    # -------------------------------------------------------------------
    # ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
    # -------------------------------------------------------------------
    page_selection = st.sidebar.selectbox("Choose Option", page_options)
    if page_selection == "Recommender System":
        # Header contents
        st.write('# Movie Recommender Engine')
        st.write('### EXPLORE Data Science Academy Unsupervised Predict')
        st.image('resources/imgs/Image_header.png',use_column_width=True)
        # Recommender System algorithm selection
        sys = st.radio("Select an algorithm",
                       ('Content Based Filtering',
                        'Collaborative Based Filtering'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        movie_1 = st.selectbox('Fisrt Option',title_list[14930:15200])
        movie_2 = st.selectbox('Second Option',title_list[25055:25255])
        movie_3 = st.selectbox('Third Option',title_list[21100:21200])
        fav_movies = [movie_1,movie_2,movie_3]

        # Perform top-10 movie recommendation generation
        if sys == 'Content Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = content_model(movie_list=fav_movies,
                                                            top_n=10)
                    st.title("We think you'll like:")
                    for i,j in enumerate(top_recommendations):
                        st.subheader(str(i+1)+'. '+j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")


        if sys == 'Collaborative Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = collab_model(movie_list=fav_movies,
                                                           top_n=10)
                    st.title("We think you'll like:")
                    for i,j in enumerate(top_recommendations):
                        st.subheader(str(i+1)+'. '+j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")


    # -------------------------------------------------------------------

    # ------------- SAFE FOR ALTERING/EXTENSION -------------------
    if page_selection == "Solution Overview":
        st.title("Solution Overview")
        st.markdown("<h1 style='text-align: left; color: black;'>Content-based filtering: uses item features to recommend other items similar to what the user likes, based on their previous actions or explicit feedback</h1>", unsafe_allow_html=True)
        st.image(('resources/imgs/content-based.png'), use_column_width=True)
        st.markdown("<h1 style='text-align: left; color: black;'>Collaborative filtering: builds a model from your past behavior (i.e. movies watched or selected by the you) as well as similar decisions made by other users</h1>", unsafe_allow_html=True)
        st.image(('resources/imgs/collaborative filtering.png'), use_column_width=True)
        st.write("Describe your winning approach on this page")

    if page_selection == "Search for a movie":
        st.title("Search for Movies")
        st.image(('resources/imgs/franchises.jpg'), use_column_width=True)
        st.markdown('Please Refer to the About Machine Learning Page to learn more about the techniques used to recommend movies. If you decide not to use the recommender systems you can use this page to filter movies based on the rating of the movie , the year in which the movie was released and the genre of the movies. After you change the filter you will be left with movies that are specific to that filter used. Then when you scroll down you will see the movie name and the link to a youtube trailer of that movie. When you click the link ,you will see a page on youtube for that specific movie and you can watch the trailer and see if you like it. This is an alternative method to you if you are not satisfied with the recommender engine . Enjoy! ', unsafe_allow_html=True)
        # Movies
        df = pd.read_csv('resources/data/movies.csv')

        
        def explode(df, lst_cols, fill_value='', preserve_index=False):
            import numpy as np
             # make sure `lst_cols` is list-alike
            if (lst_cols is not None
                    and len(lst_cols) > 0
                    and not isinstance(lst_cols, (list, tuple, np.ndarray, pd.Series))):
                lst_cols = [lst_cols]
            # all columns except `lst_cols`
            idx_cols = df.columns.difference(lst_cols)
            # calculate lengths of lists
            lens = df[lst_cols[0]].str.len()
            # preserve original index values    
            idx = np.repeat(df.index.values, lens)
            # create "exploded" DF
            res = (pd.DataFrame({
                        col:np.repeat(df[col].values, lens)
                        for col in idx_cols},
                        index=idx)
                    .assign(**{col:np.concatenate(df.loc[lens>0, col].values)
                            for col in lst_cols}))
            # append those rows that have empty lists
            if (lens == 0).any():
                # at least one list in cells is empty
                res = (res.append(df.loc[lens==0, idx_cols], sort=False)
                            .fillna(fill_value))
            # revert the original index order
            res = res.sort_index()   
            # reset index if requested
            if not preserve_index:        
                res = res.reset_index(drop=True)
            return res 
        movie_data = pd.merge(rating_m, df, on='movieId')
        movie_data['year'] = movie_data.title.str.extract('(\(\d\d\d\d\))',expand=False)
        #Removing the parentheses
        movie_data['year'] = movie_data.year.str.extract('(\d\d\d\d)',expand=False)

        movie_data.genres = movie_data.genres.str.split('|')
        movie_rating = st.sidebar.number_input("Pick a rating ",0.5,5.0, step=0.5)

        movie_data = explode(movie_data, ['genres'])
        movie_title = movie_data['genres'].unique()
        title = st.selectbox('Genre', movie_title)
        movie_data['year'].dropna(inplace = True)
        movie_data = movie_data.drop(['movieId','timestamp','userId'], axis = 1)
        year_of_movie_release = movie_data['year'].sort_values(ascending=False).unique()
        release_year = st.selectbox('Year', year_of_movie_release)

        movie = movie_data[(movie_data.rating == movie_rating)&(movie_data.genres == title)&(movie_data.year == release_year)]
        df = movie.drop_duplicates(subset = ["title"])
        if len(df) !=0:
            st.write(df)
        if len(df) ==0:
            st.write('We have no movies for that rating!')        
        def youtube_link(title):
    
            """This function takes in the title of a movie and returns a Search query link to youtube
    
            INPUT: ('Avengers age of ultron')
            -----------
    
            OUTPUT: https://www.youtube.com/results?search_query=The+little+Mermaid&page=1
            ----------
            """
            title = title.replace(' ','+')
            base = "https://www.youtube.com/results?search_query="
            q = title
            page = "&page=1"
            URL = base + q + page
            return URL            
        if len(df) !=0:           
            for _, row in df.iterrows():
                st.write(row['title'])
                st.write(youtube_link(title = row['title']))



    # You may want to add more sections here for aspects such as an EDA,
    # or to provide your business pitch.
    if page_selection == "Welcome":
        st.subheader("==========================================================")
        st.markdown('''<span style="color:black"> **Welcome To Our Movie Review App.** </span>''', unsafe_allow_html=True)
        st.subheader("==========================================================")
        st.image('resources/imgs/giphy.gif', use_column_width=True)
        st.image('resources/imgs/Movie-Show-GIF-960.gif', use_column_width=True)

    if page_selection == "About The App":
        st.title("About the App")
        st.markdown("Are you a movie lover? Are you tired of wasting your time watching tons of trailers and ending up not watching their movies? Are you tired of finishing your popcorns before you find the right movie? That has come to an end!!")
        st.image(["resources/imgs/Tired 1.1.gif", "resources/imgs/Tired 1.2.jpg"],use_column_width=True)
        st.markdown("Then we have got the right App for you.")
        st.subheader("How The App Works")
        st.markdown("The Movie Recommender App filters or predicts your preferences based on your favourite or watched movie selections. With just a few clicks, you will select three of your most favourite movies from thousands of movies on the app and you will get top 10 movies you are most likely to enjoy. You have an option to view some data visualizations including word clouds that show the most popular words that appear in movie titles and plots on the most popular genres. The app also contains a contact page, where users of the app can rate our app and give feedback and suggestions. Links to movie sites are also included, so the user has quick and easy to access the recommended movies.")
        st.subheader("Data Description")
        st.markdown("The dataset used for the movie recommender app consists of several million 5-star ratings obtained from users of the online MovieLens movie recommendation service. The data for the MovieLens dataset is maintained by the GroupLens research group in the Department of Computer Science and Engineering at the University of Minnesota. Additional movie content data was legally scraped from IMDB.")

    if page_selection == "EDA":
        st.title("Exploratory Data Analysis")
        st.subheader("All Time Popular Movies By Ratings Insights")
        st.markdown("The graph shows all the movies that have been rated the most for all movies in the dataset. The most popular can be seen as a 1994 movie Shawshank Redemption with a rating count of more than 30 thousand.,")
        st.image(('resources/imgs/all time popular movies by ratings.png'), use_column_width=True)
        st.subheader("Released Movies Per Year Insights")
        st.markdown("The graph shows the number of movies that have been released each year from 1971 to 2017. It is safe to note that there has been an exponential increase in the number of movies released each year. we can go as far as to say that movies released in the 2010s are about 4 times the number of those that were released in the 1070s.")
        st.image(('resources/imgs/total movies released per year.png'), use_column_width=True)
        st.subheader("Popular Genres By Rating Insights")
        st.markdown("The treemap depicts the genres that are most popular to the least popular in terms of ratings. From the treemap it can be seen that the most rated genre happens to be Drama, followed by Comedy with IMAX the least rated.")
        st.image(('resources/imgs/popular genres.png'), use_column_width=True)
        st.subheader("Percentage Of Users Per Ratings Insights")
        st.markdown("The graphs shows the total number of user percentage based on their ratings. Most users rated movies with a rating of 4.0(26.53%)")
        st.image(('resources/imgs/percentage of users per rating.png'), use_column_width=True)
        st.subheader("Popular Actors/Actresses Insights")
        st.markdown("The graph shows who the most popular actors/actresses appearing in the movies are and the number of movies they appear in. The actor Samuel L Jackson is the most popular, appearing in more than 80 movies. Actress Julianne Moore is the most popular amoung the actresses.")
        st.image(('resources/imgs/popular actors.png'), use_column_width=True)
        st.subheader("Rating Distribution")
        st.markdown("This graph shows how ratings are distributed. Just like in, Percentage of Users Per Ratings Insights, most movies have a rating of 4.0")
        st.image(('resources/imgs/rating distribution.png'), use_column_width=True)
        st.subheader("Movie Runtime Ditribution")
        st.markdown("Below we can a see a distribution of movie runtime. Majority of the movies have a 100 minutes runtime.")
        st.image(('resources/imgs/runtime distribution.png'), use_column_width=True)
        st.subheader("Popular Movies Wordcloud")
        st.markdown("The wordcloud below shows the letters appearing the most in the movie titles. Love, Girl, Man and Night are words that appear the biggest. This means that more movies have such words in their title. It makes sense that these words appear the most as we have more movies in the drama, comedy and romance genre.")
        st.image(('resources/imgs/most popular movies wordcloud.png'), use_column_width=True)
        st.subheader("Popular Movie Directors")
        st.markdown("The graph below shows the most popular movie directors. It makes sense for Woody Allen to be the most popular director as the first movie he directed was in 1965 and since then he has directed about 50 movies with the latest released in 2020. Woody has also been acting since 1965 to date, he has directed 3 short films and directed about 12 tv shows.")
        st.image(('resources/imgs/popular movie directors.png'), use_column_width=True)
        st.subheader("Average Budget Per Genre")
        st.markdown("The graph below shows the average budget used for movies in each genre. The War genre seems to have the highest average budget, with the Documentary genre having the least budget.")
        st.image(('resources/imgs/average budget per genre.png'), use_column_width=True)
        st.subheader("Average Runtime Per Genre")
        st.markdown("Western genre has the highest average movie runtime at about 120 minutes. Animation genre has the least average runtime at about 76-77 minutes.")
        st.image(('resources/imgs/average runtime per genre.png'), use_column_width=True)
        st.subheader("Top 20 Popular Movies From 2010")
        st.markdown("The 2014 movie Intersteller had the highest ratings. The movie only had a budget of $165 million but went to make $696.3 million from the box office. The movie also bagged 6 awards including an Academy award for best visual effects.")
        st.image(('resources/imgs/top 20 popular movies by ratings from 2010.png'), use_column_width=True)
        

    if page_selection == "Contact Us":
        st.title("Get in touch with us")
        st.markdown('''<span style="color:blue"> **Help us improve this app by rating it. Tell us how to give you a better user experience.** </span>''', unsafe_allow_html=True)
        @st.cache(allow_output_mutation=True)
        def get_data():
            return []
        name = st.text_input("User name")
        inputs = st.text_input("Let us improve your user experience!!!")
        rate = st.slider("Rate us", 0, 5)
        if st.button("Submit"):
            get_data().append({"User name": name, "Suggestion": inputs,"rating":rate})
        st.markdown('''<span style="color:blue"> **What other users said:** </span>''', unsafe_allow_html=True)
        st.write(pd.DataFrame(get_data()))
        st.markdown('''<span style="color:blue"> **For any questions contact us here:** </span>''', unsafe_allow_html=True)
        st.image(('resources/imgs/our contact2.PNG'), use_column_width=True)
            #pass
       
    if page_selection == "About Us":
        st.markdown("<h1 style='text-align: center; color: blue;'>About Us</h1>", unsafe_allow_html=True)
        st.markdown("")
        st.info("1. Palesa Hlungwani")
        st.image(('resources/imgs/Palesa.jpg'), use_column_width=True)
        st.markdown("* Github account:PTStace")
        st.markdown("* Kaggle account:palesa_hlungwani")
        st.markdown("* email:[email protected]")
        st.markdown("")

        st.info("2. Orline Sorelle Ketcha")
        st.image(('resources/imgs/Orline.jpg'), use_column_width=True)
        st.markdown("* Github account:OrlineSorel")
        st.markdown("* Kaggle account:Sorelle94")
        st.markdown("* email:[email protected]")
        st.markdown("")

        st.info("3. Thiyasizwe Kubeka")
        st.image(('resources/imgs/Thiya.jpg'), use_column_width=True)
        st.markdown("* Github account:thiyasizwe_kubeka")
        st.markdown("* Kaggle account:Thiyasizwa_kubeka")
        st.markdown("* email:[email protected]")
        st.markdown("")

        st.info("4. Katleho Mokhele")
        st.image(('resources/imgs/Katleho.jpg'), use_column_width=True)
        st.markdown("* Github account:Katness AI")
        st.markdown("* Kaggle account:Katleho Mokhele")
        st.markdown("* email:[email protected]")
        st.markdown("")

        st.info("5. Mfumo Baloyi")
        st.image(('resources/imgs/Mfumo.jpg'), use_column_width=True)
        st.markdown("* Github account:MfumoB")
        st.markdown("* Kaggle account:Mfumoe")
        st.markdown("* email:[email protected]")
예제 #26
0
def main():

    # DO NOT REMOVE the 'Recommender System' option below, however,
    # you are welcome to add more options to enrich your app.
    page_options = ["About the app","Data visualisation","Recommender System","About Recommender Systems","Solution Overview"]

    # -------------------------------------------------------------------
    # ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
    # -------------------------------------------------------------------
    page_selection = st.sidebar.selectbox("Choose Option", page_options)
    if page_selection == "Recommender System":
        # Header contents
        st.write('# Movie Recommender Engine')
        st.write('### EXPLORE Data Science Academy Unsupervised Predict')
        st.image('resources/imgs/Image_header.png',use_column_width=True)
        # Recommender System algorithm selection
        sys = st.radio("Select an algorithm",
                       ('Content Based Filtering',
                        'Collaborative Based Filtering'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        movie_1 = st.selectbox('Fisrt Option',title_list[14930:15200])
        movie_2 = st.selectbox('Second Option',title_list[25055:25255])
        movie_3 = st.selectbox('Third Option',title_list[21100:21200])
        fav_movies = [movie_1,movie_2,movie_3]

        # Perform top-10 movie recommendation generation
        if sys == 'Content Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = content_model(movie_list=fav_movies,
                                                            top_n=10)
                    st.title("We think you'll like:")
                    for i,j in enumerate(top_recommendations):
                        st.subheader(str(i+1)+'. '+j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")


        if sys == 'Collaborative Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = collab_model(movie_list=fav_movies,
                                                           top_n=10)
                    st.title("We think you'll like:")
                    for i,j in enumerate(top_recommendations):
                        st.subheader(str(i+1)+'. '+j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")


    # -------------------------------------------------------------------

    # ------------- SAFE FOR ALTERING/EXTENSION -------------------
    if page_selection == "About the app":
        st.title("About The App")
        st.markdown("The application is about recomender systems. It recomends movies to a user based on two types of recomender system. Which are Collaborative-filtering and Content-Based filtering. Below is a brief explanation of the recomender systems")		
        st.subheader("Recommender Systems")
        st.image("resources/imgs/image.png")
        st.markdown("**Collaborative-filtering:** In collaborative-filtering items are recommended, for example movies, based on how similar your user profile is to other users’, finds the users that are most similar to you and then recommends items that they have shown a preference for. This method suffers from the so-called cold-start problem: If there is a new movie, no-one else would’ve yet liked or watched it, so you’re not going to have this in your list of recommended movies, even if you’d love it.")
        st.markdown("**Content-based filtering:** This method uses attributes of the content to recommend similar content. It doesn’t have a cold-start problem because it works through attributes or tags of the content, such as actors, genres or directors, so that new movies can be recommended right away.")        
		
    if page_selection == "Solution Overview":
        st.title("Solution Overview")
        st.write("Describe your winning approach on this page")
    if page_selection== "Data visualisation":
       st.title("How Our Movie Data Behaves")
       from PIL import Image
       img4 = Image.open('halloween.jpg')
       st.image(img4, width=500)
       st.subheader('Distribution of Users and Ratings')
       eda = st.radio('Distribution of Data:',('Distribution of Ratings', 'Distribution of Top Users'))
       if eda == ('Distribution of Ratings'):
           from PIL import Image
           img5 = Image.open('rating distribution.jpg')
           st.image(img5, width=500)
           st.write('Ratings above three occur more frequently indicting that users who rate the films are either generous or that users are more likely to rate the films if they found if satisfactory or good')
        if eda == ('Distribution of Top Users'):
            img6 = Image.open('user distribution.jpg')
            st.image(img6, width=500)
            st.write('This data respresents the number of users who rated over 2000 films. Very few users rated many films, the films rated by many users represented the distribution of popular films')
        st.subheader('A Look At The Titles Of Popular Or Influential Films')
        wordcloud = st.radio('Highly Rated Films:',('Highly Rated Films', 'Films With A Low Rating' , 'Films Rated by The Greatest Number Of People'))
        if wordcloud == 'Highly Rated Films':
            img7 = Image.open('highly rated films wcb.jpg')
            st.image(img7,width=700)
        if wordcloud == 'Films With A Low Rating':
            img8 = Image.open('Low rated films wcb.jpg')
            st.image(img8,width=700)
        if wordcloud == 'Films Rated by The Greatest Number Of People':
            img9 = Image.open('rated by most people wcb.jpg')
            st.image(img9, width=700)
def main():

    # DO NOT REMOVE the 'Recommender System' option below, however,
    # you are welcome to add more options to enrich your app.
    page_options = [
        "Recommender System", "Pandas profiling", "Sweetviz", "EDA",
        "Solution Overview", "Slides"
    ]

    # -------------------------------------------------------------------
    # ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
    # -------------------------------------------------------------------
    page_selection = st.sidebar.selectbox("Choose Option", page_options)
    if page_selection == "Recommender System":
        # Header contents
        st.markdown(rec_header, unsafe_allow_html=True)
        st.image('resources/imgs/Image_header.png', use_column_width=True)
        # Recommender System algorithm selection
        sys = st.radio(
            "Select an algorithm",
            ('Content Based Filtering', 'Collaborative Based Filtering'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        movie_1 = st.selectbox('Fisrt Option', title_list[14930:15200])
        movie_2 = st.selectbox('Second Option', title_list[25055:25255])
        movie_3 = st.selectbox('Third Option', title_list[21100:21200])
        fav_movies = [movie_1, movie_2, movie_3]

        # Perform top-10 movie recommendation generation
        if sys == 'Content Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = content_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

        if sys == 'Collaborative Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = collab_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

    # -----------------------------------------------------------------------#
    #                       SAFE FOR ALTERING/EXTENSION                      #
    #------------------------------------------------------------------------#
    #                           pandas profiling                             #
    #------------------------------------------------------------------------#
    if page_selection == "Pandas profiling":
        st.markdown(prof, unsafe_allow_html=True)
        ds = st.radio("choose the data sorce", ("movies data", "ratings data"))
        if ds == "movies data":
            data_file = 'resources/data/movies.csv'
        else:
            data_file = 'resources/data/ratings.csv'
        if data_file is not None:
            df = pd.read_csv(data_file)
            st.dataframe(df.head())
            profile = ProfileReport(df)
            st_profile_report(profile)
        pass

    #------------------------------------------------------------------------#
    #                           Sweetviz report                              #
    #------------------------------------------------------------------------#
    if page_selection == "Sweetviz":
        st.markdown(sweet, unsafe_allow_html=True)
        ds = st.radio("choose the data sorce", ("movies data", "ratings data"))
        if ds == "movies data":
            data_file = 'resources/data/movies.csv'
        else:
            data_file = 'resources/data/ratings.csv'
        if data_file is not None:
            df1 = pd.read_csv(data_file)
            st.dataframe(df1.head())
            if st.button("Genwrate Sweetviz Report"):
                report = sv.analyze(df1)
                report.show_html()
                st_display_sweetviz("SWEETVIZ_REPORT.html")
        pass

    #------------------------------------------------------------------------#
    #                           Custom EDA                                   #
    #------------------------------------------------------------------------#
    if page_selection == "EDA":
        #Ratings by year
        st.markdown(eda_header, unsafe_allow_html=True)

        if st.checkbox("Ratings by year"):
            st.image("resources/imgs/ratings_by_year.PNG", format='PNG')
            st.write(
                "The ratings for the movies span a period of 25 years, from 1995 all the way to 2019, with the last 5 years accumalatively having had the most ratings in comparison to any othe other 5 year interval. From 2006 to 2014 there is decline in user engagement when it comes to rating movies. Prior to 2006 there are 3 good years with ratings above 500000 ratings for the year, 3 more years at 400000 ratings and 3 below 300000 ratings for the year. it would be of interest to the spending behaviour of users for each of these years, as that would tell the complete story."
            )
            #components.html()

        #Movies Realese before and after 1995
        if st.checkbox(
                "Now for a look at the release year of each of the movies"):
            st.image("resources/imgs/Movie_release_over_T.PNG", format='PNG')
            st.write(
                "29906 movies have been released in total since 1995 up to 2019, in comparison to the 17937 that have released up to 1994 going all the way back to 1874 (this is not including movies that had no release date specified, which amount to 370 movies). The number of released movies per year is increases slowly at the beginning, then increasing faster leading up to 1995, where the is a substantial rise in the number of movies released. So much so that roughly on average a 153 movies have been released a year from 1874 to 1994, while that number increases substantially for the years between 1995 to 2019, averaging roughly 1246 movies per year. This has given users an overwheliming amount of options to choose from, with regards to the movies they can watch, catering to the needs of everyone, which makes the case for a recommendation system compelling."
            )

            #components.html()

        #Distributins of user ratings for movies in the past 25 years
        if st.checkbox(
                "Distributins of user ratings for movies in the past 25 years"
        ):
            st.image("resources/imgs/dst.PNG", format='PNG')
            st.image("resources/imgs/dst1.PNG", format='PNG')
            st.image("resources/imgs/dst2.png", format='PNG')
            st.write(
                "For the entire 25 year period, a rating of 4.0 is the most abundant rating given by users to movies,\
              followed by a rating of 3.0. Ratings of 5.0, 3.5 and 4.5 are next most numerous ratings users give to movies.\
              When the the 25 years are divided into 5, 5 year periods, the first 5 year period between 1995 and 1999,\
              there is an anomaly in the ratings, quite different from th other periods,\
              but still with ratings of 4.0 and 3.0 being the most abundant, followed by a 5.0 rating.\
              The last 3 intervals there is a contant pattern that has emerged with the distributions of the ratings"
            )
            #components.html()

        #Ratings distributions across movie genres
        if st.checkbox("Ratings distributions across movie genres"):
            st.image("resources/imgs/ratings_by_year.png", format='PNG')
            st.write(
                "Movies with between 1 to 4 genres have the most number of ratings for the 25 years, with an average rating for these of roughly bewteen 3.5 to 3.6. Movies with 2 and 3 genres movies get the lions share of the ratings."
            )
            #components.html()

        #Decade movies were released
        if st.checkbox("Decade movies were released"):
            st.image("resources/imgs/dc.png", format='PNG')
            st.write(
                "This is an interesting insight in the data with potentially huge implications, that will be eloborated upon later, in the conclusion when recommendations are put forward. For now however what emerges is the following; movies from before the 1970s have little to no ratings associated with them. Movies released from 1980s have the most ratings. The average rating for movies released for each decade from 1910 onwards is between 3.5 and 4.0., with that average for movies released in later decades carries more weight, since they have more ratings. With movies released in earlier decades having a lower average, perhaps to do with the quality of the movies."
            )
            #components.html()
        # Merging the train and movies data on the movieId column
        pass

    #------------------------------------------------------------------------#
    #                           Home                                         #
    #------------------------------------------------------------------------#
    #if page_selection == "Home":
    #    st.markdown(home,unsafe_allow_html=True)
    #
    #------------------------------------------------------------------------#
    #                        Slides                               #
    #------------------------------------------------------------------------#
    if page_selection == "Slides":
        st.markdown(slides, unsafe_allow_html=True)

        #load_css('utils/icon.css')

    #------------------------------------------------------------------------#
    #                        Solution Overview                               #
    #------------------------------------------------------------------------#
    if page_selection == "Solution Overview":
        st.markdown(html_temp, unsafe_allow_html=True)
        st.markdown(html_overview, unsafe_allow_html=True)
def main():

    html_template = """
    <div style="background-color:black;padding:10px;border-radius:10px;margin:10px;">
    <h1 style="color:green;text-align:center;">EDSA Movie Recommendation Challenge</h1>
    <h2 style="color:white;text-align:center;">UNSUPERVISED LEARNING PREDICT - TEAM1</h2>
    </div>
    """

    title_template = """
    <div style="background-color:#464e5f;padding:10px;border-radius:10px;margin:20px;">
    <h1 style="color:white;text-align:center;">UNSUPERVISED LEARNING PREDICT</h1>
    <h2 style="color:white;text-align:center;">TEAM 1</h2>
    <h3 style="color:white;text-align:center;">Malibongwe Xulu</h3>
    <h3 style="color:white;text-align:center;">Nthabiseng Moela</h3>
    <h3 style="color:white;text-align:center;">Simangele Maphanga</h3>
    <h3 style="color:white;text-align:center;">Kgauhelo Mokgawa</h3>
    <h3 style="color:white;text-align:center;">Manko Mofokeng</h3>
    <h2 style="color:white;text-align:center;">14 December 2020</h2>
    </div>
    """

    # DO NOT REMOVE the 'Recommender System' option below, however,
    # you are welcome to add more options to enrich your app.
    page_options = [
        "Home", "Recommender System", "About", "Exploratory Data Analysis",
        "Solution Overview"
    ]

    # -------------------------------------------------------------------
    # ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
    # -------------------------------------------------------------------
    page_selection = st.sidebar.selectbox("Choose Option", page_options)
    if page_selection == "Recommender System":
        # Header contents
        st.write('# Movie Recommender Engine')
        st.write('### EXPLORE Data Science Academy Unsupervised Predict')
        st.image('resources/imgs/Image_header.png', use_column_width=True)
        # Recommender System algorithm selection
        sys = st.radio(
            "Select an algorithm",
            ('Content Based Filtering', 'Collaborative Based Filtering'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        movie_1 = st.selectbox('First Option', title_list[14930:15200])
        movie_2 = st.selectbox('Second Option', title_list[25055:25255])
        movie_3 = st.selectbox('Third Option', title_list[21100:21200])
        fav_movies = [movie_1, movie_2, movie_3]

        # Perform top-10 movie recommendation generation
        if sys == 'Content Based Filtering':
            if st.button("Recommend"):
                with st.spinner('Crunching the numbers...'):
                    top_recommendations = content_model(movie_list=fav_movies,
                                                        top_n=10)
                st.title("We think you'll like:")
                for i, j in enumerate(top_recommendations):
                    st.subheader(str(i + 1) + '. ' + j)

        if sys == 'Collaborative Based Filtering':
            if st.button("Recommend"):
                with st.spinner('Crunching the numbers...'):
                    top_recommendations = collab_model(movie_list=fav_movies,
                                                       top_n=10)
                st.title("We think you'll like:")
                for i, j in enumerate(top_recommendations):
                    st.subheader(str(i + 1) + '. ' + j)

    # -------------------------------------------------------------------

    # ------------- SAFE FOR ALTERING/EXTENSION -------------------

    if page_selection == "Home":
        st.markdown(html_template.format('royalblue', 'white'),
                    unsafe_allow_html=True)
        st.image('resources/imgs/Home.PNG', use_column_width=True)
        #st.markdown(title_template, unsafe_allow_html=True)

    if page_selection == "About":
        #markup(page_selection)
        st.write(
            "### Oveview: Flex your Unsupervised Learning skills to generate movie recommendations"
        )

        # You can read a markdown file from supporting resources folder
        #if st.checkbox("Introduction"):
        st.subheader("Introduction to Unsupervised Learning Predict")
        st.write(
            """In today’s technology driven world, recommender systems are socially and economically critical for ensuring that individuals can make appropriate choices surrounding the content they engage with on a daily basis. One application where this is especially true surrounds movie content recommendations; where intelligent algorithms can help viewers find great titles from tens of thousands of options."""
        )
        st.write(
            """With this context, EDSA is challenging you to construct a recommendation algorithm based on content or collaborative filtering, capable of accurately predicting how a user will rate a movie they have not yet viewed based on their historical preferences."""
        )
        st.write(
            """Providing an accurate and robust solution to this challenge has immense economic potential, with users of the system being exposed to content they would like to view or purchase - generating revenue and platform affinity."""
        )

        #if st.checkbox("Problem Statement"):
        st.subheader("Problem Statement of the Unsupervised Learning Predict")
        st.write("Build recommender systems to recommend a movie")

        #if st.checkbox("Data"):
        st.subheader("Data Overview")
        st.write(
            """This dataset consists of several million 5-star ratings obtained from users of the online MovieLens movie recommendation service. The MovieLens dataset has long been used by industry and academic researchers to improve the performance of explicitly-based recommender systems, and now you get to as well!"""
        )

        st.write(
            """For this Predict, we'll be using a special version of the MovieLens dataset which has enriched with additional data, and resampled for fair evaluation purposes."""
        )

        st.write("""### Source:""")
        st.write(
            """The data for the MovieLens dataset is maintained by the GroupLens research group in the Department of Computer Science and Engineering at the University of Minnesota. Additional movie content data was legally scraped from IMDB"""
        )

        st.write("""### Supplied Files:
        genome_scores.csv - a score mapping the strength between movies and tag-related properties. Read more here

        genome_tags.csv - user assigned tags for genome-related scores

        imdb_data.csv - Additional movie metadata scraped from IMDB using the links.csv file.

        links.csv - File providing a mapping between a MovieLens ID and associated IMDB and TMDB IDs.

        sample_submission.csv - Sample of the submission format for the hackathon.

        tags.csv - User assigned for the movies within the dataset.

        test.csv - The test split of the dataset. Contains user and movie IDs with no rating data.

        train.csv - The training split of the dataset. Contains user and movie IDs with associated rating data."""
                 )

        # st.subheader("Raw Twitter data and label")
        # if st.checkbox('Show raw data'): # data is hidden if box is unchecked
        #     st.write(raw[['sentiment', 'message']]) # will write the df to the page

    if page_selection == "Exploratory Data Analysis":
        st.title('Exploratory Data Analysis')

        if st.checkbox("ratings"):
            st.subheader("Movie ratings")
            st.image('resources/imgs/rating.PNG', use_column_width=True)

        # if st.checkbox("correlation"):
        #     st.subheader("Correlation between features")
        #     st.image('resources/imgs/correlation.png',use_column_width=True)

        if st.checkbox("genre wordcloud"):
            st.subheader("Top Genres")
            st.image('resources/imgs/genre_wordcloud.png',
                     use_column_width=True)

        if st.checkbox("genres"):
            st.subheader("Top Genres")
            st.image('resources/imgs/top_genres.PNG', use_column_width=True)

        # if st.checkbox("movies released per year"):
        #     st.subheader("Movies released per year")
        #     st.image('resources/imgs/release_year.png',use_column_width=True)

        if st.checkbox("tags"):
            st.subheader("Top tags")
            st.image('resources/imgs/top_tags.PNG', use_column_width=True)

        if st.checkbox("cast"):
            st.subheader("Popular cast")
            st.image('resources/imgs/cast.PNG', use_column_width=True)

    # if page_selection == "Recommend a movie":
    #     st.title("Recommend a movie")
    #     sys = st.radio("Select an algorithm",
    #                    ('Content Based Filtering',
    #                     'Collaborative Based Filtering'))

    if page_selection == "Solution Overview":
        st.title("Solution Overview")
        st.write("RMSE of the recommendation models to show their performance")
        st.image('resources/imgs/performance_df.PNG', use_column_width=True)
예제 #29
0
def main():

    # DO NOT REMOVE the 'Recommender System' option below, however,
    # you are welcome to add more options to enrich your app.
    st.sidebar.title("Pages")
    page_options = [
        "Home", "Exploratory Data Analysis(EDA)", "Recommender System",
        "Solution Overview", "Business Pitch", "About"
    ]

    # -------------------------------------------------------------------
    # ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
    # -------------------------------------------------------------------
    page_selection = st.sidebar.selectbox("Choose Option", page_options)
    if page_selection == "Recommender System":
        # Header contents
        st.write('# Movie Recommender Engine')
        st.write('### EXPLORE Data Science Academy Unsupervised Predict')
        st.image('resources/imgs/Image_header.png', use_column_width=True)
        # Recommender System algorithm selection
        sys = st.radio(
            "Select an algorithm",
            ('Content Based Filtering', 'Collaborative Based Filtering'))

        # User-based preferences
        st.write('### Enter Your Three Favorite Movies')
        movie_1 = st.selectbox('Fisrt Option', title_list[14930:15200])
        movie_2 = st.selectbox('Second Option', title_list[25055:25255])
        movie_3 = st.selectbox('Third Option', title_list[21100:21200])
        fav_movies = [movie_1, movie_2, movie_3]

        # Perform top-10 movie recommendation generation
        if sys == 'Content Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = content_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

        if sys == 'Collaborative Based Filtering':
            if st.button("Recommend"):
                try:
                    with st.spinner('Crunching the numbers...'):
                        top_recommendations = collab_model(
                            movie_list=fav_movies, top_n=10)
                    st.title("We think you'll like:")
                    for i, j in enumerate(top_recommendations):
                        st.subheader(str(i + 1) + '. ' + j)
                except:
                    st.error("Oops! Looks like this algorithm does't work.\
                              We'll need to fix it!")

    # -------------------------------------------------------------------

    # ------------- SAFE FOR ALTERING/EXTENSION -------------------
    if page_selection == "Solution Overview":
        title_SO = """
	    <div style="background-color:#464e5f00;padding:10px;border-radius:10px;margin:10px;border-style:solid; border-color:#000000; padding: 1em;">
	    <h1 style="color:black;text-align:center;">Solution Overview</h1>
        """
        st.markdown(title_SO, unsafe_allow_html=True)
        #st.title("Solution Overview")
        st.image('resources/imgs/Sol.jpeg', use_column_width=True)
        st.write("Describe your winning approach on this page")
        st.write(
            "Our objective was to construct a recommendation algorithm based on the content or collaborative filtering, capable of accurately predicting how a user will rate a movie they have not yet viewed based on their historical preferences. We used a special version of the MovieLens dataset. Below is a description of the dataset we used"
        )
        st.write(
            "genome_scores - a score mapping the strength between movies and tag-related properties"
        )
        st.write("genome_tags - user assigned tags for genome-related scores")
        st.write(
            "imdb_data - Additional movie metadata scraped from IMDB using the links.csv file"
        )
        st.write(
            "links - File providing a mapping between a MovieLens ID and associated IMDB and TMDB IDs"
        )
        st.write("tags - User assigned for the movies within the dataset")
        st.write(
            "test - The test split of the dataset. Contains user and movie IDs with no rating data"
        )
        st.write(
            "train - The training split of the dataset. Contains user and movie IDs with associated rating data"
        )
        st.write(
            "The initial step was the data preprocessing and we looked for missing values. We discovered that there are missing values in three of the eight datasets we have."
        )
        st.write(
            "After data preprocessing we started building our based model. We built five different collaborative base models, namely SVD, Normal Predictor, CoClustering, KNN Baseline, and lastly NMF. Their performances were compared using a statistical measure known as the root mean square error (RMSE), which determines the average squared difference between the estimated values and the actual value. A low RMSE value indicates a high model accuracy. The best performing base models were SVD and KNN Baseline.\n\nWe performed hyperparameter tuning on SVD and it gave us the best result of Kaggle."
        )

        imdb = """
	    <div style="background-color:#464e5f00;padding:10px;border-radius:10px;margin:10px;">
	    <h3 style="color:black;text-align:left;">Cleaning the imdb_data dataset</h3>
        """
        st.markdown(imdb, unsafe_allow_html=True)
        st.write(
            'We imputed the runtime with the mean runtime\n\nCreated a list plot keywords for each movie.\n\nCreated a list of title casts for each movie.'
        )

        movies = """
	    <div style="background-color:#464e5f00;padding:10px;border-radius:10px;margin:10px;">
	    <h3 style="color:black;text-align:left;">Cleaning the movies dataset</h3>
        """
        st.markdown(movies, unsafe_allow_html=True)
        st.write(
            'Created a list of genres in every movie in the movies column\n\nAdded the releasea_year column.'
        )

        st.write('After cleaning the data, we then merged the data')
        st.write(
            'We proceeded to the second step, the EDA. We constructed various plots using our data and gathered insights from our data, these are well documented on our Exploratory Data Analysis(EDA) page.'
        )

    # You may want to add more sections here for aspects such as an EDA,
    # or to provide your business pitch.

    # Home
    if page_selection == "Home":
        st.image('resources/imgs/EDSA_logo.png', use_column_width=True)

        html_temp = """
	    <div style="background-color:{};padding:10px;border-radius:10px;margin:10px;border:3px; border-style:solid; border-color:#000000; padding: 1em;">
	    <h1 style="color:{};text-align:center;">UNSUPERVISED PREDICT</h1>
	    </div>
	    """

        title_temp = """
	    <div style="background-color:#464e5f00;padding:10px;border-radius:10px;margin:10px;border-style:solid; border-color:#000000; padding: 1em;">
	    <h1 style="color:black;text-align:center;">Recommender System</h1>
	    <h2 style="color:black;text-align:center;">Team:3</h2>
	    <h2 style="color:black;text-align:center;">July 2020</h3>
	    </div>
	    """
        st.markdown(html_temp.format('#D2691E00', 'black'),
                    unsafe_allow_html=True)
        st.markdown(title_temp, unsafe_allow_html=True)

    # EDA
    if page_selection == "Exploratory Data Analysis(EDA)":
        title_eda = """
	    <div style="background-color:#464e5f00;padding:10px;border-radius:10px;margin:10px;border-style:solid; border-color:#000000; padding: 1em;">
	    <h1 style="color:black;text-align:center;">Exploratory Data Analysis(EDA)</h1>
        """
        st.markdown(title_eda, unsafe_allow_html=True)
        st.image('resources/imgs/EDA6.png', use_column_width=True)

        #options = ["Ratings","Movies","Directors","Genres","Title Cast"]

        sys_eda = st.radio(
            "Choose an EDA section",
            ('Ratings', 'Movies', 'Directors', 'Genres', 'Title Cast'))

        #option_selection = st.selectbox("Choose Option", options)
        if sys_eda == "Ratings":
            #if option_selection == "Ratings":
            #options_ratings = ["Top 10 users by number of ratings","Top 10 users by number of ratings(No outlier)","Rating distribution"]
            #rating_option = st.selectbox("Choose Option", options_ratings)

            op_ratings = st.radio(
                "Choose an option under ratings",
                ("Top 10 users by number of ratings",
                 "Top 10 users by number of ratings(No outlier)",
                 "Rating distribution"))

            if op_ratings == "Top 10 users by number of ratings":
                #if rating_option == "Top 10 users by number of ratings":
                st.image('resources/imgs/Top 10 users by number of rating.png',
                         use_column_width=True)
                st.write(
                    'visualizing the top 10 users by number of ratins we see that user 72315 is an outlier in the sense he/she has rated an extreme number of movies compared to the rest of the users, there being a difference of 9272 ratings betwen user 12952 and user 80974 , This means that our system "better knows" user 72315 and his/her preferences and as such it would be easy for our recommeder system to recommend movies they would enjoy.'
                )
            if op_ratings == "Top 10 users by number of ratings(No outlier)":
                #if rating_option == "Top 10 users by number of ratings(No outlier)":
                st.image('resources/imgs/rating_no_outlier.png',
                         use_column_width=True)
                st.write(
                    "Removing the outlier user 72315 we see that the rest of users have not rated an extreme number of movies comapred to each other.Now that we've looked into the number of ratings for each user, we can now investigate the distribution of ratings"
                )
                st.write(
                    "Most review sites use a 1 to 5 star rating system, with")
                st.write(
                    "5 star : Excellent\n\n4.0 – 5.0 stars : Positive Reviews\n\n3.0 - 3.9 stars : Neutral Reviews\n\n1.0 - 2.9 star : Negative Reviews"
                )
            if op_ratings == "Rating distribution":
                #if rating_option == "Rating distribution":
                st.image('resources/imgs/rating_dist.png',
                         use_column_width=True)
                st.write(
                    "Checking the distribution of ratings we see that rating of 4.0 is the most popular rating accounting for 27% of the ratings which suggests that most users have found most movies to be good but not not excellent, then again no movie can truly be excellent, the second most popular rating is 3.0 which suggests that quite a number of users found the movies they've seen to be neutral."
                )
                st.write(
                    "An Interesting note here is that the ratings are left skewed as see more ratings on the right side of the bargraph, this could be the result of the behaviour that people only tend to rate movies they like werease if they don't like a movie they would watch it till the end, let alone rate it"
                )
                st.write(
                    "we see that average movie rating is 3.5 which suggest that we have more neutral and positive reviews seen by the skewed distribution"
                )

        if sys_eda == "Movies":
            #if option_selection == "Movies":
            op_movies = st.radio(
                "Choose an option under movies",
                ("Top 25 most rated movies of all time",
                 "25  most rated movies of the 21st century",
                 "Top 10 best and worst rated movies with over 10000 ratings",
                 "Total movies released per year"))
            #option_movie = ["Top 25 most rated movies of all time","25  most rated movies of the 21st century","Top 10 best and worst rated movies with over 10000 ratings","Total movies released per year"]
            #movie_option = st.selectbox("Choose Option", option_movie)
            if op_movies == "Top 25 most rated movies of all time":
                #if movie_option == "Top 25 most rated movies of all time":
                st.image('resources/imgs/25_most_1.png', use_column_width=True)
                st.write(
                    "Unsurprisingly the most rated movie of all time is Shawshank Redemption which is a 1994 American drama film written and directed by Frank Darabont, based on the 1982 Stephen King novella Rita Hayworth and Shawshank Redemption, we also see timeless classics like The Matrix which didn't only win 41 awards but also shaped the making of action movies in the 21st century.\n\nThe Matrix changed the way action sequences were handled by Hollywood, popularising the special effect known as bullet time. The most iconic scene in the film uses this technique, leaving us stunned as Neo dodges an enemy's gunfire, weaving his body around the bullets.\n\nAn interesting point to note here is that 21 of the 25 or 84% of 25 most rated movies of all time were released before the year 2000, could this mean people don't rate movies anymore or could it this be attributed to the fact these movies were released a long time ago and rating counts have accumulated over the years?\n\nSeing that 84% of the movies in our 25 most rated movies were released before 2000, sparks the curiosty to find the most rated movies of the 21st century"
                )
            if op_movies == "25  most rated movies of the 21st century":
                #if movie_option == "25  most rated movies of the 21st century":
                st.image('resources/imgs/25_most_2.png', use_column_width=True)
                st.write(
                    "Looking at 25 most rated movies of the 21st century we see that another timeless classic the Lord of the rings trilogy tops the chart.\n\nThe Lord of the Rings is a film series of three epic fantasy adventure films directed by Peter Jackson, based on the novel written by J. R. R. Tolkien. This film series is about a young hobbit, Frodo, who has found the One Ring that belongs to the Dark Lord Sauron, begins his journey with eight companions to Mount Doom, the only place where it can be destroyed.\n\nIt is said that the reason this trilogy was so succesful is that the writter included so much detail and crafted such a wonderful and interesting story that it was still well loved when released in cinemas 50 years later. And there's no reason to think anything will change in another 50 years. The story features a number of diversions and twists, If you haven't seen it before and you love adventure films i'm sure our recommendation model which we will build later on will lead you to this trilogy🙂\n\nWe also see some of the best movies based DC comic books(my personal favourite kind of literature) two of which are from the Dark Knight trilogy which are The Dark Knight and Batman begins, we also see Spiderman which was written by the Legendary Marvel comic book superheros creator Stan Lee.\n\nSomething worth noting here again is the trend we saw ealier that as most recent movies don't really have that higher number of ratings as we see that 25 most rated movies of the 21st centure were all released before 2010.\n\n\n\nNow that we have have seen which movies were most rated we can now investigate the best and worst rated movies on average, one thing we have to be aware of here is the is that when calculating the average best and worst rated movies, if a movie is rated by one user and that user gave it 5 star then it will top the chart which would be misleading as such to avoid this we will develop a threshold for number of ratings that a movie should have before we can include its average rating.\n\n\From the two bar plots above we see that alot of movies have recieve over 10 000 ratings and as such we will make this our threshold"
                )
            if op_movies == "Top 10 best and worst rated movies with over 10000 ratings":
                #if movie_option == "Top 10 best and worst rated movies with over 10000 ratings":
                st.image('resources/imgs/10_best_3.png', use_column_width=True)
                st.image('resources/imgs/10_worst_4.png',
                         use_column_width=True)
                st.write(
                    "Unsurprisingly the most rated movie, Shawkshank Redemption is also the best rated movie with over 10000 ratings with an average rating of 4.4176, with this, would it be totally unjustified to say Shawshank Redemption is the best movie of all time? Shawshank Redemption is followed by another popularly known and loved movie The Godfather which is a 1972 American crime film directed by Francis Ford Coppola who co-wrote the screenplay with Mario Puzo, based on Puzo's best-selling 1969 novel of the same name, the sequel of The Godfather, The Godfather: Part II also made it to the top 10 best rated movies, coming in at position 4. The Godfather and its sequel revieved an average rating of4.3114 and 4.2741 respectively, Bottom of the list is Pulp Fiction with 4.1951 average rating.\n\n\n\nan interesting note here is that even though we have ##### movies released in the 21st century that have over 10 000 ratings, none of them made it to the top 10 best rated movies of all time since.\n\n\n\nNow turning our attention to worst rated movies with over 10000 ratings, it it worth noting that these are not at all lowest rated movies of all time but, they are however the lowest rated movies with over 10000 ratings, we do have lowest rated movies of all time but for the context of this notebook we will focus on those that have more that 10000 ratings.\n\nLooking at the lowest rated movies we see Waterworld coming in last with an average rating of 2.8830, we also see popular movies by Jim Carrey like Dumb and Dumber and Mask, as also see Home Alone which is a very popular christmas movie."
                )
            if op_movies == "Total movies released per year":
                #if movie_option == "Total movies released per year":
                st.image('resources/imgs/Total_5.png', use_column_width=True)
                st.write(
                    "We see that the year 2015 saw the highest number of movies released topping the chart with 2513 movies released in that year and the year 2016 coming in second with 2488 released movies.\n\n\n\nThere has been a steep increase in the movies of movies release per year in the 21st century"
                )

        if sys_eda == "Directors":
            #if option_selection == "Directors":
            st.info(
                "We start off with directors, A film director controls a film's artistic and dramatic aspects and visualizes the screenplay (or script) while guiding the technical crew and actors in the fulfilment of that vision. The director has a key role in choosing the cast members, production design and all the creative aspects of filmmaking\n\n\n\nEven though most people don't into finding our who director what movie to decide whether its going to be a good watch or not, there is a proportion of people that either watch the credits at the end of the movie or do research of each movie before they watch it, for these people director of a movie plays an import role in decided whether or not to watch a movie, for me personally I watch mroe series's than movies and but I know that if a series is directed by Chuck Lorre than I will definately love it.\n\nlet's start by finding our which directors have recieved the most number of ratings for their collective movies"
            )

            op_director = st.radio(
                "Choose an option under directors",
                ("Top 25 most rated directors",
                 "Top 25 directors with most number of movies",
                 "10 highest rated director with over 10000 ratings",
                 "10 worst rated directors with over 10000 ratings"))
            #option_directors = ["Top 25 most rated directors","Top 25 directors with most number of movies","10 highest rated director with over 10000 ratings","10 worst rated directors with over 10000 ratings"]
            #director_option = st.selectbox("Choose option", option_directors)
            if op_director == "Top 25 most rated directors":
                #if director_option == "Top 25 most rated directors":
                st.image('resources/imgs/top_25_most_D1.png',
                         use_column_width=True)
                st.write(
                    "Topping the chart bar far we see Quentin Tarantino who has directed a total of 10 movies is an American film director, screenwriter, producer, and actor. His films are characterized by nonlinear storylines, aestheticization of violence, extended scenes of dialogue, ensemble casts, references to popular culture and a wide variety of other films, soundtracks primarily containing songs and score pieces from the 1960s to the 1980s, alternate history, and features of neo-noir film, One of Quentin Tarantino's highest rated movie Pulp fiction appreard in the top 10 best rated movies we saw ealier.\n\nwe also see familiar names like Stephen King who is also an author of horror, supernatural fiction, suspense, crime, science-fiction, and fantasy novels and directed a total of 23 movies among these movies is the movie we ponded a question of whether we can consider it as the best movie of all time, since it appeared top of the chart on both the Top 25 most rated movies of all time and Top 10 best rated movies of all time, Shawshank Redemption was based on Stephen King's novel.\n\n\n\nAfter seein the total number of ratings for each director its only natural that one wonders how many movies did each of these directors release, as this would contribute to the total number of ratings they have recieved, so lets find out which directors have released the most number of movies."
                )
            if op_director == "Top 25 directors with most number of movies":
                #if director_option == "Top 25 directors with most number of movies":
                st.image('resources/imgs/Top_25_directors_D2.png',
                         use_column_width=True)
                st.write(
                    "We see a tie for the number spot between Luc Besson and Woody Allen, each having released an equal number of 26 movies. followed Stephen King This time coming at number 2 with a total of 23 movies. we also see some popularly names like that of William Shakespeare who was an English playwright, poet, and actor, widely regarded as the greatest writer in the English language and the world's greatest dramatist. as well Tyler Perry, a world-renowned producer, director, actor, screenwriter, playwright, author, songwriter, entrepreneur, and philanthropist, whos most successful movies series is the the Madea series which he doesnt only direct but also plays 3 roles in.\n\n\n\nKey obseravtion. Most of the movies that were produced by the directors in the about bar plot have the genres Drama and Romance or a mixture of those two gernes popularly known as romantic comedies, whether or not these two genres are the most succesful generes of highest rated genres is still to be investigated.\n\n\n\nWe have seen some of the most rated directors and dicectors with the most number of movies, now we turn our attention to finding out which directors have recieved the bests rating on average, this can help us guage if whether a movie will be worth watching or not, since we can check the average rating the director of that movie\n\n\n\ncontinuing with the trend we will only consider directors that have atleast 10000 ratings"
                )
            if op_director == "10 highest and worst rated director with over 10000 ratings":
                #if director_option == "10 highest and worst rated director with over 10000 ratings":
                st.image('resources/imgs/10_highest_rated_D3.png',
                         use_column_width=True)
                st.image('resources/imgs/10_worst_directors_D4.png',
                         use_column_width=True)
                st.write(
                    "Toping the chart of the best rated directors is Chuck Palahniuk, the director of Fight Club that recieved an average rating of 4.22 which had Action, Crime, Drama and thriller genres. The second spot is held by Christopher McQuarrie recieving an average rating of 4.19 for three movies he has directed namely Usual suspects, Way of the gun and Edge of Tomorrow with mix of genres Action, Crime and Thriller, this this shares some light on the question we posed earlier of whether people the most succesful genres were a mix of Drama, Romance or Comedy, as we see that our two best rated directors create blockbusters with mix of genres action and thriller. We will investigated these genres thoroughly at a later stage.\n\n\Looking at the worst rated directed we see that the lowest rated director is Jack Bernstein with an average rating of 2.84\n\n\n\nWe now move to the next factor that influences the perfrance of of viewers that is the genre of the movie.\n\n"
                )

        if sys_eda == "Genres":
            #if option_selection == "Genres":
            op_genre = st.radio("Choose an option under Genres",
                                ("Treemap of movie genres",
                                 "Genre average rating over the years",
                                 "Word cloud of movie genres"))
            #options_genres = ["Treemap of movie genres","Genre average rating over the years","Word cloud of movie genres"]
            #genre_options = st.selectbox('Choose option', options_genres)
            if op_genre == "Treemap of movie genres":
                #if genre_options == "Treemap of movie genres":
                st.image('resources/imgs/Treemap_G1.png',
                         use_column_width=True)
                st.write(
                    "The genre treemap shows that Drama is the most popular genre with a genre count of 25606 followed by comedy with a count of 16870 as we initially suspected, We also see that Thriller and Romance follow suit. IMAX is by far the least popular genres with a count of 195 with Film-Noir following with a count of 353.\n\n\n\nWe have now seen the the most popular and least popular genres, lets now dig a little deeper into the genres and find out if whether the genre preference has changed throughout the years, to investigate this let's created an animated bar plot."
                )
            if op_genre == "Genre average rating over the years":
                #if genre_options == "Genre average rating over the years":
                st.video('resources/imgs/download.mp4')
                st.write(
                    "Right off the bat of the bet, the bar charr race shows us that there has been a change in genre preferences over the years"
                )
                st.write(
                    "Stangely Animation was the best rated genre in 1995.\n\n\n\nIn 1996 Animation dropped to the 8th position and the Documentary became the most rated genre\n\n\n\n1997 Animation toped the char again and the following year Documentaty took over, seems between those 4 years the most prefered genres where Animation and Documentary, Strange times indeed...\n\n\n\nIn 1999 Crime movies started being popular and became the highest rated genre that year\n\n\n\nDrame took over the top spot in the year 2000\n\n\n\n2001 We see Fantasy, Crime and Drama taking the 1st. 2nd and 3rd spots respectively and we see these genres taking turns over the next couple of years until 2013 when Romance takes the lead and Documentaries become more popular and toping the chart in 2015."
                )
            if op_genre == "Word cloud of movie genres":
                #if genre_options == "Word cloud of movie genres":
                st.image('resources/imgs/Wordcloud_G3.png',
                         use_column_width=True)

        if sys_eda == "Title Cast":
            #if option_selection == "Title Cast":
            st.image('resources/imgs/title_cast_1.png', use_column_width=True)
            st.write(
                "The likes of Samuel L. Jackson ,steve Buscemi ans Keith David are the most popular cast members according to the graph above."
            )

    #About
    if page_selection == "About":
        title_about = """
	    <div style="background-color:#464e5f00;padding:10px;border-radius:10px;margin:10px;">
	    <h1 style="color:black;text-align:center;"> - The Team -</h1>
        <h3 style="color:black;text-align:right;">We are a team of data science students from Explore Data Science Academy. This is our project for the 2020 July unsupervised sprint.</h3>
        """
        mission = """
	    <div style="background-color:#464e5f00;padding:10px;border-radius:10px;margin:10px;">
	    <h1 style="color:black;text-align:center;"> - Our Mission - </h1>
        <h3 style="color:black;text-align:center;">To keep you entertained by helping you find movies you're most likely to enjoy&#128515</h3>
        """

        contributors = """
        <div style="background-color:#464e5f00;padding:10px;border-radius:10px;margin:10px;">
	    <h1 style="color:black;text-align:center;"> - Contributors -</h1>
        <h3 style="color:black;text-align:center;">Thapelo Mojela</h3>
        <h3 style="color:black;text-align:center;">Presca Mashamaite</h3>
        <h3 style="color:black;text-align:center;">Mpho Mokhokane</h3>
        <h3 style="color:black;text-align:center;">Josias Sekhebesa</h3>
        <h3 style="color:black;text-align:center;">Bukelwa Mqhamane</h3>
        """
        st.markdown(title_about, unsafe_allow_html=True)
        st.markdown(mission, unsafe_allow_html=True)
        st.markdown(contributors, unsafe_allow_html=True)

    if page_selection == "Business Pitch":
        st.image('resources/imgs/BV_1.jpg', use_column_width=True)
        st.write(
            "Some of the biggest companies in the world invested in streaming entertainment in the 21st century. The investment in streaming entertainment gave us platforms such as Netflix, Apple TV,, Disney Plus, Amazon prime and many more. These platforms are racking up millions of subscribers as the entire world is now streaming more than ever."
        )
        st.write(
            "You may be wondering why these streaming platforms are attracting millions of subscribers, there are several reasons why people are leaning more towards streaming platforms. Streaming platforms have a lot of diverse content that can be consumed anywhere, anytime, and the subscribers are in total control of the rate at which they consume the content."
        )
        st.image('resources/imgs/BV_2.jpg', use_column_width=True)
        st.write(
            "Another thing that is a major contributor in the rise and success of streaming platforms is their ability to recommend content that their users are most likely to watch and enjoy. They achieve this through the use of recommender algorithms. These algorithms ensure that each user is exposed to what they like."
        )
        st.image('resources/imgs/increasing.jpg', use_column_width=True)
        st.write(
            "When doing exploratory data analysis we saw that the number of movies released increases exponentially each year. The exponential increase in the number of movies released means that streaming platforms need an excellent recommender algorithm to ensure that the movies reach the right audience."
        )
        st.image('resources/imgs/BV_L.jpg', use_column_width=True)
        st.write(
            "This is where our recommender algorithm comes in. Our recommender algorithm will help with user retention by making tailored recommendations for each user. The user retention will ultimately result in a growth of the platform."
        )