예제 #1
0
def get_meta(title, m_id):
    rec = Recommendation()
    rec.filter_genres()
    rec.filter_productions()
    df_movies = rec.md
    df_credits = pd.read_csv(PATH_CREDITS)
    df_imdb_link = pd.read_csv(PATH_MOVIELENS_TO_TMDB)
    attributes = [
        "id", "original_title", "genres", "homepage", "overview",
        "release_date", "production_companies", "runtime", "tagline",
        "vote_average", "vote_count"
    ]

    df_title = df_movies.iloc[df_movies.index[df_movies["original_title"] ==
                                              title][0]][attributes]
    df_crew = df_credits.iloc[df_credits.index[df_credits["title"] == title]
                              [0]][["cast", "crew"]]
    cast = [cast["name"] for cast in literal_eval(df_crew["cast"])[0:5]]
    crew = [
        crew["name"] for crew in literal_eval(df_crew["crew"])
        if crew["job"] in ["Director"]
    ]
    try:
        imdb_link = str(df_imdb_link.iloc[df_imdb_link.index[
            df_imdb_link["tmdbId"] == int(m_id)][0]]["imdbId"])[:-2]
        imdb_link = ("https://www.imdb.com/title/tt" + "0" *
                     (IMDB_ID_LEN - len(imdb_link)) + imdb_link)
    except:
        imdb_link = "https://www.imdb.com/search/title/?title=" + title

    return df_title, cast, crew, imdb_link
예제 #2
0
def main():
    if (sys.argv[-1] == '--collect'):
        url_pois = 'https://pt.foursquare.com/explore?mode=url&ne=-29.358988%2C-50.837817&q=Sele%C3%A7%C3%B5es%20principais&sw=-29.41889%2C-50.887942'
        url_city = 'http://www.dataviva.info/pt/location/5rs020102'

        e = Extraction(url_pois, url_city)
        e.poi_data_extraction()
        e.city_data_extraction()

    # Gera relatório do dataset
    file = 'foursquare_data.csv'
    df = pd.read_csv(file, parse_dates=True, encoding='UTF-8')
    profile = pandas_profiling.ProfileReport(df)
    profile.to_file(outputfile='dataset_report.html')

    P = Process(file)
    df = P.method()

    df_report = pd.read_csv('preprocessed.csv',
                            parse_dates=True,
                            encoding='UTF-8')
    profile = pandas_profiling.ProfileReport(df_report)
    profile.to_file(outputfile='preprocessed_dataset_report.html')

    R = Recommendation(df)
    R.pattern_recommendation()
    R.new_recommendation()
    R.compare()
    # R.test_rec()

    ont = Ontology()
    ont.write_owl()
    def test_recommendation(self):
        recommendation = Recommendation(self.client)

        recommendation.forward_to("book1", "book2")
        recommendation.forward_to("book1", "book2")
        recommendation.forward_to("book1", "book2")
        recommendation.forward_to("book1", "book3")
        recommendation.forward_to("book1", "book3")
        recommendation.forward_to("book1", "book4")
        recommendation.forward_to("book1", "book5")
        recommendation.forward_to("book1", "book6")
        recommendation.forward_to("book1", "book6")

        assert recommendation.get_recommendations("book1", 1, 5) == [
            "book2",
            "book6",
            "book3",
            "book5",
            "book4",
        ]

        assert recommendation.get_recommendations("book1",
                                                  1,
                                                  5,
                                                  with_time=True) == [
                                                      ("book2", 3),
                                                      ("book6", 2),
                                                      ("book3", 2),
                                                      ("book5", 1),
                                                      ("book4", 1),
                                                  ]
예제 #4
0
def home():
    if "recommend" in request.args:
        try:
            title = request.args["recommend"]
            rec = ContentBased()
            did_you_mean = False
            df = rec.recommend(title,
                               DEFAULT_LIMIT,
                               full_search=True,
                               keywords_and_desc=False,
                               critics=False)
            poster_paths = get_poster_paths(df["id"].tolist(),
                                            df["original_title"].tolist())
            if rec.changed_title != title and rec.changed_title != str():
                did_you_mean = True
            else:
                rec.changed_title = title
            rec_title_meta = get_meta(rec.changed_title, None)
            rec_id = rec_title_meta[0]["id"]

            return render_template('recommendations.html',
                                   titles=df["original_title"].tolist(),
                                   images=poster_paths,
                                   votes=df["vote_average"].tolist(),
                                   m_id=df["id"].tolist(),
                                   rec_title=rec.changed_title,
                                   rec_id=rec_id,
                                   did_you_mean=did_you_mean)
        except:
            abort(404)
    elif "genres" in request.args:
        genre = request.args["genres"]
        if genre == "All":
            genre = None
        offset = int(request.args["offset"])

        gen_rec = Recommendation()
        gen_rec.filter_genres()
        df = gen_rec.top_movies(gen_rec.md,
                                percentile=0.85,
                                limit=DEFAULT_LIMIT,
                                offset=offset,
                                genre=genre)
        poster_paths = get_poster_paths(df["id"].tolist(),
                                        df["original_title"].tolist())

        return render_template('recommendations.html',
                               titles=df["original_title"].tolist(),
                               images=poster_paths,
                               votes=df["vote_average"].tolist(),
                               m_id=df["id"].tolist(),
                               rec_title=request.args["genres"],
                               offset=offset,
                               next_offset=offset + DEFAULT_LIMIT,
                               prev_offset=offset - DEFAULT_LIMIT,
                               rec_id=None,
                               did_you_mean=None)
    else:
        return render_template('homepage.html')
예제 #5
0
def menu():
    recommendation = Recommendation()
    name_movie = None
    while name_movie != "":
        print("#################################")
        print("RECOMMENDATION CONTENT BASED IMDB")
        name_movie = input("Enter name movie: ")
        if name_movie != "":
            recommendation.recommendation_movies(name_movie)
def compute_recommendation(user):
    """
    Compute the recommendation for a user. Return the recommended events
    """
    recommendation = Recommendation()
    recommendation.init_basic_matrix()
    recommendation.init_frequency_matrix()
    recommendation.compute_matrix()
    return get_features_event(recommendation.compute_recommended_events(user))
예제 #7
0
 def __init__(self):
     self.recommendation = Recommendation()
예제 #8
0
 def recommend(data):
     recs = Recommendation(data, 5, 5)
     return (recs.allRecommendations())
    def setUp(self):
        clean()

        c = Category(name="C", external_id="1")
        w = Weight(name="t", weight=1.0)
        w.save()
        c.save()

        for i in range(0, 4):
            User(name="U" + str(i), external_id=str(i),
                 token="t" + str(i)).save()
        users = User.objects.all()
        for i in range(0, 4):
            Feature(name="F" + str(i)).save()
        features = Feature.objects.all()
        for i in range(0, 6):
            Event(category=c,
                  external_id=str(i),
                  name='E' + str(i),
                  website='',
                  description='').save()
        events = Event.objects.all()

        Rating(event=events[1], user=users[0], rating=4).save()
        Rating(event=events[4], user=users[0], rating=5).save()

        Rating(event=events[1], user=users[1], rating=3).save()
        Rating(event=events[3], user=users[1], rating=4).save()

        Rating(event=events[5], user=users[2], rating=4).save()

        Rating(event=events[0], user=users[3], rating=5).save()
        Rating(event=events[2], user=users[3], rating=3).save()

        EventFeature(event=events[0], feature=features[1], tf_idf=1,
                     weight=w).save()

        EventFeature(event=events[1], feature=features[0], tf_idf=1,
                     weight=w).save()
        EventFeature(event=events[1], feature=features[1], tf_idf=1,
                     weight=w).save()

        EventFeature(event=events[2], feature=features[1], tf_idf=1,
                     weight=w).save()
        EventFeature(event=events[2], feature=features[2], tf_idf=1,
                     weight=w).save()

        EventFeature(event=events[3], feature=features[1], tf_idf=1,
                     weight=w).save()

        EventFeature(event=events[4], feature=features[0], tf_idf=1,
                     weight=w).save()
        EventFeature(event=events[4], feature=features[1], tf_idf=1,
                     weight=w).save()
        EventFeature(event=events[4], feature=features[2], tf_idf=1,
                     weight=w).save()

        EventFeature(event=events[5], feature=features[3], tf_idf=1,
                     weight=w).save()

        from recommendation import Recommendation
        self.recommendation = Recommendation()
예제 #10
0
    def recommend(self,
                  title,
                  limit,
                  critics=False,
                  full_search=False,
                  use_pickle=True,
                  keywords_and_desc=False):
        """
            param: title - movie title (as in TMDB dataset)
                   limit - no. of movies to display
                   critics - True - will display critically acclaimed movies
                             False - will not sort movies on basis of their imdb rankings
                             (DEFAULT - False)
                   full_search - True - will search using cast, crew, keywords 
                                        and genre as metadata
                                 False - will search using overview and tagline 
                                         as metadata
                                 (DEFAULT - False)
                   use_pickle - True - will use pickled results
                                False - will compute the results from scratch
                                (DEFAULT - True)
                   keywords_and_desc - True - will merge results of keywords 
                                              and description
                                       False - will not merge results of keywords 
                                               and description

            return: pandas DataFrame object with attributes -
                    original_title, id, vote_average, vote_count, popularity, release_date
		"""
        rec = Recommendation()
        rec.filter_genres()
        title_index = self.verify_title(rec.md, title)

        if keywords_and_desc:
            if isfile(PATH_PICKLE_KEYWORDS) and isfile(
                    PATH_PICKLE_DESC) and use_pickle:
                df_keywords = pd.read_pickle(PATH_PICKLE_KEYWORDS)
                df_desc = pd.read_pickle(PATH_PICKLE_DESC)
                rec_matrix_keywords = self.countvectorize(df_keywords)
                rec_matrix_desc = self.tfidf(df_desc)
                rec_matrix = rec_matrix_keywords + rec_matrix_desc
                df = df_keywords
        elif full_search:
            if isfile(PATH_PICKLE_KEYWORDS) and use_pickle:
                df = pd.read_pickle(PATH_PICKLE_KEYWORDS)
            else:
                df = self.make_keywords(rec.md)
                df.to_pickle(PATH_PICKLE_KEYWORDS)
            rec_matrix = self.countvectorize(df)
        else:
            if isfile(PATH_PICKLE_DESC) and use_pickle:
                df = pd.read_pickle(PATH_PICKLE_DESC)
            else:
                df = self.make_desc(rec.md)
                df.to_pickle(PATH_PICKLE_DESC)
            rec_matrix = self.tfidf(df)

        rec_movie = rec_matrix[title_index]
        ids = rec_movie.argsort()[::-1][1:SCAN_SIZE + 1]

        if critics:
            return rec.top_movies(df.iloc[ids],
                                  percentile=0.50,
                                  limit=limit,
                                  offset=0)
        else:
            return df.iloc[ids[:limit]][[
                "original_title",
                "id",
                "vote_average",
                "vote_count",
                "popularity",
                "release_date",
            ]]