Beispiel #1
0
def get_score_user_similarity_model(movie_data):
    ratings_df = movie_data.ratings_df

    train_scores = []
    test_scores = []
    n_iter = 1
    for _ in xrange(n_iter):
        train_df, test_df = train_test_split(ratings_df)

        x_train, y_train = get_xy(train_df)
        x_test, y_test = get_xy(test_df)

        y_train_pred = get_y_pred_user_similarity_model(movie_data, x_train)
        y_test_pred = get_y_pred_user_similarity_model(movie_data, x_test)

        train_score, test_score = get_scores(y_test, y_test_pred, y_train,
                                             y_train_pred)

        train_scores.append(train_score)
        test_scores.append(test_score)

    print 'mean train score: %.4f, std: %.4f' % (np.mean(train_scores),
                                                 np.std(train_scores))
    print 'mean test score: %.4f, std: %.4f' % (np.mean(test_scores),
                                                np.std(test_scores))
def build_model(ratings_df):
    train_ratings_df, test_ratings_df = train_test_split(ratings_df)

    x_train, y_train = get_xy(train_ratings_df)
    x_test, y_test = get_xy(test_ratings_df)

    movie_matrix_data = MovieMatrixData(train_ratings_df)
    explore_rating_matrix(movie_matrix_data)

    rating_matrix = movie_matrix_data.rating_matrix

    p_matrix, q_matrix = factor(rating_matrix)

    model = p_matrix.dot(q_matrix)

    print model.shape

    print np.amin(model), np.amax(model)

    model = np.clip(model,
                    a_min=movie_matrix_data.min_rating,
                    a_max=movie_matrix_data.max_rating)

    y_train_pred = get_y_pred(movie_matrix_data, model, x_train)
    y_test_pred = get_y_pred(movie_matrix_data, model, x_test)

    train_score, test_score = get_scores(y_test, y_test_pred, y_train,
                                         y_train_pred)

    print 'train: %.3f, test: %.3f' % (train_score, test_score)
    def fit(self, ratings_df):
        with elapsed_time('total fit'):
            for model in self.models:
                model.fit(ratings_df)

            x, y = get_xy(ratings_df)

            with elapsed_time('get blend predictions'):
                blend_predictions = self.get_blend_predictions(x)

            user_groups = x.groupby('userId')
            movie_groups = x.groupby('movieId')
            supports = []
            for _, row in x.iterrows():
                user_id = row['userId']
                movie_id = row['movieId']
                user_support = len(user_groups.get_group(user_id))
                movie_support = len(movie_groups.get_group(movie_id))
                support = min(user_support, movie_support)
                supports.append(support)

            # print Counter(supports)

            self.regression.fit(blend_predictions, y)

            print 'linear regression coefficients: %s, intercept: %.3f' % (
                self.regression.coef_, self.regression.intercept_)
def build_model(ratings_df):
    train_scores = []
    test_scores = []
    train_rmse_scores = []
    test_rmse_scores = []
    n_iter = 1

    # model = BaselineTotalMeanModel()
    # model = BaselineMeansModel(user_weight=0.5)
    # model = BaselineEffectsModel(movie_lambda=5.0, user_lambda=20.0)
    model = UserSimilarityModel(movie_lambda=5.0, user_lambda=20.0)

    for _ in xrange(n_iter):
        train_ratings_df, test_ratings_df = train_test_split(ratings_df)

        model = model.fit(train_ratings_df)

        x_train, y_train = get_xy(train_ratings_df)
        x_test, y_test = get_xy(test_ratings_df)

        with elapsed_time('scoring'):
            y_train_pred = model.predict(x_train)
            y_test_pred = model.predict(x_test)

            train_score = r2_score(y_train, y_train_pred)
            test_score = r2_score(y_test, y_test_pred)

            train_rmse = root_mean_squared_error(y_train, y_train_pred)
            test_rmse = root_mean_squared_error(y_test, y_test_pred)

        train_scores.append(train_score)
        test_scores.append(test_score)

        train_rmse_scores.append(train_rmse)
        test_rmse_scores.append(test_rmse)

    print 'mean train score: %.4f, std: %.4f' % (np.mean(train_scores),
                                                 np.std(train_scores))
    print 'mean test score: %.4f, std: %.4f' % (np.mean(test_scores),
                                                np.std(test_scores))
    print
    print 'mean train rmse: %.4f, std: %.4f' % (np.mean(train_rmse_scores),
                                                np.std(train_rmse_scores))
    print 'mean test rmse: %.4f, std: %.4f' % (np.mean(test_rmse_scores),
                                               np.std(test_rmse_scores))
Beispiel #5
0
    def fit(self, ratings_df):
        with elapsed_time('effects init'):
            _, y_train = get_xy(ratings_df)
            self.y_mean = y_train.mean()

            movie_ratings = ratings_df.groupby('movieId')['rating']
            self.user_groups = ratings_df.groupby('userId')

            self.movie_effects = self.calculate_movie_effects(movie_ratings)
            self.user_effects = self.calculate_user_effects(self.user_groups)

        return self
def score_models(ratings_df, model_records):
    train_ratings_df, test_ratings_df = train_test_split(ratings_df)
    x_train, y_train = get_xy(train_ratings_df)
    x_test, y_test = get_xy(test_ratings_df)

    print

    for model_record in model_records:
        model_name, model = model_record
        model.fit(train_ratings_df)

        y_train_pred = model.predict(x_train)
        y_test_pred = model.predict(x_test)

        train_r2_score = r2_score(y_train, y_train_pred)
        test_r2_score = r2_score(y_test, y_test_pred)

        train_rmse = root_mean_squared_error(y_train, y_train_pred)
        test_rmse = root_mean_squared_error(y_test, y_test_pred)

        print '%s' % model_name
        print 'train r2 score: %.4f, test r2 score: %.4f' % (train_r2_score,
                                                             test_r2_score)
        print 'train rmse: %.4f, test rmse: %.4f\n' % (train_rmse, test_rmse)
Beispiel #7
0
 def fit(self, ratings_df):
     _, y_train = get_xy(ratings_df)
     self.y_mean = y_train.mean()
     return self