Python Dataset.load_from_dfの例

プログラミング言語: Python

名前空間/パッケージ名: surprise.dataset

クラス/型: Dataset

メソッド/関数: load_from_df

hotexamples.comのコード掲載数: 6

Python Dataset.load_from_df - 6件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのsurprise.dataset.Dataset.load_from_dfの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

load_from_df(6)

Dataset(3)

construct_trainset(3)

load_builtin(2)

__init__(1)

construct_testset(1)

コード例 #1

ファイルを表示

ファイル: svd_ratings.py プロジェクト: dickensc/FairRecommender

def svd_ratings_predicate(observed_ratings_df,
                          truth_ratings_df,
                          fold='0',
                          phase='eval'):
    """
    pmf_ratings Predicates
    """
    print("SVD predicates")
    svd_model = SVD()
    reader = Reader(rating_scale=(0.2, 1))
    train_dataset = Dataset.load_from_df(df=observed_ratings_df.reset_index(
    ).loc[:, ['userId', 'movieId', 'rating']],
                                         reader=reader)
    svd_model.fit(train_dataset.build_full_trainset())

    # make predictions
    predictions = pd.DataFrame(index=truth_ratings_df.index,
                               columns=['rating'])

    for row in truth_ratings_df.loc[:, ['rating']].iterrows():
        uid = row[0][0]
        iid = row[0][1]
        predictions.loc[(uid, iid), 'rating'] = svd_model.predict(uid, iid).est

    write(predictions, 'svd_rating_obs', fold, phase)

コード例 #2

ファイルを表示

ファイル: random_recommender.py プロジェクト: erremesse/SAME

    def estimate_preference(self, user_id, item_id):

        """
        Estimate the preference value by a specific user.
        :param user_id: Id of the user to recommend.
        :param item_id: Id of the item to recommend.
        :return: The estimate preference by the sepecific recommender.
        """

        # train file:
        df_ratings = self.rating_data_model.df_ratings
        # A reader is still needed but only the rating_scale param is requiered.
        reader = Reader(rating_scale=(self.rating_data_model.get_min_preference(), self.rating_data_model.get_max_preference()))
        train_data = Dataset(reader=reader)
        # The columns must correspond to user id, item id and ratings (in that order).
        raw_trainset = train_data.load_from_df(df_ratings[['user_id', 'item_id', 'rating']], reader)
        trainset = train_data.construct_trainset(raw_trainset.raw_ratings)

        # Train recommendation input_model:
        self.model.fit(trainset)

        return float(self.model.estimate(u=user_id, i=item_id)[0])

コード例 #3

ファイルを表示

mtarix_toGO['Norm_Tot_Amnt']= (mtarix_toGO['Mean_amount'] -min_amt)/max_amt
#lower_bound = min(mtarix_toGO['Log_Mean_Amount'])
#upper_bound = max(mtarix_toGO['Log_Mean_Amount'])
#print lower_bound
#print upper_bound
# Remove the outliers
dfx=mtarix_toGO[mtarix_toGO['Norm_Tot_Amnt'] <= 0.4]
lower_bound = min(dfx['Norm_Tot_Amnt'])
upper_bound = max(dfx['Norm_Tot_Amnt'])
print 'Lower Bound normalized spending =',lower_bound
print 'Upper Bound normalized spending =',upper_bound
print 'Number of Transactions remaining after removing Outliers::',mtarix_toGO.shape[0]

#define the reader  with  upper and lower bounds , also now we are predicting Normalized Total Amount column
reader_x = Reader(rating_scale = (lower_bound,upper_bound))
data = Dataset.load_from_df(df=dfx[['CustomerID','StockCode','Norm_Tot_Amnt']],reader=reader_x)


#for i in range(9):
#    print (data.raw_ratings[0][2] - data.df['Log_Mean_amount'][0])

print 'difference in processed and pre-processed dataset = ',(data.raw_ratings[0][2] - data.df['Norm_Tot_Amnt'][0])

import time
start_time = time.time()


#param_grid = {'n_factors':[2,5,10,50],'n_epochs': [10,50,100], 'lr_bu': [0.1,0.01,0.001,0.0001],'lr_bi': [0.1,0.01,0.001,0.0001],'reg_bi': [0.1,0.01,0.001,0.0001],'reg_bu': [0.1,0.01,0.001,0.0001],'reg_qi': [0.1,0.01,0.001,0.0001],'reg_pu': [0.1,0.01,0.001,0.0001]}
param_grid = {'n_factors':[5,10,50,100],'n_epochs': [5,10,20,50,100], 'lr_all': [0.1,0.01,0.001],'reg_all': [0.1,0.01,0.001}
grid_search = GridSearchCV(SVDpp, param_grid, measures=['rmse', 'mae'], cv=3, n_jobs=1)

コード例 #4

ファイルを表示

ファイル: Test.py プロジェクト: kadabra45/Projet_final

del df4['geo_point_2d']

df_f = df4.join(df5)

df_i = df_f.loc[df_f["ARRONDISSEMENT"].str.contains('paris') == True]

df_i = df_i.drop(204726)

df_i['ARRONDISSEMENT'] = encoder.fit_transform(df_i['ARRONDISSEMENT'])
df_i['LIEU/ADRESSE'] = encoder.fit_transform(df_i['LIEU/ADRESSE'])
df_i['STADE'] = encoder.fit_transform(df_i['STADE'])
df_a = df_i.loc[df_i["ALLERGIE"] == 1]

reader = Reader(rating_scale=(1, 164151))
df_etude_2 = Dataset.load_from_df(df_a[['LATITUDE', 'LONGITUDE', 'GENRE']],
                                  reader)

X = StandardScaler().fit_transform(df_a)
algo5 = DBSCAN(eps=0.3, min_samples=7).fit(X)
labels = algo5.labels_
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
n_noise_ = list(labels).count(-1)

train_2, test_2 = train_test_split(df_etude_2, test_size=.25)
algo = SVD()
predictions_2 = algo.fit(train_2).test(test_2)

lat = []
lng = []

for i in predictions_2:

コード例 #5

ファイルを表示

                           head(10)

#%% Most active users -- Check correlation of numbers with rating/time?
ratings.\
    groupby('User')['Recipe'].\
           count().\
                sort_values(ascending=False).\
                           head(10)

#%% Distribution of Ratings
print(ratings.Rating.describe())
print(set(ratings.Rating))

#%% Build train - test split
reader = Reader(rating_scale=(1, 5))
data   = Dataset.load_from_df(ratings, reader)

random.seed(42)
random.shuffle(data.raw_ratings)
cut_off = int(len(data.raw_ratings) * 0.75)

train_ratings = data.raw_ratings[:cut_off]
test_ratings  = data.raw_ratings[cut_off:]

data.raw_ratings = train_ratings

#%% Evaluate baseline on all, bias and test error
def evaluator(algo, df, cv_method, verbose = False):
    """
    wrapper to streamline evaluation
    """

コード例 #6

ファイルを表示

ファイル: Collaborative Filtering_3.py プロジェクト: Hashmeet229/Recommendation-System

from surprise import KNNBasic
from surprise import KNNWithMeans
from surprise import KNNWithZScore
from surprise import KNNBaseline
from surprise import SVD
from surprise import BaselineOnly
from surprise import SVDpp
from surprise import NMF
from surprise import SlopeOne
from surprise import CoClustering
from surprise.accuracy import rmse
from surprise.model_selection import train_test_split
from surprise import accuracy

reader = Reader(rating_scale=(1, 7))
data = Dataset.load_from_df(df_c1[['Smart Card_', 'Class.1_', 'freq']], reader)

# getting the most effective Algorithm for Recommendation System
benchmark = []
for algorithm in [
        SVD(),
        NMF(),
        SVDpp(),
        SlopeOne(),
        NormalPredictor(),
        KNNBaseline(),
        KNNBasic(),
        KNNWithMeans(),
        KNNWithZScore(),
        BaselineOnly(),
        CoClustering()