def prepare_training_fastai(train):
    data = train.copy()
    data[DEFAULT_USER_COL] = data[DEFAULT_USER_COL].astype('str')
    data[DEFAULT_ITEM_COL] = data[DEFAULT_ITEM_COL].astype('str')
    data = CollabDataBunch.from_df(data,
                                   user_name=DEFAULT_USER_COL,
                                   item_name=DEFAULT_ITEM_COL,
                                   rating_name=DEFAULT_RATING_COL,
                                   valid_pct=0)
    return data
Ejemplo n.º 2
0
    def model_training(self, train, seed):

        user_emb = self.get_user_emb_size()
        buss_emb = self.get_buss_emb_size()

        data = CollabDataBunch.from_df(train,
                                       seed=seed,
                                       user_name='user_id',
                                       item_name='business_id',
                                       rating_name='stars_review')
        learn = collab_learner(data,
                               use_nn=True,
                               emb_szs={
                                   'user_id': user_emb,
                                   'business_id': buss_emb
                               },
                               layers=[256, 128],
                               y_range=(0., 5.))

        learn.lr_find()
        learn.recorder.plot()
        learn.fit_one_cycle(40, 1e-2)
        learn.save(self.get_model_name())

        learn.show_results(rows=10)
        pd.set_option('display.max_columns', None)

        business_w = learn.model.embeds[1].weight[1:]
        buss_narray = business_w.cpu().data.numpy()
        mms_buss = MinMaxScaler()
        buss_narray = mms_buss.fit_transform(buss_narray)
        buss_ids = list(learn.data.train_ds.x.classes['business_id'][1:])

        self.business_df = pd.DataFrame(buss_narray, index=buss_ids)

        user_w = learn.model.embeds[0].weight[1:]
        user_narray = user_w.cpu().data.numpy()
        mms_user = MinMaxScaler()
        user_narray = mms_user.fit_transform(user_narray)
        user_ids = list(learn.data.train_ds.x.classes['user_id'][1:])

        self.user_df = pd.DataFrame(buss_narray, index=buss_ids)

        self.learn = learn
        return
    def model_training(self, train, seed, factors):
        print(" Training EmbeddingDotBias Model , it might take a while ..")

        data = CollabDataBunch.from_df(train,
                                       seed=seed,
                                       user_name='user_id',
                                       item_name='business_id',
                                       rating_name='stars_review')
        learn1 = collab_learner(data,
                                n_factors=factors,
                                y_range=(0., 5.),
                                wd=1e-1)

        print("Finding the learning rate.")
        print("\n")

        learn1.lr_find()
        learn1.recorder.plot()
        learn1.fit_one_cycle(40, 3e-4)
        learn1.save(self.get_model_name())

        print(" Exporting the model.")
        print("\n")

        learn1.export()

        print(" Visalizing the results.")
        print("\n")

        learn1.show_results(rows=10)
        pd.set_option('display.max_columns', None)

        print("Completed ...")
        print("\n")

        return learn1
Ejemplo n.º 4
0
        print(
            "Total number of ratings are\t{}".format(data.shape[0]),
            "Total number of users are\t{}".format(data[USER].nunique()),
            "Total number of items are\t{}".format(data[ITEM].nunique()),
            sep="\n"
        )

        #st.subheader("data loaded")

        data_train, data_test = python_random_split(data, ratio=0.7)

        split =data_train.shape[0], data_test.shape[0]
        st.write("Splitting_Ratio:",split)

        data = CollabDataBunch.from_df(data_train, seed=42, valid_pct=0.1)

        y_range = [0.5,5.5]
        st.write(y_range)

        factor=N_FACTORS
        st.write("No. of factors:",factor)

        learn = collab_learner(data, n_factors=factor, y_range=y_range, wd=1e-1)
        
        learn.model

        #st.subheader("data loaded")

        fit_onecycle=learn.fit_one_cycle(5, 3e-4)
        st.write(fit_onecycle)
"""

!git clone https://github.com/SonaliDasgupta/goodbooks-10k

import pandas as pd

!pip install fastai pytorch

ratings_df = pd.read_csv('goodbooks-10k/ratings.csv')

from fastai.collab import CollabDataBunch
from fastai.collab import collab_learner

import torch.optim as optim
from fastai.metrics import exp_rmspe
data = CollabDataBunch.from_df(ratings_df) #no test dataset here
wd = 1e-3
m = collab_learner(data, n_factors = 50, y_range = (1,5), metrics = [exp_rmspe])
#m.opt_fn = optim.Adam(params = m.parameters, lr = 0.5)
#choosing 50 factors as of now, might try with half the size of dataset later
#ratings go from 1 to 5 hence y_range

from fastai.train import lr_find
lr_find(m)
m.recorder.plot_metrics

m.recorder.plot()

m.opt = optim.Adam(params = m.model.parameters(), lr = 0.03)
m.opt.mom = 0.9
m.fit(3, lr = 0.03, wd = 1e-5) #PULL CODE IN SPYDER  AND SEE OPTIM WRAPPER USAGE , TRY WITH ADAM LATER AND ALSO WEIGHT DECAY AND MOMENTUM
Ejemplo n.º 6
0
def prepare_training_fastai(train):
    data = train.copy()
    data[DEFAULT_USER_COL] = data[DEFAULT_USER_COL].astype('str')
    data[DEFAULT_ITEM_COL] = data[DEFAULT_ITEM_COL].astype('str')
    data = CollabDataBunch.from_df(data, user_name=DEFAULT_USER_COL, item_name=DEFAULT_ITEM_COL, rating_name=DEFAULT_RATING_COL, valid_pct=0)
    return data
Ejemplo n.º 7
0
N_FACTORS = 40
EPOCHS = 5

ratings = pd.read_csv('./data/ml-100k/ratings.csv')

# split the dataset
train_valid_df, test_df = python_stratified_split(ratings_df,
                                                  ratio=0.75,
                                                  min_rating=1,
                                                  filter_by="item",
                                                  col_user=USER,
                                                  col_item=ITEM)

data = CollabDataBunch.from_df(train_valid_df,
                               user_name=USER,
                               item_name=ITEM,
                               rating_name=RATING,
                               valid_pct=0)
# data.show_batch()
"""Now we will create a `collab_learner` for the data, which by default uses 
the `EmbeddingDotBias` model. We will be using 40 latent factors. This will 
create an embedding for the users and the items that will map each of these 
to 40 floats as can be seen below. Note that the embedding parameters are not 
predefined, but are learned by the model.

Although ratings can only range from 1-5, we are setting the range of possible 
ratings to a range from 0 to 5.5 -- that will allow the model to predict values 
around 1 and 5, which improves accuracy. Lastly, we set a value for weight-decay 
for regularization."""

learn = collab_learner(data, n_factors=N_FACTORS, y_range=[0, 5.5], wd=1e-1)