def prepare_training_fastai(train): data = train.copy() data[DEFAULT_USER_COL] = data[DEFAULT_USER_COL].astype('str') data[DEFAULT_ITEM_COL] = data[DEFAULT_ITEM_COL].astype('str') data = CollabDataBunch.from_df(data, user_name=DEFAULT_USER_COL, item_name=DEFAULT_ITEM_COL, rating_name=DEFAULT_RATING_COL, valid_pct=0) return data
def model_training(self, train, seed): user_emb = self.get_user_emb_size() buss_emb = self.get_buss_emb_size() data = CollabDataBunch.from_df(train, seed=seed, user_name='user_id', item_name='business_id', rating_name='stars_review') learn = collab_learner(data, use_nn=True, emb_szs={ 'user_id': user_emb, 'business_id': buss_emb }, layers=[256, 128], y_range=(0., 5.)) learn.lr_find() learn.recorder.plot() learn.fit_one_cycle(40, 1e-2) learn.save(self.get_model_name()) learn.show_results(rows=10) pd.set_option('display.max_columns', None) business_w = learn.model.embeds[1].weight[1:] buss_narray = business_w.cpu().data.numpy() mms_buss = MinMaxScaler() buss_narray = mms_buss.fit_transform(buss_narray) buss_ids = list(learn.data.train_ds.x.classes['business_id'][1:]) self.business_df = pd.DataFrame(buss_narray, index=buss_ids) user_w = learn.model.embeds[0].weight[1:] user_narray = user_w.cpu().data.numpy() mms_user = MinMaxScaler() user_narray = mms_user.fit_transform(user_narray) user_ids = list(learn.data.train_ds.x.classes['user_id'][1:]) self.user_df = pd.DataFrame(buss_narray, index=buss_ids) self.learn = learn return
def model_training(self, train, seed, factors): print(" Training EmbeddingDotBias Model , it might take a while ..") data = CollabDataBunch.from_df(train, seed=seed, user_name='user_id', item_name='business_id', rating_name='stars_review') learn1 = collab_learner(data, n_factors=factors, y_range=(0., 5.), wd=1e-1) print("Finding the learning rate.") print("\n") learn1.lr_find() learn1.recorder.plot() learn1.fit_one_cycle(40, 3e-4) learn1.save(self.get_model_name()) print(" Exporting the model.") print("\n") learn1.export() print(" Visalizing the results.") print("\n") learn1.show_results(rows=10) pd.set_option('display.max_columns', None) print("Completed ...") print("\n") return learn1
print( "Total number of ratings are\t{}".format(data.shape[0]), "Total number of users are\t{}".format(data[USER].nunique()), "Total number of items are\t{}".format(data[ITEM].nunique()), sep="\n" ) #st.subheader("data loaded") data_train, data_test = python_random_split(data, ratio=0.7) split =data_train.shape[0], data_test.shape[0] st.write("Splitting_Ratio:",split) data = CollabDataBunch.from_df(data_train, seed=42, valid_pct=0.1) y_range = [0.5,5.5] st.write(y_range) factor=N_FACTORS st.write("No. of factors:",factor) learn = collab_learner(data, n_factors=factor, y_range=y_range, wd=1e-1) learn.model #st.subheader("data loaded") fit_onecycle=learn.fit_one_cycle(5, 3e-4) st.write(fit_onecycle)
""" !git clone https://github.com/SonaliDasgupta/goodbooks-10k import pandas as pd !pip install fastai pytorch ratings_df = pd.read_csv('goodbooks-10k/ratings.csv') from fastai.collab import CollabDataBunch from fastai.collab import collab_learner import torch.optim as optim from fastai.metrics import exp_rmspe data = CollabDataBunch.from_df(ratings_df) #no test dataset here wd = 1e-3 m = collab_learner(data, n_factors = 50, y_range = (1,5), metrics = [exp_rmspe]) #m.opt_fn = optim.Adam(params = m.parameters, lr = 0.5) #choosing 50 factors as of now, might try with half the size of dataset later #ratings go from 1 to 5 hence y_range from fastai.train import lr_find lr_find(m) m.recorder.plot_metrics m.recorder.plot() m.opt = optim.Adam(params = m.model.parameters(), lr = 0.03) m.opt.mom = 0.9 m.fit(3, lr = 0.03, wd = 1e-5) #PULL CODE IN SPYDER AND SEE OPTIM WRAPPER USAGE , TRY WITH ADAM LATER AND ALSO WEIGHT DECAY AND MOMENTUM
N_FACTORS = 40 EPOCHS = 5 ratings = pd.read_csv('./data/ml-100k/ratings.csv') # split the dataset train_valid_df, test_df = python_stratified_split(ratings_df, ratio=0.75, min_rating=1, filter_by="item", col_user=USER, col_item=ITEM) data = CollabDataBunch.from_df(train_valid_df, user_name=USER, item_name=ITEM, rating_name=RATING, valid_pct=0) # data.show_batch() """Now we will create a `collab_learner` for the data, which by default uses the `EmbeddingDotBias` model. We will be using 40 latent factors. This will create an embedding for the users and the items that will map each of these to 40 floats as can be seen below. Note that the embedding parameters are not predefined, but are learned by the model. Although ratings can only range from 1-5, we are setting the range of possible ratings to a range from 0 to 5.5 -- that will allow the model to predict values around 1 and 5, which improves accuracy. Lastly, we set a value for weight-decay for regularization.""" learn = collab_learner(data, n_factors=N_FACTORS, y_range=[0, 5.5], wd=1e-1)