def evaluate_fm(model_, te_, tr_, items_features=None, users_features=None): if not tr_.multiply(te_).nnz == 0: print('train test interaction are not fully disjoin') # Compute and print the AUC score train_auc = auc_score(model_, tr_, item_features=items_features, user_features=users_features, num_threads=NUM_THREADS).mean() print('Collaborative filtering train AUC: %s' % train_auc) test_auc = auc_score(model_, te_, train_interactions=tr_, item_features=items_features, user_features=users_features, num_threads=NUM_THREADS).mean() print('Collaborative filtering test AUC: %s' % test_auc) p_at_k_train = precision_at_k(model_, tr_, item_features=items_features, user_features=users_features, k=5, num_threads=NUM_THREADS).mean() p_at_k_test = precision_at_k(model_, te_, train_interactions=tr_, item_features=items_features, user_features=users_features, k=5, num_threads=NUM_THREADS).mean() print("Train precision: %.2f" % p_at_k_train) print("Test precision: %.2f" % p_at_k_test)
def auc(): """Evaluates models on the ROC AUC metric. Measure the ROC AUC metric for a model: the probability that a randomly chosen positive example has a higher score than a randomly chosen negative example. A perfect score is 1.0. """ auc = auc_score(model=model, train_interactions=train, test_interactions=test, item_features=item_features).mean() logger.info(model_name + ' AUC: %s' % auc) train_auc = auc_score(model, train, item_features=item_features if item_features is not None else None, num_threads=NUM_THREADS).mean() logger.info(model_name + ' training set AUC: %s' % train_auc) test_auc = auc_score(model, test, train_interactions=train, item_features=item_features if item_features is not None else None, num_threads=NUM_THREADS).mean() logger.info(model_name + ' test set AUC: %s' % auc)
def train_model(train, test, user_features, item_features): log.info("Initializing model") model = LightFM(loss="warp", item_alpha=ITEM_ALPHA, no_components=NUM_COMPONENTS) log.info("Training model") model = model.fit(train, user_features=user_features, item_features=item_features, epochs=NUM_EPOCHS, num_threads=NUM_THREADS) log.info("Scoring") train_auc = auc_score(model, train, user_features=user_features, item_features=item_features, num_threads=NUM_THREADS).mean() log.info(f"Training set AUC: {train_auc}") test_auc = auc_score(model, test, train_interactions=train, user_features=user_features, item_features=item_features, num_threads=NUM_THREADS).mean() log.info(f"Test set AUC: {test_auc}") return model
def main(): movielens = fetch_movielens() train = movielens['train'] test = movielens['test'] print(train.shape) print(test.shape) model = LightFM(learning_rate=0.05, loss='bpr') model.fit(train, epochs=5) k = 10 train_recall = recall_at_k(model, train, k=k).mean() test_recall = recall_at_k(model, test, k=k).mean() print(f'recall_at_{k}(train): {train_recall}') print(f'recall_at_{k}(test) : {test_recall}') train_auc = auc_score(model, train).mean() test_auc = auc_score(model, test).mean() print(f'auc_score(train): {train_auc}') print(f'auc_score(test) : {test_auc}') y_train_preds = model.predict_rank(train) y_test_preds = model.predict_rank(test) train_dcg = dcg_score(train.toarray(), y_train_preds.toarray()) test_dcg = dcg_score(test.toarray(), y_test_preds.toarray()) print(f'dcg_score(train): {train_dcg}') print(f'dcg_score(test) : {test_dcg}') print('DONE') return 0
def train(data, user_features=None, item_features=None, use_features=False): loss_type = "warp" # "bpr" model = LightFM(learning_rate=0.05, loss=loss_type, max_sampled=100) if use_features: model.fit_partial(data, epochs=20, user_features=friends_features, item_features=item_features) train_precision = precision_at_k(model, data, k=10, user_features=friends_features, item_features=item_features).mean() train_auc = auc_score(model, data, user_features=friends_features, item_features=item_features).mean() print(f'Precision: train {train_precision:.2f}') print(f'AUC: train {train_auc:.2f}') else: model.fit_partial(data, epochs=20) train_precision = precision_at_k(model, data, k=10).mean() train_auc = auc_score(model, data).mean() print(f'Precision: train {train_precision:.2f}') print(f'AUC: train {train_auc:.2f}') return model
def evaluate(model, train, test, hybrid=False, features=None): if hybrid: auc_train = np.mean(auc_score(model, train, item_features=features)) pre_train = np.mean(precision_at_k(model, train, item_features=features)) mrr_train = np.mean(reciprocal_rank(model, train, item_features=features)) auc_test = np.mean(auc_score(model, test, item_features=features)) pre_test = np.mean(precision_at_k(model, test, item_features=features)) mrr_test = np.mean(reciprocal_rank(model, test, item_features=features)) else: auc_train = np.mean(auc_score(model, train)) pre_train = np.mean(precision_at_k(model, train)) mrr_train = np.mean(reciprocal_rank(model, train)) auc_test = np.mean(auc_score(model, test)) pre_test = np.mean(precision_at_k(model, test)) mrr_test = np.mean(reciprocal_rank(model, test)) res_dict = {'auc_train': auc_train, 'pre_train': pre_train, 'mrr_train': mrr_train, 'auc_test': auc_test, 'pre_test': pre_test, 'mrr_test': mrr_test} print('The AUC Score is in training/validation: ', auc_train,' / ', auc_test) print('The mean precision at k Score in training/validation is: ', pre_train, ' / ', pre_test) print('The mean reciprocal rank in training/validation is: ', mrr_train, ' / ', mrr_test) print('_________________________________________________________') return res_dict
def test_auc_score(): no_users, no_items = (10, 100) train, test = _generate_data(no_users, no_items) model = LightFM(loss='bpr') model.fit_partial(train) auc = evaluation.auc_score(model, test, num_threads=2) expected_auc = np.array(_auc(model, test)) assert auc.shape == expected_auc.shape assert np.abs(auc.mean() - expected_auc.mean()) < 0.01 assert len(auc) == (test.getnnz(axis=1) > 0).sum() assert len(evaluation.auc_score(model, train, preserve_rows=True)) == test.shape[0] # With omitting train interactions auc = evaluation.auc_score(model, test, train_interactions=train, num_threads=2) expected_auc = np.array(_auc(model, test, train)) assert np.abs(auc.mean() - expected_auc.mean()) < 0.01
def lightfm_model(data, prec_at_k=100, train_split=0.8, epochs=10): """ Code to evaluate LightFm model Data is a scipy sparse matrix https://arxiv.org/abs/1507.08439 """ model = LightFM(learning_rate=0.05, loss='logistic') train, test = random_train_test_split(data, test_percentage=1 - train_split) model.fit(train, epochs=epochs) #, num_threads=1) train_precision = precision_at_k(model, train, k=prec_at_k) test_precision = precision_at_k(model, test, k=prec_at_k, train_interactions=train) train_auc = auc_score(model, train) test_auc = auc_score(model, test, train_interactions=train) print('Performance of LightFm Model \n') print( f'Precision \t Train: {train_precision.mean():.2f} \t Test: {test_precision.mean():.2f}' ) print( f'AUC \t\t Train: {train_auc.mean():.2f} \t Test: {test_auc.mean():.2f}' ) return (train_auc, test_auc, train_precision, test_precision, prec_at_k)
def main(): movielens = fetch_movielens() train = movielens['train'] print(type(train)) print(train.toarray()[:5, :]) test = movielens['test'] print(type(test)) print(test.toarray()[:5, :]) model = LightFM(learning_rate=0.05, loss='bpr') model.fit(train, epochs=10) train_precision = precision_at_k(model, train, k=10).mean() test_precision = precision_at_k(model, test, k=10, train_interactions=train).mean() train_auc = auc_score(model, train).mean() test_auc = auc_score(model, test, train_interactions=train).mean() print(f'train precision: {train_precision}') print(f'test precision: {test_precision}') print(f'train auc: {train_auc}') print(f'test auc: {test_auc}') print('DONE')
def evaluate(self, model, train, test, k=10): train_precision = precision_at_k(model, train, k=k).mean() test_precision = precision_at_k(model, test, k=k).mean() train_auc = auc_score(model, train).mean() test_auc = auc_score(model, test).mean() return train_precision, test_precision, train_auc, test_auc
def validate_item_features(ctx, data_home): data = fetch_stackexchange('crossvalidated', test_set_fraction=0.1, indicator_features=False, tag_features=True, data_home=data_home) train = data['train'] test = data['test'] # Set the number of threads; you can increase this # ify you have more physical cores available. NUM_COMPONENTS = 30 NUM_EPOCHS = 3 ITEM_ALPHA = 1e-6 # Let's fit a WARP model: these generally have the best performance. model = LightFM(loss='warp', item_alpha=ITEM_ALPHA, no_components=NUM_COMPONENTS) # Run 3 epochs and time it. model = model.fit(train, epochs=NUM_EPOCHS) train_auc = auc_score(model, train).mean() print('Collaborative filtering train AUC: %s' % train_auc) test_auc = auc_score(model, test, train_interactions=train).mean() print('Collaborative filtering test AUC: %s' % test_auc) # Set biases to zero model.item_biases *= 0.0 test_auc = auc_score(model, test, train_interactions=train).mean() print('Collaborative filtering test AUC: %s' % test_auc) item_features = data['item_features'] tag_labels = data['item_feature_labels'] print('There are %s distinct tags, with values like %s.' % (item_features.shape[1], tag_labels[:3].tolist())) # Define a new model instance model = LightFM(loss='warp', item_alpha=ITEM_ALPHA, no_components=NUM_COMPONENTS) # Fit the hybrid model. Note that this time, we pass # in the item features matrix. model = model.fit(train, item_features=item_features, epochs=NUM_EPOCHS) # Don't forget the pass in the item features again! train_auc = auc_score(model, train, item_features=item_features).mean() print('Hybrid training set AUC: %s' % train_auc) test_auc = auc_score(model, test, train_interactions=train, item_features=item_features).mean() print('Hybrid test set AUC: %s' % test_auc)
def lightfm_model(data, prec_at_k=10, train_split=0.8): """ Code to evaluate LightFm model Data is a scipy sparse matrix https://arxiv.org/abs/1507.08439 """ model = LightFM(learning_rate=0.05, loss='bpr') train, test = random_train_test_split(data, test_percentage=1 - train_split) model.fit(train, epochs=10) train_precision = precision_at_k(model, train, k=10) test_precision = precision_at_k(model, test, k=10, train_interactions=train) train_auc = auc_score(model, train) test_auc = auc_score(model, test, train_interactions=train) print('Performance of LightFm Model \n') print( f'Precision \t Train: {train_precision.mean():.2f} \t Test: {test_precision.mean():.2f}' ) print( f'AUC \t\t Train: {train_auc.mean():.2f} \t Test: {test_auc.mean():.2f}' ) fig, ax = plt.subplots(2, 2, figsize=(15, 10)) ax[0, 0].hist(train_auc, bins='auto') ax[0, 0].title.set_text('Distribution of Train AUC score over users') ax[0, 0].set_ylabel('Count') ax[0, 0].set_xlabel('AUC Score') ax[0, 1].hist(test_auc, bins='auto') ax[0, 1].title.set_text('Distribution of Test AUC score over users') ax[0, 1].set_ylabel('Count') ax[0, 1].set_xlabel('AUC Score') ax[1, 0].hist(train_precision, bins='auto') ax[1, 0].title.set_text( f'Distribution of Train Precision @ {prec_at_k} for all users') ax[1, 0].set_ylabel('Count') ax[1, 0].set_xlabel(f'Precision @ {prec_at_k}') ax[1, 1].hist(test_precision, bins='auto') ax[1, 1].title.set_text( f'Distribution of Test Precision @ {prec_at_k} for all users') ax[1, 1].set_ylabel('Count') ax[1, 1].set_xlabel(f'Precision @ {prec_at_k}') plt.show() print('\n')
def collab_filtering(): """ implements collaborative filtering version by using only the rating data from movielens dataset :return: """ data = fetch_movielens() for key, value in data.items(): print(key, type(value), value.shape) train = data['train'] test = data['test'] print( 'The dataset has %s users and %s items, ' 'with %s interactions in the test and %s interactions in the training set.' % (train.shape[0], train.shape[1], test.getnnz(), train.getnnz())) model = LightFM(learning_rate=0.05, loss='bpr') model.fit(train, epochs=50, num_threads=5) train_precision = precision_at_k(model, train, k=10).mean() test_precision = precision_at_k(model, test, k=10).mean() train_recall = recall_at_k(model, test, k=10).mean() test_recall = recall_at_k(model, test, k=10).mean() train_auc = auc_score(model, train).mean() test_auc = auc_score(model, test).mean() print('Precision: train %.2f, test %.2f.' % (train_precision, test_precision)) print('Recall: train %.2f, test %.2f.' % (train_recall, test_recall)) print('AUC: train %.2f, test %.2f.' % (train_auc, test_auc)) model = LightFM(learning_rate=0.05, loss='warp') #resume training from the model's previous state model.fit_partial(train, epochs=50, num_threads=5) train_precision = precision_at_k(model, train, k=10).mean() test_precision = precision_at_k(model, test, k=10).mean() train_recall = recall_at_k(model, test, k=10).mean() test_recall = recall_at_k(model, test, k=10).mean() train_auc = auc_score(model, train).mean() test_auc = auc_score(model, test).mean() print("*****************") print("After re-training") print('Precision: train %.2f, test %.2f.' % (train_precision, test_precision)) print('Recall: train %.2f, test %.2f.' % (train_recall, test_recall)) print('AUC: train %.2f, test %.2f.' % (train_auc, test_auc)) #check sample recommendation sample_recommendation(model, data, [3, 25, 450])
def eval(model, train, val): # auc print("Train auc: %.2f" % auc_score(model, train).mean()) print("Val auc: %.2f" % auc_score(model, val).mean()) # precision_at_k print("Train precision: %.2f" % precision_at_k(model, train, k=5).mean()) print("Val precision: %.2f" % precision_at_k(model, val, k=5).mean()) # recall_at_k print("Train recall: %.2f" % precision_at_k(model, train, k=5).mean()) print("Val recall: %.2f" % precision_at_k(model, val, k=5).mean())
def evaluate_model(model, train, test, item_fetures=None, user_features=None, num_threads=1): train_precision = precision_at_k(model, train, k=10, user_features=user_features, item_features=item_fetures, num_threads=num_threads).mean() test_precision = precision_at_k(model, test, train_interactions=train, k=10, user_features=user_features, item_features=item_fetures, num_threads=num_threads).mean() train_auc = auc_score(model, train, user_features=user_features, item_features=item_fetures, num_threads=num_threads).mean() test_auc = auc_score(model, test, train_interactions=train, user_features=user_features, item_features=item_fetures, num_threads=num_threads).mean() print('Precision: train %.2f, test %.2f.' % (train_precision, test_precision)) print('AUC: train %.2f, test %.2f.' % (train_auc, test_auc)) return train_precision, test_precision, train_auc, test_auc
def patk_learning_curve(model, train, test, iterarray, user_features=None, item_features=None, k=5, **fit_params): old_epoch = 0 train_patk = [] test_patk = [] warp_duration = [] # bpr_duration = [] train_warp_auc = [] test_warp_auc = [] # bpr_auc = [] headers = ['Epoch', 'train p@5', 'train_auc', 'test p@5', 'test_auc'] print_log(headers, header=True) for epoch in iterarray: more = epoch - old_epoch start = time.time() model.fit_partial(train, user_features=user_features, epochs=more, item_features=item_features, **fit_params) warp_duration.append(time.time() - start) train_warp_auc.append( auc_score(model, train, item_features=item_features).mean()) test_warp_auc.append( auc_score(model, test, item_features=item_features, train_interactions=train).mean()) this_test = precision_at_k(model, test, train_interactions=train, item_features=item_features, k=k) this_train = precision_at_k(model, train, train_interactions=None, item_features=item_features, k=k) train_patk.append(np.mean(this_train)) test_patk.append(np.mean(this_test)) row = [ epoch, train_patk[-1], train_warp_auc[-1], test_patk[-1], test_warp_auc[-1] ] print_log(row) return model, train_patk, test_patk, warp_duration, train_warp_auc, test_warp_auc
def model_train_test(spark, train, val, test, no_components=20, learning_rate=0.01, epochs=15, k=10): ''' Function to train Lightfm collaborative filtering recommender system and test on validation data and testing data Parameters ---------- spark : spark session object train: Type-Sparse Matrix: Processed training data val: Type-Sparse Matrix: Processed validation data test: Type-Sparse Matrix: Processed testing data No_components: type-Int: latent factors learning_rate: type-int: learning rate epochs: type-int: Iterations k: type-int: Top-k predictions for every user Return ---------- None ''' start_time = time.time() #Create Lightfm object model = LightFM(no_components=no_components, learning_rate=learning_rate, loss='warp') #Fit model model.fit(train, epochs=epochs, num_threads=1) #Record time (Fit time) fit_time = time.time() #Calculate AUC value auc_val = auc_score(model, val).mean() score_calc_time = time.time() auc_test = auc_score(model, test).mean() auc_train = auc_score(model, train).mean() #Calculate Precision_at_k P_at_K = precision_at_k(model, test, k=k) precision_value = np.mean(P_at_K) print("For no_components = {}, learning_rate = {} ".format( no_components, learning_rate)) print("Train AUC Score: {}".format(auc_train)) print("Val AUC Score: {}".format(auc_val)) print("Test AUC Score: {}".format(auc_test)) print("Precision at k={} Score: {}".format(k, precision_value)) print("--- Fit time: {} mins ---".format(fit_time - start_time)) print("--- Score time: {} mins ---".format(score_calc_time - fit_time))
def evaluate_model(df, user_id_col='user_id', item_id_col='business_id', stratify=None): """ Model evaluation. Args: df: the input dataframe. user_id_col: user id column. item_id_col: item id column. stratify: if use stratification. Returns: train_auc: training set auc score. test_auc: testing set auc score. """ # model evaluation # create test and train datasets print('model evaluation') train, test = train_test_split(df, test_size=0.2, stratify=stratify) ds = Dataset() # we call fit to supply userid, item id and user/item features ds.fit( df[user_id_col].unique(), # all the users df[item_id_col].unique(), # all the items ) # plugging in the interactions (train_interactions, train_weights) = ds.build_interactions([ (x[0], x[1], x[2]) for x in train.values ]) (test_interactions, _) = ds.build_interactions([(x[0], x[1], x[2]) for x in test.values]) # model model = LightFM(no_components=100, learning_rate=0.05, loss='warp', max_sampled=50) model.fit(train_interactions, sample_weight=train_weights, epochs=10, num_threads=10) # auc-roc train_auc = auc_score(model, train_interactions, num_threads=20).mean() print('Training set AUC: %s' % train_auc) test_auc = auc_score(model, test_interactions, num_threads=20).mean() print('Testing set AUC: %s' % test_auc)
def measure_accuracies(model, data): print("\nMeasuring accuracies of the model...") # evaluate the precision@k metric training_precision = precision_at_k(model, data["train"], k=PRECISION_K).mean() test_precision = precision_at_k(model, data["test"], k=PRECISION_K).mean() # evaluate the AUROC metric training_auc = auc_score(model, data["train"]).mean() test_auc = auc_score(model, data["test"]).mean() # print them out print("Precision@k: training %.2f, test %.2f" % (training_precision, test_precision)) print("AUC: training %.2f, test %.2f" % (training_auc, test_auc))
def _get_metrics(model, train_set, test_set): train_set = train_set.tocsr() test_set = test_set.tocsr() train_set.data[train_set.data < 0] = 0.0 test_set.data[test_set.data < 0] = 0.0 train_set.eliminate_zeros() test_set.eliminate_zeros() return (precision_at_k(model, train_set).mean(), precision_at_k(model, test_set).mean(), auc_score(model, train_set).mean(), auc_score(model, test_set).mean())
def evaluate_model(model, metric, test, train): """ Evaluate trained model on the test set, using one of the three available accuracy metrics AUC: the probability that a randomly chosen positive example has a higher score than a randomly chosen negative example. Precision: the fraction of known positives in the first k positions of the ranked list of results. Recall: the number of positive items in the first k positions of the ranked list of results divided by the number of positive items in the test period. :param model:(LightFM, required) - model to be evaluated :param metric:(string, required) - accuracy metric to be used, one of ['auc', 'precision', 'recall'] :param test:(COO matrix, required) - known positives used to test the model :param train:(COO matrix, required) - training set; these interactions will be omitted from the score calculations to avoid re-recommending known positives. :return: test_score (float) - score computed on the test set """ try: # make sure the metric is correct assert metric in ['auc', 'precision', 'recall'] if metric == 'auc': test_score = auc_score(model, test, train).mean() elif metric == 'precision': test_score = precision_at_k(model, test, train, k=5).mean() else: test_score = recall_at_k(model, test, train, k=5).mean() return test_score except AssertionError: print('The metric provided is not correct or available!')
def Model_fit_part1(train, test): # initialising model with warp loss function model_without_features = LightFM(loss="warp") start = time.time() #=================== model_without_features.fit(train, user_features=None, item_features=None, sample_weight=None, epochs=1, num_threads=4, verbose=False) #=================== end = time.time() print("time taken = {0:.{1}f} seconds".format(end - start, 2)) print("checking accuracy and results with test data") # auc metric score (ranging from 0 to 1) start = time.time() #=================== auc_without_features = auc_score(model=model_without_features, test_interactions=test, num_threads=4, check_intersections=False) print(auc_without_features) #=================== end = time.time() print("accurcay model time taken = {0:.{1}f} seconds".format( end - start, 2))
def Model_fit_part2(train, prod_features, test): # initialising model with warp loss function from lightfm import LightFM from lightfm.evaluation import auc_score model_with_features = LightFM(loss="warp") # fitting the model with hybrid collaborative filtering + content based (product + features) start = time.time() #=================== model_with_features.fit(train, user_features=None, item_features=prod_features, sample_weight=None, epochs=1, num_threads=4, verbose=False) auc_with_features = auc_score(model=model_with_features, test_interactions=test, train_interactions=train, item_features=prod_features, num_threads=4, check_intersections=False) #=================== end = time.time() print("time taken = {0:.{1}f} seconds".format(end - start, 2)) print("average AUC without adding item-feature interaction = {0:.{1}f}". format(auc_with_features.mean(), 2))
def testAUC(self): self.train_auc = auc_score(self.model, self.item_user, item_features=self.item, user_features=self.user, num_threads=self.num_threads).mean() print('Hybrid testing set AUC: %s' % self.train_auc)
def test_LightFM_model(model, test_interactions, train_interactions, user_features, movie_features, k=5): test_precision = precision_at_k(model, test_interactions, train_interactions, k=k, user_features=user_features, item_features=movie_features, num_threads=2).mean() test_recall = recall_at_k(model, test_interactions, train_interactions, k=k, user_features=user_features, item_features=movie_features, num_threads=2).mean() test_auc = auc_score(model, test_interactions, train_interactions, user_features=user_features, item_features=movie_features, num_threads=2).mean() print('Model') print('Precision at k=', str(k), ': ', round(test_precision, 3), sep='') print('Recall at k=', str(k) + ': ', round(test_recall, 3), sep='') print('AUC: ', round(test_auc, 3), sep='') return ({ 'precision': round(test_precision, 3), 'recall': round(test_recall, 3), 'auc': round(test_auc, 3) })
def runMF(product_type, num_samples=20, num_threads=2): """ 헤비유저를 위한 화장품 추천 모델을 만들기 위한 함수입니다. LightFM 모델을 사용합니다. sample_hyperparameters 함수를 이용하여 하이퍼 파라미터를 구해서 모델 파라미터에 적용시켜 모델을 만들어 줍니다. ------- interactions:유저와 화장품 정보로 만든 희소 행렬 num_samples: 하이퍼 파라미터를 구할때 만들어지는 랜덤한 숫자들을 지정한 개수만큼만 나오게 합니다 """ interactions=pickle.load(open("./pickle_data/"+product_type+"/interactions.p", "rb")) user_features=pickle.load(open("./pickle_data/user_features.p", "rb")) x = sparse.csr_matrix(interactions.values) train, test = random_train_test_split(x, test_percentage=0.2, random_state=RandomState(523)) #itertools 패키지의 islice기능을 사용하여 위에서 설정한 파라미터값을 설정한 num_samples값만큼 반복적으로 나오게하여 적용합니다. for hyperparams in itertools.islice(sample_hyperparameters(), num_samples): num_epochs = hyperparams.pop("num_epochs") model = LightFM(**hyperparams) model.fit(train, user_features, epochs=num_epochs, num_threads=num_threads, verbose=True) auc = auc_score(model, test, train_interactions = train, num_threads=num_threads,user_features=user_features).mean() hyperparams["num_epochs"] = num_epochs yield (auc, model)
def run(self): self.random = RandomState(self.random_seed) orders_path = self.requires()['orders'].output().path _, features, interactions = self._generate_matrices(orders_path) train_features, val_features, train_interactiosn, val_interactions = \ train_test_split(features, interactions, test_size=0.1, random_state=self.random) model = LightFM(loss='logistic', no_components=self.no_components, random_state=self.random) wait = 0 best_val_auc = None for epoch in range(1, self.epochs + 1): model.fit_partial(train_interactiosn, user_features=train_features, epochs=self.epochs, num_threads=self.num_threads) auc_scores = auc_score(model, val_interactions, user_features=val_features, num_threads=self.num_threads) current_val_auc = np.nan_to_num(auc_scores).mean() if best_val_auc is None or current_val_auc > best_val_auc: joblib.dump(model, self.output().path) best_val_auc = current_val_auc wait = 0 else: wait += 1 if wait == self.patience: break print('Epoch {}/{} - AUC: {:.6g}'.format(epoch, self.epochs, best_val_auc))
def resultados_colaborativo(self): """ Método resultados_colaboraivo. Obtiene los resultados del modelo colaborativo. Este método solo se utiliza en la interfaz de texto. """ global train, test, modelo # Se obtienen los resultados precision = precision_at_k(modelo, test, train_interactions=train, k=10, num_threads=self.CPU_THREADS).mean() auc = auc_score(modelo, test, train_interactions=train, num_threads=self.CPU_THREADS).mean() recall = recall_at_k(modelo, test, train_interactions=train, k=10, num_threads=self.CPU_THREADS).mean() reciprocal = reciprocal_rank(modelo, test, train_interactions=train, num_threads=self.CPU_THREADS).mean() # Se imprimen los resultados imprimir_resultados_clasico(precision, auc, recall, reciprocal)
def best_reccomendation(): #define variables best = 0.0 best_model = '' for model in models: score = 0.0 pak_score = evaluation.precision_at_k(model, data2['test']) score += np.mean(pak_score) rak_score = evaluation.recall_at_k(model, data2['test']) score += np.mean(rak_score) auc_score = evaluation.auc_score(model, data2['test']) score += np.mean(auc_score) rr_score = evaluation.reciprocal_rank(model, data2['test']) score += np.mean(rr_score) print(score) if score >= best: best = score best_model = model return best_model
def plot_roc(data): alpha = 1e-05 epochs = 50 num_components = 32 warp_model = LightFM(no_components=num_components, loss='warp', learning_schedule='adagrad', max_sampled=3, user_alpha=alpha, item_alpha=alpha) bpr_model = LightFM(no_components=num_components, loss='bpr', learning_schedule='adagrad', user_alpha=alpha, item_alpha=alpha) logistic_model=LightFM(no_components=num_components, loss='logistic', learning_schedule='adagrad', user_alpha=alpha, item_alpha=alpha) warp_auc = [] bpr_auc = [] logistic_auc = [] for epoch in range(epochs): warp_model.fit_partial(data['matrix'], epochs=5) warp_auc.append(auc_score(warp_model, data['matrix']).mean()) for epoch in range(epochs): bpr_model.fit_partial(data['matrix'], epochs=5) bpr_auc.append(auc_score(bpr_model, data['matrix']).mean()) for epoch in range(epochs): logistic_model.fit_partial(data['matrix'], epochs=5) logistic_auc.append(auc_score(bpr_model, data['matrix']).mean()) x = np.arange(epochs) plt.plot(x, np.array(warp_auc)) plt.plot(x, np.array(bpr_auc)) plt.plot(x, np.array(logistic_auc)) plt.legend(['WARP AUC', 'BPR AUC', 'LOGISTIC AUC'], loc='upper right') return plt.show(block=False)
def _get_metrics(model, train_set, test_set): train_set = train_set.tocsr() test_set = test_set.tocsr() train_set.data[train_set.data < 0] = 0.0 test_set.data[test_set.data < 0] = 0.0 train_set.eliminate_zeros() test_set.eliminate_zeros() train_users = train_set.getnnz(axis=1) > 0 test_users = test_set.getnnz(axis=1) > 0 return (precision_at_k(model, train_set)[train_users].mean(), precision_at_k(model, test_set)[test_users].mean(), auc_score(model, train_set)[train_users].mean(), auc_score(model, test_set)[test_users].mean())
def test_intersections_check(): no_users, no_items = (10, 100) train, test = _generate_data(no_users, no_items) model = LightFM(loss="bpr") model.fit_partial(train) # check error is raised when train and test have interactions in common with pytest.raises(ValueError): evaluation.auc_score( model, train, train_interactions=train, check_intersections=True ) with pytest.raises(ValueError): evaluation.recall_at_k( model, train, train_interactions=train, check_intersections=True ) with pytest.raises(ValueError): evaluation.precision_at_k( model, train, train_interactions=train, check_intersections=True ) with pytest.raises(ValueError): evaluation.reciprocal_rank( model, train, train_interactions=train, check_intersections=True ) # check no errors raised when train and test have no interactions in common evaluation.auc_score( model, test, train_interactions=train, check_intersections=True ) evaluation.recall_at_k( model, test, train_interactions=train, check_intersections=True ) evaluation.precision_at_k( model, test, train_interactions=train, check_intersections=True ) evaluation.reciprocal_rank( model, test, train_interactions=train, check_intersections=True ) # check no error is raised when there are intersections but flag is False evaluation.auc_score( model, train, train_interactions=train, check_intersections=False ) evaluation.recall_at_k( model, train, train_interactions=train, check_intersections=False ) evaluation.precision_at_k( model, train, train_interactions=train, check_intersections=False ) evaluation.reciprocal_rank( model, train, train_interactions=train, check_intersections=False )
def test_auc_score(): no_users, no_items = (10, 100) train = sp.rand(no_users, no_items, format='coo') train.data = np.ones_like(train.data) model = LightFM(loss='bpr') model.fit_partial(train) auc = evaluation.auc_score(model, train, num_threads=2)[train.getnnz(axis=1) > 0] expected_auc = np.array(_auc(model, train)) assert auc.shape == expected_auc.shape assert np.abs(auc.mean() - expected_auc.mean()) < 0.01
NUM_EPOCHS = 3 ITEM_ALPHA = 1e-6 # Let's fit a WARP model: these generally have the best performance. model = LightFM(loss='warp', item_alpha=ITEM_ALPHA, no_components=NUM_COMPONENTS) # Run 3 epochs and time it. model = model.fit(train, epochs=NUM_EPOCHS, num_threads=NUM_THREADS) # Import the evaluation routines from lightfm.evaluation import auc_score # Compute and print the AUC score train_auc = auc_score(model, train, num_threads=NUM_THREADS).mean() print('Collaborative filtering train AUC: %s' % train_auc) # We pass in the train interactions to exclude them from predictions. # This is to simulate a recommender system where we do not # re-recommend things the user has already interacted with in the train # set. test_auc = auc_score(model, test, train_interactions=train, num_threads=NUM_THREADS).mean() print('Collaborative filtering test AUC: %s' % test_auc) # Set biases to zero model.item_biases *= 0.0 test_auc = auc_score(model, test, train_interactions=train, num_threads=NUM_THREADS).mean()
def do_fiber_training(visualization = False): if not os.path.isfile(rc.RECOMMENDER_TRAINING) or not os.path.isfile(rc.RECOMMENDER_MODEL): yarn_data_matrix = pickle.load(open( rc.YARN_DATA_MATRIX, "rb" )) yarn_data_train = sps.coo_matrix( yarn_data_matrix[:int(len(yarn_data_matrix)*0.5)] ) > 0 yarn_data_test = sps.coo_matrix( yarn_data_matrix[int(len(yarn_data_matrix)*0.5):] ) > 0 if visualization: print yarn_data_train.shape[0],yarn_data_test.shape[0], len(yarn_data_matrix) # Taken from: https://github.com/lyst/lightfm/blob/master/examples/stackexchange/hybrid_crossvalidated.ipynb # Set the number of threads; you can increase this # ify you have more physical cores available. NUM_THREADS = 2 NUM_COMPONENTS = 30 NUM_EPOCHS = 3 ITEM_ALPHA = 1e-6 # Let's fit a WARP model: these generally have the best performance. model = LightFM(loss='warp', item_alpha=ITEM_ALPHA, no_components=NUM_COMPONENTS) # Run 3 epochs and time it. model = model.fit(yarn_data_train, epochs=NUM_EPOCHS, num_threads=NUM_THREADS) # Compute and print the AUC score train_auc = auc_score(model, yarn_data_train, num_threads=NUM_THREADS).mean() print('Collaborative filtering train AUC: %s' % train_auc) # We pass in the train interactions to exclude them from predictions. # This is to simulate a recommender system where we do not # re-recommend things the user has already interacted with in the train # set. test_auc = auc_score(model, yarn_data_test, train_interactions=yarn_data_train, num_threads=NUM_THREADS).mean() print('Collaborative filtering test AUC: %s' % test_auc) pickle.dump(yarn_data_matrix,open(rc.RECOMMENDER_TRAINING, 'wb')) pickle.dump(model,open(rc.RECOMMENDER_MODEL, 'wb')) else: yarn_data_matrix = pickle.load(open(rc.RECOMMENDER_TRAINING, 'rb')) model = pickle.load(open(rc.RECOMMENDER_MODEL, 'rb')) translation_dict = pickle.load(open(rc.YARN_TRANSLATION_DATA, 'rb')) print len(yarn_data_matrix) for matrix_id in xrange(0,len(yarn_data_matrix)): print matrix_id predictions = model.predict(matrix_id,yarn_data_matrix[matrix_id]) matches = [] predictions += abs(np.min(predictions)) # make non-negative _max = np.max(predictions) # find max for normalization predictions /= _max # Normalize predictions for prediction in xrange(0,len(predictions)): if predictions[prediction] > 0.9: matches.append([translation_dict[prediction],prediction,predictions[prediction]]) print translation_dict[matrix_id],matches