def dialogue(): click.echo( 'Welcome to AutoRec! Let me help you with the product recommendations.' ) name = click.prompt(colored('Enter your name', Fore.MAGENTA)) #click.echo(name) csv = click.prompt( colored('Enter the path for your sample data', Fore.MAGENTA)) #click.echo(name) click.clear() #user = click.prompt(colored('do you want to recommend items for users to purchase? [Y/N]',Fore.GREEN), type= click.Choice(['Y','N'])) #csv = click.prompt(colored('Enter the path',Fore.MAGENTA)) #click.echo(name) click.clear() modelname = 'user_user' data_subset = data.get_data(csv) location = click.echo('Data being preprocessed') modelname = click.prompt(colored( 'Do you want to recommend items for users to purchase? [y/N]"', Fore.MAGENTA), type=click.Choice(['Y', 'N'])) #click.echo(name) click.clear() results = model.all_models(csv, modelname) modeled = click.echo('Data being modeled') modelname1 = click.prompt(colored( 'Do you want to recommend similar items for users to purchase? [y/N]"', Fore.MAGENTA), type=click.Choice(['Y', 'N'])) #click.echo(name) click.clear() modelname = 'item_item' results = model.all_models(csv, modelname) modelname2 = click.prompt(colored( 'Recommendations using matrix factorization? [y/N]"', Fore.MAGENTA), type=click.Choice(['Y', 'N'])) modelname = 'matrix' results1 = model.all_models(csv, modelname)
import numpy as np import torch import torch.nn as nn from torch.utils.data import DataLoader from sklearn.metrics import confusion_matrix import matplotlib.pyplot as plt import seaborn as sns from data_preprocessing import get_data from model import CNN BATCH = 128 if __name__ == "__main__": print("\nLoading data...", flush=True) data_set = get_data(os.path.join(sys.argv[1], "validation"), range(3430)) data_loader = DataLoader(data_set, batch_size=BATCH, shuffle=False) print("\nLoading model...", flush=True) model = CNN().cuda() model.load_state_dict(torch.load(sys.argv[2])) print("\nStart predicting...", flush=True) model.eval() loss = nn.CrossEntropyLoss() # softmax + cross entropy y = [] pred_y = [] with torch.no_grad(): for i, data in enumerate(data_loader): pred_y += np.argmax(model(data[0].cuda()).cpu().data.numpy(), axis=1).tolist()
obj.backward() optimizer.step() filter_visualization = x.detach().cpu().squeeze( ) # To discard the dimension euqal to 1 # We just want to record x after ITERs, so we can detach from graph with gradient discarded. hook_handle.remove( ) # hook_handle must be removed, or it'll always exist and work for the conv layer. return filter_activations, filter_visualization if __name__ == "__main__": device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("\nLoading data...", flush=True) train_set = get_data(os.path.join(sys.argv[1], "training"), IMG_ID) x = [] y = [] for i in range(len(IMG_ID)): x_tmp, y_tmp = train_set.__getitem__(i) x.append(x_tmp) y.append(y_tmp) print("\nLoading model...", flush=True) model = CNN().to(device) model.load_state_dict(torch.load(sys.argv[2])) print("\nStart explaining...", flush=True) for cf in [[0, 25], [0, 55], [4, 64], [4, 120], [8, 45], [8, 85]]: print("\n{}".format(cf[1])) filter_activations, filter_visualization = filter_explaination(
def all_models(_file_path, modelname): #data = ds.get_data(_file_path, 'data/data_subset.csv', 0.99) data = ds.get_data(_file_path) data_surprise = data[['customer_id', 'product_id', 'star_rating']]. \ rename(columns={'customer_id': 'userID', 'product_id': 'itemID', 'star_rating': 'rating'}) reader = Reader(rating_scale=(1.0, 5.0)) df_loaded = Dataset.load_from_df(data_surprise, reader) #trainset = df_loaded.build_full_trainset() results_list = [] # features reviews = data.shape[0] n_users = data.customer_id.nunique() n_products = data.product_id.nunique() mean_rating = data.star_rating.mean() rating_std = data.star_rating.std() sparsity = reviews * 100 / (n_users * n_products) #for model in ['user_user', 'item_item', 'matrix_fact']: # Perform cross validation results = model_selection.cross_validate(select_model( df_loaded, model_selection=modelname), df_loaded, measures=['RMSE', 'MAE'], cv=5, verbose=False) # precisions, recalls = precision_recall_at_k(predictions, k=5, threshold=4) kf = KFold(n_splits=5) trainset, testset = train_test_split(df_loaded, test_size=.25) map_k, mar_k = 0, 0 algo = select_model(df_loaded, model_selection=modelname) #for trainset, testset in trainset.split(): algo.fit(trainset) predictions = algo.test(testset) #uid = str(11613707) # raw user id (as in the ratings file). They are **strings**! #iid = str(302) # raw item id (as in the ratings file). They are **strings**! # get a prediction for specific users and items. #pred = algo.predict(uid,verbose=True) #pred top_n = rec.get_top_n(predictions, n=30) #top_n[data_surprise.userID[38745832]] # top_n = rec.get_top_n(predictions,data_surprise,userID = 11613707) # pred_SVD_124 = top.get_top_n(predictions,userId = 13545982,data = data) #top_n.head(15) # pred_SVD_124 print('Recommendations for the user') # print('user Id Item Id') dfo = pd.DataFrame(columns=['UserId', 'ItemId']) i = 0 for uid, user_ratings in top_n.items(): row = [uid, top_n[uid]] dfo.loc[i] = row i = i + 1 #dfo.to_csv('submissionF.csv', index = False) # print('Recommendations for the user') #print(uid, [iid for (iid, _) in user_ratings]) precisions, recalls = metrics.precision_recall_at_k(predictions, k=5, threshold=4) # Precision and recall can then be added for all the splits merge = dfo.merge(data, left_on='UserId', right_on='customer_id') merge1 = merge[['UserId', 'product_title']] print(merge1) map_k += precisions mar_k += recalls # Get results & append algorithm name tmp = pd.DataFrame.from_dict(results).mean(axis=0) tmp = tmp.append(pd.Series(map_k / 5, index=['map_k'])) tmp = tmp.append(pd.Series(mar_k / 5, index=['mar_k'])) tmp = tmp.append(pd.Series([str(_file_path)], index=['data'])) tmp = tmp.append(pd.Series([str(modelname)], index=['Algorithm'])) # features tmp = tmp.append(pd.Series(reviews, index=['reviews'])) tmp = tmp.append(pd.Series(n_users, index=['n_users'])) tmp = tmp.append(pd.Series(n_products, index=['n_products'])) tmp = tmp.append(pd.Series(mean_rating, index=['mean_rating'])) tmp = tmp.append(pd.Series(rating_std, index=['std_rating'])) tmp = tmp.append(pd.Series(sparsity, index=['sparsity'])) results_list.append(tmp) # print(results_list) results_df = pd.DataFrame(results_list) # saving the results file to folder return results_df
def all_models(_file_path, _save_path): data = ds.get_data(_file_path, 'data/data_subset.csv', 0.99) data_surprise = data[['customer_id', 'product_id', 'star_rating']]. \ rename(columns={'customer_id': 'userID', 'product_id': 'itemID', 'star_rating': 'rating'}) reader = Reader(rating_scale=(1.0, 5.0)) df_loaded = Dataset.load_from_df(data_surprise, reader) results_list = [] # features reviews = data.shape[0] n_users = data.customer_id.nunique() n_products = data.product_id.nunique() mean_rating = data.star_rating.mean() rating_std = data.star_rating.std() sparsity = reviews * 100 / (n_users * n_products) for model in ['user_user', 'item_item', 'matrix_fact']: # Perform cross validation results = model_selection.cross_validate(select_model( df_loaded, model_selection=model), df_loaded, measures=['RMSE', 'MAE'], cv=5, verbose=False) # precisions, recalls = precision_recall_at_k(predictions, k=5, threshold=4) kf = KFold(n_splits=5) map_k, mar_k = 0, 0 algo = select_model(df_loaded, model_selection=model) for trainset, testset in kf.split(df_loaded): algo.fit(trainset) predictions = algo.test(testset) precisions, recalls = metrics.precision_recall_at_k(predictions, k=5, threshold=4) # Precision and recall can then be added for all the splits map_k += precisions mar_k += recalls # Get results & append algorithm name tmp = pd.DataFrame.from_dict(results).mean(axis=0) tmp = tmp.append(pd.Series(map_k / 5, index=['map_k'])) tmp = tmp.append(pd.Series(mar_k / 5, index=['mar_k'])) tmp = tmp.append(pd.Series([str(_file_path)], index=['data'])) tmp = tmp.append(pd.Series([str(model)], index=['Algorithm'])) # features tmp = tmp.append(pd.Series(reviews, index=['reviews'])) tmp = tmp.append(pd.Series(n_users, index=['n_users'])) tmp = tmp.append(pd.Series(n_products, index=['n_products'])) tmp = tmp.append(pd.Series(mean_rating, index=['mean_rating'])) tmp = tmp.append(pd.Series(rating_std, index=['std_rating'])) tmp = tmp.append(pd.Series(sparsity, index=['sparsity'])) results_list.append(tmp) print(results_list) results_df = pd.DataFrame(results_list) # saving the results file to folder if _save_path: results_df.to_csv(_save_path, mode='a', index=False) return results_df
sys.path = sys.path[1:] + [sys.path[0]] import shap import matplotlib.pyplot as plt from data_preprocessing import get_data from model import CNN TOTAL_ID = range(9866) B_SIZE = 32 IMG_ID = [7741, 7096, 7290, 1658] if __name__ == "__main__": index = np.random.permutation(TOTAL_ID) print("\nLoading data...", flush=True) train_set = get_data(os.path.join(sys.argv[1], "training"), index[:B_SIZE].tolist() + IMG_ID) x = [] y = [] for i in range(B_SIZE + len(IMG_ID)): x_tmp, y_tmp = train_set.__getitem__(i) x.append(x_tmp) y.append(y_tmp) x = torch.stack(x).cuda() print("\nLoading model...", flush=True) model = CNN().cuda() model.load_state_dict(torch.load(sys.argv[2])) print("\nComputing...", flush=True) e = shap.DeepExplainer(model, x[:B_SIZE]) shap_values = e.shap_values(x[B_SIZE:])
def predict(inputs): model.eval() inputs = torch.FloatTensor(inputs).permute( 0, 3, 1, 2) # from numpy(b, h, w, c) to pytorch(b, c, h, w) output = model(inputs.cuda()) return output.detach().cpu().numpy() def segmentation(inputs): return slic(inputs, n_segments=100, compactness=1, sigma=1) if __name__ == "__main__": print("\nLoading data...", flush=True) train_set = get_data(os.path.join(sys.argv[1], "validation"), IMG_ID) x = [] y = [] for i in range(len(IMG_ID)): x_tmp, y_tmp = train_set.__getitem__(i) x.append(x_tmp) y.append(y_tmp) x = torch.stack(x).cuda() y = torch.stack(y) print("\nLoading model...", flush=True) model = CNN().cuda() model.load_state_dict(torch.load(sys.argv[2])) model.eval() pred_y = np.argmax(model(x).cpu().data.numpy(), axis=1).tolist()
return algo import os, io st.title('Welcome to RecServe!') st.header('Let me help you with the product recommendations') option1 = st.selectbox('Select the path for the dataset?', ['sample_us.tsv']) #st.write('sample_us.tsv') #st.write('You selected:',option1) #url = st.text_input('Enter the path for the data') st.write('The data is loaded') #data_load_state = st.text('Loading the data') data = ds.get_data(option1) st.write(data) #data = ds.get_data(_file_path, 'data/data_subset.csv', 0.99) #data = ds.get_data('/Users/lalitharahul/Desktop/AutoRecommender/RecServe/sample_us.tsv') #data = ds.get_data(url) #st.write(data) #data_load_state.text('Data is preprocessed') data_surprise = data[['customer_id', 'product_id', 'star_rating']]. \ rename(columns={'customer_id': 'userID', 'product_id': 'itemID', 'star_rating': 'rating'}) reader = Reader(rating_scale=(1.0, 5.0)) df_loaded = Dataset.load_from_df(data_surprise, reader) #trainset = df_loaded.build_full_trainset() results_list = []
def run(experiment): save_path = "checkpoints/" + experiment.name log_path = "tensorboard/train/" + experiment.name # create or clean directory for path in [save_path, log_path]: if not os.path.exists(path): os.makedirs(path) else: shutil.rmtree(path) os.makedirs(path) save_path += "/dev" # log git commit hash repo = git.Repo(search_parent_directories=True) sha = repo.head.object.hexsha file = open(log_path + "/git_commit_" + sha, 'w') file.close() epochs, input_batch_size, rnn_size, num_layers, encoding_embedding_size, decoding_embedding_size, learning_rate, keep_probability, num_samples, reward = map(experiment.hyperparams.get, ('epochs', 'input_batch_size', 'rnn_size', 'num_layers', 'encoding_embedding_size', 'decoding_embedding_size', 'learning_rate', 'keep_probability', 'num_samples', "reward")) ### prepare data ### (train_source_int_text, train_target_int_text), (valid_source_int_text, valid_target_int_text), ( source_vocab_to_int, target_vocab_to_int), (source_int_to_vocab, target_int_to_vocab) = data_preprocessing.get_data(experiment.data["dataset"], experiment.data["folder"], experiment.data["train_source_file"], experiment.data["train_target_file"], experiment.data["dev_source_file"], experiment.data["dev_target_file"], experiment.tokenization) max_source_sentence_length = max([len(sentence) for sentence in train_source_int_text]) train_source = train_source_int_text train_target = train_target_int_text valid_source = valid_source_int_text valid_target = valid_target_int_text # shuffle rnd = random.Random(1234) train_combined = list(zip(train_source, train_target)) rnd.shuffle(train_combined) train_source, train_target = zip(*train_combined) valid_combined = list(zip(valid_source, valid_target)) rnd.shuffle(valid_combined) valid_source, valid_target = zip(*valid_combined) # set reward function if reward == "levenshtein": reward_func = lambda ref_hyp: - textdistance.levenshtein(ref_hyp[0], ref_hyp[1]) elif reward == "jaro-winkler": reward_func = lambda ref_hyp: textdistance.JaroWinkler()(ref_hyp[0], ref_hyp[1]) elif reward == "hamming": reward_func = lambda ref_hyp: - textdistance.hamming(ref_hyp[0], ref_hyp[1]) if experiment.train_method == 'MLE': graph_batch_size = input_batch_size elif experiment.train_method == 'reinforce' or experiment.train_method == 'reinforce_test': graph_batch_size = num_samples ### prepare model ### tf.reset_default_graph()# maybe need? with tf.variable_scope(tf.get_variable_scope(), reuse=False): model = rnn_model.RNN(graph_batch_size, max_source_sentence_length, source_vocab_to_int, target_vocab_to_int, encoding_embedding_size, decoding_embedding_size, rnn_size, num_layers) eval_batch_size = 128 with tf.variable_scope(tf.get_variable_scope(), reuse=True): eval_model = rnn_model.RNN(eval_batch_size, max_source_sentence_length, source_vocab_to_int, target_vocab_to_int, encoding_embedding_size, decoding_embedding_size, rnn_size, num_layers, False) early_stopping = True ### train model ### if experiment.train_method == 'reinforce_test': train.reinforce_test(model, experiment.start_checkpoint, source_vocab_to_int, learning_rate, keep_probability, graph_batch_size, target_int_to_vocab, source_int_to_vocab, valid_source, valid_target) else: train.train(experiment.name, experiment.train_method, model, epochs, input_batch_size, train_source, train_target, valid_source, valid_target, learning_rate, keep_probability, save_path, experiment.start_checkpoint, target_int_to_vocab, source_int_to_vocab, source_vocab_to_int, log_path, graph_batch_size, experiment.max_hours, eval_model, eval_batch_size, reward_func, early_stopping)
import sys import pandas as pd import seaborn as sns from sklearn.model_selection import train_test_split from data_preprocessing import get_data from A1 import A1 from A2 import A2 from B1 import B1 from B2 import B2 # ====================================================================================================================== # Data preprocessing smile_X_train, smile_y_train, smile_X_test, smile_y_test, gender_X_train, gender_y_train, gender_X_test, gender_y_test, eye_X_train, eye_y_train, eye_X_test, eye_y_test, face_X_train, face_y_train, face_X_test, face_y_test = get_data( ) # ====================================================================================================================== # Task A1 model_A1 = A1.create_model() # Build model object. acc_A1_train, model_A1_trained = A1.train_model( model_A1, gender_X_train, gender_y_train ) # Train model based on the training set (you should fine-tune your model based on validation set.) acc_A1_test = A1.test_model(model_A1_trained, gender_X_test, gender_y_test) # Test model based on the test set. #Clean up memory/GPU etc... # Some code to free memory if necessary. # ====================================================================================================================== # Task A2 model_A2 = A2.create_model() acc_A2_train, model_A2_trained = A2.train_model(model_A2, smile_X_train, smile_y_train) acc_A2_test = A2.test_model(model_A2_trained, smile_X_test, smile_y_test)