def item_mean_test(ratings, min_num_ratings, verbose=False, p_test=0.1): """ Splits the data set in train and test and compute the RMSE using as prediction the item mean. :param ratings: initial data set (sparse matrix of size nxp, n items and p users) :param min_num_ratings: all users and items must have at least min_num_ratings per user and per item to be kept :param verbose: True if user wants details to be printed :param p_test share of the data set to be dedicated to test set :return: RMSE value of the prediction using item means as a predictions b """ _, train, test = split_data(ratings, min_num_ratings, verbose=verbose, p_test=p_test) cumulated_rmse = 0 # find the RMSE share due to all users for item in range(train.shape[0]): # compute the mean of non-zero rating for current user current_train_ratings = train[item] current_non_zero_train_ratings = current_train_ratings[ current_train_ratings.nonzero()] if current_non_zero_train_ratings.shape[1] != 0: mean = current_non_zero_train_ratings.mean() # compute the rmse with all non-zero ratings of current user in test set current_test_ratings = test[item] current_non_zero_test_ratings = current_test_ratings[ current_test_ratings.nonzero()].todense() cumulated_rmse += calculate_mse(current_non_zero_test_ratings, mean) cumulated_rmse = np.sqrt(float(cumulated_rmse) / test.nnz) return cumulated_rmse
def finding_weighted_average(test, predictions_a, predictions_b): """ Compute the weighted average of two predictions A and B and find the weight that minimizes the RMSE :param test: test dataset of shape (num_items, num_users) :param predictions_a: matrix prediction A :param predictions_b: matrix prediction B :return: the weight for a that minimizes the RMSE """ # Initialization a = np.linspace(0, 1.0, num=101) rmse_min = 10 a_min = 0 for i, value in enumerate(a): # Compute the weighted average of the two predictions prediction_from_two = np.multiply(predictions_a, value) + np.multiply( predictions_b, 1 - value) x, y = test.nonzero() # Calculate the RMSE of the prediction rmse = np.sqrt( calculate_mse(test[x, y], prediction_from_two[x, y]).sum() / (test.nnz)) # Check whether it is the optimal weight if rmse_min > rmse: rmse_min = rmse a_min = value print("RMSE={}".format(rmse_min)) return a_min
def baseline_global_mean(train, test): """ Baseline method: use the global mean. :param train: train data array of shape (num_items, num_users) :param test: test data array of shape (num_items, num_users) :return: global_mean, the average of all the ratings, the RMSE on the train and test sets """ # Compute the global mean global_mean = train.sum() / train.nnz # Compute the RMSE for test and train tst_nz_indices = test.nonzero() mse_test = 1 / test.nnz * calculate_mse(test[tst_nz_indices].toarray()[0], global_mean) tr_nz_indices = train.nonzero() mse_train = 1 / train.nnz * calculate_mse( train[tr_nz_indices].toarray()[0], global_mean) return global_mean, np.sqrt(mse_train), np.sqrt(mse_test)
def baseline_item_mean(train, test): """ Baseline method: use item means as the prediction. :param train: train data array of shape (num_items, num_users) :param test: test data array of shape (num_items, num_users) :return: array of item's means with shape = (num_items,). The RMSE on the train and test sets """ # Compute mean for every users means = np.array(train.sum(axis=1).T / train.getnnz(axis=1))[0] # Compute the RMSE for test and train tst_nz_idx = test.nonzero() mse_test = 1 / len(tst_nz_idx[0]) * calculate_mse( test[tst_nz_idx].toarray()[0], means[tst_nz_idx[0]]) tr_nz_idx = train.nonzero() mse_train = 1 / len(tr_nz_idx[0]) * calculate_mse( train[tr_nz_idx].toarray()[0], means[tr_nz_idx[0]]) return means, np.sqrt(mse_train), np.sqrt(mse_test)
def compute_rmse_global_mean(train, test): """ Compute the RMSE obtained by using global mean of non zero value of train to predict values of test :param train: training data set (sparse matrix of size nxp, n items and p users) :param test: testing data set (sparse matrix of size nxp, n items and p users) :return: RMSE value of the prediction """ mean = global_mean(train) mse = calculate_mse(test[test.nonzero()].todense(), mean) rmse = np.sqrt(float(mse) / test.nnz) return rmse
def test(inputbatch, targetbatch, encoder, decoder, num_layers, criterion, SOS_TOKEN, teacher_forcing_ratio, logger, sliding_attention): """ @param: inputbatch: (batchsize X sequence_length X input_size) Variable which represents data fed into the encoder. @param: targetbatch: (batchsize X sequence_length X input_size) Variable which represents data fed into the decoder (during teacher forcing) or data that is not fed. @param: encoder: The encoder network object being tested. @param: decoder: The decoder network object being tested. @param: criterion: Used only during validation to record the validation loss. But for testing as well, we can just supply a criterion. """ logger.info( "inside test(), inputbatch.size() = {}, targetbatch.size() = {}, SOS_TOKEN = {}" .format(inputbatch.size(), targetbatch.size(), SOS_TOKEN)) #Done encoder_hidden = encoder.initHidden(numlayers=num_layers, batchsize=inputbatch.size(0)) use_teacher_forcing = True if random.random( ) < teacher_forcing_ratio else False if encoder.rnnobject == "LSTM": encoder_cellstate = encoder.initHidden(numlayers=num_layers, batchsize=inputbatch.size(0)) sequence_length = inputbatch.size( 1) #This is the sequence length of the input tensor. target_sequence_length = targetbatch.size( 1) #This is the sequence length of the target tensor. hidden_size = encoder_hidden.size(2) loss = 0 #Iterate over each instance of the input batch. input_tensor = inputbatch # batch_size X seq_size X input_size target_tensor = targetbatch # batch_size X seq_size X input_size attention_vectors = list() hidden_states = torch.zeros(input_tensor.size(0), sequence_length, hidden_size) use_teacher_forcing = True if random.random( ) < teacher_forcing_ratio else False for ei in range( sequence_length ): #Iterate over each sequence in input tensor. i.e iterate over seq_size dimension. #Iterate over each sequence of the instance. inp_t = input_tensor[:, ei, :].contiguous().view(input_tensor.size(0), 1, inputbatch.size(2)) inp_t = inp_t.cuda() ## CUDA logger.debug("input_t.size() = {}".format(inp_t.size())) encoder_hidden = encoder_hidden.cuda() ## CUDA logger.debug("encoder_hidden.size() = {}".format( encoder_hidden.size())) if encoder.rnnobject == "LSTM": encoder_cellstate = encoder_cellstate.cuda() ## CUDA encoder_output, (encoder_hidden, encoder_cellstate) = encoder( inp_t, encoder_hidden, encoder_cellstate) else: encoder_output, encoder_hidden = encoder(inp_t, encoder_hidden) hidden_states[:, ei, :] = encoder_hidden[-1, :, :].unsqueeze(dim=0) #First input to the decoder. Size: batch_size,1,input_size decoder_input = Variable(torch.FloatTensor( [[SOS_TOKEN] * inputbatch.size(2)] * inputbatch.size(0)), requires_grad=True) #print("Size of decoder input before reshape",decoder_input.size()) decoder_input = decoder_input.view(decoder_input.size(0), 1, decoder_input.size(1)) decoder_hidden = encoder_hidden #First hidden input to the decoder is the last hidden state of the encoder. if decoder.rnnobject == "LSTM": decoder_cellstate = encoder_cellstate #First Cell State of the decoder is the last cell state of the encoder. #We iterate per sequence, to obtain outputs. There is no notion of teacher forcing for the validation/testing set. decoder_predictions = torch.zeros( target_tensor.size(0), target_tensor.size(1), target_tensor.size(2) ) #we will store the validation predictions in this tensor for plotting and analysis. prev_hidden_states = hidden_states.cuda() mse_predictions = list() mse_per_timestep = list() attention_vectors = list() contextvector = None for di in range(target_sequence_length): decoder_input = decoder_input.cuda() ## CUDA logger.debug("decoder_input.size() = {}".format(decoder_input.size())) if decoder.rnnobject == "LSTM": decoder_output, ( decoder_hidden, decoder_cellstate ), attentional_hidden_state, attention_vector = decoder( decoder_input, prev_hidden_states, decoder_hidden, decoder_cellstate) else: decoder_output, decoder_hidden, attentional_hidden_state, attention_vector = decoder( decoder_input, prev_hidden_states, decoder_hidden) attention_vectors.append(attention_vector) if use_teacher_forcing: decoder_input = target_tensor[:, di, :].contiguous().view( target_tensor.size(0), 1, target_tensor.size(2)) # Teacher forcing else: decoder_input = decoder_output decoder_output = decoder_output.view( decoder_output.size(0), decoder_output.size(2)) #change size from m X n X k to m X k if criterion != None: loss += criterion(decoder_output, target_tensor[:, di, :]) #_mse,per_sequence_mse_from_tensor(testtargets_arraytestpreds_array) logger.debug( "Input to calculate_mse function = decoder_output_size = {}, target_tensor_size = {}" .format(decoder_output.size(), target_tensor[:, di, :].size())) _mse = calculate_mse( decoder_output, target_tensor[:, di, :] ) #inputs: decoder_output.size() = target_tensor[:,di,:] = batch_size X num_columns; mse_predictions.append( _mse ) #The MSELoss criterion doesn't for some reason seem to be giving the correct results so we use our own mean squared error criterion per batch. decoder_predictions[:, di, :] = decoder_output #predictions of the decoder for sequence step `di` on the validation data. logger.debug( "prev_hidden_state.size before cat = {},decoder_hidden = {}". format(prev_hidden_states.size(), decoder_hidden.size())) if sliding_attention: prev_hidden_states = torch.cat( (prev_hidden_states[:, 1:, :], decoder_hidden[-1, :, :].unsqueeze(dim=0).permute(1, 0, 2)), dim=1 ) #for now just use decoder hidden state we can also use attentional_hidden state if required later. logger.debug("prev_hidden_state.size = {}".format( prev_hidden_states.size())) attention_tensor = torch.stack(attention_vectors) logger.debug( "Attention Tensor Size = {}, Attention List Length = {}, Single Attention Vector Size = {}" .format(attention_tensor.size(), len(attention_vectors), attention_vectors[0].size())) #Anomaly Threshold Stuff. anomaly_threshold = 1.1 * np.max( mse_predictions ) #This is the anomaly threshold score we are going to use. mse_per_timestep = calculate_mse_tensor( decoder_predictions.detach().numpy(), target_tensor.cpu().detach().numpy( )) #returns a list containing the mean-squared error per timestep. return loss.item( ), mse_per_timestep, anomaly_threshold, decoder_predictions, attention_tensor