def item_mean_test(ratings, min_num_ratings, verbose=False, p_test=0.1):
    """
    Splits the data set in train and test and compute the RMSE using as prediction the item mean.
    :param ratings: initial data set (sparse matrix of size nxp, n items and p users)
    :param min_num_ratings: all users and items must have at least min_num_ratings per user and per item to be kept
    :param verbose: True if user wants details to be printed
    :param p_test share of the data set to be dedicated to test set
    :return: RMSE value of the prediction using item means as a predictions
b    """
    _, train, test = split_data(ratings,
                                min_num_ratings,
                                verbose=verbose,
                                p_test=p_test)
    cumulated_rmse = 0

    # find the RMSE share due to all users
    for item in range(train.shape[0]):

        # compute the mean of non-zero rating for current user
        current_train_ratings = train[item]
        current_non_zero_train_ratings = current_train_ratings[
            current_train_ratings.nonzero()]

        if current_non_zero_train_ratings.shape[1] != 0:
            mean = current_non_zero_train_ratings.mean()
            # compute the rmse with all non-zero ratings of current user in test set
            current_test_ratings = test[item]
            current_non_zero_test_ratings = current_test_ratings[
                current_test_ratings.nonzero()].todense()
            cumulated_rmse += calculate_mse(current_non_zero_test_ratings,
                                            mean)

    cumulated_rmse = np.sqrt(float(cumulated_rmse) / test.nnz)

    return cumulated_rmse
Beispiel #2
0
def finding_weighted_average(test, predictions_a, predictions_b):
    """ Compute the weighted average of two predictions A and B and find the weight that minimizes the RMSE

    :param test: test dataset of shape (num_items, num_users)
    :param predictions_a: matrix prediction A
    :param predictions_b: matrix prediction B
    :return: the weight for a that minimizes the RMSE
    """
    # Initialization
    a = np.linspace(0, 1.0, num=101)
    rmse_min = 10
    a_min = 0

    for i, value in enumerate(a):
        # Compute the weighted average of the two predictions
        prediction_from_two = np.multiply(predictions_a, value) + np.multiply(
            predictions_b, 1 - value)
        x, y = test.nonzero()

        # Calculate the RMSE of the prediction
        rmse = np.sqrt(
            calculate_mse(test[x, y], prediction_from_two[x, y]).sum() /
            (test.nnz))

        # Check whether it is the optimal weight
        if rmse_min > rmse:
            rmse_min = rmse
            a_min = value

    print("RMSE={}".format(rmse_min))
    return a_min
Beispiel #3
0
def baseline_global_mean(train, test):
    """ Baseline method: use the global mean.

    :param train: train data array of shape (num_items, num_users)
    :param test: test data array of shape (num_items, num_users)
    :return: global_mean, the average of all the ratings, the RMSE on the train and test sets
    """
    # Compute the global mean
    global_mean = train.sum() / train.nnz

    # Compute the RMSE for test and train
    tst_nz_indices = test.nonzero()
    mse_test = 1 / test.nnz * calculate_mse(test[tst_nz_indices].toarray()[0],
                                            global_mean)
    tr_nz_indices = train.nonzero()
    mse_train = 1 / train.nnz * calculate_mse(
        train[tr_nz_indices].toarray()[0], global_mean)
    return global_mean, np.sqrt(mse_train), np.sqrt(mse_test)
Beispiel #4
0
def baseline_item_mean(train, test):
    """ Baseline method: use item means as the prediction.

    :param train: train data array of shape (num_items, num_users)
    :param test: test data array of shape (num_items, num_users)
    :return: array of item's means with shape = (num_items,). The RMSE on the train and test sets
    """
    # Compute mean for every users
    means = np.array(train.sum(axis=1).T / train.getnnz(axis=1))[0]

    # Compute the RMSE for test and train
    tst_nz_idx = test.nonzero()
    mse_test = 1 / len(tst_nz_idx[0]) * calculate_mse(
        test[tst_nz_idx].toarray()[0], means[tst_nz_idx[0]])
    tr_nz_idx = train.nonzero()
    mse_train = 1 / len(tr_nz_idx[0]) * calculate_mse(
        train[tr_nz_idx].toarray()[0], means[tr_nz_idx[0]])
    return means, np.sqrt(mse_train), np.sqrt(mse_test)
def compute_rmse_global_mean(train, test):
    """
    Compute the RMSE obtained by using global mean of non zero value of train to predict values of test
    :param train: training data set (sparse matrix of size nxp, n items and p users)
    :param test: testing data set (sparse matrix of size nxp, n items and p users)
    :return: RMSE value of the prediction
    """
    mean = global_mean(train)
    mse = calculate_mse(test[test.nonzero()].todense(), mean)
    rmse = np.sqrt(float(mse) / test.nnz)
    return rmse
Beispiel #6
0
def test(inputbatch, targetbatch, encoder, decoder, num_layers, criterion,
         SOS_TOKEN, teacher_forcing_ratio, logger, sliding_attention):
    """
		@param: inputbatch: (batchsize X sequence_length X input_size) Variable which represents data fed into the encoder.
		@param: targetbatch: (batchsize X sequence_length X input_size) Variable which represents data fed into the decoder (during teacher forcing) or data that is not fed.
		@param: encoder: The encoder network object being tested.
		@param: decoder:  The decoder network object being tested.
		@param: criterion: Used only during validation to record the validation loss. But for testing as well, we can just supply a criterion.
	"""

    logger.info(
        "inside test(), inputbatch.size() = {}, targetbatch.size() = {}, SOS_TOKEN = {}"
        .format(inputbatch.size(), targetbatch.size(), SOS_TOKEN))
    #Done
    encoder_hidden = encoder.initHidden(numlayers=num_layers,
                                        batchsize=inputbatch.size(0))
    use_teacher_forcing = True if random.random(
    ) < teacher_forcing_ratio else False

    if encoder.rnnobject == "LSTM":
        encoder_cellstate = encoder.initHidden(numlayers=num_layers,
                                               batchsize=inputbatch.size(0))

    sequence_length = inputbatch.size(
        1)  #This is the sequence length of the input tensor.
    target_sequence_length = targetbatch.size(
        1)  #This is the sequence length of the target tensor.
    hidden_size = encoder_hidden.size(2)
    loss = 0

    #Iterate over each instance of the input batch.
    input_tensor = inputbatch  # batch_size X seq_size X input_size
    target_tensor = targetbatch  # batch_size X seq_size X input_size
    attention_vectors = list()
    hidden_states = torch.zeros(input_tensor.size(0), sequence_length,
                                hidden_size)

    use_teacher_forcing = True if random.random(
    ) < teacher_forcing_ratio else False

    for ei in range(
            sequence_length
    ):  #Iterate over each sequence in input tensor. i.e iterate over seq_size dimension.
        #Iterate over each sequence of the instance.
        inp_t = input_tensor[:,
                             ei, :].contiguous().view(input_tensor.size(0), 1,
                                                      inputbatch.size(2))
        inp_t = inp_t.cuda()  ## CUDA
        logger.debug("input_t.size() = {}".format(inp_t.size()))
        encoder_hidden = encoder_hidden.cuda()  ## CUDA
        logger.debug("encoder_hidden.size() = {}".format(
            encoder_hidden.size()))
        if encoder.rnnobject == "LSTM":
            encoder_cellstate = encoder_cellstate.cuda()  ## CUDA
            encoder_output, (encoder_hidden, encoder_cellstate) = encoder(
                inp_t, encoder_hidden, encoder_cellstate)
        else:
            encoder_output, encoder_hidden = encoder(inp_t, encoder_hidden)

        hidden_states[:, ei, :] = encoder_hidden[-1, :, :].unsqueeze(dim=0)

    #First input to the decoder. Size: batch_size,1,input_size
    decoder_input = Variable(torch.FloatTensor(
        [[SOS_TOKEN] * inputbatch.size(2)] * inputbatch.size(0)),
                             requires_grad=True)

    #print("Size of decoder input before reshape",decoder_input.size())
    decoder_input = decoder_input.view(decoder_input.size(0), 1,
                                       decoder_input.size(1))
    decoder_hidden = encoder_hidden  #First hidden input to the decoder is the last hidden state of the encoder.
    if decoder.rnnobject == "LSTM":
        decoder_cellstate = encoder_cellstate  #First Cell State of the decoder is the last cell state of the encoder.

    #We iterate per sequence, to obtain outputs. There is no notion of teacher forcing for the validation/testing set.
    decoder_predictions = torch.zeros(
        target_tensor.size(0), target_tensor.size(1), target_tensor.size(2)
    )  #we will store the validation predictions in this tensor for plotting and analysis.
    prev_hidden_states = hidden_states.cuda()

    mse_predictions = list()
    mse_per_timestep = list()

    attention_vectors = list()
    contextvector = None
    for di in range(target_sequence_length):
        decoder_input = decoder_input.cuda()  ## CUDA
        logger.debug("decoder_input.size() = {}".format(decoder_input.size()))
        if decoder.rnnobject == "LSTM":
            decoder_output, (
                decoder_hidden, decoder_cellstate
            ), attentional_hidden_state, attention_vector = decoder(
                decoder_input, prev_hidden_states, decoder_hidden,
                decoder_cellstate)

        else:
            decoder_output, decoder_hidden, attentional_hidden_state, attention_vector = decoder(
                decoder_input, prev_hidden_states, decoder_hidden)
        attention_vectors.append(attention_vector)
        if use_teacher_forcing:
            decoder_input = target_tensor[:, di, :].contiguous().view(
                target_tensor.size(0), 1,
                target_tensor.size(2))  # Teacher forcing
        else:
            decoder_input = decoder_output

        decoder_output = decoder_output.view(
            decoder_output.size(0),
            decoder_output.size(2))  #change size from m X n X k to m X k
        if criterion != None:
            loss += criterion(decoder_output, target_tensor[:, di, :])

        #_mse,per_sequence_mse_from_tensor(testtargets_arraytestpreds_array)
        logger.debug(
            "Input to calculate_mse function = decoder_output_size = {}, target_tensor_size = {}"
            .format(decoder_output.size(), target_tensor[:, di, :].size()))
        _mse = calculate_mse(
            decoder_output, target_tensor[:, di, :]
        )  #inputs: decoder_output.size() =  target_tensor[:,di,:] =  batch_size X num_columns;
        mse_predictions.append(
            _mse
        )  #The MSELoss criterion doesn't for some reason seem to be giving the correct results so we use our own mean squared error criterion per batch.
        decoder_predictions[:,
                            di, :] = decoder_output  #predictions of the decoder for sequence step `di` on the validation data.
        logger.debug(
            "prev_hidden_state.size before cat = {},decoder_hidden = {}".
            format(prev_hidden_states.size(), decoder_hidden.size()))
        if sliding_attention:
            prev_hidden_states = torch.cat(
                (prev_hidden_states[:, 1:, :],
                 decoder_hidden[-1, :, :].unsqueeze(dim=0).permute(1, 0, 2)),
                dim=1
            )  #for now just use decoder hidden state we can also use attentional_hidden state if required later.

        logger.debug("prev_hidden_state.size = {}".format(
            prev_hidden_states.size()))

    attention_tensor = torch.stack(attention_vectors)
    logger.debug(
        "Attention Tensor Size = {}, Attention List Length = {}, Single Attention Vector Size = {}"
        .format(attention_tensor.size(), len(attention_vectors),
                attention_vectors[0].size()))

    #Anomaly Threshold Stuff.
    anomaly_threshold = 1.1 * np.max(
        mse_predictions
    )  #This is the anomaly threshold score we are going to use.
    mse_per_timestep = calculate_mse_tensor(
        decoder_predictions.detach().numpy(),
        target_tensor.cpu().detach().numpy(
        ))  #returns a list containing the mean-squared error per timestep.
    return loss.item(
    ), mse_per_timestep, anomaly_threshold, decoder_predictions, attention_tensor