def rate(df_train_data: pd.DataFrame, U: np.ndarray, M: np.ndarray, mu: float,
         bu: np.ndarray, bi: np.ndarray):

    rmses: np.ndarray = np.zeros(paths.gen_size)
    best: float = 0.0
    ind = 1

    for i in range(0, paths.gen_size):
        prediction_matrix = make_predictions(U[i], M[i], mu, bu[i], bi[i])
        rmses[i] = svd_base.calc_rmse(df_train_data, prediction_matrix)
        if ((i == 1) or (best > rmses[i])):
            ind = i
            best = rmses[i]

    return rmses, best, ind
def train(k: int, df_train_data: pd.DataFrame, df_test_data: pd.DataFrame):
    """ Main function running genetic algorithm
    """
    logger = logging.getLogger('sLogger')
    logger.info("Training for K = {0}".format(k))

    print("Initializing first generation")
    # Initialize the first generation at random
    U, M, bu, bi = init_random_baseline(k)
    mu: float = df_train_data['Prediction'].mean()
    prev_rmse: float = 0
    overallBest: float = 4.0
    bestU: np.ndarray = np.zeros([paths.num_users, k])
    bestM: np.ndarray = np.zeros([k, paths.num_movies])
    bestbu: np.ndarray = np.zeros([paths.num_users, 1])
    bestbi: np.ndarray = np.zeros([1, paths.num_movies])

    tic = time()

    for i in range(0, paths.num_generations):
        new_U = np.copy(U)
        new_M = np.copy(M)
        new_bu = np.copy(bu)
        new_bi = np.copy(bi)
        ratings, best_rmse, bestInd = rate(df_train_data, U, M, mu, bu, bi)

        if overallBest > best_rmse:
            bestU = np.copy(U[bestInd])
            bestM = np.copy(M[bestInd])
            bestbu = np.copy(bu[bestInd])
            bestbi = np.copy(bi[bestInd])
            overallBest = best_rmse

        for j in range(0, paths.gen_size):
            p1 = selection(ratings)
            p2 = selection(ratings)
            new_U[j], new_M[j], new_bu[j], new_bi[j] = crossover(U[p1], M[p1], bu[p1], bi[p1], \
                U[p2], M[p2], bu[p2], bi[p2], k)

        U = np.copy(new_U)
        M = np.copy(new_M)
        bu = np.copy(new_bu)
        bi = np.copy(new_bi)

        toc = time()
        iter_time = (toc - tic) / (i + 1)
        logger.info(
            'Iteration: %d, Misfit: %.8f, Improvement: %.8f, Time: %.3f' %
            (i + 1, best_rmse, prev_rmse - best_rmse, iter_time))

        prev_rmse = best_rmse

    # normalize best result
    prediction_matrix = make_predictions(bestU, bestM, mu, bestbu, bestbi)
    prediction_matrix = normalize_predictions(prediction_matrix)

    rmse = svd_base.calc_rmse(df_train_data, prediction_matrix)
    logger.info("Final RMSE for K = {0} is {1}".format(k, rmse))

    # save data
    assert (prediction_matrix.shape == (paths.num_users, paths.num_movies))
    dh.write_submission(prediction_matrix)
Example #3
0
def train(df_train_data: pd.DataFrame, df_test_data: pd.DataFrame):
    """ Main function running the simple SGD approach
    """
    logger = logging.getLogger('sLogger')

    dh.log(logger, "Initializing state of approximation matrices", False)
    # Initialize the starting matrices using SVD
    k = 5
    U, M = init_svd_baseline(df_train_data, k)

    # Calculate the initial loss
    prediction_matrix: np.ndarray = np.dot(U, M)
    rmse = svd_base.calc_rmse(df_train_data, prediction_matrix)
    dh.log(logger, "Initial loss: {0}".format(rmse), False)

    # initialize other variables needed for training
    train_samples: np.ndarray = dh.df_as_array(df_train_data)
    alpha: float = paths.learning_rate
    lambda_term: float = paths.lambda_term

    # initialize variables used for backtracking the solution
    prev_U: np.ndarray = np.copy(U)
    prev_M: np.ndarray = np.copy(M)
    prev_rmse: float = rmse

    i_iter = 1
    tic = time()
    num_useless_iter = 0

    dh.log(logger, "Starting SGD algorithm", False)
    while (i_iter <= paths.sgd_max_iteration):
        # perform update steps
        U, M = sgd_update(train_samples, U, M, alpha, lambda_term)

        prediction_matrix = np.dot(U, M)
        rmse = svd_base.calc_rmse(df_test_data, prediction_matrix)

        # stop sgd when we see little to no improvement for 1000 iterations
        if (rmse > prev_rmse - 1e-7):
            num_useless_iter += 1
            dh.log(logger, "Useless iteration - {0}".format(num_useless_iter),
                   True)
        else:
            num_useless_iter = 0
        if (num_useless_iter == 10):
            break

        # revert to previous values of approximation matrices
        # if there is no improvement over the last 100 iterations
        if (rmse < prev_rmse):
            prev_U = np.copy(U)
            prev_M = np.copy(M)
            prev_rmse = rmse
        else:
            U = np.copy(prev_U)
            M = np.copy(prev_M)

            dh.log(logger, "Revert iterations", True)
            # update learning rate so we don't miss the minimum
            alpha /= 1.5

        toc = time()
        dh.log(logger, 'Iteration: %d, Misfit: %.6f' % (i_iter, rmse), False)
        dh.log(logger,
               'Average time per iteration: %.4f' % ((toc - tic) / i_iter),
               True)
        i_iter += 1

    # normalize best result
    prediction_matrix[prediction_matrix > paths.max_rating] = paths.max_rating
    prediction_matrix[prediction_matrix < paths.min_rating] = paths.min_rating

    assert (prediction_matrix.shape == (paths.num_users, paths.num_movies))
    dh.write_submission(prediction_matrix)
def train(k: int, df_train_data: pd.DataFrame, df_test_data: pd.DataFrame):
    """ Main function running the simple SGD approach
    """
    logger = logging.getLogger('sLogger')
    logger.info("Training for K = {0}".format(k))

    print("Initializing state of approximation matrices")
    # Initialize the starting matrices using SVD
    U, M = init_svd_baseline(df_train_data, k)
    bu: np.ndarray = np.zeros([paths.num_users, 1])
    bi: np.ndarray = np.zeros([1, paths.num_movies])
    mu: float = df_train_data['Prediction'].mean()
    alpha: float = paths.learning_rate

    # Calculate the initial loss
    prediction_matrix = make_predictions(U, M, mu, bu, bi)
    rmse = svd_base.calc_rmse(df_test_data, prediction_matrix)
    logger.info("Initial loss: {0}".format(rmse))

    # initialize other variables needed for training
    train_samples: np.ndarray = dh.df_as_array(df_train_data)

    # initialize variables used for backtracking the solution
    prev_U: np.ndarray = U
    prev_M: np.ndarray = M
    prev_bu: np.ndarray = bu
    prev_bi: np.ndarray = bi
    prev_rmse: float = rmse

    prev_delta_U: np.ndarray = np.zeros(U.shape)
    prev_delta_M: np.ndarray = np.zeros(M.shape)
    prev_delta_bu: np.ndarray = np.zeros(bu.shape)
    prev_delta_bi: np.ndarray = np.zeros(bi.shape)

    i_iter = 1
    tic = time()
    num_useless_iter = 0
    uphill_iter = 0

    print("Starting SGD algorithm")
    while (i_iter <= paths.sgd_max_iteration):
        # perform update steps
        U, M, bu, bi, prev_delta_U, prev_delta_M, prev_delta_bu, prev_delta_bi = \
            sgd_update(train_samples, U, M, alpha, mu, bu, bi, prev_delta_U, \
            prev_delta_M, prev_delta_bu, prev_delta_bi)

        prediction_matrix = make_predictions(U, M, mu, bu, bi)
        rmse = svd_base.calc_rmse(df_test_data, prediction_matrix)

        toc = time()
        iter_time = (toc - tic) / i_iter
        logger.info(
            'Iteration: %d, Misfit: %.8f, Improvement: %.8f, Time: %.3f' %
            (i_iter, rmse, prev_rmse - rmse, iter_time))

        # stop sgd when we see little to no improvement for 1000 iterations
        if (rmse > prev_rmse - 1e-7):
            num_useless_iter += 1
            logger.info("Useless iteration {0}".format(num_useless_iter))
        else:
            num_useless_iter = 0
        if (num_useless_iter == 10):
            break

        # revert to previous values of approximation matrices
        # if there is no improvement over the last 100 iterations
        if (rmse < prev_rmse):
            prev_U = np.copy(U)
            prev_M = np.copy(M)
            prev_bu = np.copy(bu)
            prev_bi = np.copy(bi)
            prev_rmse = rmse
            uphill_iter = 0
        else:
            uphill_iter += 1
            logger.info("Went uphill: {0}".format(uphill_iter))
            # if rmse keeps getting worse after 5 iterations revert to previous best result
            if (uphill_iter >= 5):
                logger.info("Revert iterations")
                U = np.copy(prev_U)
                M = np.copy(prev_M)
                bu = np.copy(prev_bu)
                bi = np.copy(prev_bi)

                # update learning rate so we don't miss the minimum again
                alpha /= 1.5
                uphill_iter = 0

        i_iter += 1

    # normalize best result
    prediction_matrix = make_predictions(prev_U, prev_M, mu, prev_bu, prev_bi)
    prediction_matrix = normalize_predictions(prediction_matrix)

    rmse = svd_base.calc_rmse(df_test_data, prediction_matrix)
    logger.info("Final RMSE for K = {0} is {1}".format(k, rmse))

    # save data
    assert (prediction_matrix.shape == (paths.num_users, paths.num_movies))
    dh.write_submission(prediction_matrix)
def train(k: int, train_samples, df_test_data, U, M, bu, bi):
    """ Main function running the simple SGD approach
    """
    logger = logging.getLogger('sLogger')
    logger.info("Training for K = {0}".format(k))

    # Calculate the initial loss
    prediction_matrix = make_predictions(U, M, bu, bi)
    rmse = svd_base.calc_rmse(df_test_data, prediction_matrix)
    logger.info("Initial loss: {0}".format(rmse))

    # initialize other variables needed for training
    alpha: float = paths.learning_rate
    lambda_term: float = paths.lambda_term

    # initialize variables used for backtracking the solution
    prev_U: np.ndarray = U
    prev_M: np.ndarray = M
    prev_bu: np.ndarray = bu
    prev_bi: np.ndarray = bi
    prev_rmse: float = rmse

    i_iter = 1
    tic = time()
    num_useless_iter = 0
    uphill_iter = 0

    print("Starting SGD algorithm")
    while(i_iter <= paths.sgd_max_iteration):
        # perform update steps
        U, M, bu, bi = sgd_update(train_samples, U, M, bu, bi, alpha, lambda_term)

        prediction_matrix = make_predictions(U, M, bu, bi)
        rmse = svd_base.calc_rmse(df_test_data, prediction_matrix)

        # bookkeeping
        toc = time()
        iter_time = (toc - tic) / i_iter
        logger.info('Iteration: %d, Misfit: %.8f, Improvement: %.8f, Time: %.3f'
                    % (i_iter, rmse, prev_rmse - rmse, iter_time))

        # stop sgd when we see little to no improvement for 1000 iterations
        if (rmse > prev_rmse - 1e-7):
            num_useless_iter += 1
            logger.info("Useless iteration {0}".format(num_useless_iter))
        else:
            num_useless_iter = 0
        if (num_useless_iter == 10):
            break

        # revert to previous values of approximation matrices
        # if there is no improvement over the last 100 iterations
        if (rmse < prev_rmse):
            prev_U = np.copy(U)
            prev_M = np.copy(M)
            prev_bu = np.copy(bu)
            prev_bi = np.copy(bi)
            prev_rmse = rmse
            uphill_iter = 0
        else:
            uphill_iter += 1
            logger.info("Went uphill: {0}".format(uphill_iter))
            # if rmse keeps getting worse after 5 iterations revert to previous best result
            if (uphill_iter >= 5):
                logger.info("Revert iterations")
                U = np.copy(prev_U)
                M = np.copy(prev_M)
                bu = np.copy(prev_bu)
                bi = np.copy(prev_bi)

                # update learning rate so we don't miss the minimum again
                alpha /= 1.5
                uphill_iter = 0

        i_iter += 1

    # normalize best result
    prediction_matrix = make_predictions(prev_U, prev_M, prev_bu, prev_bi)
    prediction_matrix = normalize_predictions(prediction_matrix)

    rmse = svd_base.calc_rmse(df_test_data, prediction_matrix)
    logger.info("Final RMSE for K = {0} is {1}".format(k, rmse))

    return prediction_matrix, rmse