def rate(df_train_data: pd.DataFrame, U: np.ndarray, M: np.ndarray, mu: float, bu: np.ndarray, bi: np.ndarray): rmses: np.ndarray = np.zeros(paths.gen_size) best: float = 0.0 ind = 1 for i in range(0, paths.gen_size): prediction_matrix = make_predictions(U[i], M[i], mu, bu[i], bi[i]) rmses[i] = svd_base.calc_rmse(df_train_data, prediction_matrix) if ((i == 1) or (best > rmses[i])): ind = i best = rmses[i] return rmses, best, ind
def train(k: int, df_train_data: pd.DataFrame, df_test_data: pd.DataFrame): """ Main function running genetic algorithm """ logger = logging.getLogger('sLogger') logger.info("Training for K = {0}".format(k)) print("Initializing first generation") # Initialize the first generation at random U, M, bu, bi = init_random_baseline(k) mu: float = df_train_data['Prediction'].mean() prev_rmse: float = 0 overallBest: float = 4.0 bestU: np.ndarray = np.zeros([paths.num_users, k]) bestM: np.ndarray = np.zeros([k, paths.num_movies]) bestbu: np.ndarray = np.zeros([paths.num_users, 1]) bestbi: np.ndarray = np.zeros([1, paths.num_movies]) tic = time() for i in range(0, paths.num_generations): new_U = np.copy(U) new_M = np.copy(M) new_bu = np.copy(bu) new_bi = np.copy(bi) ratings, best_rmse, bestInd = rate(df_train_data, U, M, mu, bu, bi) if overallBest > best_rmse: bestU = np.copy(U[bestInd]) bestM = np.copy(M[bestInd]) bestbu = np.copy(bu[bestInd]) bestbi = np.copy(bi[bestInd]) overallBest = best_rmse for j in range(0, paths.gen_size): p1 = selection(ratings) p2 = selection(ratings) new_U[j], new_M[j], new_bu[j], new_bi[j] = crossover(U[p1], M[p1], bu[p1], bi[p1], \ U[p2], M[p2], bu[p2], bi[p2], k) U = np.copy(new_U) M = np.copy(new_M) bu = np.copy(new_bu) bi = np.copy(new_bi) toc = time() iter_time = (toc - tic) / (i + 1) logger.info( 'Iteration: %d, Misfit: %.8f, Improvement: %.8f, Time: %.3f' % (i + 1, best_rmse, prev_rmse - best_rmse, iter_time)) prev_rmse = best_rmse # normalize best result prediction_matrix = make_predictions(bestU, bestM, mu, bestbu, bestbi) prediction_matrix = normalize_predictions(prediction_matrix) rmse = svd_base.calc_rmse(df_train_data, prediction_matrix) logger.info("Final RMSE for K = {0} is {1}".format(k, rmse)) # save data assert (prediction_matrix.shape == (paths.num_users, paths.num_movies)) dh.write_submission(prediction_matrix)
def train(df_train_data: pd.DataFrame, df_test_data: pd.DataFrame): """ Main function running the simple SGD approach """ logger = logging.getLogger('sLogger') dh.log(logger, "Initializing state of approximation matrices", False) # Initialize the starting matrices using SVD k = 5 U, M = init_svd_baseline(df_train_data, k) # Calculate the initial loss prediction_matrix: np.ndarray = np.dot(U, M) rmse = svd_base.calc_rmse(df_train_data, prediction_matrix) dh.log(logger, "Initial loss: {0}".format(rmse), False) # initialize other variables needed for training train_samples: np.ndarray = dh.df_as_array(df_train_data) alpha: float = paths.learning_rate lambda_term: float = paths.lambda_term # initialize variables used for backtracking the solution prev_U: np.ndarray = np.copy(U) prev_M: np.ndarray = np.copy(M) prev_rmse: float = rmse i_iter = 1 tic = time() num_useless_iter = 0 dh.log(logger, "Starting SGD algorithm", False) while (i_iter <= paths.sgd_max_iteration): # perform update steps U, M = sgd_update(train_samples, U, M, alpha, lambda_term) prediction_matrix = np.dot(U, M) rmse = svd_base.calc_rmse(df_test_data, prediction_matrix) # stop sgd when we see little to no improvement for 1000 iterations if (rmse > prev_rmse - 1e-7): num_useless_iter += 1 dh.log(logger, "Useless iteration - {0}".format(num_useless_iter), True) else: num_useless_iter = 0 if (num_useless_iter == 10): break # revert to previous values of approximation matrices # if there is no improvement over the last 100 iterations if (rmse < prev_rmse): prev_U = np.copy(U) prev_M = np.copy(M) prev_rmse = rmse else: U = np.copy(prev_U) M = np.copy(prev_M) dh.log(logger, "Revert iterations", True) # update learning rate so we don't miss the minimum alpha /= 1.5 toc = time() dh.log(logger, 'Iteration: %d, Misfit: %.6f' % (i_iter, rmse), False) dh.log(logger, 'Average time per iteration: %.4f' % ((toc - tic) / i_iter), True) i_iter += 1 # normalize best result prediction_matrix[prediction_matrix > paths.max_rating] = paths.max_rating prediction_matrix[prediction_matrix < paths.min_rating] = paths.min_rating assert (prediction_matrix.shape == (paths.num_users, paths.num_movies)) dh.write_submission(prediction_matrix)
def train(k: int, df_train_data: pd.DataFrame, df_test_data: pd.DataFrame): """ Main function running the simple SGD approach """ logger = logging.getLogger('sLogger') logger.info("Training for K = {0}".format(k)) print("Initializing state of approximation matrices") # Initialize the starting matrices using SVD U, M = init_svd_baseline(df_train_data, k) bu: np.ndarray = np.zeros([paths.num_users, 1]) bi: np.ndarray = np.zeros([1, paths.num_movies]) mu: float = df_train_data['Prediction'].mean() alpha: float = paths.learning_rate # Calculate the initial loss prediction_matrix = make_predictions(U, M, mu, bu, bi) rmse = svd_base.calc_rmse(df_test_data, prediction_matrix) logger.info("Initial loss: {0}".format(rmse)) # initialize other variables needed for training train_samples: np.ndarray = dh.df_as_array(df_train_data) # initialize variables used for backtracking the solution prev_U: np.ndarray = U prev_M: np.ndarray = M prev_bu: np.ndarray = bu prev_bi: np.ndarray = bi prev_rmse: float = rmse prev_delta_U: np.ndarray = np.zeros(U.shape) prev_delta_M: np.ndarray = np.zeros(M.shape) prev_delta_bu: np.ndarray = np.zeros(bu.shape) prev_delta_bi: np.ndarray = np.zeros(bi.shape) i_iter = 1 tic = time() num_useless_iter = 0 uphill_iter = 0 print("Starting SGD algorithm") while (i_iter <= paths.sgd_max_iteration): # perform update steps U, M, bu, bi, prev_delta_U, prev_delta_M, prev_delta_bu, prev_delta_bi = \ sgd_update(train_samples, U, M, alpha, mu, bu, bi, prev_delta_U, \ prev_delta_M, prev_delta_bu, prev_delta_bi) prediction_matrix = make_predictions(U, M, mu, bu, bi) rmse = svd_base.calc_rmse(df_test_data, prediction_matrix) toc = time() iter_time = (toc - tic) / i_iter logger.info( 'Iteration: %d, Misfit: %.8f, Improvement: %.8f, Time: %.3f' % (i_iter, rmse, prev_rmse - rmse, iter_time)) # stop sgd when we see little to no improvement for 1000 iterations if (rmse > prev_rmse - 1e-7): num_useless_iter += 1 logger.info("Useless iteration {0}".format(num_useless_iter)) else: num_useless_iter = 0 if (num_useless_iter == 10): break # revert to previous values of approximation matrices # if there is no improvement over the last 100 iterations if (rmse < prev_rmse): prev_U = np.copy(U) prev_M = np.copy(M) prev_bu = np.copy(bu) prev_bi = np.copy(bi) prev_rmse = rmse uphill_iter = 0 else: uphill_iter += 1 logger.info("Went uphill: {0}".format(uphill_iter)) # if rmse keeps getting worse after 5 iterations revert to previous best result if (uphill_iter >= 5): logger.info("Revert iterations") U = np.copy(prev_U) M = np.copy(prev_M) bu = np.copy(prev_bu) bi = np.copy(prev_bi) # update learning rate so we don't miss the minimum again alpha /= 1.5 uphill_iter = 0 i_iter += 1 # normalize best result prediction_matrix = make_predictions(prev_U, prev_M, mu, prev_bu, prev_bi) prediction_matrix = normalize_predictions(prediction_matrix) rmse = svd_base.calc_rmse(df_test_data, prediction_matrix) logger.info("Final RMSE for K = {0} is {1}".format(k, rmse)) # save data assert (prediction_matrix.shape == (paths.num_users, paths.num_movies)) dh.write_submission(prediction_matrix)
def train(k: int, train_samples, df_test_data, U, M, bu, bi): """ Main function running the simple SGD approach """ logger = logging.getLogger('sLogger') logger.info("Training for K = {0}".format(k)) # Calculate the initial loss prediction_matrix = make_predictions(U, M, bu, bi) rmse = svd_base.calc_rmse(df_test_data, prediction_matrix) logger.info("Initial loss: {0}".format(rmse)) # initialize other variables needed for training alpha: float = paths.learning_rate lambda_term: float = paths.lambda_term # initialize variables used for backtracking the solution prev_U: np.ndarray = U prev_M: np.ndarray = M prev_bu: np.ndarray = bu prev_bi: np.ndarray = bi prev_rmse: float = rmse i_iter = 1 tic = time() num_useless_iter = 0 uphill_iter = 0 print("Starting SGD algorithm") while(i_iter <= paths.sgd_max_iteration): # perform update steps U, M, bu, bi = sgd_update(train_samples, U, M, bu, bi, alpha, lambda_term) prediction_matrix = make_predictions(U, M, bu, bi) rmse = svd_base.calc_rmse(df_test_data, prediction_matrix) # bookkeeping toc = time() iter_time = (toc - tic) / i_iter logger.info('Iteration: %d, Misfit: %.8f, Improvement: %.8f, Time: %.3f' % (i_iter, rmse, prev_rmse - rmse, iter_time)) # stop sgd when we see little to no improvement for 1000 iterations if (rmse > prev_rmse - 1e-7): num_useless_iter += 1 logger.info("Useless iteration {0}".format(num_useless_iter)) else: num_useless_iter = 0 if (num_useless_iter == 10): break # revert to previous values of approximation matrices # if there is no improvement over the last 100 iterations if (rmse < prev_rmse): prev_U = np.copy(U) prev_M = np.copy(M) prev_bu = np.copy(bu) prev_bi = np.copy(bi) prev_rmse = rmse uphill_iter = 0 else: uphill_iter += 1 logger.info("Went uphill: {0}".format(uphill_iter)) # if rmse keeps getting worse after 5 iterations revert to previous best result if (uphill_iter >= 5): logger.info("Revert iterations") U = np.copy(prev_U) M = np.copy(prev_M) bu = np.copy(prev_bu) bi = np.copy(prev_bi) # update learning rate so we don't miss the minimum again alpha /= 1.5 uphill_iter = 0 i_iter += 1 # normalize best result prediction_matrix = make_predictions(prev_U, prev_M, prev_bu, prev_bi) prediction_matrix = normalize_predictions(prediction_matrix) rmse = svd_base.calc_rmse(df_test_data, prediction_matrix) logger.info("Final RMSE for K = {0} is {1}".format(k, rmse)) return prediction_matrix, rmse