def pso_plus_mae(): # Settings metric_type = "MAE" # MSE, RMSE, MAE, R2 optimizer_type = "PSO" # PSO, BGD # Step 1: Load Data data = load_data() # Step 2: Preprocess the data train_data, train_labels, test_data, test_labels, train_data_full, test_data_full = data_preprocess( data) # Step 3: Learning Start theta = np.array([0.0, 0.0]) # Initialize model parameter start_time = datetime.datetime.now() # Track learning starting time thetas, losses = learn(train_labels.values, train_data.values, theta, max_iters, alpha, optimizer_type, metric_type) end_time = datetime.datetime.now() # Track learning ending time exection_time = (end_time - start_time).total_seconds() # Track execution time # Step 4: Results presentation print("Learn: execution time={t:.3f} seconds".format(t=exection_time)) # Build baseline model print("R2:", -compute_loss(test_labels.values, test_data.values, thetas[-1], "R2")) # R2 should be maximize print( "MSE:", compute_loss(test_labels.values, test_data.values, thetas[-1], "MSE")) print( "RMSE:", compute_loss(test_labels.values, test_data.values, thetas[-1], "RMSE")) print( "MAE:", compute_loss(test_labels.values, test_data.values, thetas[-1], "MAE"))
def mini_batch_gradient_descent(y, x, theta, max_iters, alpha, metric_type, mini_batch_size): """ Mini Batch Gradient Descent :param y: ground truth :param x: input data (feature matrix) :param theta: model parameters (w and b) :param max_iters: max iterations :param alpha: step size :param metric_type: metric type :param mini_batch_size: mini batch size :return: thetas all tracked updated model parameters losses all tracked losses during the learning course """ losses = [] thetas = [] # Please refer to the function "gradient_descent" to implement the mini-batch gradient descent here for i in range(max_iters): # TODO EDIT THIS TO SHUFFLE BOTH X AND Y indx = np.random.permutation(len(y)) x, y = x[indx], y[indx] for batch in get_batches(x, y, batch_size=mini_batch_size): gradient = -2 * batch['x'].T.dot( batch['y'] - batch['x'].dot(theta)) / mini_batch_size theta = theta - alpha * gradient loss = compute_loss(batch['y'], batch['x'], theta, metric_type) thetas.append(theta) losses.append(loss) print("MBGD({bi}/{ti}): loss={l}, w={w}, b={b}".format( bi=i, ti=max_iters - 1, l=loss, w=theta[0], b=theta[1])) return thetas, losses
def gradient_descent(y, x, theta, max_iters, alpha, metric_type): """ Batch Gradient Descent :param y: ground truth :param x: input data (feature matrix) :param theta: model parameters (w and b) :param max_iters: max iterations :param alpha: step size :param metric_type: metric type :return: thetas all tracked updated model parameters losses all tracked losses during the learning course """ losses = [] thetas = [] num_of_samples = len(x) for i in range(max_iters): # This is for MSE loss only gradient = -2 * x.T.dot(y - x.dot(theta)) / num_of_samples theta = theta - alpha * gradient loss = compute_loss(y, x, theta, metric_type) # Track losses and thetas thetas.append(theta) losses.append(loss) print("BGD({bi}/{ti}): loss={l}, w={w}, b={b}".format(bi=i, ti=max_iters - 1, l=loss, w=theta[0], b=theta[1])) return thetas, losses
def mini_batch_gradient_descent(y, x, theta, max_iters, alpha, metric_type, mini_batch_size): """ Mini Batch Gradient Descent :param y: ground truth :param x: input data (feature matrix) :param theta: model parameters (w and b) :param max_iters: max iterations :param alpha: step size :param metric_type: metric type :param mini_batch_size: mini batch size :return: thetas all tracked updated model parameters losses all tracked losses during the learning course """ losses = [] thetas = [] # for i in range(0,num_of_samples,mini_batch_size): num_of_samples = len(x) for i in range(max_iters): batchX, batchY = get_batches(x, y, mini_batch_size) for n, batch in enumerate(batchX): # print(batch) # This is for MSE loss only gradient = -2 * batch.T.dot(batchY[n] - batch.dot(theta)) / num_of_samples theta = theta - alpha * gradient loss = compute_loss(batchY[n], batch, theta, metric_type) # Track losses and thetas thetas.append(theta) losses.append(loss) print("MiniBGD({bi}/{ti}): loss={l}, w={w}, b={b}".format( bi = i, ti = max_iters - 1, l = loss, w = theta[0], b = theta[1])) return thetas, losses
def mini_batch_gradient_descent(y, x, theta, max_iters, alpha, metric_type, mini_batch_size): """ Mini Batch Gradient Descent :param y: ground truth :param x: input data (feature matrix) :param theta: model parameters (w and b) :param max_iters: max iterations :param alpha: step size :param metric_type: metric type :param mini_batch_size: mini batch size :return: thetas all tracked updated model parameters losses all tracked losses during the learning course """ losses = [] thetas = [] for i in range(max_iters): #nb_epochs for batch_x, batch_y in get_batches(x, y, mini_batch_size): # This is for MSE loss only gradient = -2 * batch_x.T.dot(batch_y - batch_x.dot(theta)) / mini_batch_size theta = theta - alpha * gradient loss = compute_loss(batch_y, batch_x, theta, metric_type) # Track losses and thetas thetas.append(theta) losses.append(loss) print("MiniBGD({bi}/{ti}): loss={l}, w={w}, b={b}".format( bi=i, ti=max_iters - 1, l=loss, w=theta[0], b=theta[1])) # Please refer to the function "gradient_descent" to implement the mini-batch gradient descent here return thetas, losses
def mini_batch_gradient_descent(y, x, theta, max_iters, alpha, metric_type, mini_batch_size): """ Mini Batch Gradient Descent :param y: ground truth :param x: input data (feature matrix) :param theta: model parameters (w and b) :param max_iters: max iterations :param alpha: step size :param metric_type: metric type :param mini_batch_size: mini batch size :return: thetas all tracked updated model parameters losses all tracked losses during the learning course """ losses = [] thetas = [] num_of_samples = len(x) for it in range(max_iters): np.random.shuffle(x) for i in range(0, num_of_samples, mini_batch_size): gradient = -2 * x.T.dot(y - x.dot(theta)) / num_of_samples theta = theta - alpha * gradient loss = compute_loss(y, x, theta, metric_type) thetas.append(theta) losses.append(loss) print("MiniBGD({bi}/{ti}): loss={l}, w={w}, b={b}".format( bi=i, ti=max_iters - 1, l=loss, w=theta[0], b=theta[1])) # Please refer to the function "gradient_descent" to implement the mini-batch gradient descent here return thetas, losses
def compute_z_loss(y, x, thetas): """ Compute z-axis values :param y: train labels :param x: train data :param thetas: model parameters :return: z_losses value (loss) for z-axis """ thetas = np.array(thetas) w = thetas[:, 0].reshape(thetas[:, 0].shape[0], ) b = thetas[:, 1].reshape(thetas[:, 1].shape[0], ) z_losses = np.zeros((len(w), len(b))) for ind_row, row in enumerate(w): for ind_col, col in enumerate(b): theta = np.array([row, col]) z_losses[ind_row, ind_col] = compute_loss(y, x, theta, "MSE") return z_losses
def mini_batch_gradient_descent(y, x, theta, max_iters, alpha, metric_type, mini_batch_size): """ Mini Batch Gradient Descent :param y: ground truth :param x: input data (feature matrix) :param theta: model parameters (w and b) :param max_iters: max iterations :param alpha: step size :param metric_type: metric type :param mini_batch_size: mini batch size :return: thetas all tracked updated model parameters losses all tracked losses during the learning course """ losses = [] thetas = [] # Please refer to the function "gradient_descent" to implement the mini-batch gradient descent here n = len(x) for i in range(max_iters): from sklearn.utils import shuffle x, y = shuffle(x, y) for j in range(0, n, mini_batch_size): batch_x = x[j:j + mini_batch_size] batch_y = y[j:j + mini_batch_size] n = len(batch_x) gradient = -2 * batch_x.T.dot(batch_y - batch_x.dot(theta)) / n theta = theta - alpha * gradient loss = compute_loss(batch_y, batch_x, theta, metric_type) # Track losses and thetas thetas.append(theta) losses.append(loss) print("MiniBGD({bi}/{ti}): loss={l}, w={w}, b={b}".format( bi=i, ti=max_iters - 1, l=loss, w=theta[0], b=theta[1])) return thetas, losses
def pso(y, x, theta, max_iters, pop_size, metric_type): """ Particle Swarm Optimization :param y: train labels :param x: train data :param theta: model parameters :param max_iters: max iterations :param pop_size: population size :param metric_type: metric type (MSE, RMSE, R2, MAE) :return: best_thetas all tracked best model parameters for each generation losses all tracked losses of the best model in each generation """ # Init settings w = 0.729844 # Inertia weight to prevent velocities becoming too large c_p = 1.496180 # Scaling co-efficient on the social component c_g = 1.496180 # Scaling co-efficient on the cognitive component terminate = False g_best = theta lower_bound = -100 upper_bound = 100 velocity = [] thetas = [] p_best = [] # Track results best_thetas = [] losses = [] # initialization for i in range(pop_size): theta = np.random.uniform(lower_bound, upper_bound, len(theta)) thetas.append(theta) p_best.append(theta) if compute_loss(y, x, theta, metric_type) < compute_loss( y, x, g_best, metric_type): g_best = theta.copy() velocity.append( np.random.uniform(-np.abs(upper_bound - lower_bound), np.abs(upper_bound - lower_bound), len(theta))) # Evolution count = 0 while not terminate: for i in range(pop_size): rand_p = np.random.uniform(0, 1, size=len(theta)) rand_g = np.random.uniform(0, 1, size=len(theta)) velocity[i] = w * velocity[i] + c_p * rand_p * ( p_best[i] - thetas[i]) + c_g * rand_g * (g_best - thetas[i]) thetas[i] = thetas[i] + velocity[i] if compute_loss(y, x, thetas[i], metric_type) < compute_loss( y, x, p_best[i], metric_type): p_best[i] = thetas[i] if compute_loss(y, x, p_best[i], metric_type) < compute_loss( y, x, g_best, metric_type): g_best = p_best[i] best_thetas.append(g_best) current_loss = compute_loss(y, x, g_best, metric_type) losses.append(current_loss) print("PSO({bi}/{ti}): loss={l}, w={w}, b={b}".format(bi=count, ti=max_iters - 1, l=current_loss, w=g_best[0], b=g_best[1])) count += 1 if count >= max_iters: terminate = True return best_thetas, losses
# Step 3: Learning Start theta = np.array([0.0, 0.0]) # Initialize model parameter start_time = datetime.datetime.now() # Track learning starting time thetas, losses = learn(train_labels.values, train_data.values, theta, max_iters, alpha, optimizer_type, metric_type) end_time = datetime.datetime.now() # Track learning ending time exection_time = (end_time - start_time).total_seconds() # Track execution time # Step 4: Results presentation print("Learn: execution time={t:.3f} seconds".format(t=exection_time)) print("R2:", -compute_loss(test_labels.values, test_data.values, thetas[-1], "R2")) # R2 should be maximize print( "MSE:", compute_loss(test_labels.values, test_data.values, thetas[-1], "MSE")) print( "RMSE:", compute_loss(test_labels.values, test_data.values, thetas[-1], "RMSE")) print( "MAE:", compute_loss(test_labels.values, test_data.values, thetas[-1], "MAE")) print("List:", thetas) niter = max_iters # visualize_train(train_data_full, train_labels, train_data, thetas, losses, niter) visualize_test(test_data_full, test_data, thetas) plt.show()