def fit(self, data): # we use the same lambda for each task best_perf = np.Inf check_val_error = [] for _, lambda_par in enumerate(self.lambda_par_range): # computing the average test error on the validation tasks all_validation_errors = [] for _, task_val in enumerate(data.val_task_indexes): x_tr = data.features_tr[task_val] y_tr = data.labels_tr[task_val] x_ts = data.features_ts[task_val] y_ts = data.labels_ts[task_val] curr_weights, average_weights = inner_algorithm( x_tr, y_tr, lambda_par, self.fixed_meta_parameter, self.loss_name) validation_error = loss(x_ts, y_ts, average_weights, self.loss_name) all_validation_errors.append(validation_error) average_validation_error = np.mean(all_validation_errors) check_val_error.append(average_validation_error) if average_validation_error < best_perf: best_perf = average_validation_error best_lambda = lambda_par all_test_errors = [] for _, task_ts in enumerate(data.test_task_indexes): x_tr = data.features_tr[task_ts] y_tr = data.labels_tr[task_ts] x_ts = data.features_ts[task_ts] y_ts = data.labels_ts[task_ts] curr_weights, average_weights = inner_algorithm( x_tr, y_tr, best_lambda, self.fixed_meta_parameter, self.loss_name) test_error = loss(x_ts, y_ts, average_weights, self.loss_name) all_test_errors.append(test_error) average_test_error = np.mean(all_test_errors) all_best_performances = average_test_error * np.ones( len(data.tr_task_indexes)) print(f'best lambda: ', best_lambda) print(f'best test error: ', all_best_performances[-1]) return all_best_performances
def fit(self, data, task_indexes): performance = [] for task_idx, task in enumerate(getattr(data, task_indexes)): x = data.features_tr[task] y = data.labels_tr[task] n_points, n_dims = x.shape best_perf = np.Inf for step_idx, step_size in enumerate(self.step_size_range): curr_untranslated_weights = np.zeros(n_dims) curr_weights = curr_untranslated_weights + self.fixed_bias all_weight_vectors = [] all_losses = [] shuffled_indexes = list(range(n_points)) np.random.shuffle(shuffled_indexes) for iteration, curr_point_idx in enumerate(shuffled_indexes): prev_untranslated_weights = curr_untranslated_weights prev_weights = curr_weights # receive a new datapoint curr_x = x[curr_point_idx, :] curr_y = y[curr_point_idx] # compute the gradient subgrad = subgradient(curr_x, curr_y, prev_weights, loss_name='absolute') full_gradient = subgrad * curr_x # update weight vector curr_untranslated_weights = prev_untranslated_weights - step_size * full_gradient curr_weights = curr_untranslated_weights + self.fixed_bias all_weight_vectors.append(curr_weights) if len(all_weight_vectors) < 2: final_w = curr_weights else: final_w = np.mean(all_weight_vectors, axis=0) loss_thing = loss(x, y, final_w, loss_name='absolute') all_losses.append(loss_thing) curr_perf = loss(data.features_ts[task], data.labels_ts[task], final_w, loss_name='absolute') if curr_perf < best_perf: best_perf = curr_perf best_step = step_size performance.append(best_perf) print(performance)
def fit(self, data): curr_meta_magnitude_betting_fraction = self.meta_magnitude_betting_fraction curr_meta_magnitude_wealth = self.meta_magnitude_wealth curr_meta_magnitude = curr_meta_magnitude_betting_fraction * curr_meta_magnitude_wealth curr_meta_direction = np.zeros(data.features_tr[0].shape[1]) all_individual_cum_errors = [] best_mtl_performances = [] total_iter = 0 all_meta_parameters = [] all_final_weight_vectors = [] for task_iteration, task in enumerate(data.tr_task_indexes): x = data.features_tr[task] y = data.labels_tr[task] task_iteration = task_iteration + 1 prev_meta_direction = curr_meta_direction prev_meta_magnitude_betting_fraction = curr_meta_magnitude_betting_fraction prev_meta_magnitude_wealth = curr_meta_magnitude_wealth prev_meta_magnitude = curr_meta_magnitude # update meta-parameter meta_parameter = prev_meta_magnitude * prev_meta_direction all_meta_parameters.append(meta_parameter) # initialize the inner parameters n_points, n_dims = x.shape curr_inner_magnitude_betting_fraction = self.inner_magnitude_betting_fraction curr_inner_magnitude_wealth = self.inner_magnitude_wealth curr_inner_magnitude = curr_inner_magnitude_betting_fraction * curr_inner_magnitude_wealth curr_inner_direction = np.zeros(x.shape[1]) temp_weight_vectors = [] all_gradients = [] shuffled_indexes = list(range(n_points)) # np.random.shuffle(shuffled_indexes) for inner_iteration, curr_point_idx in enumerate(shuffled_indexes): inner_iteration = inner_iteration + 1 prev_inner_direction = curr_inner_direction prev_inner_magnitude_betting_fraction = curr_inner_magnitude_betting_fraction prev_inner_magnitude_wealth = curr_inner_magnitude_wealth prev_inner_magnitude = curr_inner_magnitude # update inner weight vector weight_vector = prev_inner_magnitude * prev_inner_direction + meta_parameter temp_weight_vectors.append(weight_vector) # receive a new datapoint curr_x = x[curr_point_idx, :] curr_y = y[curr_point_idx] all_individual_cum_errors.append(loss(curr_x, curr_y, weight_vector, loss_name='absolute')) # compute the gradient subgrad = subgradient(curr_x, curr_y, weight_vector, loss_name='absolute') full_gradient = subgrad * curr_x all_gradients.append(full_gradient) # define inner step size inner_step_size = (1 / (self.L * self.R)) * np.sqrt(2 / inner_iteration) # update inner direction curr_inner_direction = l2_unit_ball_projection(prev_inner_direction - inner_step_size * full_gradient) # update inner magnitude_wealth curr_inner_magnitude_wealth = prev_inner_magnitude_wealth - 1 / (self.R * self.L) * full_gradient @ prev_inner_direction * prev_inner_magnitude # update magnitude_betting_fraction curr_inner_magnitude_betting_fraction = (1/inner_iteration) * ((inner_iteration-1) * prev_inner_magnitude_betting_fraction - (1/(self.L*self.R))*(full_gradient @ prev_inner_direction)) # update magnitude curr_inner_magnitude = curr_inner_magnitude_betting_fraction * curr_inner_magnitude_wealth # define total iteration total_iter = total_iter + n_points # compute meta-gradient meta_gradient = np.sum(all_gradients, axis=0) # define meta step size meta_step_size = (1 / (self.L * self.R * n_points)) * np.sqrt(2 / task_iteration) # update meta-direction curr_meta_direction = l2_unit_ball_projection(prev_meta_direction - meta_step_size * meta_gradient) # update meta-magnitude_wealth curr_meta_magnitude_wealth = prev_meta_magnitude_wealth - (1 / (self.R * self.L * n_points)) * meta_gradient @ prev_meta_direction * prev_meta_magnitude # update meta-magnitude_betting_fraction curr_meta_magnitude_betting_fraction = (1/task_iteration) * ((task_iteration-1) * prev_meta_magnitude_betting_fraction - (1 / (self.L * self.R * n_points)) * (meta_gradient @ prev_meta_direction)) # update meta-magnitude curr_meta_magnitude = curr_meta_magnitude_betting_fraction * curr_meta_magnitude_wealth all_final_weight_vectors.append(np.mean(temp_weight_vectors, axis=0)) all_test_errors = [] for idx, curr_test_task in enumerate(data.tr_task_indexes[:task_iteration]): all_test_errors.append(loss(data.features_ts[curr_test_task], data.labels_ts[curr_test_task], all_final_weight_vectors[idx], loss_name='absolute')) best_mtl_performances.append(np.nanmean(all_test_errors)) self.all_meta_parameters = all_meta_parameters return best_mtl_performances, pd.DataFrame(all_individual_cum_errors).rolling(window=10 ** 10, min_periods=1).mean().values.ravel()
def fit(self, data): best_perf = np.Inf for _, inner_step_size in enumerate(self.inner_step_size_range): for _, meta_step_size in enumerate(self.meta_step_size_range): all_individual_cum_errors = [] all_mtl_performances = [] all_final_weight_vectors = [] curr_metaparameter = np.zeros(data.features_tr[0].shape[1]) for task_iteration, task in enumerate(data.tr_task_indexes): x = data.features_tr[task] y = data.labels_tr[task] task_iteration = task_iteration + 1 # initialize the inner parameters n_points, n_dims = x.shape curr_untranslated_weights = np.zeros(n_dims) temp_weight_vectors = [] shuffled_indexes = list(range(n_points)) # np.random.shuffle(shuffled_indexes) for inner_iteration, curr_point_idx in enumerate( shuffled_indexes): prev_untranslated_weights = curr_untranslated_weights prev_metaparameter = curr_metaparameter # update inner weight vector curr_weights = curr_untranslated_weights + curr_metaparameter temp_weight_vectors.append(curr_weights) # receive a new datapoint curr_x = x[curr_point_idx, :] curr_y = y[curr_point_idx] all_individual_cum_errors.append( loss(curr_x, curr_y, curr_weights, loss_name='absolute')) # compute the gradient subgrad = subgradient(curr_x, curr_y, curr_weights, loss_name='absolute') full_gradient = subgrad * curr_x # update metaparameters curr_metaparameter = prev_metaparameter - meta_step_size * full_gradient # update the untranslated weights curr_untranslated_weights = prev_untranslated_weights - inner_step_size * full_gradient all_final_weight_vectors.append( np.mean(temp_weight_vectors, axis=0)) all_test_errors = [] for idx, curr_test_task in enumerate( data.tr_task_indexes[:task_iteration]): all_test_errors.append( loss(data.features_ts[curr_test_task], data.labels_ts[curr_test_task], all_final_weight_vectors[idx], loss_name='absolute')) all_mtl_performances.append(np.nanmean(all_test_errors)) average_stuff = pd.DataFrame( all_individual_cum_errors).rolling( window=10**10, min_periods=1).mean().values.ravel() if average_stuff[-1] < best_perf: best_perf = average_stuff[-1] best_mtl_performances = all_mtl_performances best_average = average_stuff # print('inner step: %8e | meta step: %8e | perf: %10.3f' % (inner_step_size, meta_step_size, np.nanmean(all_individual_cum_errors))) else: pass # print('inner step: %8e | meta step: %8e | perf: %10.3f' % (inner_step_size, meta_step_size, np.nanmean(all_individual_cum_errors))) return best_mtl_performances, best_average
def fit(self, data): best_perf = np.Inf for _, inner_step_size in enumerate(self.inner_step_size_range): for _, meta_step_size in enumerate(self.meta_step_size_range): all_individual_cum_errors = [] all_mtl_performances = [] all_final_weight_vectors = [] curr_metaparameter = np.zeros(data.features_tr[0].shape[1]) for task_iteration, task in enumerate(data.tr_task_indexes): x = data.features_tr[task] y = data.labels_tr[task] prev_metaparameter = curr_metaparameter temp_weight_vectors = [] all_gradients = [] # initialize the inner parameters n_points, n_dims = x.shape curr_untranslated_weights = np.zeros(n_dims) shuffled_indexes = list(range(n_points)) # np.random.shuffle(shuffled_indexes) for inner_iteration, curr_point_idx in enumerate( shuffled_indexes): prev_untranslated_weights = curr_untranslated_weights # update inner weight vector curr_weights = curr_untranslated_weights + curr_metaparameter temp_weight_vectors.append(curr_weights) # receive a new datapoint curr_x = x[curr_point_idx, :] curr_y = y[curr_point_idx] all_individual_cum_errors.append( loss(curr_x, curr_y, curr_weights, loss_name='absolute')) # compute the gradient subgrad = subgradient(curr_x, curr_y, curr_weights, loss_name='absolute') full_gradient = subgrad * curr_x all_gradients.append(full_gradient) # update the untranslated weights curr_untranslated_weights = prev_untranslated_weights - inner_step_size * full_gradient # update metaparameters curr_metaparameter = prev_metaparameter - meta_step_size * np.sum( all_gradients, axis=0) all_final_weight_vectors.append( np.mean(temp_weight_vectors, axis=0)) all_test_errors = [] for idx, curr_test_task in enumerate( data.tr_task_indexes[:task_iteration]): all_test_errors.append( loss(data.features_ts[curr_test_task], data.labels_ts[curr_test_task], all_final_weight_vectors[idx], loss_name='absolute')) all_mtl_performances.append(np.nanmean(all_test_errors)) average_stuff = pd.DataFrame( all_individual_cum_errors).rolling( window=10**10, min_periods=1).mean().values.ravel() if average_stuff[-1] < best_perf: best_perf = average_stuff[-1] best_inner = inner_step_size best_meta = meta_step_size best_average = average_stuff best_mtl_performances = all_mtl_performances # plt.plot(average_stuff) # plt.title('best inner step ' + str(best_inner) + ' | ' + 'best meta step ' + str(best_meta)) # plt.ylim(top=12, bottom=0) # plt.pause(0.1) # k = 1 return best_mtl_performances, best_average
def fit(self, data): meta_wealth_range = [1] inner_wealth_range = [1] best_cumsum_perf = np.Inf for meta_idx, meta_wealth in enumerate(meta_wealth_range): for inner_idx, inner_wealth in enumerate(inner_wealth_range): all_individual_cum_errors = [] curr_meta_fraction = self.meta_magnitude_betting_fraction curr_meta_wealth = meta_wealth curr_meta_magnitude = curr_meta_fraction * curr_meta_wealth curr_meta_direction = np.zeros(data.features_tr[0].shape[1]) all_final_weight_vectors = [] all_h_meta = [] all_mtl_performances = [] all_meta_parameters = [] for task_iteration, task in enumerate(data.tr_task_indexes): x = data.features_tr[task] y = data.labels_tr[task] task_iteration = task_iteration + 1 # initialize the inner parameters n_points, n_dims = x.shape curr_inner_fraction = self.inner_magnitude_betting_fraction curr_inner_wealth = inner_wealth curr_inner_magnitude = curr_inner_fraction * curr_inner_wealth curr_inner_direction = np.zeros(n_dims) all_h_inner = [] temp_weight_vectors = [] shuffled_indexes = list(range(n_points)) # np.random.shuffle(shuffled_indexes) for inner_iteration, curr_point_idx in enumerate(shuffled_indexes): inner_iteration = inner_iteration + 1 prev_meta_direction = curr_meta_direction prev_meta_fraction = curr_meta_fraction prev_meta_wealth = curr_meta_wealth prev_meta_magnitude = curr_meta_magnitude prev_inner_direction = curr_inner_direction prev_inner_fraction = curr_inner_fraction prev_inner_wealth = curr_inner_wealth prev_inner_magnitude = curr_inner_magnitude # define total iteration total_iter = self.__general_iteration(task_iteration, n_points, inner_iteration) # update meta-parameter meta_parameter = prev_meta_magnitude * prev_meta_direction all_meta_parameters.append(meta_parameter) # update inner weight vector weight_vector = prev_inner_magnitude * prev_inner_direction + meta_parameter temp_weight_vectors.append(weight_vector) # receive a new datapoint curr_x = x[curr_point_idx, :] curr_y = y[curr_point_idx] all_individual_cum_errors.append(loss(curr_x, curr_y, weight_vector, loss_name='absolute')) # compute the gradient subgrad = subgradient(curr_x, curr_y, weight_vector, loss_name='absolute') full_gradient = subgrad * curr_x # define meta step size meta_step_size = (1 / (self.L * self.R)) * np.sqrt(2 / total_iter) # update meta-direction curr_meta_direction = l2_unit_ball_projection(prev_meta_direction - meta_step_size * full_gradient) # define inner step size inner_step_size = (1 / (self.L * self.R)) * np.sqrt(2 / inner_iteration) # update inner direction curr_inner_direction = l2_unit_ball_projection(prev_inner_direction - inner_step_size * full_gradient) # update meta-magnitude_wealth curr_meta_wealth = prev_meta_wealth - (1 / (self.R * self.L)) * full_gradient @ prev_meta_direction * prev_meta_magnitude # meta h thing h_meta = (1 / (self.R * self.L)) * (full_gradient @ prev_meta_direction) * (1 / (1 - (1 / (self.R * self.L)) * (full_gradient @ prev_meta_direction) * prev_meta_fraction)) all_h_meta.append(h_meta) a_thing_meta = 1 + np.sum([curr_h ** 2 for curr_h in all_h_meta]) # update meta-magnitude_betting_fraction curr_meta_fraction = np.max([np.min([prev_meta_fraction - (2 / (2 - np.log(3))) * (h_meta / a_thing_meta), 1 / 2]), -1 / 2]) # update meta-magnitude curr_meta_magnitude = curr_meta_fraction * curr_meta_wealth # update inner magnitude_wealth curr_inner_wealth = prev_inner_wealth - 1 / (self.R * self.L) * full_gradient @ prev_inner_direction * prev_inner_magnitude # update magnitude_betting_fraction h_inner = (1 / (self.R * self.L)) * full_gradient @ prev_inner_direction * (1 / (1 - (1 / (self.R * self.L)) * full_gradient @ prev_inner_direction * prev_inner_fraction)) all_h_inner.append(h_inner) a_thing_inner = 1 + np.sum([curr_h**2 for curr_h in all_h_inner]) # update magnitude_betting_fraction curr_inner_fraction = np.max([np.min([prev_inner_fraction - (2 / (2 - np.log(3))) * (h_inner / a_thing_inner), 1/2]), -1/2]) # update magnitude curr_inner_magnitude = curr_inner_fraction * curr_inner_wealth all_final_weight_vectors.append(np.mean(temp_weight_vectors, axis=0)) all_test_errors = [] for idx, curr_test_task in enumerate(data.tr_task_indexes[:task_iteration]): all_test_errors.append(loss(data.features_ts[curr_test_task], data.labels_ts[curr_test_task], all_final_weight_vectors[idx], loss_name='absolute')) all_mtl_performances.append(np.nanmean(all_test_errors)) if np.nanmean(all_individual_cum_errors) < best_cumsum_perf: best_cumsum_perf = np.nanmean(all_individual_cum_errors) best_cumsum_performances = all_individual_cum_errors best_mtl_performances = all_mtl_performances print('inner wealth: %8.2f | meta wealth: %8.2f | perf: %10.3f' % (inner_wealth, meta_wealth, np.nanmean(all_individual_cum_errors))) else: print('inner wealth: %8.2f | meta wealth: %8.2f| perf: %10.3f' % (inner_wealth, meta_wealth, np.nanmean(all_individual_cum_errors))) return best_mtl_performances, pd.DataFrame(best_cumsum_performances).rolling(window=10 ** 10, min_periods=1).mean().values.ravel()
def fit(self, data, task_indexes): all_mtl_performances = [] all_errors = [] total_points = 0 for task_idx, task in enumerate(getattr(data, task_indexes)): x = data.features_tr[task] y = data.labels_tr[task] n_points, n_dims = x.shape total_points = total_points + n_points wealth_range = [1] for idx, wealth in enumerate(wealth_range): curr_bet_fraction = self.magnitude_betting_fraction curr_wealth = wealth # self.magnitude_wealth curr_magnitude = curr_bet_fraction * curr_wealth curr_direction = np.random.randn(n_dims) all_weight_vectors = [] all_h = [] shuffled_indexes = list(range(n_points)) np.random.shuffle(shuffled_indexes) for iteration, curr_point_idx in enumerate(shuffled_indexes): iteration = iteration + 1 prev_direction = curr_direction prev_bet_fraction = curr_bet_fraction prev_wealth = curr_wealth prev_magnitude = curr_magnitude # update weight vector weight_vector = prev_magnitude * prev_direction + self.fixed_bias all_weight_vectors.append(weight_vector) # receive a new datapoint curr_x = x[curr_point_idx, :] curr_y = y[curr_point_idx] all_errors.append( loss(curr_x, curr_y, weight_vector, loss_name='absolute')) # compute the gradient subgrad = subgradient(curr_x, curr_y, weight_vector, loss_name='absolute') full_gradient = subgrad * curr_x # define step size step_size = 1 / (self.L * self.R) * np.sqrt(2 / iteration) # update direction curr_direction = l2_unit_ball_projection(prev_direction - step_size * full_gradient) # update magnitude_wealth curr_wealth = prev_wealth - ( 1 / (self.R * self.L) ) * full_gradient @ prev_direction * prev_magnitude # h_thing h = ( 1 / (self.R * self.L) ) * full_gradient @ prev_direction * ( 1 / (1 - (1 / (self.R * self.L)) * (full_gradient @ prev_direction) * prev_bet_fraction)) all_h.append(h) a_thing = 1 + np.sum([curr_h**2 for curr_h in all_h]) # update magnitude_betting_fraction curr_bet_fraction = np.max([ np.min([ prev_bet_fraction - (2 / (2 - np.log(3))) * (h / a_thing), 1 / 2 ]), -1 / 2 ]) # update magnitude curr_magnitude = curr_bet_fraction * curr_wealth if len(all_weight_vectors) < 2: final_w = weight_vector else: final_w = np.mean(all_weight_vectors, axis=0) curr_test_perf = loss(data.features_ts[task], data.labels_ts[task], final_w, loss_name='absolute') all_mtl_performances.append(curr_test_perf) return (task_idx + 1) * [np.nanmean(all_mtl_performances) ], total_points * [np.nanmean(all_errors)]
def fit(self, data): all_individual_cum_errors = [] best_metaparameter = np.zeros(data.features_tr[0].shape[1]) for task_iteration, task in enumerate(data.tr_task_indexes): x = data.features_tr[task] y = data.labels_tr[task] # initialize the inner parameters n_points, n_dims = x.shape curr_metaparameter = best_metaparameter best_perf = np.Inf for meta_idx, meta_step_size in enumerate( self.meta_step_size_range): for inner_idx, inner_step_size in enumerate( self.inner_step_size_range): if meta_idx == 0 and inner_idx == 0: og_metaparameter = deepcopy(curr_metaparameter) else: curr_metaparameter = og_metaparameter temp_cum_errors = [] curr_untranslated_weights = np.zeros(n_dims) shuffled_indexes = list(range(n_points)) # np.random.shuffle(shuffled_indexes) for inner_iteration, curr_point_idx in enumerate( shuffled_indexes): prev_untranslated_weights = curr_untranslated_weights prev_metaparameter = curr_metaparameter # update inner weight vector curr_weights = curr_untranslated_weights + prev_metaparameter # receive a new datapoint curr_x = x[curr_point_idx, :] curr_y = y[curr_point_idx] temp_cum_errors.append( loss(curr_x, curr_y, curr_weights, loss_name='absolute')) # compute the gradient subgrad = subgradient(curr_x, curr_y, curr_weights, loss_name='absolute') full_gradient = subgrad * curr_x # update metaparameters curr_metaparameter = prev_metaparameter - meta_step_size * full_gradient # update the untranslated weights curr_untranslated_weights = prev_untranslated_weights - inner_step_size * full_gradient curr_cum_sum = pd.DataFrame(temp_cum_errors).rolling( window=10**10, min_periods=1).mean().values.ravel() if curr_cum_sum[-1] < best_perf: best_perf = curr_cum_sum[-1] best_cum_errors = temp_cum_errors best_metaparameter = deepcopy(curr_metaparameter) print( '%3d | best params: %10.5f | %10.5f | error: %5.3f' % (task_iteration, inner_step_size, meta_step_size, best_perf)) # print('inner step: %8e | meta step: %8e | perf: %10.3f' % (inner_step_size, meta_step_size, np.nanmean(all_individual_cum_errors))) else: pass # print('inner step: %8e | meta step: %8e | perf: %10.3f' % (inner_step_size, meta_step_size, np.nanmean(all_individual_cum_errors))) all_individual_cum_errors = all_individual_cum_errors + best_cum_errors return None, pd.DataFrame(all_individual_cum_errors).rolling( window=10**10, min_periods=1).mean().values.ravel()
def fit(self, data): best_perf = np.Inf counter_val = 0 for _, gamma_par in enumerate(self.gamma_par_range): for _, lambda_par in enumerate(self.lambda_par_range): counter_val = counter_val + 1 # print(f'val: ', counter_val, ' on ', len(self.lambda_par_range) * len(self.gamma_par_range)) all_meta_parameters_temp = [] all_average_val_errors_temp = [ ] # temporary memory for the best val error curve all_average_test_errors_temp = [ ] # temporary memory for the best test error curve # initialize meta-parameter meta_parameter = np.zeros(data.features_tr[0].shape[1]) for task_tr_index, task_tr in enumerate(data.tr_task_indexes): # print(f'TRAINING task', task_tr_index + 1) x = data.features_tr[task_tr] y = data.labels_tr[task_tr] curr_weights, average_weights = inner_algorithm( x, y, lambda_par, meta_parameter, self.loss_name) # compute the meta-gradient meta_gradient = -lambda_par * (curr_weights - meta_parameter) # update the meta_parameter meta_parameter = meta_parameter - gamma_par * meta_gradient all_meta_parameters_temp.append(meta_parameter) average_meta_parameter = np.mean(all_meta_parameters_temp, axis=0) # compute the error on the validation and test tasks with average_meta_parameter all_val_errors_temp = [] for _, task_val in enumerate(data.val_task_indexes): x_tr = data.features_tr[task_val] y_tr = data.labels_tr[task_val] x_ts = data.features_ts[task_val] y_ts = data.labels_ts[task_val] curr_weights, average_weights = inner_algorithm( x_tr, y_tr, lambda_par, average_meta_parameter, self.loss_name) val_error = loss(x_ts, y_ts, average_weights, self.loss_name) all_val_errors_temp.append(val_error) average_val_error = np.mean(all_val_errors_temp) all_average_val_errors_temp.append(average_val_error) all_test_errors_temp = [] for _, task_ts in enumerate(data.test_task_indexes): x_tr = data.features_tr[task_ts] y_tr = data.labels_tr[task_ts] x_ts = data.features_ts[task_ts] y_ts = data.labels_ts[task_ts] curr_weights, average_weights = inner_algorithm( x_tr, y_tr, lambda_par, average_meta_parameter, self.loss_name) test_error = loss(x_ts, y_ts, average_weights, self.loss_name) all_test_errors_temp.append(test_error) average_test_error = np.mean(all_test_errors_temp) all_average_test_errors_temp.append(average_test_error) # select the hyper-parameters for which the last training task's average validation error is minimized if average_val_error < best_perf: best_perf = average_val_error best_lambda_par = lambda_par best_gamma_par = gamma_par all_best_performances = all_average_test_errors_temp print(f'best lambda: ', best_lambda_par, ' best gamma: ', best_gamma_par) print(f'best test error: ', all_best_performances[-1]) return all_best_performances
def fit(self, data): best_perf = np.Inf counter_val = 0 for _, gamma_par in enumerate(self.gamma_par_range): for _, lambda_par in enumerate(self.lambda_par_range): counter_val = counter_val + 1 # print(f'val: ', counter_val, ' on ', len(self.lambda_par_range) * len(self.gamma_par_range)) all_meta_parameters_temp = [] all_average_val_errors_temp = [ ] # temporary memory for the best val error curve all_average_test_errors_temp = [ ] # temporary memory for the best test error curve # initialize meta-parameter if self.dataset == 'circle': curr_b = np.zeros(data.features_tr[0].shape[1]) sum_b = np.zeros(data.features_tr[0].shape[1]) avg_b = np.zeros(data.features_tr[0].shape[1]) test_for_shape = feature_map(data.all_side_info[0], data.labels_tr[0], self.feature_map_name, self.r, self.W) curr_M = np.zeros([ data.features_tr[0].shape[1], test_for_shape.shape[0] ]) sum_M = np.zeros([ data.features_tr[0].shape[1], test_for_shape.shape[0] ]) avg_M = np.zeros([ data.features_tr[0].shape[1], test_for_shape.shape[0] ]) else: curr_b = np.zeros(data.features_tr[0].shape[1]) sum_b = np.zeros(data.features_tr[0].shape[1]) avg_b = np.zeros(data.features_tr[0].shape[1]) test_for_shape = feature_map(data.features_tr[0], data.labels_tr[0], self.feature_map_name, self.r, self.W) curr_M = np.zeros([ data.features_tr[0].shape[1], test_for_shape.shape[0] ]) sum_M = np.zeros([ data.features_tr[0].shape[1], test_for_shape.shape[0] ]) avg_M = np.zeros([ data.features_tr[0].shape[1], test_for_shape.shape[0] ]) idx_avg = 1 for task_tr_index, task_tr in enumerate(data.tr_task_indexes): # print(f'TRAINING task', task_tr_index + 1) x = data.features_tr[task_tr] y = data.labels_tr[task_tr] if self.dataset == 'circle': s = data.all_side_info[task_tr] if self.dataset == 'circle': x_trasf_feature = feature_map(s, y, self.feature_map_name, self.r, self.W) else: x_trasf_feature = feature_map(x, y, self.feature_map_name, self.r, self.W) # update the meta-parameter curr_meta_parameter = avg_M @ x_trasf_feature + avg_b curr_weights, average_weights = inner_algorithm( x, y, lambda_par, curr_meta_parameter, self.loss_name) # compute the meta-gradient meta_gradient_b = -lambda_par * (curr_weights - curr_meta_parameter) meta_gradient_M = np.tensordot(meta_gradient_b, x_trasf_feature, 0) # update the meta_parameter curr_b = curr_b - gamma_par * meta_gradient_b curr_M = curr_M - gamma_par * meta_gradient_M sum_M = sum_M + curr_M avg_M = sum_M / idx_avg sum_b = sum_b + curr_b avg_b = sum_b / idx_avg idx_avg = idx_avg + 1 # compute the error on the validation and test tasks with average_meta_parameter all_val_errors_temp = [] for _, task_val in enumerate(data.val_task_indexes): x_tr = data.features_tr[task_val] y_tr = data.labels_tr[task_val] x_ts = data.features_ts[task_val] y_ts = data.labels_ts[task_val] if self.dataset == 'circle': s = data.all_side_info[task_val] if self.dataset == 'circle': x_trasf_feature = feature_map( s, y_tr, self.feature_map_name, self.r, self.W) else: x_trasf_feature = feature_map( x_tr, y_tr, self.feature_map_name, self.r, self.W) curr_meta_parameter = avg_M @ x_trasf_feature + avg_b curr_weights, average_weights = inner_algorithm( x_tr, y_tr, lambda_par, curr_meta_parameter, self.loss_name) val_error = loss(x_ts, y_ts, average_weights, self.loss_name) all_val_errors_temp.append(val_error) average_val_error = np.mean(all_val_errors_temp) all_average_val_errors_temp.append(average_val_error) all_test_errors_temp = [] for _, task_ts in enumerate(data.test_task_indexes): x_tr = data.features_tr[task_ts] y_tr = data.labels_tr[task_ts] x_ts = data.features_ts[task_ts] y_ts = data.labels_ts[task_ts] if self.dataset == 'circle': s = data.all_side_info[task_ts] if self.dataset == 'circle': x_trasf_feature = feature_map( s, y_tr, self.feature_map_name, self.r, self.W) else: x_trasf_feature = feature_map( x_tr, y_tr, self.feature_map_name, self.r, self.W) curr_meta_parameter = avg_M @ x_trasf_feature + avg_b curr_weights, average_weights = inner_algorithm( x_tr, y_tr, lambda_par, curr_meta_parameter, self.loss_name) test_error = loss(x_ts, y_ts, average_weights, self.loss_name) all_test_errors_temp.append(test_error) average_test_error = np.mean(all_test_errors_temp) all_average_test_errors_temp.append(average_test_error) # select the hyper-parameters for which the average validation error is minimized if average_val_error < best_perf: best_perf = average_val_error best_lambda_par = lambda_par best_gamma_par = gamma_par all_best_performances = all_average_test_errors_temp print(f'best lambda: ', best_lambda_par, ' best gamma: ', best_gamma_par) print(f'best test error: ', all_best_performances[-1]) return all_best_performances