Example #1
0
    def fit(self, data):

        # we use the same lambda for each task
        best_perf = np.Inf
        check_val_error = []

        for _, lambda_par in enumerate(self.lambda_par_range):

            # computing the average test error on the validation tasks
            all_validation_errors = []

            for _, task_val in enumerate(data.val_task_indexes):

                x_tr = data.features_tr[task_val]
                y_tr = data.labels_tr[task_val]
                x_ts = data.features_ts[task_val]
                y_ts = data.labels_ts[task_val]

                curr_weights, average_weights = inner_algorithm(
                    x_tr, y_tr, lambda_par, self.fixed_meta_parameter,
                    self.loss_name)
                validation_error = loss(x_ts, y_ts, average_weights,
                                        self.loss_name)
                all_validation_errors.append(validation_error)

            average_validation_error = np.mean(all_validation_errors)
            check_val_error.append(average_validation_error)

            if average_validation_error < best_perf:
                best_perf = average_validation_error
                best_lambda = lambda_par

        all_test_errors = []

        for _, task_ts in enumerate(data.test_task_indexes):

            x_tr = data.features_tr[task_ts]
            y_tr = data.labels_tr[task_ts]
            x_ts = data.features_ts[task_ts]
            y_ts = data.labels_ts[task_ts]

            curr_weights, average_weights = inner_algorithm(
                x_tr, y_tr, best_lambda, self.fixed_meta_parameter,
                self.loss_name)
            test_error = loss(x_ts, y_ts, average_weights, self.loss_name)
            all_test_errors.append(test_error)

        average_test_error = np.mean(all_test_errors)
        all_best_performances = average_test_error * np.ones(
            len(data.tr_task_indexes))

        print(f'best lambda: ', best_lambda)
        print(f'best test error: ', all_best_performances[-1])

        return all_best_performances
Example #2
0
    def fit(self, data, task_indexes):

        performance = []
        for task_idx, task in enumerate(getattr(data, task_indexes)):
            x = data.features_tr[task]
            y = data.labels_tr[task]
            n_points, n_dims = x.shape

            best_perf = np.Inf
            for step_idx, step_size in enumerate(self.step_size_range):
                curr_untranslated_weights = np.zeros(n_dims)
                curr_weights = curr_untranslated_weights + self.fixed_bias
                all_weight_vectors = []
                all_losses = []
                shuffled_indexes = list(range(n_points))
                np.random.shuffle(shuffled_indexes)
                for iteration, curr_point_idx in enumerate(shuffled_indexes):
                    prev_untranslated_weights = curr_untranslated_weights
                    prev_weights = curr_weights

                    # receive a new datapoint
                    curr_x = x[curr_point_idx, :]
                    curr_y = y[curr_point_idx]

                    # compute the gradient
                    subgrad = subgradient(curr_x,
                                          curr_y,
                                          prev_weights,
                                          loss_name='absolute')
                    full_gradient = subgrad * curr_x

                    # update weight vector
                    curr_untranslated_weights = prev_untranslated_weights - step_size * full_gradient
                    curr_weights = curr_untranslated_weights + self.fixed_bias
                    all_weight_vectors.append(curr_weights)

                    if len(all_weight_vectors) < 2:
                        final_w = curr_weights
                    else:
                        final_w = np.mean(all_weight_vectors, axis=0)
                    loss_thing = loss(x, y, final_w, loss_name='absolute')
                    all_losses.append(loss_thing)

                curr_perf = loss(data.features_ts[task],
                                 data.labels_ts[task],
                                 final_w,
                                 loss_name='absolute')
                if curr_perf < best_perf:
                    best_perf = curr_perf
                    best_step = step_size
            performance.append(best_perf)
            print(performance)
Example #3
0
    def fit(self, data):
        curr_meta_magnitude_betting_fraction = self.meta_magnitude_betting_fraction
        curr_meta_magnitude_wealth = self.meta_magnitude_wealth
        curr_meta_magnitude = curr_meta_magnitude_betting_fraction * curr_meta_magnitude_wealth
        curr_meta_direction = np.zeros(data.features_tr[0].shape[1])

        all_individual_cum_errors = []
        best_mtl_performances = []

        total_iter = 0
        all_meta_parameters = []
        all_final_weight_vectors = []
        for task_iteration, task in enumerate(data.tr_task_indexes):
            x = data.features_tr[task]
            y = data.labels_tr[task]

            task_iteration = task_iteration + 1
            prev_meta_direction = curr_meta_direction
            prev_meta_magnitude_betting_fraction = curr_meta_magnitude_betting_fraction
            prev_meta_magnitude_wealth = curr_meta_magnitude_wealth
            prev_meta_magnitude = curr_meta_magnitude

            # update meta-parameter
            meta_parameter = prev_meta_magnitude * prev_meta_direction
            all_meta_parameters.append(meta_parameter)

            # initialize the inner parameters
            n_points, n_dims = x.shape
            curr_inner_magnitude_betting_fraction = self.inner_magnitude_betting_fraction
            curr_inner_magnitude_wealth = self.inner_magnitude_wealth
            curr_inner_magnitude = curr_inner_magnitude_betting_fraction * curr_inner_magnitude_wealth
            curr_inner_direction = np.zeros(x.shape[1])

            temp_weight_vectors = []
            all_gradients = []
            shuffled_indexes = list(range(n_points))
            # np.random.shuffle(shuffled_indexes)
            for inner_iteration, curr_point_idx in enumerate(shuffled_indexes):
                inner_iteration = inner_iteration + 1
                prev_inner_direction = curr_inner_direction
                prev_inner_magnitude_betting_fraction = curr_inner_magnitude_betting_fraction
                prev_inner_magnitude_wealth = curr_inner_magnitude_wealth
                prev_inner_magnitude = curr_inner_magnitude

                # update inner weight vector
                weight_vector = prev_inner_magnitude * prev_inner_direction + meta_parameter
                temp_weight_vectors.append(weight_vector)

                # receive a new datapoint
                curr_x = x[curr_point_idx, :]
                curr_y = y[curr_point_idx]

                all_individual_cum_errors.append(loss(curr_x, curr_y, weight_vector, loss_name='absolute'))

                # compute the gradient
                subgrad = subgradient(curr_x, curr_y, weight_vector, loss_name='absolute')
                full_gradient = subgrad * curr_x
                all_gradients.append(full_gradient)

                # define inner step size
                inner_step_size = (1 / (self.L * self.R)) * np.sqrt(2 / inner_iteration)

                # update inner direction
                curr_inner_direction = l2_unit_ball_projection(prev_inner_direction - inner_step_size * full_gradient)

                # update inner magnitude_wealth
                curr_inner_magnitude_wealth = prev_inner_magnitude_wealth - 1 / (self.R * self.L) * full_gradient @ prev_inner_direction * prev_inner_magnitude

                # update magnitude_betting_fraction
                curr_inner_magnitude_betting_fraction = (1/inner_iteration) * ((inner_iteration-1) * prev_inner_magnitude_betting_fraction - (1/(self.L*self.R))*(full_gradient @ prev_inner_direction))

                # update magnitude
                curr_inner_magnitude = curr_inner_magnitude_betting_fraction * curr_inner_magnitude_wealth

            # define total iteration
            total_iter = total_iter + n_points

            # compute meta-gradient
            meta_gradient = np.sum(all_gradients, axis=0)

            # define meta step size
            meta_step_size = (1 / (self.L * self.R * n_points)) * np.sqrt(2 / task_iteration)

            # update meta-direction
            curr_meta_direction = l2_unit_ball_projection(prev_meta_direction - meta_step_size * meta_gradient)

            # update meta-magnitude_wealth
            curr_meta_magnitude_wealth = prev_meta_magnitude_wealth - (1 / (self.R * self.L * n_points)) * meta_gradient @ prev_meta_direction * prev_meta_magnitude

            # update meta-magnitude_betting_fraction
            curr_meta_magnitude_betting_fraction = (1/task_iteration) * ((task_iteration-1) * prev_meta_magnitude_betting_fraction - (1 / (self.L * self.R * n_points)) * (meta_gradient @ prev_meta_direction))

            # update meta-magnitude
            curr_meta_magnitude = curr_meta_magnitude_betting_fraction * curr_meta_magnitude_wealth

            all_final_weight_vectors.append(np.mean(temp_weight_vectors, axis=0))
            all_test_errors = []
            for idx, curr_test_task in enumerate(data.tr_task_indexes[:task_iteration]):
                all_test_errors.append(loss(data.features_ts[curr_test_task], data.labels_ts[curr_test_task], all_final_weight_vectors[idx], loss_name='absolute'))
            best_mtl_performances.append(np.nanmean(all_test_errors))

        self.all_meta_parameters = all_meta_parameters
        return best_mtl_performances, pd.DataFrame(all_individual_cum_errors).rolling(window=10 ** 10, min_periods=1).mean().values.ravel()
Example #4
0
    def fit(self, data):

        best_perf = np.Inf
        for _, inner_step_size in enumerate(self.inner_step_size_range):
            for _, meta_step_size in enumerate(self.meta_step_size_range):
                all_individual_cum_errors = []
                all_mtl_performances = []
                all_final_weight_vectors = []

                curr_metaparameter = np.zeros(data.features_tr[0].shape[1])
                for task_iteration, task in enumerate(data.tr_task_indexes):
                    x = data.features_tr[task]
                    y = data.labels_tr[task]

                    task_iteration = task_iteration + 1

                    # initialize the inner parameters
                    n_points, n_dims = x.shape

                    curr_untranslated_weights = np.zeros(n_dims)
                    temp_weight_vectors = []
                    shuffled_indexes = list(range(n_points))
                    # np.random.shuffle(shuffled_indexes)
                    for inner_iteration, curr_point_idx in enumerate(
                            shuffled_indexes):
                        prev_untranslated_weights = curr_untranslated_weights
                        prev_metaparameter = curr_metaparameter

                        # update inner weight vector
                        curr_weights = curr_untranslated_weights + curr_metaparameter
                        temp_weight_vectors.append(curr_weights)

                        # receive a new datapoint
                        curr_x = x[curr_point_idx, :]
                        curr_y = y[curr_point_idx]

                        all_individual_cum_errors.append(
                            loss(curr_x,
                                 curr_y,
                                 curr_weights,
                                 loss_name='absolute'))

                        # compute the gradient
                        subgrad = subgradient(curr_x,
                                              curr_y,
                                              curr_weights,
                                              loss_name='absolute')
                        full_gradient = subgrad * curr_x

                        # update metaparameters
                        curr_metaparameter = prev_metaparameter - meta_step_size * full_gradient

                        # update the untranslated weights
                        curr_untranslated_weights = prev_untranslated_weights - inner_step_size * full_gradient

                    all_final_weight_vectors.append(
                        np.mean(temp_weight_vectors, axis=0))
                    all_test_errors = []
                    for idx, curr_test_task in enumerate(
                            data.tr_task_indexes[:task_iteration]):
                        all_test_errors.append(
                            loss(data.features_ts[curr_test_task],
                                 data.labels_ts[curr_test_task],
                                 all_final_weight_vectors[idx],
                                 loss_name='absolute'))
                    all_mtl_performances.append(np.nanmean(all_test_errors))

                average_stuff = pd.DataFrame(
                    all_individual_cum_errors).rolling(
                        window=10**10, min_periods=1).mean().values.ravel()
                if average_stuff[-1] < best_perf:
                    best_perf = average_stuff[-1]
                    best_mtl_performances = all_mtl_performances
                    best_average = average_stuff
                    # print('inner step: %8e | meta step: %8e |       perf: %10.3f' % (inner_step_size, meta_step_size, np.nanmean(all_individual_cum_errors)))
                else:
                    pass
                    # print('inner step: %8e | meta step: %8e | perf: %10.3f' % (inner_step_size, meta_step_size, np.nanmean(all_individual_cum_errors)))
        return best_mtl_performances, best_average
Example #5
0
    def fit(self, data):

        best_perf = np.Inf
        for _, inner_step_size in enumerate(self.inner_step_size_range):
            for _, meta_step_size in enumerate(self.meta_step_size_range):
                all_individual_cum_errors = []
                all_mtl_performances = []
                all_final_weight_vectors = []

                curr_metaparameter = np.zeros(data.features_tr[0].shape[1])
                for task_iteration, task in enumerate(data.tr_task_indexes):
                    x = data.features_tr[task]
                    y = data.labels_tr[task]

                    prev_metaparameter = curr_metaparameter
                    temp_weight_vectors = []
                    all_gradients = []
                    # initialize the inner parameters
                    n_points, n_dims = x.shape

                    curr_untranslated_weights = np.zeros(n_dims)

                    shuffled_indexes = list(range(n_points))
                    # np.random.shuffle(shuffled_indexes)
                    for inner_iteration, curr_point_idx in enumerate(
                            shuffled_indexes):

                        prev_untranslated_weights = curr_untranslated_weights

                        # update inner weight vector
                        curr_weights = curr_untranslated_weights + curr_metaparameter
                        temp_weight_vectors.append(curr_weights)

                        # receive a new datapoint
                        curr_x = x[curr_point_idx, :]
                        curr_y = y[curr_point_idx]

                        all_individual_cum_errors.append(
                            loss(curr_x,
                                 curr_y,
                                 curr_weights,
                                 loss_name='absolute'))

                        # compute the gradient
                        subgrad = subgradient(curr_x,
                                              curr_y,
                                              curr_weights,
                                              loss_name='absolute')
                        full_gradient = subgrad * curr_x
                        all_gradients.append(full_gradient)

                        # update the untranslated weights
                        curr_untranslated_weights = prev_untranslated_weights - inner_step_size * full_gradient

                    # update metaparameters
                    curr_metaparameter = prev_metaparameter - meta_step_size * np.sum(
                        all_gradients, axis=0)

                    all_final_weight_vectors.append(
                        np.mean(temp_weight_vectors, axis=0))
                    all_test_errors = []
                    for idx, curr_test_task in enumerate(
                            data.tr_task_indexes[:task_iteration]):
                        all_test_errors.append(
                            loss(data.features_ts[curr_test_task],
                                 data.labels_ts[curr_test_task],
                                 all_final_weight_vectors[idx],
                                 loss_name='absolute'))
                    all_mtl_performances.append(np.nanmean(all_test_errors))

                average_stuff = pd.DataFrame(
                    all_individual_cum_errors).rolling(
                        window=10**10, min_periods=1).mean().values.ravel()
                if average_stuff[-1] < best_perf:
                    best_perf = average_stuff[-1]
                    best_inner = inner_step_size
                    best_meta = meta_step_size
                    best_average = average_stuff
                    best_mtl_performances = all_mtl_performances
                # plt.plot(average_stuff)
                # plt.title('best inner step ' + str(best_inner) + ' | ' + 'best meta step ' + str(best_meta))
                # plt.ylim(top=12, bottom=0)
                # plt.pause(0.1)
                # k = 1

        return best_mtl_performances, best_average
    def fit(self, data):
        meta_wealth_range = [1]
        inner_wealth_range = [1]

        best_cumsum_perf = np.Inf
        for meta_idx, meta_wealth in enumerate(meta_wealth_range):
            for inner_idx, inner_wealth in enumerate(inner_wealth_range):
                all_individual_cum_errors = []

                curr_meta_fraction = self.meta_magnitude_betting_fraction
                curr_meta_wealth = meta_wealth
                curr_meta_magnitude = curr_meta_fraction * curr_meta_wealth
                curr_meta_direction = np.zeros(data.features_tr[0].shape[1])

                all_final_weight_vectors = []
                all_h_meta = []
                all_mtl_performances = []
                all_meta_parameters = []
                for task_iteration, task in enumerate(data.tr_task_indexes):
                    x = data.features_tr[task]
                    y = data.labels_tr[task]

                    task_iteration = task_iteration + 1

                    # initialize the inner parameters
                    n_points, n_dims = x.shape
                    curr_inner_fraction = self.inner_magnitude_betting_fraction
                    curr_inner_wealth = inner_wealth
                    curr_inner_magnitude = curr_inner_fraction * curr_inner_wealth
                    curr_inner_direction = np.zeros(n_dims)

                    all_h_inner = []

                    temp_weight_vectors = []
                    shuffled_indexes = list(range(n_points))
                    # np.random.shuffle(shuffled_indexes)
                    for inner_iteration, curr_point_idx in enumerate(shuffled_indexes):
                        inner_iteration = inner_iteration + 1

                        prev_meta_direction = curr_meta_direction
                        prev_meta_fraction = curr_meta_fraction
                        prev_meta_wealth = curr_meta_wealth
                        prev_meta_magnitude = curr_meta_magnitude

                        prev_inner_direction = curr_inner_direction
                        prev_inner_fraction = curr_inner_fraction
                        prev_inner_wealth = curr_inner_wealth
                        prev_inner_magnitude = curr_inner_magnitude

                        # define total iteration
                        total_iter = self.__general_iteration(task_iteration, n_points, inner_iteration)

                        # update meta-parameter
                        meta_parameter = prev_meta_magnitude * prev_meta_direction
                        all_meta_parameters.append(meta_parameter)

                        # update inner weight vector
                        weight_vector = prev_inner_magnitude * prev_inner_direction + meta_parameter
                        temp_weight_vectors.append(weight_vector)

                        # receive a new datapoint
                        curr_x = x[curr_point_idx, :]
                        curr_y = y[curr_point_idx]

                        all_individual_cum_errors.append(loss(curr_x, curr_y, weight_vector, loss_name='absolute'))

                        # compute the gradient
                        subgrad = subgradient(curr_x, curr_y, weight_vector, loss_name='absolute')
                        full_gradient = subgrad * curr_x

                        # define meta step size
                        meta_step_size = (1 / (self.L * self.R)) * np.sqrt(2 / total_iter)

                        # update meta-direction
                        curr_meta_direction = l2_unit_ball_projection(prev_meta_direction - meta_step_size * full_gradient)

                        # define inner step size
                        inner_step_size = (1 / (self.L * self.R)) * np.sqrt(2 / inner_iteration)

                        # update inner direction
                        curr_inner_direction = l2_unit_ball_projection(prev_inner_direction - inner_step_size * full_gradient)

                        # update meta-magnitude_wealth
                        curr_meta_wealth = prev_meta_wealth - (1 / (self.R * self.L)) * full_gradient @ prev_meta_direction * prev_meta_magnitude

                        # meta h thing
                        h_meta = (1 / (self.R * self.L)) * (full_gradient @ prev_meta_direction) * (1 / (1 - (1 / (self.R * self.L)) * (full_gradient @ prev_meta_direction) * prev_meta_fraction))
                        all_h_meta.append(h_meta)
                        a_thing_meta = 1 + np.sum([curr_h ** 2 for curr_h in all_h_meta])

                        # update meta-magnitude_betting_fraction
                        curr_meta_fraction = np.max([np.min([prev_meta_fraction - (2 / (2 - np.log(3))) * (h_meta / a_thing_meta), 1 / 2]), -1 / 2])

                        # update meta-magnitude
                        curr_meta_magnitude = curr_meta_fraction * curr_meta_wealth

                        # update inner magnitude_wealth
                        curr_inner_wealth = prev_inner_wealth - 1 / (self.R * self.L) * full_gradient @ prev_inner_direction * prev_inner_magnitude

                        # update magnitude_betting_fraction
                        h_inner = (1 / (self.R * self.L)) * full_gradient @ prev_inner_direction * (1 / (1 - (1 / (self.R * self.L)) * full_gradient @ prev_inner_direction * prev_inner_fraction))
                        all_h_inner.append(h_inner)
                        a_thing_inner = 1 + np.sum([curr_h**2 for curr_h in all_h_inner])
                        # update magnitude_betting_fraction
                        curr_inner_fraction = np.max([np.min([prev_inner_fraction - (2 / (2 - np.log(3))) * (h_inner / a_thing_inner), 1/2]), -1/2])

                        # update magnitude
                        curr_inner_magnitude = curr_inner_fraction * curr_inner_wealth

                    all_final_weight_vectors.append(np.mean(temp_weight_vectors, axis=0))
                    all_test_errors = []
                    for idx, curr_test_task in enumerate(data.tr_task_indexes[:task_iteration]):
                        all_test_errors.append(loss(data.features_ts[curr_test_task], data.labels_ts[curr_test_task], all_final_weight_vectors[idx], loss_name='absolute'))
                    all_mtl_performances.append(np.nanmean(all_test_errors))

                if np.nanmean(all_individual_cum_errors) < best_cumsum_perf:
                    best_cumsum_perf = np.nanmean(all_individual_cum_errors)
                    best_cumsum_performances = all_individual_cum_errors
                    best_mtl_performances = all_mtl_performances
                    print('inner wealth: %8.2f | meta wealth: %8.2f |       perf: %10.3f' % (inner_wealth, meta_wealth, np.nanmean(all_individual_cum_errors)))
                else:
                    print('inner wealth: %8.2f | meta wealth: %8.2f| perf: %10.3f' % (inner_wealth, meta_wealth, np.nanmean(all_individual_cum_errors)))

        return best_mtl_performances, pd.DataFrame(best_cumsum_performances).rolling(window=10 ** 10, min_periods=1).mean().values.ravel()
Example #7
0
    def fit(self, data, task_indexes):

        all_mtl_performances = []
        all_errors = []
        total_points = 0
        for task_idx, task in enumerate(getattr(data, task_indexes)):
            x = data.features_tr[task]
            y = data.labels_tr[task]
            n_points, n_dims = x.shape
            total_points = total_points + n_points

            wealth_range = [1]
            for idx, wealth in enumerate(wealth_range):
                curr_bet_fraction = self.magnitude_betting_fraction

                curr_wealth = wealth  # self.magnitude_wealth
                curr_magnitude = curr_bet_fraction * curr_wealth
                curr_direction = np.random.randn(n_dims)

                all_weight_vectors = []
                all_h = []

                shuffled_indexes = list(range(n_points))
                np.random.shuffle(shuffled_indexes)
                for iteration, curr_point_idx in enumerate(shuffled_indexes):
                    iteration = iteration + 1
                    prev_direction = curr_direction
                    prev_bet_fraction = curr_bet_fraction
                    prev_wealth = curr_wealth
                    prev_magnitude = curr_magnitude

                    # update weight vector
                    weight_vector = prev_magnitude * prev_direction + self.fixed_bias
                    all_weight_vectors.append(weight_vector)

                    # receive a new datapoint
                    curr_x = x[curr_point_idx, :]
                    curr_y = y[curr_point_idx]

                    all_errors.append(
                        loss(curr_x,
                             curr_y,
                             weight_vector,
                             loss_name='absolute'))

                    # compute the gradient
                    subgrad = subgradient(curr_x,
                                          curr_y,
                                          weight_vector,
                                          loss_name='absolute')
                    full_gradient = subgrad * curr_x

                    # define step size
                    step_size = 1 / (self.L * self.R) * np.sqrt(2 / iteration)

                    # update direction
                    curr_direction = l2_unit_ball_projection(prev_direction -
                                                             step_size *
                                                             full_gradient)

                    # update magnitude_wealth
                    curr_wealth = prev_wealth - (
                        1 / (self.R * self.L)
                    ) * full_gradient @ prev_direction * prev_magnitude

                    # h_thing
                    h = (
                        1 / (self.R * self.L)
                    ) * full_gradient @ prev_direction * (
                        1 /
                        (1 - (1 / (self.R * self.L)) *
                         (full_gradient @ prev_direction) * prev_bet_fraction))
                    all_h.append(h)
                    a_thing = 1 + np.sum([curr_h**2 for curr_h in all_h])

                    # update magnitude_betting_fraction
                    curr_bet_fraction = np.max([
                        np.min([
                            prev_bet_fraction - (2 / (2 - np.log(3))) *
                            (h / a_thing), 1 / 2
                        ]), -1 / 2
                    ])

                    # update magnitude
                    curr_magnitude = curr_bet_fraction * curr_wealth

                    if len(all_weight_vectors) < 2:
                        final_w = weight_vector
                    else:
                        final_w = np.mean(all_weight_vectors, axis=0)

                curr_test_perf = loss(data.features_ts[task],
                                      data.labels_ts[task],
                                      final_w,
                                      loss_name='absolute')

                all_mtl_performances.append(curr_test_perf)

        return (task_idx + 1) * [np.nanmean(all_mtl_performances)
                                 ], total_points * [np.nanmean(all_errors)]
    def fit(self, data):

        all_individual_cum_errors = []

        best_metaparameter = np.zeros(data.features_tr[0].shape[1])
        for task_iteration, task in enumerate(data.tr_task_indexes):
            x = data.features_tr[task]
            y = data.labels_tr[task]

            # initialize the inner parameters
            n_points, n_dims = x.shape

            curr_metaparameter = best_metaparameter

            best_perf = np.Inf
            for meta_idx, meta_step_size in enumerate(
                    self.meta_step_size_range):
                for inner_idx, inner_step_size in enumerate(
                        self.inner_step_size_range):

                    if meta_idx == 0 and inner_idx == 0:
                        og_metaparameter = deepcopy(curr_metaparameter)
                    else:
                        curr_metaparameter = og_metaparameter

                    temp_cum_errors = []
                    curr_untranslated_weights = np.zeros(n_dims)
                    shuffled_indexes = list(range(n_points))
                    # np.random.shuffle(shuffled_indexes)
                    for inner_iteration, curr_point_idx in enumerate(
                            shuffled_indexes):
                        prev_untranslated_weights = curr_untranslated_weights
                        prev_metaparameter = curr_metaparameter

                        # update inner weight vector
                        curr_weights = curr_untranslated_weights + prev_metaparameter

                        # receive a new datapoint
                        curr_x = x[curr_point_idx, :]
                        curr_y = y[curr_point_idx]

                        temp_cum_errors.append(
                            loss(curr_x,
                                 curr_y,
                                 curr_weights,
                                 loss_name='absolute'))

                        # compute the gradient
                        subgrad = subgradient(curr_x,
                                              curr_y,
                                              curr_weights,
                                              loss_name='absolute')
                        full_gradient = subgrad * curr_x

                        # update metaparameters
                        curr_metaparameter = prev_metaparameter - meta_step_size * full_gradient

                        # update the untranslated weights
                        curr_untranslated_weights = prev_untranslated_weights - inner_step_size * full_gradient

                    curr_cum_sum = pd.DataFrame(temp_cum_errors).rolling(
                        window=10**10, min_periods=1).mean().values.ravel()
                    if curr_cum_sum[-1] < best_perf:
                        best_perf = curr_cum_sum[-1]
                        best_cum_errors = temp_cum_errors
                        best_metaparameter = deepcopy(curr_metaparameter)
                        print(
                            '%3d | best params: %10.5f | %10.5f | error: %5.3f'
                            % (task_iteration, inner_step_size, meta_step_size,
                               best_perf))
                        # print('inner step: %8e | meta step: %8e |       perf: %10.3f' % (inner_step_size, meta_step_size, np.nanmean(all_individual_cum_errors)))
                    else:
                        pass
                        # print('inner step: %8e | meta step: %8e | perf: %10.3f' % (inner_step_size, meta_step_size, np.nanmean(all_individual_cum_errors)))
            all_individual_cum_errors = all_individual_cum_errors + best_cum_errors
        return None, pd.DataFrame(all_individual_cum_errors).rolling(
            window=10**10, min_periods=1).mean().values.ravel()
Example #9
0
    def fit(self, data):

        best_perf = np.Inf

        counter_val = 0

        for _, gamma_par in enumerate(self.gamma_par_range):
            for _, lambda_par in enumerate(self.lambda_par_range):

                counter_val = counter_val + 1
                # print(f'val: ', counter_val, ' on ', len(self.lambda_par_range) * len(self.gamma_par_range))

                all_meta_parameters_temp = []
                all_average_val_errors_temp = [
                ]  # temporary memory for the best val error curve
                all_average_test_errors_temp = [
                ]  # temporary memory for the best test error curve

                # initialize meta-parameter
                meta_parameter = np.zeros(data.features_tr[0].shape[1])

                for task_tr_index, task_tr in enumerate(data.tr_task_indexes):

                    # print(f'TRAINING task', task_tr_index + 1)

                    x = data.features_tr[task_tr]
                    y = data.labels_tr[task_tr]

                    curr_weights, average_weights = inner_algorithm(
                        x, y, lambda_par, meta_parameter, self.loss_name)

                    # compute the meta-gradient
                    meta_gradient = -lambda_par * (curr_weights -
                                                   meta_parameter)

                    # update the meta_parameter
                    meta_parameter = meta_parameter - gamma_par * meta_gradient
                    all_meta_parameters_temp.append(meta_parameter)
                    average_meta_parameter = np.mean(all_meta_parameters_temp,
                                                     axis=0)

                    # compute the error on the validation and test tasks with average_meta_parameter
                    all_val_errors_temp = []
                    for _, task_val in enumerate(data.val_task_indexes):
                        x_tr = data.features_tr[task_val]
                        y_tr = data.labels_tr[task_val]
                        x_ts = data.features_ts[task_val]
                        y_ts = data.labels_ts[task_val]
                        curr_weights, average_weights = inner_algorithm(
                            x_tr, y_tr, lambda_par, average_meta_parameter,
                            self.loss_name)
                        val_error = loss(x_ts, y_ts, average_weights,
                                         self.loss_name)
                        all_val_errors_temp.append(val_error)
                    average_val_error = np.mean(all_val_errors_temp)
                    all_average_val_errors_temp.append(average_val_error)

                    all_test_errors_temp = []
                    for _, task_ts in enumerate(data.test_task_indexes):
                        x_tr = data.features_tr[task_ts]
                        y_tr = data.labels_tr[task_ts]
                        x_ts = data.features_ts[task_ts]
                        y_ts = data.labels_ts[task_ts]
                        curr_weights, average_weights = inner_algorithm(
                            x_tr, y_tr, lambda_par, average_meta_parameter,
                            self.loss_name)
                        test_error = loss(x_ts, y_ts, average_weights,
                                          self.loss_name)
                        all_test_errors_temp.append(test_error)
                    average_test_error = np.mean(all_test_errors_temp)
                    all_average_test_errors_temp.append(average_test_error)

                # select the hyper-parameters for which the last training task's average validation error is minimized
                if average_val_error < best_perf:
                    best_perf = average_val_error
                    best_lambda_par = lambda_par
                    best_gamma_par = gamma_par
                    all_best_performances = all_average_test_errors_temp

        print(f'best lambda: ', best_lambda_par, '  best gamma: ',
              best_gamma_par)
        print(f'best test error: ', all_best_performances[-1])

        return all_best_performances
Example #10
0
    def fit(self, data):

        best_perf = np.Inf
        counter_val = 0

        for _, gamma_par in enumerate(self.gamma_par_range):
            for _, lambda_par in enumerate(self.lambda_par_range):

                counter_val = counter_val + 1
                # print(f'val: ', counter_val, ' on ', len(self.lambda_par_range) * len(self.gamma_par_range))

                all_meta_parameters_temp = []
                all_average_val_errors_temp = [
                ]  # temporary memory for the best val error curve
                all_average_test_errors_temp = [
                ]  # temporary memory for the best test error curve

                # initialize meta-parameter
                if self.dataset == 'circle':
                    curr_b = np.zeros(data.features_tr[0].shape[1])
                    sum_b = np.zeros(data.features_tr[0].shape[1])
                    avg_b = np.zeros(data.features_tr[0].shape[1])
                    test_for_shape = feature_map(data.all_side_info[0],
                                                 data.labels_tr[0],
                                                 self.feature_map_name, self.r,
                                                 self.W)
                    curr_M = np.zeros([
                        data.features_tr[0].shape[1], test_for_shape.shape[0]
                    ])
                    sum_M = np.zeros([
                        data.features_tr[0].shape[1], test_for_shape.shape[0]
                    ])
                    avg_M = np.zeros([
                        data.features_tr[0].shape[1], test_for_shape.shape[0]
                    ])
                else:
                    curr_b = np.zeros(data.features_tr[0].shape[1])
                    sum_b = np.zeros(data.features_tr[0].shape[1])
                    avg_b = np.zeros(data.features_tr[0].shape[1])
                    test_for_shape = feature_map(data.features_tr[0],
                                                 data.labels_tr[0],
                                                 self.feature_map_name, self.r,
                                                 self.W)
                    curr_M = np.zeros([
                        data.features_tr[0].shape[1], test_for_shape.shape[0]
                    ])
                    sum_M = np.zeros([
                        data.features_tr[0].shape[1], test_for_shape.shape[0]
                    ])
                    avg_M = np.zeros([
                        data.features_tr[0].shape[1], test_for_shape.shape[0]
                    ])

                idx_avg = 1
                for task_tr_index, task_tr in enumerate(data.tr_task_indexes):

                    # print(f'TRAINING task', task_tr_index + 1)

                    x = data.features_tr[task_tr]
                    y = data.labels_tr[task_tr]
                    if self.dataset == 'circle':
                        s = data.all_side_info[task_tr]

                    if self.dataset == 'circle':
                        x_trasf_feature = feature_map(s, y,
                                                      self.feature_map_name,
                                                      self.r, self.W)
                    else:
                        x_trasf_feature = feature_map(x, y,
                                                      self.feature_map_name,
                                                      self.r, self.W)

                    # update the meta-parameter
                    curr_meta_parameter = avg_M @ x_trasf_feature + avg_b

                    curr_weights, average_weights = inner_algorithm(
                        x, y, lambda_par, curr_meta_parameter, self.loss_name)

                    # compute the meta-gradient
                    meta_gradient_b = -lambda_par * (curr_weights -
                                                     curr_meta_parameter)
                    meta_gradient_M = np.tensordot(meta_gradient_b,
                                                   x_trasf_feature, 0)

                    # update the meta_parameter
                    curr_b = curr_b - gamma_par * meta_gradient_b
                    curr_M = curr_M - gamma_par * meta_gradient_M

                    sum_M = sum_M + curr_M
                    avg_M = sum_M / idx_avg
                    sum_b = sum_b + curr_b
                    avg_b = sum_b / idx_avg

                    idx_avg = idx_avg + 1

                    # compute the error on the validation and test tasks with average_meta_parameter
                    all_val_errors_temp = []
                    for _, task_val in enumerate(data.val_task_indexes):
                        x_tr = data.features_tr[task_val]
                        y_tr = data.labels_tr[task_val]
                        x_ts = data.features_ts[task_val]
                        y_ts = data.labels_ts[task_val]
                        if self.dataset == 'circle':
                            s = data.all_side_info[task_val]

                        if self.dataset == 'circle':
                            x_trasf_feature = feature_map(
                                s, y_tr, self.feature_map_name, self.r, self.W)
                        else:
                            x_trasf_feature = feature_map(
                                x_tr, y_tr, self.feature_map_name, self.r,
                                self.W)

                        curr_meta_parameter = avg_M @ x_trasf_feature + avg_b

                        curr_weights, average_weights = inner_algorithm(
                            x_tr, y_tr, lambda_par, curr_meta_parameter,
                            self.loss_name)
                        val_error = loss(x_ts, y_ts, average_weights,
                                         self.loss_name)
                        all_val_errors_temp.append(val_error)
                    average_val_error = np.mean(all_val_errors_temp)
                    all_average_val_errors_temp.append(average_val_error)

                    all_test_errors_temp = []
                    for _, task_ts in enumerate(data.test_task_indexes):
                        x_tr = data.features_tr[task_ts]
                        y_tr = data.labels_tr[task_ts]
                        x_ts = data.features_ts[task_ts]
                        y_ts = data.labels_ts[task_ts]
                        if self.dataset == 'circle':
                            s = data.all_side_info[task_ts]

                        if self.dataset == 'circle':
                            x_trasf_feature = feature_map(
                                s, y_tr, self.feature_map_name, self.r, self.W)
                        else:
                            x_trasf_feature = feature_map(
                                x_tr, y_tr, self.feature_map_name, self.r,
                                self.W)

                        curr_meta_parameter = avg_M @ x_trasf_feature + avg_b

                        curr_weights, average_weights = inner_algorithm(
                            x_tr, y_tr, lambda_par, curr_meta_parameter,
                            self.loss_name)
                        test_error = loss(x_ts, y_ts, average_weights,
                                          self.loss_name)
                        all_test_errors_temp.append(test_error)
                    average_test_error = np.mean(all_test_errors_temp)
                    all_average_test_errors_temp.append(average_test_error)

                # select the hyper-parameters for which the average validation error is minimized
                if average_val_error < best_perf:
                    best_perf = average_val_error
                    best_lambda_par = lambda_par
                    best_gamma_par = gamma_par
                    all_best_performances = all_average_test_errors_temp

        print(f'best lambda: ', best_lambda_par, '  best gamma: ',
              best_gamma_par)
        print(f'best test error: ', all_best_performances[-1])

        return all_best_performances