コード例 #1
0
    def _test_forward_automatic_d_dynamic_d_hyper(self, method, optimizer_hypers=None,
                                                  **opt_kwargs):
        iris, x, y, model, w, out, error, accuracy = iris_logistic_regression(
            method.get_augmentation_multiplier())

        shape_w = w.get_shape()

        scalar_hyper = tf.Variable(1., name='scalar_hyper')
        vector_hyper = tf.Variable(tf.ones([3]), name='vector_hyper')

        tr_err = tf.identity(error + tf.reduce_sum(w ** 2) * scalar_hyper
                             + vector_hyper * tf.stack([tf.reduce_sum(w.tensor[:5]),
                                                        tf.reduce_sum(w.tensor[5:10]),
                                                        tf.reduce_sum(w.tensor[10:])]),
                             name='training_error')
        optimizer = method.create(w, loss=tr_err, **opt_kwargs)

        d_phi_d_scalar_hyper = optimizer.auto_d_dynamics_d_hyper(scalar_hyper)
        self.assertIsNotNone(d_phi_d_scalar_hyper.tensor)
        self.assertListEqual(d_phi_d_scalar_hyper.get_shape().as_list(), [shape_w[0].value, 1])

        d_phi_d_vector_hyper = optimizer.auto_d_dynamics_d_hyper(vector_hyper)
        self.assertIsNotNone(d_phi_d_vector_hyper.tensor)
        self.assertListEqual(d_phi_d_vector_hyper.get_shape().as_list(),
                             [shape_w[0].value, vector_hyper.get_shape()[0].value])

        if optimizer_hypers:
            [self.assertIsNotNone(optimizer.auto_d_dynamics_d_hyper(hyp)) for hyp in as_list(optimizer_hypers)]
コード例 #2
0
    def __init__(self,
                 forward_hyper_grad,
                 hyperparameter_optimizers,
                 hyper_projections=None,
                 hyper_step=None):
        """
        Helper class to perform Real Time Hyperparameter optimization.
        See section 3.3 of Forward and Reverse Gradient-Based Hyperparameter Optimization
        (https://arxiv.org/abs/1703.01785)

        :param forward_hyper_grad:          instance of `ForwardHyperGradient`. Used to compute hyper-gradients
        :param hyperparameter_optimizers:   single or list of Optimizer for the hyper-parameter descent procedure
        :param hyper_projections:           (optional) list of assign ops that performs projection to
                                            onto a convex subset of the hyperparameter space.
        :param hyper_step:                  (optional) instance of `GlobalStep` class that keeps tracks of the number
                                            of hyper-batches performed so far.
        """
        assert isinstance(forward_hyper_grad, ForwardHyperGradient)
        self.direct_doh = forward_hyper_grad

        assert isinstance(hyperparameter_optimizers, (list, Optimizer)), "hyper_opt_dicts should be a single " \
                                                                         "Optimizer or a list of Optimizer. Instead" \
                                                                         "is %s" % hyperparameter_optimizers
        self.hyper_opt_dicts = as_list(hyperparameter_optimizers)

        self.hyper_projections = hyper_projections or []

        self.hyper_step = hyper_step or GlobalStep()
コード例 #3
0
def positivity(hyper_list):
    """
    Simple positivity constraints for a list of hyperparameters

    :param hyper_list: single variable or list of variable (hyperparameters)
    :return: single or list of assign ops, one for each variable in `hyper_list`
    """
    lst = [
        hyp.assign(tf.maximum(hyp, tf.zeros_like(hyp)))
        for hyp in as_list(hyper_list)
    ]
    return lst if len(lst) > 1 else lst[0]
コード例 #4
0
    def __init__(self, fig, plot_streams, prefix='', delay=60, additional_operations=None, start_from=0, stop_at=10000):
        super(ReadSaveDictThread, self).__init__(daemon=True)
        self._fig = fig
        self._stop = threading.Event()
        self._plot_streams = as_list(plot_streams)
        self._prefix = prefix
        self._delay = delay
        self._additional_operations = additional_operations or []
        self._start_from = start_from
        self._stop_at = stop_at
        self.read_count = start_from

        self.exc = []
コード例 #5
0
def continuous_plot(fig, plot_streams, prefix='', delay=120, additional_operations=None, start_from=0):
    plot_streams = as_list(plot_streams)
    additional_operations = additional_operations or []
    read_count = start_from
    while threading.current_thread().is_alive():
        [op.run() for op in additional_operations]
        updates = read_stream(prefix=prefix, start=read_count)
        read_count += len(updates)
        print(read_count)
        for upd in updates:
            [pls.process_save_dict(upd) for pls in plot_streams]
        [pls.plot() for pls in plot_streams]
        fig.canvas.draw()
        time.sleep(delay)
コード例 #6
0
def generate_setting_dict(local_variables, excluded=None):
    """
    Generates a dictionary of (name, values) of local variables (typically obtained by vars()) that
    can be saved at the beginning of the experiment. Furthermore, if an object obj in local_variables implements the
    function setting(), it saves the result of obj.setting() as value in the dictionary.

    :param local_variables:
    :param excluded: (optional, default []) variable or list of variables to be excluded.
    :return: A dictionary
    """
    excluded = as_list(excluded) or []
    setting_dict = {
        k: v.setting() if hasattr(v, 'setting') else v
        for k, v in local_variables.items() if v not in excluded
    }
    import datetime
    setting_dict['datetime'] = str(datetime.datetime.now())
    return setting_dict
コード例 #7
0
    def run(self,
            T,
            train_feed_dict_supplier=None,
            val_feed_dict_suppliers=None,
            hyper_constraints_ops=None,
            _debug_no_hyper_update=False):  # TODO add session parameter
        """

        :param _debug_no_hyper_update: 
        :param T: number of steps
        :param train_feed_dict_supplier:
        :param val_feed_dict_suppliers:
        :param hyper_constraints_ops: (list of) either callable (no parameters) or tensorflow ops
        :return:
        """
        # idea: if steps == T then do full reverse, or forward, otherwise do trho and rtho
        # after all the main difference is that if we go with the full version, after the gradient has been
        # computed, the method `initialize()` is called.

        self.hyper_gradients.run_all(
            T,
            train_feed_dict_supplier=train_feed_dict_supplier,
            val_feed_dict_suppliers=val_feed_dict_suppliers,
            hyper_batch_step=self.hyper_batch_step.eval())
        if not _debug_no_hyper_update:
            [
                tf.get_default_session().run(hod.assign_ops)
                for hod in self.hyper_optimizers
            ]
            if hyper_constraints_ops:
                [
                    op() if callable(op) else op.eval()
                    for op in as_list(hyper_constraints_ops)
                ]

            self.hyper_batch_step.increase.eval()
コード例 #8
0
ファイル: datasets.py プロジェクト: prithv1/RFHO
def redivide_data(datasets, partition_proportions=None, shuffle=False, filters=None, maps=None, balance_classes=False):
    """
    Function that redivides datasets. Can be use also to shuffle or filter or map examples.

    :param datasets: original datasets, instances of class Dataset (works with get_data and get_targets for
    compatibility with mnist datasets
    :param partition_proportions: (optional, default None)  list of fractions that can either sum up to 1 or less
    then one, in which case one additional partition is created with proportion 1 - sum(partition proportions).
    If None it will retain the same proportion of samples found in datasets
    :param shuffle: (optional, default False) if True shuffles the examples
    :param filters: (optional, default None) filter or list of filters: functions with signature
    (data, target, index) -> boolean (accept or reject the sample)
    :param maps: (optional, default None) map or list of maps: functions with signature
    (data, target, index) ->  (new_data, new_target) (maps the old sample to a new one, possibly also to more
    than one sample, for data augmentation)
    :return: a list of datasets of length equal to the (possibly augmented) partition_proportion
    """

    all_data = vstack([get_data(d) for d in datasets])
    all_labels = stack_or_concat([get_targets(d) for d in datasets])

    all_infos = np.concatenate([d.sample_info for d in datasets])

    N = all_data.shape[0]

    if partition_proportions:  # argument check
        partition_proportions = list([partition_proportions] if isinstance(partition_proportions, float)
                                     else partition_proportions)
        sum_proportions = sum(partition_proportions)
        assert sum_proportions <= 1, "partition proportions must sum up to at most one: %d" % sum_proportions
        if sum_proportions < 1.: partition_proportions += [1. - sum_proportions]
    else:
        partition_proportions = [1. * get_data(d).shape[0] / N for d in datasets]

    if shuffle:
        if sp and isinstance(all_data, sp.sparse.csr.csr_matrix): raise NotImplementedError()
        # if sk_shuffle:  # TODO this does not work!!! find a way to shuffle these matrices while
        # keeping compatibility with tensorflow!
        #     all_data, all_labels, all_infos = sk_shuffle(all_data, all_labels, all_infos)
        # else:
        permutation = np.arange(all_data.shape[0])
        np.random.shuffle(permutation)

        all_data = all_data[permutation]
        all_labels = np.array(all_labels[permutation])
        all_infos = np.array(all_infos[permutation])

    if filters:
        if sp and isinstance(all_data, sp.sparse.csr.csr_matrix): raise NotImplementedError()
        filters = as_list(filters)
        data_triple = [(x, y, d) for x, y, d in zip(all_data, all_labels, all_infos)]
        for fiat in filters:
            data_triple = [xy for i, xy in enumerate(data_triple) if fiat(xy[0], xy[1], xy[2], i)]
        all_data = np.vstack([e[0] for e in data_triple])
        all_labels = np.vstack([e[1] for e in data_triple])
        all_infos = np.vstack([e[2] for e in data_triple])

    if maps:
        if sp and isinstance(all_data, sp.sparse.csr.csr_matrix): raise NotImplementedError()
        maps = as_list(maps)
        data_triple = [(x, y, d) for x, y, d in zip(all_data, all_labels, all_infos)]
        for _map in maps:
            data_triple = [_map(xy[0], xy[1], xy[2], i) for i, xy in enumerate(data_triple)]
        all_data = np.vstack([e[0] for e in data_triple])
        all_labels = np.vstack([e[1] for e in data_triple])
        all_infos = np.vstack([e[2] for e in data_triple])

    N = all_data.shape[0]
    assert N == all_labels.shape[0]

    calculated_partitions = reduce(
        lambda v1, v2: v1 + [sum(v1) + v2],
        [int(N * prp) for prp in partition_proportions],
        [0]
    )
    calculated_partitions[-1] = N

    print('datasets.redivide_data:, computed partitions numbers -',
          calculated_partitions, 'len all', N, end=' ')

    new_general_info_dict = {}
    for data in datasets:
        new_general_info_dict = {**new_general_info_dict, **data.info}

        if balance_classes:
            new_datasets = []
            forbidden_indices = np.empty(0, dtype=np.int64)
            for d1, d2 in zip(calculated_partitions[:-1], calculated_partitions[1:-1]):
                indices = np.array(get_indices_balanced_classes(d2 - d1, all_labels, forbidden_indices))
                dataset = Dataset(data=all_data[indices], target=all_labels[indices],
                                  sample_info=all_infos[indices],
                                  info=new_general_info_dict)
                new_datasets.append(dataset)
                forbidden_indices = np.append(forbidden_indices, indices)
                test_if_balanced(dataset)
            remaining_indices = np.array(list(set(list(range(N))) - set(forbidden_indices)))
            new_datasets.append(Dataset(data=all_data[remaining_indices], target=all_labels[remaining_indices],
                                        sample_info=all_infos[remaining_indices],
                                        info=new_general_info_dict))
        else:
            new_datasets = [
                Dataset(data=all_data[d1:d2], target=all_labels[d1:d2], sample_info=all_infos[d1:d2],
                        info=new_general_info_dict)
                for d1, d2 in zip(calculated_partitions, calculated_partitions[1:])
                ]

        print('DONE')

        return new_datasets
コード例 #9
0
    def __init__(self, optimizer, hyper_dict, global_step=None):
        """
        Creates a new object that computes the hyper-gradient of validation errors in forward mode.
        See section 3.2 of Forward and Reverse Gradient-Based Hyperparameter Optimization
        (https://arxiv.org/abs/1703.01785)
        Note that this class only computes the hyper-gradient and does not perform hyperparameter optimization.

        :param optimizer: instance of Optimizer class, which represent the dynamics with which the model parameters are
                            updated
        :param hyper_dict: A dictionary of `{validation_error: hyper_pairs_list}` where
                            `validation_error` is a scalar tensor and `hyper_pairs_list` is single or a list of
                            pairs (hyperparameter, derivative_of_dynamics_w.r.t hyperparameter)
                            (matrix B_t in the paper). Unfortunately tensorflow does not computes Jacobians
                            efficiently yet (suggestions or pointer are welcomed)
        :param global_step: (optional) instance of `GlobalStep` to keep track of the optimization step
        """
        assert isinstance(optimizer, Optimizer)

        self.w = optimizer.raw_w  # might be variable or MergedVariable (never tested on Variables actually) ...
        self.w_t = MergedVariable.get_tensor(self.w)  # this is always a tensor

        self.tr_dynamics = optimizer.dynamics

        assert isinstance(hyper_dict, dict), '%s not allowed type. Should be a dict of (tf.Tensor,' \
                                             'list[(hyper-parameter, d_dynamics_d_hyper-parameter)]' % hyper_dict

        self.hyper_list = []  # more comfortable to use
        self.d_dynamics_d_hypers = []
        self.hyper_dict = {}  # standardizes hyper_dict parameter
        for k, v in hyper_dict.items():
            list_v = as_list(v)
            assert isinstance(list_v[0], tuple), "Something's wrong in hyper_dict %s, at least in entry%s. Check!"\
                                                 % (hyper_dict, list_v[0])
            self.hyper_dict[k] = list_v  # be sure values are lists!
            self.hyper_list += [pair[0] for pair in list_v]
            self.d_dynamics_d_hypers += [pair[1] for pair in list_v]

        self.val_errors = []  # will follow the same order as hyper_list
        for hyp in self.hyper_list:  # find the right validation error for hyp!
            for k, v in hyper_dict.items():
                all_hypers = [pair[0] for pair in as_list(v)]
                if hyp in all_hypers:
                    self.val_errors.append(k)
                    break

        for i, der in enumerate(
                self.d_dynamics_d_hypers
        ):  # this automatic casting at the moment works only for SGD
            if not isinstance(der, ZMergedMatrix):
                print('Try casting d_dynamics_d_hyper to ZMergedMatrix')
                self.d_dynamics_d_hypers[i] = ZMergedMatrix(der)
                print('Successful')

        with self.w_t.graph.as_default():
            # global step
            self.global_step = global_step or GlobalStep()

            self.fw_ops = self.w.assign(
                self.tr_dynamics)  # TODO add here when hypers are sequence

            with tf.name_scope('direct_HO'):
                '''
                Creates one z per hyper-parameter and assumes that each hyper-parameter is a vector
                '''
                self.zs = [self._create_z(hyp) for hyp in self.hyper_list]

                self.zs_dynamics = [
                    optimizer.jac_z(z) + dd_dh
                    for z, dd_dh in zip(self.zs, self.d_dynamics_d_hypers)
                ]

                print('z dynamics', self.zs_dynamics[0])
                print('z', self.zs[0])

                self.zs_assigns = [
                    z.assign(z_dyn)
                    for z, z_dyn in zip(self.zs, self.zs_dynamics)
                ]

                self.grad_val_err = [
                    tf.gradients(v_e, self.w_t)[0] for v_e in self.val_errors
                ]
                assert all([
                    g is not None for g in self.grad_val_err
                ]), 'Some gradient of the validation error is None!'

                self.grad_wrt_hypers = [
                    dot(gve, z.tensor)
                    for z, gve in zip(self.zs, self.grad_val_err)
                ]

                with tf.name_scope(
                        'hyper_gradients'
                ):  # ADDED 28/3/17 keeps track of hyper-gradients as tf.Variable
                    self.hyper_gradient_vars = [
                        tf.Variable(tf.zeros_like(hyp), name=simple_name(hyp))
                        for hyp in self.hyper_list
                    ]
                    self.hyper_gradients_dict = {
                        hyp: hgv
                        for hyp, hgv  # redundant.. just for comfort ..
                        in zip(self.hyper_list, self.hyper_gradient_vars)
                    }
                    self._hyper_assign_ops = [
                        v.assign(ght) for v, ght in zip(
                            self.hyper_gradient_vars, self.grad_wrt_hypers)
                    ]
コード例 #10
0
    def __init__(self,
                 optimizer,
                 hyper_dict,
                 state_history=None,
                 global_step=None):
        """
        Creates a new object that computes the hyper-gradient of validation errors in reverse mode.
        See section 3.1 of Forward and Reverse Gradient-Based Hyperparameter Optimization
        (https://arxiv.org/abs/1703.01785)
        Note that this class only computes the hyper-gradient and does not perform hyperparameter optimization.

        :param optimizer: insance of Optimizer class, which contains the dynamics with which the model parameters are
                            updated
        :param hyper_dict: A dictionary of `{validation_error: hyperparameter or list_of_hyperparameters}` where
                            `validation_error` is a scalar tensor and `list_of_hyperparameters` is a list
                            of tensorflow variables that represents the hyperparameters
        :param state_history: (default: empty list) state history manager:
                                should implement methods `clear`, `append`, `__getitem__`
        :param global_step: optional instance of GlobalStep class
        """
        assert isinstance(optimizer, Optimizer)

        self.w = optimizer.raw_w  # might be variable or MergedVariable
        #  TODO check if it works also with w as simple Variable
        self.w_t = MergedVariable.get_tensor(self.w)  # this is always a tensor

        self.tr_dynamics = optimizer.dynamics
        assert isinstance(hyper_dict, dict), '%s not allowed type. Should be a dict of ' \
                                             '(tf.Tensor, hyperparameters)' % hyper_dict
        self.val_error_dict = hyper_dict

        self.hyper_list = []
        for k, v in hyper_dict.items():
            self.hyper_list += as_list(v)
            self.val_error_dict[k] = as_list(v)  # be sure that are all lists

        self.w_hist = state_history or []

        with self.w_t.graph.as_default():
            # global step
            self.global_step = global_step or GlobalStep()

            self._fw_ops = optimizer.assign_ops  # TODO add here when hyper-parameters are sequence

            # backward assign ops
            with tf.name_scope('backward'):
                # equation (9)
                p_T = {
                    ve: tf.gradients(ve, self.w_t)[0]
                    for ve, hyp_list in self.val_error_dict.items()
                }  # deltaE(s_t)

                self.p_dict = {
                    ve: tf.Variable(pt, name='p')
                    for ve, pt in p_T.items()
                }

                # for nullity check
                self._abs_sum_p = tf.reduce_sum(
                    tf.stack([
                        tf.reduce_sum(tf.abs(p), name='l1_p')
                        for p in self.p_dict.values()
                    ]))

                # build Lagrangian function
                with tf.name_scope('lagrangian'):
                    self.lagrangians_dict = {
                        ve: dot(p, self.tr_dynamics)
                        for ve, p in self.p_dict.items()
                    }

                # TODO read below
                '''
                In the following {if else} block there are two ways of computing the the dynamics of the update
                 of the Lagrangian multipliers. The procedures SHOULD produce the same result,
                however, for some strange reason, if w is indeed a state varibale that contains auxiliary components
                (e.g. velocity in Momentum algorithm, ...) there is a difference in the two methods and
                the right one is the first one. This is possibly due to the order in wich the derivatives are
                 taken by tensorflow, but furhter investigation is necessary.
                '''
                # detects if some auxiliary variables are used.
                if isinstance(self.w, MergedVariable) and \
                        any([isinstance(v, MergedVariable) for v in self.w.var_list(Vl_Mode.RAW)]):
                    state_components = self.w.var_list(Vl_Mode.TENSOR)

                    # equation (8)
                    self.p_dynamics = {
                        ve:
                        tf.concat(tf.gradients(lagrangian, state_components),
                                  0)
                        for ve, lagrangian in self.lagrangians_dict.items()
                    }
                else:
                    # equation (8)
                    self.p_dynamics = {
                        ve: tf.gradients(lagrangian, self.w_t)[0]
                        for ve, lagrangian in self.lagrangians_dict.items()
                    }  # equation (7)

                self._bk_ops = [
                    self.p_dict[ve].assign(self.p_dynamics[ve])
                    for ve in self.val_error_dict
                ]  # TODO add here when hp are sequ.

            with tf.name_scope('w_history_ops'):
                self._w_placeholder = tf.placeholder(self.w_t.dtype)

                self._back_hist_op = self.w.assign(self._w_placeholder)

            with tf.name_scope('hyper_derivatives'):
                # equation (10) without summation.
                self.hyper_derivatives = [
                    (self.val_error_dict[ve],
                     tf.gradients(lagrangian, self.val_error_dict[ve]))
                    for ve, lagrangian in self.lagrangians_dict.items()
                ]  # list of couples (hyper_list, list of symbolic hyper_gradients)  (lists are unhashable!)

            with tf.name_scope(
                    'hyper_gradients'
            ):  # ADDED 28/3/17 keeps track of hyper-gradients as tf.Variable
                self._grad_wrt_hypers_placeholder = tf.placeholder(
                    tf.float32, name='placeholder')
                # TODO this placeholder is not really necessary... just added to minimize the changes needed
                # (merge with RICCARDO)

                self.hyper_gradient_vars = [
                    tf.Variable(tf.zeros_like(hyp), name=simple_name(hyp))
                    for hyp in self.hyper_list
                ]
                self.hyper_gradients_dict = {
                    hyp: hgv
                    for hyp, hgv  # redundant.. just for comfort ..
                    in zip(self.hyper_list, self.hyper_gradient_vars)
                }

                self._hyper_assign_ops = {
                    h: v.assign(self._grad_wrt_hypers_placeholder)
                    for h, v in self.hyper_gradients_dict.items()
                }
コード例 #11
0
def redivide_data(datasets, partition_proportions=None, shuffle=False, filters=None, maps=None):
    """
    Function that redivides datasets. Can be use also to shuffle or filter or map examples.

    :param datasets: original datasets, instances of class Dataset (works with get_data and get_targets for
    compatibility with mnist datasets
    :param partition_proportions: (optional, default None)  list of fractions that can either sum up to 1 or less
    then one, in which case one additional partition is created with proportion 1 - sum(partition proportions).
    If None it will retain the same proportion of samples found in datasets
    :param shuffle: (optional, default False) if True shuffles the examples
    :param filters: (optional, default None) filter or list of filters: functions with signature
    (data, target, index) -> boolean (accept or reject the sample)
    :param maps: (optional, default None) map or list of maps: functions with signature
    (data, target, index) ->  (new_data, new_target) (maps the old sample to a new one, possibly also to more
    than one sample, for data augmentation)
    :return: a list of datasets of length equal to the (possibly augmented) partition_proportion
    """
    all_data = np.vstack([get_data(d) for d in datasets])
    all_labels = np.vstack([get_targets(d) for d in datasets])

    all_infos = np.concatenate([d.sample_info_dicts for d in datasets])

    N = len(all_data)

    if partition_proportions:  # argument check
        partition_proportions = list([partition_proportions] if isinstance(partition_proportions, float)
                                     else partition_proportions)
        sum_proportions = sum(partition_proportions)
        assert sum_proportions <= 1, "partition proportions must sum up to at most one: %d" % sum_proportions
        if sum_proportions < 1.: partition_proportions += [1. - sum_proportions]
    else:
        partition_proportions = [1. * len(get_data(d)) / N for d in datasets]

    if shuffle:
        permutation = list(range(N))
        np.random.shuffle(permutation)

        all_data = np.array(all_data[permutation])
        all_labels = np.array(all_labels[permutation])
        all_infos = np.array(all_infos[permutation])

    if filters:
        filters = as_list(filters)
        data_triple = [(x, y, d) for x, y, d in zip(all_data, all_labels, all_infos)]
        for fiat in filters:
            data_triple = [xy for i, xy in enumerate(data_triple) if fiat(xy[0], xy[1], xy[2], i)]
        all_data = np.vstack([e[0] for e in data_triple])
        all_labels = np.vstack([e[1] for e in data_triple])
        all_infos = np.vstack([e[2] for e in data_triple])

    if maps:
        maps = as_list(maps)
        data_triple = [(x, y, d) for x, y, d in zip(all_data, all_labels, all_infos)]
        for _map in maps:
            data_triple = [_map(xy[0], xy[1], xy[2], i) for i, xy in enumerate(data_triple)]
        all_data = np.vstack([e[0] for e in data_triple])
        all_labels = np.vstack([e[1] for e in data_triple])
        all_infos = np.vstack([e[2] for e in data_triple])

    N = len(all_data)
    assert N == len(all_labels)

    calculated_partitions = reduce(
        lambda v1, v2: v1 + [sum(v1) + v2],
        [int(N * prp) for prp in partition_proportions],
        [0]
    )
    calculated_partitions[-1] = N

    print('datasets.redivide_data:, computed partitions numbers -',
          calculated_partitions, 'len all', len(all_data), end=' ')

    new_general_info_dict = {}
    for data in datasets:
        new_general_info_dict = {**new_general_info_dict, **data.general_info_dict}

    new_datasets = [
        Dataset(data=all_data[d1:d2], target=all_labels[d1:d2], sample_info_dicts=all_infos[d1:d2],
                general_info_dict=new_general_info_dict)
        for d1, d2 in zip(calculated_partitions, calculated_partitions[1:])
        ]

    print('DONE')

    return new_datasets
コード例 #12
0
    def __init__(self, optimizer, hyper_dict, global_step=None, devices=None):
        """
        Creates a new object that computes the hyper-gradient of validation errors in forward mode.
        See section 3.2 of Forward and Reverse Gradient-Based Hyperparameter Optimization
        (https://arxiv.org/abs/1703.01785)
        Note that this class only computes the hyper-gradient and does not perform hyperparameter optimization.

        :param optimizer: instance of Optimizer class, which represent the dynamics with which the model parameters are
                            updated
        :param hyper_dict: A dictionary of `{validation_error: hyper_pairs_list}` where
                            `validation_error` is a scalar tensor and `hyper_pairs_list` is single or a list of
                            pairs (hyperparameter, derivative_of_dynamics_w.r.t hyperparameter)
                            (matrix B_t in the paper). Unfortunately tensorflow does not computes Jacobians
                            efficiently yet (suggestions or pointer are welcomed)
        :param global_step: (optional) instance of `GlobalStep` to keep track of the optimization step
        """
        assert isinstance(optimizer, Optimizer)

        self.w = optimizer.raw_w  # might be variable or MergedVariable (never tested on Variables actually) ...
        self.w_t = self.w  # MergedVariable.get_tensor(self.w)  # this is always a tensor

        self.tr_dynamics = optimizer.dynamics

        assert isinstance(hyper_dict, dict), '%s not allowed type. Should be a dict of (tf.Tensor,' \
                                             'list[(hyper-parameter, d_dynamics_d_hyper-parameter)]' % hyper_dict

        self.hyper_list = []  # more comfortable to use
        self.d_dynamics_d_hypers = []
        self.hyper_dict = {}  # standardizes hyper_dict parameter
        self._inverse_hyper_dict = {}  # hyperparameter-validation error pairs
        for k, v in hyper_dict.items():
            list_v = as_list(v)
            # assert isinstance(list_v[0], tuple), "Something's wrong in hyper_dict %s, at least in entry%s. Check!"\
            #                                      % (hyper_dict, list_v[0])
            self.hyper_dict[k] = list_v  # be sure values are lists!
            self._inverse_hyper_dict = {
                **self._inverse_hyper_dict,
                **{hyp: k
                   for hyp in list_v}
            }
            self.hyper_list += [
                pair[0] if isinstance(pair, (tuple, list)) else pair
                for pair in list_v
            ]
            self.d_dynamics_d_hypers += [
                pair[1] if isinstance(pair, (tuple, list)) else
                optimizer.auto_d_dynamics_d_hyper(
                    pair)  # try to compute it automatically
                for pair in list_v
            ]

        self.val_errors = []  # will follow the same order as hyper_list
        for hyp in self.hyper_list:  # find the right validation error for hyp!
            for k, v in hyper_dict.items():
                all_hypers = [
                    pair[0] if isinstance(pair, (list, tuple)) else pair
                    for pair in as_list(v)
                ]
                if hyp in all_hypers:
                    self.val_errors.append(k)
                    break

        for i, der in enumerate(
                self.d_dynamics_d_hypers
        ):  # this automatic casting at the moment works only for SGD
            if not isinstance(der, ZMergedMatrix):
                print('Try casting d_dynamics_d_hyper to ZMergedMatrix')
                self.d_dynamics_d_hypers[i] = ZMergedMatrix(der)
                print('Successful')

        devices = as_list(devices)  # at most will be [None]

        with self.w_t.graph.as_default():
            # global step
            self.global_step = global_step or GlobalStep()

            self.fw_ops = optimizer.assign_ops  # add here when hypers are sequence (...)

            with tf.name_scope('ForwardHG'):
                '''
                Creates one z per hyper-parameter and assumes that each hyper-parameter is a vector
                '''
                self.grad_wrt_hypers, self.zs, self.zs_dynamics, self._zs_assigns = [], [], [], []
                self.hyper_gradient_vars, self._hyper_assign_ops = [], []

                self.grad_val_err = {
                    ve:
                    tf.identity(tf.gradients(ve, self.w_t)[0],
                                name='grad_val_err_%s' % simple_name(ve.name))
                    for ve in self.hyper_dict.keys()
                }
                self._gve_inv_dict = {
                    hyp: self.grad_val_err[ve]
                    for hyp, ve in self._inverse_hyper_dict.items()
                }

                for k, hyp in enumerate(self.hyper_list):
                    with tf.device(devices[k % len(devices)]):
                        self.zs.append(self._create_z(hyp))

                        with tf.name_scope('Z_dynamics'):
                            self.zs_dynamics.append(
                                optimizer.jac_z(self.zs[k]) +
                                self.d_dynamics_d_hypers[k])
                            self._zs_assigns.append(self.zs[k].assign(
                                self.zs_dynamics[k]))

                        self.grad_wrt_hypers.append(
                            dot(self._gve_inv_dict[hyp],
                                self.zs[k],
                                name='hyper_grad_wrt_h'))

                        with tf.name_scope('hyper_gradients'):
                            self.hyper_gradient_vars.append(
                                tf.Variable(tf.zeros_like(hyp),
                                            name=simple_name(hyp)))
                            self._hyper_assign_ops.append(
                                self.hyper_gradient_vars[k].assign(
                                    self.grad_wrt_hypers[k]))

                # final operations
                self.hyper_gradients_dict = {
                    hyp: hgv
                    for hyp, hgv  # redundant.. just for comfort ..
                    in zip(self.hyper_list, self.hyper_gradient_vars)
                }
                # hyper-gradient check
                assert all([g is not None for g in self.grad_val_err]), 'Some gradient ' \
                                                                        'of the validation error is None!'