Beispiel #1
0
    def forward(self, data):
        if self._mode not in self._fn_forward:
            symb_in = tensors_for_ndarrays(data, 'X')
            symb_out = self(symb_in)
            extra_out = self.get_extra_outputs()
            fn = self._fn_forward[self._mode] = df.th.function(
                inputs=flatten(symb_in),
                outputs=flatten(symb_out) + flatten(extra_out)
            )
            fn._df2_extra = extra_out

        fn = self._fn_forward[self._mode]
        outs = fn(*flatten(data))
        return self._collect_extra_outputs(fn, outs)
Beispiel #2
0
    def forward(self, num_input, num_target, with_penalties=True, per_sample=False):
        # NOTE: using the GPU for such trivial computations as most costs
        # is actually somewhat slower (e.g. for RMSE: GPU 1.2ms vs. CPU 0.2ms).
        # So ideally, we'd like to compile a CPU-version here, but I don't know how!
        if (with_penalties, per_sample) not in self._fn_forward:
            symb_in = tensors_for_ndarrays(num_input, 'Y')
            symb_tgt = tensors_for_ndarrays(num_target, 'T')
            symb_out = self(symb_in, symb_tgt, with_penalties)
            self._fn_forward[with_penalties, per_sample] = df.th.function(
                inputs=flatten(symb_in) + flatten(symb_tgt),
                outputs=symb_out if not per_sample else self._per_sample_cost
            )

        return self._fn_forward[with_penalties, per_sample](*(flatten(num_input) + flatten(num_target)))
    def __init__(self, n_features, eps=1e-5):
        """
        - `n_features` may be an integer (#features, #feature-maps for images) or a tuple.
            - If a single integer, it indicates the size of the 1-axis, i.e. first feature-axis.
              This is the only axis that will be normalized using statistics across all other axes.
            - If a tuple, it indicates the sizes of multiple axes (starting at 1) which are
              considered feature-axes and will consequently be normalized over statistics across all other axes.
        - `eps` is a small number which is added to the variance in order to
          avoid computing sqrt(0) for features with zero variance.
        """
        df.Module.__init__(self)

        self.ndim = len(flatten(n_features))

        self.W = self._addparam(n_features, df.init.const(1), name='W_BN_{}'.format(n_features))
        self.b = self._addparam(n_features, df.init.const(0), name='b_BN_{}'.format(n_features), decay=False)

        self.Winf = self._addparam(n_features, df.init.const(1), name='W_BN_{}_inf'.format(n_features), learn=False)
        self.binf = self._addparam(n_features, df.init.const(0), name='b_BN_{}_inf'.format(n_features), learn=False)

        # These are buffers for collecting the minibatch statistics.
        self.buf_var = df.th.shared(_np.full(n_features, 1, df.floatX), name='BN_var_{}'.format(n_features))
        self.buf_mean = df.th.shared(_np.full(n_features, 0, df.floatX), name='BN_mean_{}'.format(n_features))
        self.buf_count = df.th.shared(_np.asarray(0, dtype=df.floatX), name='BN_count_{}'.format(n_features))

        self.eps = eps or 1e-5

        self.batch_mean = None
        self.batch_var = None
Beispiel #4
0
    def accumulate_statistics(self, data_in):
        if self._mode not in self._fn_accum_stats:
            symb_in = tensors_for_ndarrays(data_in, 'X')

            # Call forward once so it can compute some variables it'll actually
            # use in the stat updates collection.
            self(symb_in)

            stat_updates = self.get_stat_updates()
            if not stat_updates:
                # If there's no layer collecting statistics, we don't need to
                # compile and call a function. This prevents theano errors.
                return

            # Need to make sure there's only one update per variable for the
            # case where we've got the same module instance at multiple places
            # within the graph.
            # Also warn about it because it's not obvious whether just dropping
            # one of them is the right thing to do in general?
            todo = set(upd[0] for upd in stat_updates)
            if len(todo) < len(stat_updates):
                uniq_updates = []
                for upd in stat_updates:
                    if upd[0] in todo:
                        uniq_updates.append(upd)
                        todo.remove(upd[0])
                    else:
                        print("WARNING: Dropped the following stat-update because that variable got multiple updates: {}".format(upd[0]))
                stat_updates = uniq_updates

            self._fn_accum_stats[self._mode] = df.th.function(
                inputs=flatten(symb_in),
                updates=stat_updates
            )

        self._fn_accum_stats[self._mode](*flatten(data_in))
Beispiel #5
0
    def accumulate_gradients(self, data_in, data_tgt, crit):
        if (self._mode, id(crit)) not in self._fn_accum_grads:
            symb_in = tensors_for_ndarrays(data_in, 'X')
            symb_tgt = tensors_for_ndarrays(data_tgt, 'T')
            symb_out = self(symb_in)
            symb_cost = crit(symb_out, symb_tgt)
            extra_out = self.get_extra_outputs() + crit.get_extra_outputs()

            params = self.parameters(learnable_only=True)
            symb_grads = df.th.grad(cost=symb_cost, wrt=[p.param for p in params])
            grads_updates = [(p.grad, p.grad + symb_grad) for p, symb_grad in zip(params, symb_grads)]

            fn = self._fn_accum_grads[self._mode, id(crit)] = df.th.function(
                inputs=flatten(symb_in) + flatten(symb_tgt),
                outputs=flatten(symb_cost) + flatten(extra_out),
                updates=grads_updates
            )
            fn._df2_extra = extra_out

        fn = self._fn_accum_grads[self._mode, id(crit)]
        args = flatten(data_in) + flatten(data_tgt)
        outs = fn(*args)
        return self._collect_extra_outputs(fn, outs)