def forward(self, data): if self._mode not in self._fn_forward: symb_in = tensors_for_ndarrays(data, 'X') symb_out = self(symb_in) extra_out = self.get_extra_outputs() fn = self._fn_forward[self._mode] = df.th.function( inputs=flatten(symb_in), outputs=flatten(symb_out) + flatten(extra_out) ) fn._df2_extra = extra_out fn = self._fn_forward[self._mode] outs = fn(*flatten(data)) return self._collect_extra_outputs(fn, outs)
def forward(self, num_input, num_target, with_penalties=True, per_sample=False): # NOTE: using the GPU for such trivial computations as most costs # is actually somewhat slower (e.g. for RMSE: GPU 1.2ms vs. CPU 0.2ms). # So ideally, we'd like to compile a CPU-version here, but I don't know how! if (with_penalties, per_sample) not in self._fn_forward: symb_in = tensors_for_ndarrays(num_input, 'Y') symb_tgt = tensors_for_ndarrays(num_target, 'T') symb_out = self(symb_in, symb_tgt, with_penalties) self._fn_forward[with_penalties, per_sample] = df.th.function( inputs=flatten(symb_in) + flatten(symb_tgt), outputs=symb_out if not per_sample else self._per_sample_cost ) return self._fn_forward[with_penalties, per_sample](*(flatten(num_input) + flatten(num_target)))
def __init__(self, n_features, eps=1e-5): """ - `n_features` may be an integer (#features, #feature-maps for images) or a tuple. - If a single integer, it indicates the size of the 1-axis, i.e. first feature-axis. This is the only axis that will be normalized using statistics across all other axes. - If a tuple, it indicates the sizes of multiple axes (starting at 1) which are considered feature-axes and will consequently be normalized over statistics across all other axes. - `eps` is a small number which is added to the variance in order to avoid computing sqrt(0) for features with zero variance. """ df.Module.__init__(self) self.ndim = len(flatten(n_features)) self.W = self._addparam(n_features, df.init.const(1), name='W_BN_{}'.format(n_features)) self.b = self._addparam(n_features, df.init.const(0), name='b_BN_{}'.format(n_features), decay=False) self.Winf = self._addparam(n_features, df.init.const(1), name='W_BN_{}_inf'.format(n_features), learn=False) self.binf = self._addparam(n_features, df.init.const(0), name='b_BN_{}_inf'.format(n_features), learn=False) # These are buffers for collecting the minibatch statistics. self.buf_var = df.th.shared(_np.full(n_features, 1, df.floatX), name='BN_var_{}'.format(n_features)) self.buf_mean = df.th.shared(_np.full(n_features, 0, df.floatX), name='BN_mean_{}'.format(n_features)) self.buf_count = df.th.shared(_np.asarray(0, dtype=df.floatX), name='BN_count_{}'.format(n_features)) self.eps = eps or 1e-5 self.batch_mean = None self.batch_var = None
def accumulate_statistics(self, data_in): if self._mode not in self._fn_accum_stats: symb_in = tensors_for_ndarrays(data_in, 'X') # Call forward once so it can compute some variables it'll actually # use in the stat updates collection. self(symb_in) stat_updates = self.get_stat_updates() if not stat_updates: # If there's no layer collecting statistics, we don't need to # compile and call a function. This prevents theano errors. return # Need to make sure there's only one update per variable for the # case where we've got the same module instance at multiple places # within the graph. # Also warn about it because it's not obvious whether just dropping # one of them is the right thing to do in general? todo = set(upd[0] for upd in stat_updates) if len(todo) < len(stat_updates): uniq_updates = [] for upd in stat_updates: if upd[0] in todo: uniq_updates.append(upd) todo.remove(upd[0]) else: print("WARNING: Dropped the following stat-update because that variable got multiple updates: {}".format(upd[0])) stat_updates = uniq_updates self._fn_accum_stats[self._mode] = df.th.function( inputs=flatten(symb_in), updates=stat_updates ) self._fn_accum_stats[self._mode](*flatten(data_in))
def accumulate_gradients(self, data_in, data_tgt, crit): if (self._mode, id(crit)) not in self._fn_accum_grads: symb_in = tensors_for_ndarrays(data_in, 'X') symb_tgt = tensors_for_ndarrays(data_tgt, 'T') symb_out = self(symb_in) symb_cost = crit(symb_out, symb_tgt) extra_out = self.get_extra_outputs() + crit.get_extra_outputs() params = self.parameters(learnable_only=True) symb_grads = df.th.grad(cost=symb_cost, wrt=[p.param for p in params]) grads_updates = [(p.grad, p.grad + symb_grad) for p, symb_grad in zip(params, symb_grads)] fn = self._fn_accum_grads[self._mode, id(crit)] = df.th.function( inputs=flatten(symb_in) + flatten(symb_tgt), outputs=flatten(symb_cost) + flatten(extra_out), updates=grads_updates ) fn._df2_extra = extra_out fn = self._fn_accum_grads[self._mode, id(crit)] args = flatten(data_in) + flatten(data_tgt) outs = fn(*args) return self._collect_extra_outputs(fn, outs)