def act_names(self): if hasattr(self, '_activations_cached' ) and self._activations_cached is not None: return self._activations_cached x = self.xp.zeros((64, 3, 32, 32)).astype('f') loss = self(x) variables = [loss] + [v for _, _, v in backward_var_iter_nodup(loss)] self._activations_cached = [ v.name for v in variables if v.data is not None ] return self._activations_cached
def act_names(self): if hasattr(self, '_activations_cached'): return self._activations_cached x = self.xp.random.randn(1, 3, 32, 32).astype('f') loss = self(x) variables = [loss] + [ v for _,_,v in backward_var_iter_nodup(loss)] # Remove duplicates from bottom a = [v.name for v in variables if v.data is not None] nodup = sorted(list(set(a)), key=list(reversed(a)).index, reverse=1) self._activations_cached = nodup return self._activations_cached
def update(self, lossfun=None, *args, **kwds): ''' Modified from class GradientMethod(Optimizer) ''' if lossfun is not None: use_cleargrads = getattr(self, '_use_cleargrads', True) loss = lossfun(*args, **kwds) if use_cleargrads: self.target.cleargrads() else: self.target.zerograds() if hasattr(self, 'call_loss_hooks'): self.call_loss_hooks(self.target, loss) # SCRAMBLE IT! for scrambler in set(self.scrambler_map.values()): if scrambler is not None: scrambler.scramble_setup() for rank, func, var in backward_var_iter_nodup(loss): if var.data is None: continue scrambler = self.scrambler_map[var.name] if scrambler is not None: if self.dynamic_rescale: sf = dynamic_scale(var, self.dynamic_rescale) scrambler.scramble(rank, func, var) if self.dynamic_rescale: dynamic_unscale(var, sf) if self.compress_x_hat and func.label == 'BatchNormalizationFlexFunc': if var.name and var.name.endswith('-c'): scrambler.ascramble(func.x_hat) var.data *= cuda.numpy.NaN # This variable *SHOULDNT* be used self.target.predictor.post_scramble_callback( loss, self.scrambler_map) loss.backward() del loss self.reallocate_cleared_grads() self.call_hooks() self.t += 1 for param in self.target.params(): param.update()
def act_shapes(self): x = self.xp.random.randn(1, 3, 32, 32).astype('f') loss = self(x) variables = [loss] + [ v for _,_,v in backward_var_iter_nodup(loss)] # Remove duplicates from bottom a = [v.name for v in variables if v.data is not None] shape_map = dict((v.name,v.shape) for v in variables if v.data is not None) nodup = sorted(list(set(a)), key=list(reversed(a)).index, reverse=1) total = 0 shapes = [] for name in nodup: shape = shape_map[name] shapes += [(name, shape)] total += np.prod(shape) return shapes, total