def __init__(self, device=None, learning_rate=1e-3, act=F.relu, T=3): # Settings self.device = device self.act = act self.learning_rate = learning_rate self.T = T self.t = 0 # Loss self.recon_loss = ReconstructionLoss() # Model from meta_st.cifar10.cnn_model_001 import Model self.model = Model(device, act) self.model.to_gpu(device) if device is not None else None self.model_params = OrderedDict([x for x in self.model.namedparams()]) # Optimizer self.optimizer = Adam(learning_rate) self.optimizer.setup(self.model) self.optimizer.use_cleargrads() self.setup_meta_learners()
def __init__(self, device=None, learning_rate=1e-3, act=F.relu, T=3): # Settings self.device = device self.act = act self.learning_rate = learning_rate self.T = T self.t = 0 # Loss self.recon_loss = ReconstructionLoss() # Model from meta_st.cifar10.cnn_model_000 import Model self.model = Model(device, act) self.model.to_gpu(device) if device is not None else None self.model_params = OrderedDict([x for x in self.model.namedparams()]) # Optimizer self.optimizer = Adam(learning_rate) self.optimizer.setup(self.model) self.optimizer.use_cleargrads() self.setup_meta_learners()
class Experiment002(object): """ - Stochastic Regularization - Resnet x 5 - Objective of meta-learner is T instread of one """ def __init__(self, device=None, learning_rate=1e-3, act=F.relu, T=3): # Settings self.device = device self.act = act self.learning_rate = learning_rate self.T = T self.t = 0 # Loss self.recon_loss = ReconstructionLoss() # Model from meta_st.cifar10.cnn_model_000 import Model self.model = Model(device, act) self.model.to_gpu(device) if device is not None else None self.model_params = OrderedDict([x for x in self.model.namedparams()]) # Optimizer self.optimizer = Adam(learning_rate) self.optimizer.setup(self.model) self.optimizer.use_cleargrads() self.setup_meta_learners() def setup_meta_learners(self, ): #TODO: multiple layers, modification of inputs self.meta_learners = [] self.opt_meta_learners = [] # Meta-learner for k, v in self.model_params.items(): # meta-learner taking gradient in batch dimension ml = MetaLearner(np.prod(v.shape)) ml.to_gpu(self.device) if self.device is not None else None self.meta_learners.append(ml) # optimizer of meta-learner opt = optimizers.Adam(1e-3) opt.setup(ml) opt.use_cleargrads() self.opt_meta_learners.append(opt) def train(self, x_l0, x_l1, y_l, x_u0, x_u1): # Train learner and meta-learner self._train(x_l0, x_l1, y_l) self._train_meta_learner(x_l0, x_l1, y_l, x_u0, x_u1) def _train(self, x0, x1, y): # Cross Entropy Loss y_pred0 = self.model(x0, self.model_params) loss_ce = F.softmax_cross_entropy(y_pred0, y) self.cleargrads() loss_ce.backward() self.optimizer.update(self.model_params) def _train_meta_learner(self, x_l0, x_l1, y_l, x_u0, x_u1): # Stochastic Regularization (i.e, Consistency Loss) y_pred0 = self.model(x_u0, self.model_params) y_pred1 = self.model(x_u1, self.model_params) loss_rec = self.recon_loss(F.softmax(y_pred0), F.softmax(y_pred1)) self.cleargrads() loss_rec.backward() # update learner using loss_rec and meta-learner self.update_parameter_by_meta_learner( self.model_params, loss_rec, x_l0, x_l1, y_l) def update_parameter_by_meta_learner( self, model_params, loss, x_l0, x_l1, y_l): # Forward meta-learner namedparams = model_params for i, elm in enumerate(namedparams.items()): # parameter-loop k, p = elm with cuda.get_device_from_id(self.device): shape = p.shape xp = cuda.get_array_module(p.data) x = p.grad grad = xp.reshape(x, (np.prod(shape), )) meta_learner = self.meta_learners[i] g = meta_learner(Variable(grad)) # forward w = p - F.reshape(g, shape) self.model_params[k] = w # Train meta-learner with main objective y_pred = self.model(x_l0, self.model_params) loss_ce = F.softmax_cross_entropy(y_pred, y_l) self.cleargrads() # need to clear W'grad due to loss_rec.backward for meta_learner in self.meta_learners: meta_learner.cleargrads() loss_ce.backward(retain_grad=True) for opt in self.opt_meta_learners: opt.update() loss_ce.unchain_backward() #TODO: here is a proper place to unchain? def test(self, x, y): y_pred = self.model(x, self.model_params, test=True) acc = F.accuracy(y_pred, y) return acc def cleargrads(self, ): for k, v in self.model_params.items(): v.cleargrad()
class Experiment000(object): """ - Stochastic Regularization - Resnet x 5 """ def __init__(self, device=None, learning_rate=1e-3, act=F.relu, T=3): # Settings self.device = device self.act = act self.learning_rate = learning_rate self.T = T self.t = 0 # Loss self.recon_loss = ReconstructionLoss() # Model from meta_st.cifar10.cnn_model_000 import Model self.model = Model(device, act) self.model.to_gpu(device) if device is not None else None self.model_params = OrderedDict([x for x in self.model.namedparams()]) # Optimizer self.optimizer = Adam(learning_rate) self.optimizer.setup(self.model) self.optimizer.use_cleargrads() self.setup_meta_learners() def setup_meta_learners(self, ): #TODO: multiple layers, modification of inputs self.meta_learners = [] self.opt_meta_learners = [] # Meta-learner for _ in self.model_params: # meta-learner taking gradient in batch dimension ml = MetaLearner(4, 2, 1) ml.to_gpu(self.device) if self.device is not None else None self.meta_learners.append(ml) # optimizer of meta-learner opt = optimizers.Adam(1e-3) opt.setup(ml) opt.use_cleargrads() self.opt_meta_learners.append(opt) def update_parameter_by_meta_learner(self, model_params, loss): namedparams = model_params for i, elm in enumerate(namedparams.items()): # parameter-loop k, p = elm with cuda.get_device_from_id(self.device): shape = p.shape xp = cuda.get_array_module(p.data) # normalize grad x = p.grad p_val = 10 grad0 = xp.where(xp.absolute(x) > xp.exp(-p_val), xp.log(xp.absolute(x))/p_val, -1) grad1 = xp.where(xp.absolute(x) > xp.exp(-p_val), xp.sign(x), xp.exp(p_val)*x) grad0 = xp.reshape(grad0, (np.prod(shape), )) grad1 = xp.reshape(grad1, (np.prod(shape), )) grad0 = xp.expand_dims(grad0, axis=1) grad1 = xp.expand_dims(grad1, axis=1) input_grad = xp.concatenate((grad0, grad1), axis=1) # normalize loss x = loss.data loss0 = xp.where(xp.absolute(x) > xp.exp(-p_val), xp.log(xp.absolute(x))/p_val, -1) loss1 = xp.where(xp.absolute(x) > xp.exp(-p_val), xp.sign(x), xp.exp(p_val)*x) loss0 = xp.expand_dims(loss0, axis=0) loss1 = xp.expand_dims(loss1, axis=0) input_loss = xp.concatenate((loss0, loss1)) input_loss = xp.broadcast_to(input_loss, (input_grad.shape[0], 2)) # input input_ = xp.concatenate((input_grad, input_loss), axis=1) meta_learner = self.meta_learners[i] g = meta_learner(Variable(input_.astype(xp.float32))) # forward p.data -= g.data.reshape(shape) # Set parameter as variable to be backproped if self.t == self.T: w = p - F.reshape(g, shape) self.model_params[k] = w def train(self, x_l0, x_l1, y_l, x_u0, x_u1): self.t += 1 # Train meta-learner if self.t > self.T: self._train_meta_learner(x_l0, y_l) self.t = 0 return # Train learner self._train(x_l0, x_l1, y_l) self._train(x_u0, x_u1, None) def _train_meta_learner(self, x, y): # Cross Entropy Loss y_pred = self.model(x, self.model_params) loss_ce = F.softmax_cross_entropy(y_pred, y) self.cleargrads() for meta_learner in self.meta_learners: meta_learner.cleargrads() loss_ce.backward(retain_grad=True) for opt in self.opt_meta_learners: opt.update() loss_ce.unchain_backward() def _train(self, x0, x1, y=None): # Cross Entropy Loss y_pred0 = self.model(x0, self.model_params) if y is not None: loss_ce = F.softmax_cross_entropy(y_pred0, y) self.cleargrads() loss_ce.backward() # update learner using loss_ce self.optimizer.update(self.model_params) return # Stochastic Regularization (i.e, Consistency Loss) y_pred1 = self.model(x1, self.model_params) loss_rec = self.recon_loss(F.softmax(y_pred0), F.softmax(y_pred1)) self.cleargrads() loss_rec.backward() # update learner using loss_rec and meta-learner self.update_parameter_by_meta_learner(self.model_params, loss_rec) def test(self, x, y): y_pred = self.model(x, self.model_params, test=True) acc = F.accuracy(y_pred, y) return acc def cleargrads(self, ): for k, v in self.model_params.items(): v.cleargrad()
class Experiment000(object): """ - Stochastic Regularization - FCCN """ def __init__(self, device=None, learning_rate=1e-3, act=F.leaky_relu, T=3): # Settings self.device = device self.act = act self.learning_rate = learning_rate self.T = 3 self.t = 0 self.loss_ml = 0 # Loss self.rc_loss = ReconstructionLoss() # Model from meta_st.cifar10.cnn_model_001 import Model self.model = Model(device, act) self.model.to_gpu(device) if device is not None else None self.model_params = OrderedDict([x for x in self.model.namedparams()]) # Optimizer self.optimizer = Adam(learning_rate) #TODO: adam is appropriate? self.optimizer.setup(self.model) self.optimizer.use_cleargrads() self.setup_meta_learners() def setup_meta_learners(self, ): self.meta_learners = [] self.ml_optimizers = [] # Meta-learner for _ in self.model_params: # meta-learner taking gradient in batch dimension ml = MetaLearner(inmap=1, midmap=1, outmap=1) ml.to_gpu(self.device) if self.device is not None else None self.meta_learners.append(ml) # optimizer of meta-learner opt = optimizers.Adam(1e-3) opt.setup(ml) opt.use_cleargrads() self.ml_optimizers.append(opt) def train(self, x_l0, x_l1, y_l, x_u0, x_u1): self._train_for_primary_task(x_l0, y_l) self._train_for_auxiliary_task(x_l0, x_l1, y_l, x_u0, x_u1) self.t += 1 if self.t == self.T: self._train_meta_learners() self.t = 0 def _train_for_primary_task(self, x_l0, y_l): y_pred = self.model(x_l0, self.model_params) loss_ce = F.softmax_cross_entropy(y_pred, y_l) self._cleargrads() loss_ce.backward(retain_grad=True) self.optimizer.update(self.model_params) def _train_for_auxiliary_task(self, x_l0, x_l1, y_l, x_u0, x_u1): # Compute gradients y_pred0 = self.model(x_u0, self.model_params) y_pred1 = self.model(x_u1, self.model_params) loss_rc = self.rc_loss(y_pred0, y_pred1) self._cleargrads() loss_rc.backward(retain_grad=True) # Update optimizee parameters by meta-learner model_params = self.model_params for i, elm in enumerate(model_params.items()): name, w = elm meta_learner = self.meta_learners[i] ml_optimizer = self.ml_optimizers[i] shape = w.shape with cuda.get_device_from_id(self.device): xp = cuda.get_array_module(w.data) g_old = w.grad # no nedd to deep copy grad_data = xp.reshape(g_old, (np.prod(shape), 1)) # refine grad, update w, and replace grad = Variable(grad_data) g = meta_learner(grad) #TODO: use either h or c w -= F.reshape(g, shape) model_params[name] = w # Forward primary taks for training meta-leaners #TODO: use the same labeled data? y_pred = self.model(x_l0, self.model_params) self.loss_ml += F.softmax_cross_entropy(y_pred, y_l) def _train_meta_learners(self, ): self._cleargrads() self.loss_ml.backward(retain_grad=True) for opt in self.ml_optimizers: opt.update() self.loss_ml.unchain_backward() self.loss_ml = 0 def test(self, x, y): y_pred = self.model(x, self.model_params, test=True) acc = F.accuracy(y_pred, y) return acc def _cleargrads(self, ): for k, v in self.model_params.items(): v.cleargrad()
class Experiment000(object): """ - Stochastic Regularization - Resnet x 5 - Objective of meta-learner is T instread of one """ def __init__(self, device=None, learning_rate=1e-3, act=F.relu, T=3): # Settings self.device = device self.act = act self.learning_rate = learning_rate self.T = T self.t = 0 self.loss_ml = 0 # Loss self.recon_loss = ReconstructionLoss() # Model from meta_st.cifar10.cnn_model_001 import Model self.model = Model(device, act) self.model.to_gpu(device) if device is not None else None self.model_params = OrderedDict([x for x in self.model.namedparams()]) # Optimizer self.optimizer = Adam(learning_rate) self.optimizer.setup(self.model) self.optimizer.use_cleargrads() self.setup_meta_learners() def setup_meta_learners(self, ): #TODO: multiple layers, modification of inputs self.meta_learners = [] self.opt_meta_learners = [] # Meta-learner for k, v in self.model_params.items(): # meta-learner taking gradient in batch dimension ml = MetaLearner(np.prod(v.shape)) ml.to_gpu(self.device) if self.device is not None else None self.meta_learners.append(ml) # optimizer of meta-learner opt = optimizers.Adam(1e-3) opt.setup(ml) opt.use_cleargrads() self.opt_meta_learners.append(opt) def train(self, x_l0, x_l1, y_l, x_u0, x_u1): # Train learner and meta-learner self._train(x_l0, x_l1, y_l) self._train_meta_learner(x_l0, x_l1, y_l, x_u0, x_u1) def _train(self, x0, x1, y): # Cross Entropy Loss y_pred0 = self.model(x0, self.model_params) loss_ce = F.softmax_cross_entropy(y_pred0, y) self.cleargrads() loss_ce.backward() self.optimizer.update(self.model_params) def _train_meta_learner(self, x_l0, x_l1, y_l, x_u0, x_u1): # Stochastic Regularization (i.e, Consistency Loss) y_pred0 = self.model(x_u0, self.model_params) y_pred1 = self.model(x_u1, self.model_params) loss_rec = self.recon_loss(F.softmax(y_pred0), F.softmax(y_pred1)) self.cleargrads() loss_rec.backward() # update learner using loss_rec and meta-learner self.update_parameter_by_meta_learner(self.model_params, loss_rec, x_l0, x_l1, y_l) self.t += 1 if self.t == self.T: self.train_meta_learner() def update_parameter_by_meta_learner(self, model_params, loss, x_l0, x_l1, y_l): # Forward meta-learner namedparams = model_params for i, elm in enumerate(namedparams.items()): # parameter-loop k, p = elm with cuda.get_device_from_id(self.device): shape = p.shape xp = cuda.get_array_module(p.data) x = p.grad grad = xp.reshape(x, (np.prod(shape), )) meta_learner = self.meta_learners[i] g = meta_learner(Variable(grad)) # forward w = p - F.reshape(g, shape) self.model_params[k] = w # Train meta-learner with main objective y_pred = self.model(x_l0, self.model_params) self.loss_ml += F.softmax_cross_entropy(y_pred, y_l) def train_meta_learner(self, ): self.cleargrads() # need to clear W'grad due to loss_rec.backward for meta_learner in self.meta_learners: meta_learner.cleargrads() self.loss_ml.backward(retain_grad=True) for opt in self.opt_meta_learners: opt.update() self.loss_ml.unchain_backward( ) #TODO: here is a proper place to unchain? self.loss_ml = 0 def test(self, x, y): y_pred = self.model(x, self.model_params, test=True) acc = F.accuracy(y_pred, y) return acc def cleargrads(self, ): for k, v in self.model_params.items(): v.cleargrad()
class Experiment000(object): """ FCNN model """ def __init__(self, device=None, learning_rate=1e-3, act=F.relu, T=3): # Settings self.device = device self.act = act self.learning_rate = learning_rate # Loss self.recon_loss = ReconstructionLoss() # Model from meta_st.cifar10.cnn_model_001_small import Model self.model = Model(device, act) self.model.to_gpu(device) if device is not None else None self.model_params = OrderedDict([x for x in self.model.namedparams()]) # Optimizer for model self.optimizer = Adam() self.optimizer.setup(self.model) self.optimizer.use_cleargrads() # Optimizer, or Meta-Learner (ML) self.setup_meta_learners() # Initialize Meat-learners input as zero #self.zerograds() def setup_meta_learners(self, ): self.meta_learners = [] self.opt_meta_learners = [] # Meta-learner for k, v in self.model_params.items(): # meta-learner taking gradient in batch dimension ml = MetaLearner() ml.to_gpu(self.device) if self.device is not None else None self.meta_learners.append(ml) # optimizer of meta-learner opt = optimizers.Adam(1e-3) opt.setup(ml) opt.use_cleargrads() self.opt_meta_learners.append(opt) def train(self, x_l0, x_l1, y_l, x_u0, x_u1): # Supervised loss ## Forward of CE loss self.forward_meta_learners() #TODO: init ML'W as 0? y_pred0 = self.model(x_l0, self.model_params) loss_ce = F.softmax_cross_entropy(y_pred0, y_l) ## Backward of CE loss self.cleargrad_meta_learners() self.cleargrads() loss_ce.backward(retain_grad=True) loss_ce.unchain_backward() ## Optimizer update self.optimizer.update(self.model_params) self.update_meta_learners() # Semi-supervised loss self.forward_meta_learners() y_pred0 = self.model(x_u0, self.model_params) y_pred1 = self.model(x_u1, self.model_params) loss_rec = self.recon_loss(F.softmax(y_pred0), F.softmax(y_pred1)) ## Backward of SR loss self.cleargrad_meta_learners() self.cleargrads() loss_rec.backward(retain_grad=True) loss_rec.unchain_backward() ## Optimizer update self.optimizer.update(self.model_params) self.update_meta_learners() def forward_meta_learners(self, ): # Forward of meta-learner, i.e., parameter update for i, name_param in enumerate(self.model_params.items()): k, p = name_param with cuda.get_device_from_id(self.device): shape = p.shape xp = cuda.get_array_module(p.data) w_data = p.data # meta learner is gated-reccurent unit for W not for G w_data = xp.reshape(w_data, (1, 1, np.prod(shape))) meta_learner = self.meta_learners[i] w_accum = meta_learner(Variable(w_data)) # forward w_accum = F.reshape(w_accum, shape) self.model_params[k] = w_accum def cleargrad_meta_learners(self, ): for meta_learner in self.meta_learners: meta_learner.cleargrads() def update_meta_learners(self, ): for opt in self.opt_meta_learners: opt.update() def test(self, x, y): y_pred = self.model(x, self.model_params, test=True) acc = F.accuracy(y_pred, y) return acc def cleargrads(self, ): """For initialization of Meta-learner forward """ for k, v in self.model_params.items(): v.cleargrad() # creates the gradient region for W def zerograds(self, ): """For initialization of Meta-learner forward """ for k, v in self.model_params.items(): v.zerograd() # creates the gradient region for W
class Experiment000(object): """ - Stochastic Regularization - Resnet x 5 """ def __init__(self, device=None, learning_rate=1e-3, act=F.relu, T=3): # Settings self.device = device self.act = act self.learning_rate = learning_rate self.T = T self.t = 0 # Loss self.recon_loss = ReconstructionLoss() # Model from meta_st.cifar10.cnn_model_000 import Model self.model = Model(device, act) self.model.to_gpu(device) if device is not None else None self.model_params = OrderedDict([x for x in self.model.namedparams()]) # Optimizer self.optimizer = Adam(learning_rate) self.optimizer.setup(self.model) self.optimizer.use_cleargrads() self.setup_meta_learners() def setup_meta_learners(self, ): #TODO: multiple layers, modification of inputs self.meta_learners = [] self.opt_meta_learners = [] # Meta-learner for _ in self.model_params: # meta-learner taking gradient in batch dimension ml = MetaLearner(4, 2, 1) ml.to_gpu(self.device) if self.device is not None else None self.meta_learners.append(ml) # optimizer of meta-learner opt = optimizers.Adam(1e-3) opt.setup(ml) opt.use_cleargrads() self.opt_meta_learners.append(opt) def update_parameter_by_meta_learner(self, model_params, loss): namedparams = model_params for i, elm in enumerate(namedparams.items()): # parameter-loop k, p = elm with cuda.get_device_from_id(self.device): shape = p.shape xp = cuda.get_array_module(p.data) # normalize grad x = p.grad p_val = 10 grad0 = xp.where( xp.absolute(x) > xp.exp(-p_val), xp.log(xp.absolute(x)) / p_val, -1) grad1 = xp.where( xp.absolute(x) > xp.exp(-p_val), xp.sign(x), xp.exp(p_val) * x) grad0 = xp.reshape(grad0, (np.prod(shape), )) grad1 = xp.reshape(grad1, (np.prod(shape), )) grad0 = xp.expand_dims(grad0, axis=1) grad1 = xp.expand_dims(grad1, axis=1) input_grad = xp.concatenate((grad0, grad1), axis=1) # normalize loss x = loss.data loss0 = xp.where( xp.absolute(x) > xp.exp(-p_val), xp.log(xp.absolute(x)) / p_val, -1) loss1 = xp.where( xp.absolute(x) > xp.exp(-p_val), xp.sign(x), xp.exp(p_val) * x) loss0 = xp.expand_dims(loss0, axis=0) loss1 = xp.expand_dims(loss1, axis=0) input_loss = xp.concatenate((loss0, loss1)) input_loss = xp.broadcast_to(input_loss, (input_grad.shape[0], 2)) # input input_ = xp.concatenate((input_grad, input_loss), axis=1) meta_learner = self.meta_learners[i] g = meta_learner(Variable(input_.astype( xp.float32))) # forward p.data -= g.data.reshape(shape) # Set parameter as variable to be backproped if self.t == self.T: w = p - F.reshape(g, shape) self.model_params[k] = w def train(self, x_l0, x_l1, y_l, x_u0, x_u1): self.t += 1 # Train meta-learner if self.t > self.T: self._train_meta_learner(x_l0, y_l) self.t = 0 return # Train learner self._train(x_l0, x_l1, y_l) self._train(x_u0, x_u1, None) def _train_meta_learner(self, x, y): # Cross Entropy Loss y_pred = self.model(x, self.model_params) loss_ce = F.softmax_cross_entropy(y_pred, y) self.cleargrads() for meta_learner in self.meta_learners: meta_learner.cleargrads() loss_ce.backward(retain_grad=True) for opt in self.opt_meta_learners: opt.update() loss_ce.unchain_backward() def _train(self, x0, x1, y=None): # Cross Entropy Loss y_pred0 = self.model(x0, self.model_params) if y is not None: loss_ce = F.softmax_cross_entropy(y_pred0, y) self.cleargrads() loss_ce.backward() # update learner using loss_ce self.optimizer.update(self.model_params) return # Stochastic Regularization (i.e, Consistency Loss) y_pred1 = self.model(x1, self.model_params) loss_rec = self.recon_loss(F.softmax(y_pred0), F.softmax(y_pred1)) self.cleargrads() loss_rec.backward() # update learner using loss_rec and meta-learner self.update_parameter_by_meta_learner(self.model_params, loss_rec) def test(self, x, y): y_pred = self.model(x, self.model_params, test=True) acc = F.accuracy(y_pred, y) return acc def cleargrads(self, ): for k, v in self.model_params.items(): v.cleargrad()