def __init__(self, device=None, learning_rate=1e-3, act=F.relu, T=3): # Settings self.device = device self.act = act self.learning_rate = learning_rate self.T = T self.t = 0 # Loss self.recon_loss = ReconstructionLoss() # Model from meta_st.cifar10.cnn_model_001 import Model self.model = Model(device, act) self.model.to_gpu(device) if device is not None else None self.model_params = OrderedDict([x for x in self.model.namedparams()]) # Optimizer self.setup_meta_learners()
def __init__(self, device=None, learning_rate=1e-3, act=F.leaky_relu, T=3): # Settings self.device = device self.act = act self.learning_rate = learning_rate self.T = 3 self.t = 0 self.loss_ml = 0 # Loss self.rc_loss = ReconstructionLoss() # Model from meta_st.cifar10.cnn_model_001 import Model self.model = Model(device, act) self.model.to_gpu(device) if device is not None else None self.model_params = OrderedDict([x for x in self.model.namedparams()]) # Optimizer self.optimizer = Adam(learning_rate) #TODO: adam is appropriate? self.optimizer.setup(self.model) self.optimizer.use_cleargrads() self.setup_meta_learners()
def test_forward(): device = None act = F.leaky_relu model = Model(device, act) model_params = OrderedDict([x for x in model.namedparams()]) x_data = np.random.rand(4, 3, 32, 32).astype(np.float32) y_data = np.random.randint(0, 10, 4).astype(np.int32) x = Variable(x_data) y = Variable(y_data) # forward y_pred = model(x, model_params, test=False) l = F.softmax_cross_entropy(y_pred, y) # backward model.cleargrads() l.backward(retain_grad=True) # change variable held in model_params for k, v in model_params.items(): w = Variable(np.copy(v.grad)) w_ = F.dropout(w) model_params[k] = w_ # forward y_pred = model(x, model_params, test=False) l = F.softmax_cross_entropy(y_pred, y) # backward model.cleargrads() l.backward(retain_grad=True) # check print("after backward") for k, v in model_params.items(): if v.grad is not None: print(k)
class Experiment000(object): """ FCNN model """ def __init__(self, device=None, learning_rate=1e-3, act=F.relu, T=3): # Settings self.device = device self.act = act self.learning_rate = learning_rate self.T = T self.t = 0 # Loss self.recon_loss = ReconstructionLoss() # Model from meta_st.cifar10.cnn_model_001 import Model self.model = Model(device, act) self.model.to_gpu(device) if device is not None else None self.model_params = OrderedDict([x for x in self.model.namedparams()]) # Optimizer self.setup_meta_learners() def setup_meta_learners(self, ): #TODO: multiple layers, modification of inputs self.meta_learners = [] self.opt_meta_learners = [] # Meta-learner for k, v in self.model_params.items(): # meta-learner taking gradient in batch dimension #ml = MetaLearner(np.prod(v.shape)) ml = MetaLearner(1, ) ml.to_gpu(self.device) if self.device is not None else None self.meta_learners.append(ml) # optimizer of meta-learner opt = optimizers.Adam(1e-3) opt.setup(ml) opt.use_cleargrads() self.opt_meta_learners.append(opt) def train(self, x_l0, x_l1, y_l, x_u0, x_u1): # Foward/Backward of learner w.r.t. cross-entropy y_pred0 = self.model(x_l0, self.model_params) loss_ce = F.softmax_cross_entropy(y_pred0, y_l) self.cleargrads() loss_ce.backward(retain_grad=True) loss_ce.unchain_backward() # Forward of meta-learner, i.e., parameter update for i, elm in enumerate(self.model_params.items()): k, p = elm with cuda.get_device_from_id(self.device): shape = p.shape xp = cuda.get_array_module(p.data) x = p.grad #grad = xp.reshape(x, (np.prod(shape), )) grad = xp.reshape(x, (np.prod(shape), 1)) meta_learner = self.meta_learners[i] g = meta_learner(Variable(grad)) # forward w = p - F.reshape(g, shape) self.model_params[k] = w # parameter update #self.model_params[k] = F.reshape(g, shape) # Foward/Backward of learner w.r.t. stochastic regularization y_pred0 = self.model(x_u0, self.model_params) y_pred1 = self.model(x_u1, self.model_params) loss_rec = self.recon_loss(F.softmax(y_pred0), F.softmax(y_pred1)) self.cleargrads() loss_rec.backward() # Update meta-learner for meta_learner in self.meta_learners: meta_learner.cleargrads() for opt in self.opt_meta_learners: opt.update() def test(self, x, y): y_pred = self.model(x, self.model_params, test=True) acc = F.accuracy(y_pred, y) return acc def cleargrads(self, ): for k, v in self.model_params.items(): v.cleargrad()
class Experiment000(object): """ - Stochastic Regularization - FCCN """ def __init__(self, device=None, learning_rate=1e-3, act=F.leaky_relu, T=3): # Settings self.device = device self.act = act self.learning_rate = learning_rate self.T = 3 self.t = 0 self.loss_ml = 0 # Loss self.rc_loss = ReconstructionLoss() # Model from meta_st.cifar10.cnn_model_001 import Model self.model = Model(device, act) self.model.to_gpu(device) if device is not None else None self.model_params = OrderedDict([x for x in self.model.namedparams()]) # Optimizer self.optimizer = Adam(learning_rate) #TODO: adam is appropriate? self.optimizer.setup(self.model) self.optimizer.use_cleargrads() self.setup_meta_learners() def setup_meta_learners(self, ): self.meta_learners = [] self.ml_optimizers = [] # Meta-learner for _ in self.model_params: # meta-learner taking gradient in batch dimension ml = MetaLearner(inmap=1, midmap=1, outmap=1) ml.to_gpu(self.device) if self.device is not None else None self.meta_learners.append(ml) # optimizer of meta-learner opt = optimizers.Adam(1e-3) opt.setup(ml) opt.use_cleargrads() self.ml_optimizers.append(opt) def train(self, x_l0, x_l1, y_l, x_u0, x_u1): self._train_for_primary_task(x_l0, y_l) self._train_for_auxiliary_task(x_l0, x_l1, y_l, x_u0, x_u1) self.t += 1 if self.t == self.T: self._train_meta_learners() self.t = 0 def _train_for_primary_task(self, x_l0, y_l): y_pred = self.model(x_l0, self.model_params) loss_ce = F.softmax_cross_entropy(y_pred, y_l) self._cleargrads() loss_ce.backward(retain_grad=True) self.optimizer.update(self.model_params) def _train_for_auxiliary_task(self, x_l0, x_l1, y_l, x_u0, x_u1): # Compute gradients y_pred0 = self.model(x_u0, self.model_params) y_pred1 = self.model(x_u1, self.model_params) loss_rc = self.rc_loss(y_pred0, y_pred1) self._cleargrads() loss_rc.backward(retain_grad=True) # Update optimizee parameters by meta-learner model_params = self.model_params for i, elm in enumerate(model_params.items()): name, w = elm meta_learner = self.meta_learners[i] ml_optimizer = self.ml_optimizers[i] shape = w.shape with cuda.get_device_from_id(self.device): xp = cuda.get_array_module(w.data) g_old = w.grad # no nedd to deep copy grad_data = xp.reshape(g_old, (np.prod(shape), 1)) # refine grad, update w, and replace grad = Variable(grad_data) g = meta_learner(grad) #TODO: use either h or c w -= F.reshape(g, shape) model_params[name] = w # Forward primary taks for training meta-leaners #TODO: use the same labeled data? y_pred = self.model(x_l0, self.model_params) self.loss_ml += F.softmax_cross_entropy(y_pred, y_l) def _train_meta_learners(self, ): self._cleargrads() self.loss_ml.backward(retain_grad=True) for opt in self.ml_optimizers: opt.update() self.loss_ml.unchain_backward() self.loss_ml = 0 def test(self, x, y): y_pred = self.model(x, self.model_params, test=True) acc = F.accuracy(y_pred, y) return acc def _cleargrads(self, ): for k, v in self.model_params.items(): v.cleargrad()
class Experiment000(object): """ - Stochastic Regularization - Resnet x 5 - Objective of meta-learner is T instread of one """ def __init__(self, device=None, learning_rate=1e-3, act=F.relu, T=3): # Settings self.device = device self.act = act self.learning_rate = learning_rate self.T = T self.t = 0 self.loss_ml = 0 # Loss self.recon_loss = ReconstructionLoss() # Model from meta_st.cifar10.cnn_model_001 import Model self.model = Model(device, act) self.model.to_gpu(device) if device is not None else None self.model_params = OrderedDict([x for x in self.model.namedparams()]) # Optimizer self.optimizer = Adam(learning_rate) self.optimizer.setup(self.model) self.optimizer.use_cleargrads() self.setup_meta_learners() def setup_meta_learners(self, ): #TODO: multiple layers, modification of inputs self.meta_learners = [] self.opt_meta_learners = [] # Meta-learner for k, v in self.model_params.items(): # meta-learner taking gradient in batch dimension ml = MetaLearner(np.prod(v.shape)) ml.to_gpu(self.device) if self.device is not None else None self.meta_learners.append(ml) # optimizer of meta-learner opt = optimizers.Adam(1e-3) opt.setup(ml) opt.use_cleargrads() self.opt_meta_learners.append(opt) def train(self, x_l0, x_l1, y_l, x_u0, x_u1): # Train learner and meta-learner self._train(x_l0, x_l1, y_l) self._train_meta_learner(x_l0, x_l1, y_l, x_u0, x_u1) def _train(self, x0, x1, y): # Cross Entropy Loss y_pred0 = self.model(x0, self.model_params) loss_ce = F.softmax_cross_entropy(y_pred0, y) self.cleargrads() loss_ce.backward() self.optimizer.update(self.model_params) def _train_meta_learner(self, x_l0, x_l1, y_l, x_u0, x_u1): # Stochastic Regularization (i.e, Consistency Loss) y_pred0 = self.model(x_u0, self.model_params) y_pred1 = self.model(x_u1, self.model_params) loss_rec = self.recon_loss(F.softmax(y_pred0), F.softmax(y_pred1)) self.cleargrads() loss_rec.backward() # update learner using loss_rec and meta-learner self.update_parameter_by_meta_learner(self.model_params, loss_rec, x_l0, x_l1, y_l) self.t += 1 if self.t == self.T: self.train_meta_learner() def update_parameter_by_meta_learner(self, model_params, loss, x_l0, x_l1, y_l): # Forward meta-learner namedparams = model_params for i, elm in enumerate(namedparams.items()): # parameter-loop k, p = elm with cuda.get_device_from_id(self.device): shape = p.shape xp = cuda.get_array_module(p.data) x = p.grad grad = xp.reshape(x, (np.prod(shape), )) meta_learner = self.meta_learners[i] g = meta_learner(Variable(grad)) # forward w = p - F.reshape(g, shape) self.model_params[k] = w # Train meta-learner with main objective y_pred = self.model(x_l0, self.model_params) self.loss_ml += F.softmax_cross_entropy(y_pred, y_l) def train_meta_learner(self, ): self.cleargrads() # need to clear W'grad due to loss_rec.backward for meta_learner in self.meta_learners: meta_learner.cleargrads() self.loss_ml.backward(retain_grad=True) for opt in self.opt_meta_learners: opt.update() self.loss_ml.unchain_backward( ) #TODO: here is a proper place to unchain? self.loss_ml = 0 def test(self, x, y): y_pred = self.model(x, self.model_params, test=True) acc = F.accuracy(y_pred, y) return acc def cleargrads(self, ): for k, v in self.model_params.items(): v.cleargrad()
class Experiment000(object): """ FCNN model """ def __init__(self, device=None, learning_rate=1e-3, act=F.relu, T=3): # Settings self.device = device self.act = act self.learning_rate = learning_rate self.T = T self.t = 0 # Loss self.recon_loss = ReconstructionLoss() # Model from meta_st.cifar10.cnn_model_001 import Model self.model = Model(device, act) self.model.to_gpu(device) if device is not None else None self.model_params = OrderedDict([x for x in self.model.namedparams()]) # Optimizer self.optimizer = Adam(learning_rate) self.optimizer.setup(self.model) self.optimizer.use_cleargrads() self.setup_meta_learners() def setup_meta_learners(self, ): #TODO: multiple layers, modification of inputs self.meta_learners = [] self.opt_meta_learners = [] # Meta-learner for k, v in self.model_params.items(): # meta-learner taking gradient in batch dimension ml = MetaLearner(np.prod(v.shape)) ml.to_gpu(self.device) if self.device is not None else None self.meta_learners.append(ml) # optimizer of meta-learner opt = optimizers.Adam(1e-3) opt.setup(ml) opt.use_cleargrads() self.opt_meta_learners.append(opt) def train(self, x_l0, x_l1, y_l, x_u0, x_u1): # Train learner and meta-learner self._train(x_l0, x_l1, y_l) self._train_meta_learner(x_l0, x_l1, y_l, x_u0, x_u1) def _train(self, x0, x1, y): # Cross Entropy Loss y_pred0 = self.model(x0, self.model_params) loss_ce = F.softmax_cross_entropy(y_pred0, y) self.cleargrads() loss_ce.backward() self.optimizer.update(self.model_params) def _train_meta_learner(self, x_l0, x_l1, y_l, x_u0, x_u1): # Stochastic Regularization (i.e, Consistency Loss) y_pred0 = self.model(x_u0, self.model_params) y_pred1 = self.model(x_u1, self.model_params) #TODO: better to not use softmax? loss_rec = self.recon_loss(F.softmax(y_pred0), F.softmax(y_pred1)) self.cleargrads() loss_rec.backward() # update learner using loss_rec and meta-learner self.update_parameter_by_meta_learner( self.model_params, loss_rec, x_l0, x_l1, y_l) self.train_meta_learner(x_l1, y_l) def update_parameter_by_meta_learner( self, model_params, loss, x_l0, x_l1, y_l): # Forward meta-learner namedparams = model_params for i, elm in enumerate(namedparams.items()): # parameter-loop k, p = elm with cuda.get_device_from_id(self.device): shape = p.shape xp = cuda.get_array_module(p.data) x = p.grad grad = xp.reshape(x, (np.prod(shape), )) meta_learner = self.meta_learners[i] g = meta_learner(Variable(grad)) # forward w = p - F.reshape(g, shape) self.model_params[k] = w def train_meta_learner(self, x_l1, y_l): # Train meta-learner with main objective y_pred = self.model(x_l1, self.model_params) #TODO: recurrent training ends in memory leak loss_ml = F.softmax_cross_entropy(y_pred, y_l) self.cleargrads() # need to clear W'grad due to loss_rec.backward for meta_learner in self.meta_learners: meta_learner.cleargrads() loss_ml.backward(retain_grad=True) for opt in self.opt_meta_learners: opt.update() loss_ml.unchain_backward() #TODO: here is a proper place to unchain? def test(self, x, y): y_pred = self.model(x, self.model_params, test=True) acc = F.accuracy(y_pred, y) return acc def cleargrads(self, ): for k, v in self.model_params.items(): v.cleargrad()