Esempio n. 1
0
    def __init__(self, device=None, learning_rate=1e-3, act=F.relu, T=3):
        # Settings
        self.device = device
        self.act = act
        self.learning_rate = learning_rate
        self.T = T
        self.t = 0

        # Loss
        self.recon_loss = ReconstructionLoss()

        # Model
        from meta_st.cifar10.cnn_model_001 import Model
        self.model = Model(device, act)
        self.model.to_gpu(device) if device is not None else None
        self.model_params = OrderedDict([x for x in self.model.namedparams()])

        # Optimizer
        self.optimizer = Adam(learning_rate)
        self.optimizer.setup(self.model)
        self.optimizer.use_cleargrads()
        self.setup_meta_learners()
Esempio n. 2
0
    def __init__(self, device=None, learning_rate=1e-3, act=F.relu, T=3):
        # Settings
        self.device = device
        self.act = act
        self.learning_rate = learning_rate
        self.T = T
        self.t = 0

        # Loss
        self.recon_loss = ReconstructionLoss()

        # Model
        from meta_st.cifar10.cnn_model_000 import Model
        self.model = Model(device, act)
        self.model.to_gpu(device) if device is not None else None
        self.model_params = OrderedDict([x for x in self.model.namedparams()])
        
        # Optimizer
        self.optimizer = Adam(learning_rate)
        self.optimizer.setup(self.model)
        self.optimizer.use_cleargrads()
        self.setup_meta_learners()
Esempio n. 3
0
class Experiment002(object):
    """
    - Stochastic Regularization
    - Resnet x 5
    - Objective of meta-learner is T instread of one
    """
    def __init__(self, device=None, learning_rate=1e-3, act=F.relu, T=3):
        # Settings
        self.device = device
        self.act = act
        self.learning_rate = learning_rate
        self.T = T
        self.t = 0

        # Loss
        self.recon_loss = ReconstructionLoss()

        # Model
        from meta_st.cifar10.cnn_model_000 import Model
        self.model = Model(device, act)
        self.model.to_gpu(device) if device is not None else None
        self.model_params = OrderedDict([x for x in self.model.namedparams()])
        
        # Optimizer
        self.optimizer = Adam(learning_rate)
        self.optimizer.setup(self.model)
        self.optimizer.use_cleargrads()
        self.setup_meta_learners()

    def setup_meta_learners(self, ):
        #TODO: multiple layers, modification of inputs
        self.meta_learners = []
        self.opt_meta_learners = []

        # Meta-learner
        for k, v in self.model_params.items():
            # meta-learner taking gradient in batch dimension
            ml = MetaLearner(np.prod(v.shape))
            ml.to_gpu(self.device) if self.device is not None else None
            self.meta_learners.append(ml)

            # optimizer of meta-learner
            opt = optimizers.Adam(1e-3)
            opt.setup(ml)
            opt.use_cleargrads()
            self.opt_meta_learners.append(opt)        
                
    def train(self, x_l0, x_l1, y_l, x_u0, x_u1):
        # Train learner and meta-learner
        self._train(x_l0, x_l1, y_l)
        self._train_meta_learner(x_l0, x_l1, y_l, x_u0, x_u1)

    def _train(self, x0, x1, y):
        # Cross Entropy Loss
        y_pred0 = self.model(x0, self.model_params)
        loss_ce = F.softmax_cross_entropy(y_pred0, y)
        self.cleargrads()
        loss_ce.backward()

        self.optimizer.update(self.model_params)

    def _train_meta_learner(self, x_l0, x_l1, y_l, x_u0, x_u1):
        # Stochastic Regularization (i.e, Consistency Loss)
        y_pred0 = self.model(x_u0, self.model_params)
        y_pred1 = self.model(x_u1, self.model_params)
        loss_rec = self.recon_loss(F.softmax(y_pred0), F.softmax(y_pred1))
        self.cleargrads()
        loss_rec.backward()

        # update learner using loss_rec and meta-learner
        self.update_parameter_by_meta_learner(
            self.model_params, loss_rec, 
            x_l0, x_l1, y_l)

    def update_parameter_by_meta_learner(
            self, model_params, loss, 
            x_l0, x_l1, y_l):

        # Forward meta-learner
        namedparams = model_params
        for i, elm in enumerate(namedparams.items()):  # parameter-loop
            k, p = elm
            with cuda.get_device_from_id(self.device):
                shape = p.shape
                xp = cuda.get_array_module(p.data)

                x = p.grad
                grad = xp.reshape(x, (np.prod(shape), ))
                meta_learner = self.meta_learners[i]
                g = meta_learner(Variable(grad))  # forward
                w = p - F.reshape(g, shape)
                self.model_params[k] = w

        # Train meta-learner with main objective
        y_pred = self.model(x_l0, self.model_params)
        loss_ce = F.softmax_cross_entropy(y_pred, y_l)
        
        self.cleargrads()  # need to clear W'grad due to loss_rec.backward
        for meta_learner in self.meta_learners:
            meta_learner.cleargrads()
        loss_ce.backward(retain_grad=True)
        for opt in self.opt_meta_learners:
            opt.update()

        loss_ce.unchain_backward()  #TODO: here is a proper place to unchain?

    def test(self, x, y):
        y_pred = self.model(x, self.model_params, test=True)
        acc = F.accuracy(y_pred, y)
        return acc

    def cleargrads(self, ):
        for k, v in self.model_params.items():
            v.cleargrad()
Esempio n. 4
0
class Experiment000(object):
    """
    - Stochastic Regularization
    - Resnet x 5
    """
    def __init__(self, device=None, learning_rate=1e-3, act=F.relu, T=3):
        # Settings
        self.device = device
        self.act = act
        self.learning_rate = learning_rate
        self.T = T
        self.t = 0

        # Loss
        self.recon_loss = ReconstructionLoss()

        # Model
        from meta_st.cifar10.cnn_model_000 import Model
        self.model = Model(device, act)
        self.model.to_gpu(device) if device is not None else None
        self.model_params = OrderedDict([x for x in self.model.namedparams()])
        
        # Optimizer
        self.optimizer = Adam(learning_rate)
        self.optimizer.setup(self.model)
        self.optimizer.use_cleargrads()
        self.setup_meta_learners()

    def setup_meta_learners(self, ):
        #TODO: multiple layers, modification of inputs
        self.meta_learners = []
        self.opt_meta_learners = []

        # Meta-learner
        for _ in self.model_params:
            # meta-learner taking gradient in batch dimension
            ml = MetaLearner(4, 2, 1)
            ml.to_gpu(self.device) if self.device is not None else None
            self.meta_learners.append(ml)

            # optimizer of meta-learner
            opt = optimizers.Adam(1e-3)
            opt.setup(ml)
            opt.use_cleargrads()
            self.opt_meta_learners.append(opt)

    def update_parameter_by_meta_learner(self, model_params, loss):
        namedparams = model_params
        for i, elm in enumerate(namedparams.items()):  # parameter-loop
            k, p = elm
            with cuda.get_device_from_id(self.device):
                shape = p.shape
                xp = cuda.get_array_module(p.data)

                # normalize grad
                x = p.grad
                p_val = 10
                grad0 = xp.where(xp.absolute(x) > xp.exp(-p_val), 
                                   xp.log(xp.absolute(x))/p_val, -1)
                grad1 = xp.where(xp.absolute(x) > xp.exp(-p_val), 
                                   xp.sign(x), xp.exp(p_val)*x)
                grad0 = xp.reshape(grad0, (np.prod(shape), ))
                grad1 = xp.reshape(grad1, (np.prod(shape), ))
                grad0 = xp.expand_dims(grad0, axis=1)
                grad1 = xp.expand_dims(grad1, axis=1)
                input_grad = xp.concatenate((grad0, grad1), axis=1)

                # normalize loss
                x = loss.data
                loss0 = xp.where(xp.absolute(x) > xp.exp(-p_val), 
                                   xp.log(xp.absolute(x))/p_val, -1)
                loss1 = xp.where(xp.absolute(x) > xp.exp(-p_val), 
                                   xp.sign(x), xp.exp(p_val)*x)
                loss0 = xp.expand_dims(loss0, axis=0)
                loss1 = xp.expand_dims(loss1, axis=0)
                input_loss = xp.concatenate((loss0, loss1))
                input_loss = xp.broadcast_to(input_loss, 
                                             (input_grad.shape[0], 2))

                # input
                input_ = xp.concatenate((input_grad, input_loss), axis=1)
                meta_learner = self.meta_learners[i]
                g = meta_learner(Variable(input_.astype(xp.float32))) # forward
                p.data -= g.data.reshape(shape)

            # Set parameter as variable to be backproped
            if self.t  == self.T:
                w = p - F.reshape(g, shape)
                self.model_params[k] = w
                
    def train(self, x_l0, x_l1, y_l, x_u0, x_u1):
        self.t += 1

        # Train meta-learner
        if self.t > self.T:
            self._train_meta_learner(x_l0, y_l)
            self.t = 0
            return
        
        # Train learner
        self._train(x_l0, x_l1, y_l)
        self._train(x_u0, x_u1, None)

    def _train_meta_learner(self, x, y):
        # Cross Entropy Loss
        y_pred = self.model(x, self.model_params)
        loss_ce = F.softmax_cross_entropy(y_pred, y)

        self.cleargrads()
        for meta_learner in self.meta_learners:
            meta_learner.cleargrads()
        loss_ce.backward(retain_grad=True)

        for opt in self.opt_meta_learners:
            opt.update()

        loss_ce.unchain_backward()

    def _train(self, x0, x1, y=None):
        # Cross Entropy Loss
        y_pred0 = self.model(x0, self.model_params)

        if y is not None:
            loss_ce = F.softmax_cross_entropy(y_pred0, y)
            self.cleargrads()
            loss_ce.backward()

            # update learner using loss_ce
            self.optimizer.update(self.model_params)
            return
        
        # Stochastic Regularization (i.e, Consistency Loss)
        y_pred1 = self.model(x1, self.model_params)
        loss_rec = self.recon_loss(F.softmax(y_pred0), F.softmax(y_pred1))
        self.cleargrads()
        loss_rec.backward()

        # update learner using loss_rec and meta-learner
        self.update_parameter_by_meta_learner(self.model_params, loss_rec)
                        
    def test(self, x, y):
        y_pred = self.model(x, self.model_params, test=True)
        acc = F.accuracy(y_pred, y)
        return acc

    def cleargrads(self, ):
        for k, v in self.model_params.items():
            v.cleargrad()
Esempio n. 5
0
class Experiment000(object):
    """
    - Stochastic Regularization
    - FCCN
    """
    def __init__(self, device=None, learning_rate=1e-3, act=F.leaky_relu, T=3):
        # Settings
        self.device = device
        self.act = act
        self.learning_rate = learning_rate
        self.T = 3
        self.t = 0
        self.loss_ml = 0

        # Loss
        self.rc_loss = ReconstructionLoss()

        # Model
        from meta_st.cifar10.cnn_model_001 import Model
        self.model = Model(device, act)
        self.model.to_gpu(device) if device is not None else None
        self.model_params = OrderedDict([x for x in self.model.namedparams()])

        # Optimizer
        self.optimizer = Adam(learning_rate)  #TODO: adam is appropriate?
        self.optimizer.setup(self.model)
        self.optimizer.use_cleargrads()
        self.setup_meta_learners()

    def setup_meta_learners(self, ):
        self.meta_learners = []
        self.ml_optimizers = []

        # Meta-learner
        for _ in self.model_params:
            # meta-learner taking gradient in batch dimension
            ml = MetaLearner(inmap=1, midmap=1, outmap=1)
            ml.to_gpu(self.device) if self.device is not None else None
            self.meta_learners.append(ml)

            # optimizer of meta-learner
            opt = optimizers.Adam(1e-3)
            opt.setup(ml)
            opt.use_cleargrads()
            self.ml_optimizers.append(opt)

    def train(self, x_l0, x_l1, y_l, x_u0, x_u1):
        self._train_for_primary_task(x_l0, y_l)
        self._train_for_auxiliary_task(x_l0, x_l1, y_l, x_u0, x_u1)

        self.t += 1
        if self.t == self.T:
            self._train_meta_learners()
            self.t = 0

    def _train_for_primary_task(self, x_l0, y_l):
        y_pred = self.model(x_l0, self.model_params)
        loss_ce = F.softmax_cross_entropy(y_pred, y_l)
        self._cleargrads()
        loss_ce.backward(retain_grad=True)
        self.optimizer.update(self.model_params)

    def _train_for_auxiliary_task(self, x_l0, x_l1, y_l, x_u0, x_u1):
        # Compute gradients
        y_pred0 = self.model(x_u0, self.model_params)
        y_pred1 = self.model(x_u1, self.model_params)
        loss_rc = self.rc_loss(y_pred0, y_pred1)
        self._cleargrads()
        loss_rc.backward(retain_grad=True)

        # Update optimizee parameters by meta-learner
        model_params = self.model_params
        for i, elm in enumerate(model_params.items()):
            name, w = elm
            meta_learner = self.meta_learners[i]
            ml_optimizer = self.ml_optimizers[i]
            shape = w.shape
            with cuda.get_device_from_id(self.device):
                xp = cuda.get_array_module(w.data)
                g_old = w.grad  # no nedd to deep copy
                grad_data = xp.reshape(g_old, (np.prod(shape), 1))

                # refine grad, update w, and replace
                grad = Variable(grad_data)
                g = meta_learner(grad)  #TODO: use either h or c
                w -= F.reshape(g, shape)
            model_params[name] = w

        # Forward primary taks for training meta-leaners
        #TODO: use the same labeled data?
        y_pred = self.model(x_l0, self.model_params)
        self.loss_ml += F.softmax_cross_entropy(y_pred, y_l)

    def _train_meta_learners(self, ):
        self._cleargrads()
        self.loss_ml.backward(retain_grad=True)
        for opt in self.ml_optimizers:
            opt.update()
        self.loss_ml.unchain_backward()
        self.loss_ml = 0

    def test(self, x, y):
        y_pred = self.model(x, self.model_params, test=True)
        acc = F.accuracy(y_pred, y)
        return acc

    def _cleargrads(self, ):
        for k, v in self.model_params.items():
            v.cleargrad()
Esempio n. 6
0
class Experiment000(object):
    """
    - Stochastic Regularization
    - Resnet x 5
    - Objective of meta-learner is T instread of one
    """
    def __init__(self, device=None, learning_rate=1e-3, act=F.relu, T=3):
        # Settings
        self.device = device
        self.act = act
        self.learning_rate = learning_rate
        self.T = T
        self.t = 0
        self.loss_ml = 0

        # Loss
        self.recon_loss = ReconstructionLoss()

        # Model
        from meta_st.cifar10.cnn_model_001 import Model
        self.model = Model(device, act)
        self.model.to_gpu(device) if device is not None else None
        self.model_params = OrderedDict([x for x in self.model.namedparams()])

        # Optimizer
        self.optimizer = Adam(learning_rate)
        self.optimizer.setup(self.model)
        self.optimizer.use_cleargrads()
        self.setup_meta_learners()

    def setup_meta_learners(self, ):
        #TODO: multiple layers, modification of inputs
        self.meta_learners = []
        self.opt_meta_learners = []

        # Meta-learner
        for k, v in self.model_params.items():
            # meta-learner taking gradient in batch dimension
            ml = MetaLearner(np.prod(v.shape))
            ml.to_gpu(self.device) if self.device is not None else None
            self.meta_learners.append(ml)

            # optimizer of meta-learner
            opt = optimizers.Adam(1e-3)
            opt.setup(ml)
            opt.use_cleargrads()
            self.opt_meta_learners.append(opt)

    def train(self, x_l0, x_l1, y_l, x_u0, x_u1):
        # Train learner and meta-learner
        self._train(x_l0, x_l1, y_l)
        self._train_meta_learner(x_l0, x_l1, y_l, x_u0, x_u1)

    def _train(self, x0, x1, y):
        # Cross Entropy Loss
        y_pred0 = self.model(x0, self.model_params)
        loss_ce = F.softmax_cross_entropy(y_pred0, y)
        self.cleargrads()
        loss_ce.backward()

        self.optimizer.update(self.model_params)

    def _train_meta_learner(self, x_l0, x_l1, y_l, x_u0, x_u1):
        # Stochastic Regularization (i.e, Consistency Loss)
        y_pred0 = self.model(x_u0, self.model_params)
        y_pred1 = self.model(x_u1, self.model_params)
        loss_rec = self.recon_loss(F.softmax(y_pred0), F.softmax(y_pred1))
        self.cleargrads()
        loss_rec.backward()

        # update learner using loss_rec and meta-learner
        self.update_parameter_by_meta_learner(self.model_params, loss_rec,
                                              x_l0, x_l1, y_l)

        self.t += 1
        if self.t == self.T:
            self.train_meta_learner()

    def update_parameter_by_meta_learner(self, model_params, loss, x_l0, x_l1,
                                         y_l):

        # Forward meta-learner
        namedparams = model_params
        for i, elm in enumerate(namedparams.items()):  # parameter-loop
            k, p = elm
            with cuda.get_device_from_id(self.device):
                shape = p.shape
                xp = cuda.get_array_module(p.data)

                x = p.grad
                grad = xp.reshape(x, (np.prod(shape), ))
                meta_learner = self.meta_learners[i]
                g = meta_learner(Variable(grad))  # forward
                w = p - F.reshape(g, shape)
                self.model_params[k] = w

        # Train meta-learner with main objective
        y_pred = self.model(x_l0, self.model_params)
        self.loss_ml += F.softmax_cross_entropy(y_pred, y_l)

    def train_meta_learner(self, ):
        self.cleargrads()  # need to clear W'grad due to loss_rec.backward
        for meta_learner in self.meta_learners:
            meta_learner.cleargrads()
        self.loss_ml.backward(retain_grad=True)
        for opt in self.opt_meta_learners:
            opt.update()
        self.loss_ml.unchain_backward(
        )  #TODO: here is a proper place to unchain?
        self.loss_ml = 0

    def test(self, x, y):
        y_pred = self.model(x, self.model_params, test=True)
        acc = F.accuracy(y_pred, y)
        return acc

    def cleargrads(self, ):
        for k, v in self.model_params.items():
            v.cleargrad()
Esempio n. 7
0
class Experiment000(object):
    """
    FCNN model
    """
    def __init__(self, device=None, learning_rate=1e-3, act=F.relu, T=3):
        # Settings
        self.device = device
        self.act = act
        self.learning_rate = learning_rate

        # Loss
        self.recon_loss = ReconstructionLoss()

        # Model
        from meta_st.cifar10.cnn_model_001_small import Model
        self.model = Model(device, act)
        self.model.to_gpu(device) if device is not None else None
        self.model_params = OrderedDict([x for x in self.model.namedparams()])

        # Optimizer for model
        self.optimizer = Adam()
        self.optimizer.setup(self.model)
        self.optimizer.use_cleargrads()
        
        # Optimizer, or Meta-Learner (ML)
        self.setup_meta_learners()
        
        # Initialize Meat-learners input as zero
        #self.zerograds()
        
    def setup_meta_learners(self, ):
        self.meta_learners = []
        self.opt_meta_learners = []

        # Meta-learner
        for k, v in self.model_params.items():
            # meta-learner taking gradient in batch dimension
            ml = MetaLearner()
            ml.to_gpu(self.device) if self.device is not None else None
            self.meta_learners.append(ml)

            # optimizer of meta-learner
            opt = optimizers.Adam(1e-3)
            opt.setup(ml)
            opt.use_cleargrads()
            self.opt_meta_learners.append(opt)
                
    def train(self, x_l0, x_l1, y_l, x_u0, x_u1):
        # Supervised loss
        ## Forward of CE loss
        self.forward_meta_learners()  #TODO: init ML'W as 0?
        y_pred0 = self.model(x_l0, self.model_params)
        loss_ce = F.softmax_cross_entropy(y_pred0, y_l)

        ## Backward of CE loss
        self.cleargrad_meta_learners()
        self.cleargrads()
        loss_ce.backward(retain_grad=True)
        loss_ce.unchain_backward()
        
        ## Optimizer update
        self.optimizer.update(self.model_params)
        self.update_meta_learners()

        # Semi-supervised loss
        self.forward_meta_learners()
        y_pred0 = self.model(x_u0, self.model_params)
        y_pred1 = self.model(x_u1, self.model_params)
        loss_rec = self.recon_loss(F.softmax(y_pred0),  F.softmax(y_pred1))

        ## Backward of SR loss
        self.cleargrad_meta_learners()
        self.cleargrads()
        loss_rec.backward(retain_grad=True)
        loss_rec.unchain_backward()

        ## Optimizer update
        self.optimizer.update(self.model_params)
        self.update_meta_learners()

    def forward_meta_learners(self, ):
        # Forward of meta-learner, i.e., parameter update
        for i, name_param in enumerate(self.model_params.items()):
            k, p = name_param
            with cuda.get_device_from_id(self.device):
                shape = p.shape
                xp = cuda.get_array_module(p.data)

                w_data = p.data  # meta learner is gated-reccurent unit for W not for G
                w_data = xp.reshape(w_data, (1, 1, np.prod(shape)))
                meta_learner = self.meta_learners[i]
                w_accum = meta_learner(Variable(w_data))  # forward
                w_accum = F.reshape(w_accum, shape)
                self.model_params[k] = w_accum

    def cleargrad_meta_learners(self, ):
        for meta_learner in self.meta_learners:
            meta_learner.cleargrads()

    def update_meta_learners(self, ):
        for opt in self.opt_meta_learners:
            opt.update()
            
    def test(self, x, y):
        y_pred = self.model(x, self.model_params, test=True)
        acc = F.accuracy(y_pred, y)
        return acc

    def cleargrads(self, ):
        """For initialization of Meta-learner forward
        """
        for k, v in self.model_params.items():
            v.cleargrad()  # creates the gradient region for W
        
    def zerograds(self, ):
        """For initialization of Meta-learner forward
        """
        for k, v in self.model_params.items():
            v.zerograd()  # creates the gradient region for W
Esempio n. 8
0
class Experiment000(object):
    """
    - Stochastic Regularization
    - FCCN
    """
    def __init__(self, device=None, learning_rate=1e-3, act=F.leaky_relu, T=3):
        # Settings
        self.device = device
        self.act = act
        self.learning_rate = learning_rate
        self.T = 3
        self.t = 0
        self.loss_ml = 0

        # Loss
        self.rc_loss = ReconstructionLoss()

        # Model
        from meta_st.cifar10.cnn_model_001 import Model
        self.model = Model(device, act)
        self.model.to_gpu(device) if device is not None else None
        self.model_params = OrderedDict([x for x in self.model.namedparams()])
        
        # Optimizer
        self.optimizer = Adam(learning_rate)  #TODO: adam is appropriate?
        self.optimizer.setup(self.model)
        self.optimizer.use_cleargrads()
        self.setup_meta_learners()

    def setup_meta_learners(self, ):
        self.meta_learners = []
        self.ml_optimizers = []

        # Meta-learner
        for _ in self.model_params:
            # meta-learner taking gradient in batch dimension
            ml = MetaLearner(inmap=1, midmap=1, outmap=1)
            ml.to_gpu(self.device) if self.device is not None else None
            self.meta_learners.append(ml)

            # optimizer of meta-learner
            opt = optimizers.Adam(1e-3)
            opt.setup(ml)
            opt.use_cleargrads()
            self.ml_optimizers.append(opt)        

    def train(self, x_l0, x_l1, y_l, x_u0, x_u1):
        self._train_for_primary_task(x_l0, y_l)
        self._train_for_auxiliary_task(x_l0, x_l1, y_l, x_u0, x_u1)
        
        self.t += 1
        if self.t == self.T:
            self._train_meta_learners()
            self.t = 0

    def _train_for_primary_task(self, x_l0, y_l):
        y_pred = self.model(x_l0, self.model_params)
        loss_ce = F.softmax_cross_entropy(y_pred, y_l)
        self._cleargrads()
        loss_ce.backward(retain_grad=True)
        self.optimizer.update(self.model_params)
        
    def _train_for_auxiliary_task(self, x_l0, x_l1, y_l, x_u0, x_u1):
        # Compute gradients
        y_pred0 = self.model(x_u0, self.model_params)
        y_pred1 = self.model(x_u1, self.model_params)
        loss_rc = self.rc_loss(y_pred0, y_pred1)
        self._cleargrads()
        loss_rc.backward(retain_grad=True)

        # Update optimizee parameters by meta-learner
        model_params = self.model_params
        for i, elm in enumerate(model_params.items()):
            name, w = elm
            meta_learner = self.meta_learners[i]
            ml_optimizer = self.ml_optimizers[i]
            shape = w.shape
            with cuda.get_device_from_id(self.device):
                xp = cuda.get_array_module(w.data)
                g_old = w.grad  # no nedd to deep copy
                grad_data = xp.reshape(g_old, (np.prod(shape), 1))
            
                # refine grad, update w, and replace
                grad = Variable(grad_data)
                g = meta_learner(grad)  #TODO: use either h or c
                w -= F.reshape(g, shape)
            model_params[name] = w
                
        # Forward primary taks for training meta-leaners
        #TODO: use the same labeled data?
        y_pred = self.model(x_l0, self.model_params)
        self.loss_ml += F.softmax_cross_entropy(y_pred, y_l)

    def _train_meta_learners(self, ):
        self._cleargrads()
        self.loss_ml.backward(retain_grad=True)
        for opt in self.ml_optimizers:
            opt.update()
        self.loss_ml.unchain_backward()
        self.loss_ml = 0
            
    def test(self, x, y):
        y_pred = self.model(x, self.model_params, test=True)
        acc = F.accuracy(y_pred, y)
        return acc

    def _cleargrads(self, ):
        for k, v in self.model_params.items():
            v.cleargrad()
Esempio n. 9
0
class Experiment000(object):
    """
    - Stochastic Regularization
    - Resnet x 5
    """
    def __init__(self, device=None, learning_rate=1e-3, act=F.relu, T=3):
        # Settings
        self.device = device
        self.act = act
        self.learning_rate = learning_rate
        self.T = T
        self.t = 0

        # Loss
        self.recon_loss = ReconstructionLoss()

        # Model
        from meta_st.cifar10.cnn_model_000 import Model
        self.model = Model(device, act)
        self.model.to_gpu(device) if device is not None else None
        self.model_params = OrderedDict([x for x in self.model.namedparams()])

        # Optimizer
        self.optimizer = Adam(learning_rate)
        self.optimizer.setup(self.model)
        self.optimizer.use_cleargrads()
        self.setup_meta_learners()

    def setup_meta_learners(self, ):
        #TODO: multiple layers, modification of inputs
        self.meta_learners = []
        self.opt_meta_learners = []

        # Meta-learner
        for _ in self.model_params:
            # meta-learner taking gradient in batch dimension
            ml = MetaLearner(4, 2, 1)
            ml.to_gpu(self.device) if self.device is not None else None
            self.meta_learners.append(ml)

            # optimizer of meta-learner
            opt = optimizers.Adam(1e-3)
            opt.setup(ml)
            opt.use_cleargrads()
            self.opt_meta_learners.append(opt)

    def update_parameter_by_meta_learner(self, model_params, loss):
        namedparams = model_params
        for i, elm in enumerate(namedparams.items()):  # parameter-loop
            k, p = elm
            with cuda.get_device_from_id(self.device):
                shape = p.shape
                xp = cuda.get_array_module(p.data)

                # normalize grad
                x = p.grad
                p_val = 10
                grad0 = xp.where(
                    xp.absolute(x) > xp.exp(-p_val),
                    xp.log(xp.absolute(x)) / p_val, -1)
                grad1 = xp.where(
                    xp.absolute(x) > xp.exp(-p_val), xp.sign(x),
                    xp.exp(p_val) * x)
                grad0 = xp.reshape(grad0, (np.prod(shape), ))
                grad1 = xp.reshape(grad1, (np.prod(shape), ))
                grad0 = xp.expand_dims(grad0, axis=1)
                grad1 = xp.expand_dims(grad1, axis=1)
                input_grad = xp.concatenate((grad0, grad1), axis=1)

                # normalize loss
                x = loss.data
                loss0 = xp.where(
                    xp.absolute(x) > xp.exp(-p_val),
                    xp.log(xp.absolute(x)) / p_val, -1)
                loss1 = xp.where(
                    xp.absolute(x) > xp.exp(-p_val), xp.sign(x),
                    xp.exp(p_val) * x)
                loss0 = xp.expand_dims(loss0, axis=0)
                loss1 = xp.expand_dims(loss1, axis=0)
                input_loss = xp.concatenate((loss0, loss1))
                input_loss = xp.broadcast_to(input_loss,
                                             (input_grad.shape[0], 2))

                # input
                input_ = xp.concatenate((input_grad, input_loss), axis=1)
                meta_learner = self.meta_learners[i]
                g = meta_learner(Variable(input_.astype(
                    xp.float32)))  # forward
                p.data -= g.data.reshape(shape)

            # Set parameter as variable to be backproped
            if self.t == self.T:
                w = p - F.reshape(g, shape)
                self.model_params[k] = w

    def train(self, x_l0, x_l1, y_l, x_u0, x_u1):
        self.t += 1

        # Train meta-learner
        if self.t > self.T:
            self._train_meta_learner(x_l0, y_l)
            self.t = 0
            return

        # Train learner
        self._train(x_l0, x_l1, y_l)
        self._train(x_u0, x_u1, None)

    def _train_meta_learner(self, x, y):
        # Cross Entropy Loss
        y_pred = self.model(x, self.model_params)
        loss_ce = F.softmax_cross_entropy(y_pred, y)

        self.cleargrads()
        for meta_learner in self.meta_learners:
            meta_learner.cleargrads()
        loss_ce.backward(retain_grad=True)

        for opt in self.opt_meta_learners:
            opt.update()

        loss_ce.unchain_backward()

    def _train(self, x0, x1, y=None):
        # Cross Entropy Loss
        y_pred0 = self.model(x0, self.model_params)

        if y is not None:
            loss_ce = F.softmax_cross_entropy(y_pred0, y)
            self.cleargrads()
            loss_ce.backward()

            # update learner using loss_ce
            self.optimizer.update(self.model_params)
            return

        # Stochastic Regularization (i.e, Consistency Loss)
        y_pred1 = self.model(x1, self.model_params)
        loss_rec = self.recon_loss(F.softmax(y_pred0), F.softmax(y_pred1))
        self.cleargrads()
        loss_rec.backward()

        # update learner using loss_rec and meta-learner
        self.update_parameter_by_meta_learner(self.model_params, loss_rec)

    def test(self, x, y):
        y_pred = self.model(x, self.model_params, test=True)
        acc = F.accuracy(y_pred, y)
        return acc

    def cleargrads(self, ):
        for k, v in self.model_params.items():
            v.cleargrad()