def train(self, t, train, valid, args):
        # self.model=deepcopy(self.initial_model) # Restart model: isolate

        if t == 0: which_types = ['mcl']
        else: which_types = ['ac', 'mcl']

        for which_type in which_types:

            print('Training Type: ', which_type)

            best_loss = np.inf
            best_model = utils.get_model(self.model)
            lr = self.lr
            patience = self.lr_patience
            self.optimizer = self._get_optimizer(lr, which_type)

            # Loop epochs
            for e in range(self.nepochs):
                # Train
                clock0 = time.time()
                iter_bar = tqdm(train, desc='Train Iter (loss=X.XXX)')
                self.train_epoch(t, train, iter_bar, which_type)
                clock1 = time.time()
                train_loss, train_acc = self.eval(t, train, which_type)
                clock2 = time.time()
                print(
                    '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'
                    .format(
                        e + 1,
                        1000 * self.sbatch * (clock1 - clock0) / len(train),
                        1000 * self.sbatch * (clock2 - clock1) / len(train),
                        train_loss, 100 * train_acc),
                    end='')
                # Valid
                valid_loss, valid_acc = self.eval(t, valid, which_type)
                print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(
                    valid_loss, 100 * valid_acc),
                      end='')
                # Adapt lr
                if valid_loss < best_loss:
                    best_loss = valid_loss
                    best_model = utils.get_model(self.model)
                    patience = self.lr_patience
                    print(' *', end='')
                else:
                    patience -= 1
                    if patience <= 0:
                        lr /= self.lr_factor
                        print(' lr={:.1e}'.format(lr), end='')
                        if lr < self.lr_min:
                            print()
                            break
                        patience = self.lr_patience
                        self.optimizer = self._get_optimizer(lr, which_type)
                print()

            # Restore best
            utils.set_model_(self.model, best_model)

        return
Beispiel #2
0
    def train(self, t, xtrain, ytrain, xvalid, yvalid):
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr
        patience = self.lr_patience

        # train only the column for the current task
        self.model.unfreeze_column(t)

        # the optimizer trains solely the params for the current task
        self.optimizer = self._get_optimizer(lr)

        # Loop epochs
        for e in range(self.nepochs):
            # Train
            clock0 = time.time()
            self.train_epoch(t, xtrain, ytrain)
            clock1 = time.time()
            train_loss, train_acc = self.eval(t, xtrain, ytrain)
            clock2 = time.time()
            print(
                "| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |".format(
                    e + 1,
                    1000 * self.sbatch * (clock1 - clock0) / xtrain.size(0),
                    1000 * self.sbatch * (clock2 - clock1) / xtrain.size(0),
                    train_loss,
                    100 * train_acc,
                ),
                end="",
            )
            # Valid
            valid_loss, valid_acc = self.eval(t, xvalid, yvalid)
            print(
                " Valid: loss={:.3f}, acc={:5.1f}% |".format(
                    valid_loss, 100 * valid_acc
                ),
                end="",
            )
            # Adapt lr
            if valid_loss < best_loss:
                best_loss = valid_loss
                best_model = utils.get_model(self.model)
                patience = self.lr_patience
                print(" *", end="")
            else:
                patience -= 1
                if patience <= 0:
                    lr /= self.lr_factor
                    print(" lr={:.1e}".format(lr), end="")
                    if lr < self.lr_min:
                        print()
                        break
                    patience = self.lr_patience
                    self.optimizer = self._get_optimizer(lr)
            print()

        # Restore best
        utils.set_model_(self.model, best_model)

        return
Beispiel #3
0
    def train(self, t, train_data, valid_data, device='cuda'):
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr

        # train only the column for the current task
        self.model.unfreeze_column(t)
        # 1 define the optimizer and scheduler
        self.optimizer = self._get_optimizer(lr)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            self.optimizer, self.epochs)
        # 2 define the dataloader
        train_loader = torch.utils.data.DataLoader(train_data,
                                                   batch_size=self.batch,
                                                   shuffle=True,
                                                   num_workers=4,
                                                   pin_memory=True)
        valid_loader = torch.utils.data.DataLoader(valid_data,
                                                   batch_size=self.batch,
                                                   shuffle=False,
                                                   num_workers=4,
                                                   pin_memory=True)

        # Loop epochs
        for e in range(self.epochs):
            # Train
            self.train_epoch(t, train_loader, device=device)
            train_loss, train_acc = self.eval(t,
                                              train_loader,
                                              mode='train',
                                              device=device)
            # Valid
            valid_loss, valid_acc = self.eval(t,
                                              valid_loader,
                                              mode='train',
                                              device=device)
            print(
                '| Epoch {:3d} | Train: loss={:.3f}, acc={:5.1f}% | Valid: loss={:.3f}, acc={:5.1f}% |'
                .format(e, train_loss, 100 * train_acc, valid_loss,
                        100 * valid_acc))
            self.writer.add_scalars('Train_Loss/Task: {}'.format(t), {
                'train_loss': train_loss,
                'valid_loss': valid_loss
            },
                                    global_step=e)
            self.writer.add_scalars('Train_Accuracy/Task: {}'.format(t), {
                'train_acc': train_acc * 100,
                'valid_acc': valid_acc * 100
            },
                                    global_step=e)
            # Adapt lr
            scheduler.step()
            # update the best model
            if valid_loss < best_loss:
                best_loss = valid_loss
                best_model = utils.get_model(self.model)

        utils.set_model_(self.model, best_model)

        return
Beispiel #4
0
    def train(self, t, xtrain, ytrain, xvalid, yvalid, data, input_size, taskcla):
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr
        patience = self.lr_patience
        self.optimizer = self._get_optimizer(lr)

        self.W = {}
        self.p_old = {}
        for n, p in self.model.named_parameters():
            if p.requires_grad:
                n = n.replace('.', '__')
                self.W[n] = p.data.clone().zero_()
                self.p_old[n] = p.data.clone()

        # Loop epochs
        for e in range(self.nepochs):
            # Train
            clock0=time.time()
            num_batch = xtrain.size(0)

            self.train_epoch(t,xtrain,ytrain)

            clock1=time.time()
            train_loss,train_acc=self.eval(t,xtrain,ytrain)
            clock2=time.time()
            print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format(
                e+1,1000*self.sbatch*(clock1-clock0)/num_batch,1000*self.sbatch*(clock2-clock1)/num_batch,train_loss,100*train_acc),end='')
            # Valid
            valid_loss,valid_acc=self.eval(t,xvalid,yvalid)
            print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(valid_loss,100*valid_acc),end='')
            print()
            #save log for current task & old tasks at every epoch

            if valid_loss < best_loss:
                best_loss = valid_loss
                best_model = utils.get_model(self.model)
                patience = self.lr_patience
                print(' *', end='')

            else:
                patience -= 1
                if patience <= 0:
                    lr /= self.lr_factor
                    print(' lr={:.1e}'.format(lr), end='')
                    if lr < self.lr_min:
                        print()
                    patience = self.lr_patience
                    self.optimizer = self._get_optimizer(lr)
            print()


        # Restore best
        utils.set_model_(self.model, best_model)

        self.update_omega(self.W, self.epsilon)
        self.model_old = deepcopy(self.model)
        utils.freeze_model(self.model_old) # Freeze the weights

        return
Beispiel #5
0
    def train(self,t,xtrain,ytrain,xvalid,yvalid):
        best_loss=np.inf
        best_model=utils.get_model(self.model)
        lr=self.lr
        patience=self.lr_patience
        self.optimizer=self._get_optimizer(lr)

        # Loop epochs
        for e in range(self.nepochs):
            # Train
            clock0=time.time()
            self.train_epoch(t,xtrain,ytrain)
            clock1=time.time()
            train_loss,train_acc=self.eval(t,xtrain,ytrain)
            clock2=time.time()
            print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format(
                e+1,1000*self.sbatch*(clock1-clock0)/xtrain.size(0),1000*self.sbatch*(clock2-clock1)/xtrain.size(0),train_loss,100*train_acc),end='')
            # Valid
            valid_loss,valid_acc=self.eval(t,xvalid,yvalid)
            print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(valid_loss,100*valid_acc),end='')
            # Adapt lr
            if valid_loss<best_loss:
                best_loss=valid_loss
                best_model=utils.get_model(self.model)
                patience=self.lr_patience
                print(' *',end='')
            else:
                patience-=1
                if patience<=0:
                    lr/=self.lr_factor
                    print(' lr={:.1e}'.format(lr),end='')
                    if lr<self.lr_min:
                        print()
                        break
                    patience=self.lr_patience
                    self.optimizer=self._get_optimizer(lr)
            print()

        # Restore best
        utils.set_model_(self.model,best_model)

        # Update old
        self.model_old=deepcopy(self.model)
        self.model_old.eval()
        utils.freeze_model(self.model_old) # Freeze the weights

        # Fisher ops
        if t>0:
            fisher_old={}
            for n,_ in self.model.named_parameters():
                fisher_old[n]=self.fisher[n].clone()
        self.fisher=utils.fisher_matrix_diag(t,xtrain,ytrain,self.model,self.criterion)
        if t>0:
            # Watch out! We do not want to keep t models (or fisher diagonals) in memory, therefore we have to merge fisher diagonals
            for n,_ in self.model.named_parameters():
                self.fisher[n]=(self.fisher[n]+fisher_old[n]*t)/(t+1)       # Checked: it is better than the other option
                #self.fisher[n]=0.5*(self.fisher[n]+fisher_old[n])
        torch.save(self.model.state_dict(),'pretrain_ewc.pth')
        return
Beispiel #6
0
    def train(self, t, xtrain, ytrain, xvalid, yvalid):
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr
        patience = self.lr_patience
        self.optimizer = self._get_optimizer(lr)

        # Loop epochs
        for e in range(self.nepochs):
            # Train
            clock0 = time.time()
            self.train_epoch(t, xtrain, ytrain)
            clock1 = time.time()
            train_loss, train_acc = self.eval(t, xtrain, ytrain)
            clock2 = time.time()
            print(
                "| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |"
                .format(
                    e + 1,
                    1000 * self.sbatch * (clock1 - clock0) / xtrain.size(0),
                    1000 * self.sbatch * (clock2 - clock1) / xtrain.size(0),
                    train_loss,
                    100 * train_acc,
                ),
                end="",
            )
            # Valid
            valid_loss, valid_acc = self.eval(t, xvalid, yvalid)
            print(
                " Valid: loss={:.3f}, acc={:5.1f}% |".format(
                    valid_loss, 100 * valid_acc),
                end="",
            )
            # Adapt lr
            if valid_loss < best_loss:
                best_loss = valid_loss
                best_model = utils.get_model(self.model)
                patience = self.lr_patience
                print(" *", end="")
            else:
                patience -= 1
                if patience <= 0:
                    lr /= self.lr_factor
                    print(" lr={:.1e}".format(lr), end="")
                    if lr < self.lr_min:
                        print()
                        break
                    patience = self.lr_patience
                    self.optimizer = self._get_optimizer(lr)
            print()

        # Restore best & freeze
        utils.set_model_(self.model, best_model)
        for n, p in self.model.named_parameters():
            if not n.startswith("last"):
                p.requires_grad = False

        return
Beispiel #7
0
    def train(self, tasks, xtrain, ytrain, xvalid, yvalid):
        self.model = deepcopy(self.initial_model)  # Restart model

        task_t, task_v = tasks
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr
        patience = self.lr_patience
        self.optimizer = self._get_optimizer(lr)

        # Loop epochs
        try:
            for e in range(self.nepochs):
                # Train
                clock0 = time.time()
                self.train_epoch(task_t, xtrain, ytrain)
                clock1 = time.time()
                train_loss = self.eval_validation(task_t, xtrain, ytrain)
                clock2 = time.time()
                print(
                    "| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f} |"
                    .format(
                        e + 1,
                        1000 * self.sbatch * (clock1 - clock0) /
                        xtrain.size(0),
                        1000 * self.sbatch * (clock2 - clock1) /
                        xtrain.size(0),
                        train_loss,
                    ),
                    end="",
                )
                # Valid
                valid_loss = self.eval_validation(task_v, xvalid, yvalid)
                print(" Valid: loss={:.3f} |".format(valid_loss), end="")
                # Adapt lr
                if valid_loss < best_loss:
                    best_loss = valid_loss
                    best_model = utils.get_model(self.model)
                    patience = self.lr_patience
                    print(" *", end="")
                else:
                    patience -= 1
                    if patience <= 0:
                        lr /= self.lr_factor
                        print(" lr={:.1e}".format(lr), end="")
                        if lr < self.lr_min:
                            print()
                            break
                        patience = self.lr_patience
                        self.optimizer = self._get_optimizer(lr)
                print()
        except KeyboardInterrupt:
            print()

        # Restore best
        utils.set_model_(self.model, best_model)

        return
Beispiel #8
0
    def train(self, t, xtrain, ytrain, xvalid, yvalid, args, ac_pre_mask,
              pre_mask_back, pre_mask_pre, pre_mask, from_t):
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr
        patience = self.lr_patience
        self.optimizer = self._get_optimizer(lr)
        self.mask_pre = pre_mask_pre
        self.mask_back = pre_mask_back
        self.ac_pre_mask = ac_pre_mask
        self.pre_mask = pre_mask
        self.from_t = from_t
        # Loop epochs
        try:
            for e in range(self.nepochs):
                # Train
                clock0 = time.time()
                self.train_epoch(t, xtrain, ytrain, args=args)
                clock1 = time.time()
                train_loss, train_acc = self.eval(t, xtrain, ytrain, args=args)
                clock2 = time.time()
                print(
                    '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'
                    .format(
                        e + 1, 1000 * self.sbatch * (clock1 - clock0) /
                        xtrain.size(0), 1000 * self.sbatch *
                        (clock2 - clock1) / xtrain.size(0), train_loss,
                        100 * train_acc),
                    end='')
                # Valid
                valid_loss, valid_acc = self.eval(t, xvalid, yvalid, args=args)
                print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(
                    valid_loss, 100 * valid_acc),
                      end='')
                # Adapt lr
                if valid_loss < best_loss:
                    best_loss = valid_loss
                    best_model = utils.get_model(self.model)
                    patience = self.lr_patience
                    print(' *', end='')
                else:
                    patience -= 1
                    if patience <= 0:
                        lr /= self.lr_factor
                        print(' lr={:.1e}'.format(lr), end='')
                        if lr < self.lr_min:
                            print()
                            break
                        patience = self.lr_patience
                        self.optimizer = self._get_optimizer(lr)
                print()
        except KeyboardInterrupt:
            print()

        # Restore best validation model
        utils.set_model_(self.model, best_model)

        return
Beispiel #9
0
    def train(self,t,xtrain,ytrain,xvalid,yvalid):
        best_loss=np.inf
        best_model=utils.get_model(self.model)
        lr=self.lr
        patience=self.lr_patience
        self.optimizer=self._get_optimizer(lr)

        # Loop epochs
        for e in range(self.nepochs):
            # Train
            clock0=time.time()
            self.train_epoch(t,xtrain,ytrain)
            clock1=time.time()
            train_loss,train_acc=self.eval(t,xtrain,ytrain)
            clock2=time.time()
            print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format(
                e+1,1000*self.sbatch*(clock1-clock0)/xtrain.size(0),1000*self.sbatch*(clock2-clock1)/xtrain.size(0),train_loss,100*train_acc),end='')
            # Valid
            valid_loss,valid_acc=self.eval(t,xvalid,yvalid)
            print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(valid_loss,100*valid_acc),end='')
            # Adapt lr
            if valid_loss<best_loss:
                best_loss=valid_loss
                best_model=utils.get_model(self.model)
                patience=self.lr_patience
                print(' *',end='')
            else:
                patience-=1
                if patience<=0:
                    lr/=self.lr_factor
                    print(' lr={:.1e}'.format(lr),end='')
                    if lr<self.lr_min:
                        print()
                        break
                    patience=self.lr_patience
                    self.optimizer=self._get_optimizer(lr)
            print()

        # Restore best
        utils.set_model_(self.model,best_model)

        # Model update
        if t==0:
            self.fisher=utils.fisher_matrix_diag(t,xtrain,ytrain,self.model,self.criterion)
        else:
            fisher_new=utils.fisher_matrix_diag(t,xtrain,ytrain,self.model,self.criterion)
            for (n,p),(_,p_old) in zip(self.model.named_parameters(),self.model_old.named_parameters()):
                p=fisher_new[n]*p+self.fisher[n]*p_old
                self.fisher[n]+=fisher_new[n]
                p/=(self.fisher[n]==0).float()+self.fisher[n]

        # Old model save
        self.model_old=deepcopy(self.model)
        self.model_old.eval()
        utils.freeze_model(self.model_old)

        return
    def train(self,t,xtrain,ytrain,xvalid,yvalid,data):
        best_loss=np.inf
        best_model=utils.get_model(self.model)
        lr=self.lr
        patience=self.lr_patience
        self.optimizer=self._get_optimizer(lr)

        # Loop epochs
        for e in range(self.nepochs):
            # Train
            clock0=time.time()
            self.train_epoch(t,xtrain,ytrain)
            clock1=time.time()
            train_loss,train_acc=self.eval(t,xtrain,ytrain)
            clock2=time.time()
            print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format(e+1,1000*self.sbatch*(clock1-clock0)/xtrain.size(0),1000*self.sbatch*(clock2-clock1)/xtrain.size(0),train_loss,100*train_acc),end='')
            # Valid
            valid_loss,valid_acc=self.eval(t,xvalid,yvalid)
            print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(valid_loss,100*valid_acc),end='')
            
            #save log for current task & old tasks at every epoch
            self.logger.add(epoch=(t*self.nepochs)+e, task_num=t+1, valid_loss=valid_loss, valid_acc=valid_acc)
            for task in range(t): 
                xvalid_t=data[task]['valid']['x'].cuda()
                yvalid_t=data[task]['valid']['y'].cuda()
                valid_loss_t,valid_acc_t=self.eval(task,xvalid_t,yvalid_t)
                self.logger.add(epoch=(t*self.nepochs)+e, task_num=task+1, valid_loss=valid_loss_t, valid_acc=valid_acc_t)
            
            # Adapt lr
            if valid_loss<best_loss:
                best_loss=valid_loss
                best_model=utils.get_model(self.model)
                patience=self.lr_patience
                print(' *',end='')
            else:
                patience-=1
                if patience<=0:
                    lr/=self.lr_factor
                    print(' lr={:.1e}'.format(lr),end='')
                    if lr<self.lr_min:
                        print()
                        break
                    patience=self.lr_patience
                    self.optimizer=self._get_optimizer(lr)
            print()

        # Restore best and save model as old
        utils.set_model_(self.model,best_model)
        self.model_old = Net([1, 28, 28], [(0, 10), (1, 10), (2, 10), (3, 10), (4, 10), (5, 10), (6, 10), (7, 10), (8, 10), (9, 10)]).cuda()
        self.model_old.load_state_dict(self.model.state_dict())
        self.model_old.eval()
        
        utils.freeze_model(self.model_old)
        self.logger.save()
        return
Beispiel #11
0
    def train(self, t, xtrain, ytrain, xvalid, yvalid, args):  #N-CL
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr
        patience = self.lr_patience
        self.optimizer = self._get_optimizer(lr)

        print('before: ', self.model.fc1.weight)

        # Loop epochs
        for e in range(self.nepochs):
            # Train
            clock0 = time.time()
            self.train_epoch(t, xtrain, ytrain)
            clock1 = time.time()
            train_loss, train_acc = self.eval(t, xtrain, ytrain, args)
            clock2 = time.time()
            print(
                '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'
                .format(
                    e + 1,
                    1000 * self.sbatch * (clock1 - clock0) / xtrain.size(0),
                    1000 * self.sbatch * (clock2 - clock1) / xtrain.size(0),
                    train_loss, 100 * train_acc),
                end='')
            # Valid
            valid_loss, valid_acc = self.eval(t, xvalid, yvalid, args)
            print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(
                valid_loss, 100 * valid_acc),
                  end='')
            # Adapt lr
            if valid_loss < best_loss:
                best_loss = valid_loss
                best_model = utils.get_model(self.model)
                patience = self.lr_patience
                print(' *', end='')
            else:
                patience -= 1
                if patience <= 0:
                    lr /= self.lr_factor
                    print(' lr={:.1e}'.format(lr), end='')
                    if lr < self.lr_min:
                        print()
                        break
                    patience = self.lr_patience
                    self.optimizer = self._get_optimizer(lr)
            print()

        # Restore best
        utils.set_model_(self.model, best_model)

        print('after: ', self.model.fc1.weight)

        return
Beispiel #12
0
    def train(self,t,xtrain,ytrain,xvalid,yvalid):
        best_loss=np.inf
        best_model=utils.get_model(self.model)
        lr=self.lr
        patience=self.lr_patience
        self.optimizer=self._get_optimizer(lr)

        # Loop epochs
        for e in range(self.nepochs):
            # Train
            clock0=time.time()
            self.train_epoch(t,xtrain,ytrain)
            clock1=time.time()
            train_loss,train_acc=self.eval(t,xtrain,ytrain)
            clock2=time.time()
            print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format(
                e+1,1000*self.sbatch*(clock1-clock0)/xtrain.size(0),1000*self.sbatch*(clock2-clock1)/xtrain.size(0),train_loss,100*train_acc),end='')
            # Valid
            valid_loss,valid_acc=self.eval(t,xvalid,yvalid)
            print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(valid_loss,100*valid_acc),end='')
            # Adapt lr
            if valid_loss<best_loss:
                best_loss=valid_loss
                best_model=utils.get_model(self.model)
                patience=self.lr_patience
                print(' *',end='')
            else:
                patience-=1
                if patience<=0:
                    lr/=self.lr_factor
                    print(' lr={:.1e}'.format(lr),end='')
                    if lr<self.lr_min:
                        print()
                        break
                    patience=self.lr_patience
                    self.optimizer=self._get_optimizer(lr)
            print()

        # Restore best, save model as old
        utils.set_model_(self.model,best_model)
        if t>0:
            model_state = utils.get_model(self.model)
            model_old_state = utils.get_model(self.model_old)
            for name, param in self.model.named_parameters():
                #model_state[name]=(1-self.alpha)*model_old_state[name]+self.alpha*model_state[name]
                model_state[name]=(model_state[name]+model_old_state[name]*t)/(t+1)
            utils.set_model_(self.model,model_state)

        self.model_old=deepcopy(self.model)
        utils.freeze_model(self.model_old)
        self.model_old.eval()


        return
Beispiel #13
0
    def train(self, t, xtrain, ytrain, xvalid, yvalid, data):
        best_loss = np.inf
        best_acc = 0
        best_model = utils.get_model(self.model)
        lr = self.lr
        # patience = self.lr_patience
        self.optimizer = self._get_optimizer(t, lr)
        nepochs = self.nepochs
        test_max = 0
        # Loop epochs
        try:
            for e in range(nepochs):
                # Train

                self.train_epoch(xtrain, ytrain, cur_epoch=e, nepoch=nepochs)
                train_loss, train_acc = self.eval(xtrain, ytrain)
                print(
                    '| [{:d}/5], Epoch {:d}/{:d}, | Train: loss={:.3f}, acc={:2.2f}% |'
                    .format(t + 1, e + 1, nepochs, train_loss,
                            100 * train_acc),
                    end='')
                # # Valid
                valid_loss, valid_acc = self.eval(xvalid, yvalid)
                print(' Valid: loss={:.3f}, acc={:5.2f}% |'.format(
                    valid_loss, 100 * valid_acc),
                      end='')
                print()

                xtest = data[5]['test']['x'].cuda()
                ytest = data[5]['test']['y'].cuda()

                _, test_acc = self.eval(xtest, ytest)

                # # Adapt lr
                # if valid_loss < best_loss:
                #     best_loss = min(best_loss,valid_loss)

                # if valid_acc > best_acc:
                #     best_acc = max(best_acc, valid_acc)
                if test_acc > self.test_max:
                    self.test_max = max(self.test_max, test_acc)
                    best_model = utils.get_model(self.model)

                print(
                    '>>> Test on All Task:->>> Max_acc : {:2.2f}%  Curr_acc : {:2.2f}%<<<'
                    .format(100 * self.test_max, 100 * test_acc))

        except KeyboardInterrupt:
            print()

        # Restore best validation model
        utils.set_model_(self.model, best_model)
        return
Beispiel #14
0
    def search_network(self, t, train_data, valid_data, batch_size, epochs, device='cuda'):
        # 0 prepare
        print("Search Stage")
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr_a = self.o_lr_a
        lr = self.o_lr
        # 1 define optimizers
        self.optimizer_oa = self._get_optimizer_oa(lr_a)
        self.optimizer_o = self._get_optimizer_o(lr)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer_o, epochs)
        # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer_o, patience=self.lr_patience,
        #                                                        factor=self.lr_factor)
        num_train = len(train_data)
        indices = list(range(num_train))
        split = int(np.floor(0.5 * num_train))
        train_loader = torch.utils.data.DataLoader(
            train_data, batch_size=batch_size,
            sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
            num_workers=4, pin_memory=True)
        valid_loader = torch.utils.data.DataLoader(
            train_data, batch_size=batch_size,
            sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]),
            num_workers=4, pin_memory=True)

        # 3 training the model
        for e in range(epochs):
            # 3.1 search
            self.search_epoch(t, train_loader, valid_loader, device)
            # 3.2 compute training loss
            train_loss, train_acc = self.eval(t, train_loader, mode='search', device=device)
            # 3.3 compute valid loss
            valid_loss, valid_acc = self.eval(t, valid_loader, mode='search', device=device)
            # 3.4 logging
            print('| Epoch {:3d} | Train: loss={:.3f}, acc={:5.1f}% | Valid: loss={:.3f}, acc={:5.1f}% |'.format(
                e, train_loss, 100 * train_acc, valid_loss, 100 * valid_acc))
            self.writer.add_scalars('Search_Loss/Task: {}'.format(t),
                                    {'train_loss': train_loss, 'valid_loss': valid_loss},
                                    global_step=e)
            self.writer.add_scalars('Search_Accuracy/Task: {}'.format(t),
                                    {'train_acc': train_acc * 100, 'valid_acc': valid_acc * 100},
                                    global_step=e)
            # 3.5 Adapt lr
            scheduler.step()

            if valid_loss < best_loss:
                best_loss = valid_loss
                best_model = utils.get_model(self.model)

        # 4 Restore best model
        utils.set_model_(self.model, best_model)
Beispiel #15
0
    def post_train(self, t, xtrain, ytrain, xvalid, yvalid):
        # Restore best, save model as old
        if t > 0:
            model_state = utils.get_model(self.model)
            model_old_state = utils.get_model(self.model_old)
            for name, param in self.model.named_parameters():
                #model_state[name]=(1-self.alpha)*model_old_state[name]+self.alpha*model_state[name]
                model_state[name] = (model_state[name] +
                                     model_old_state[name] * t) / (t + 1)
            utils.set_model_(self.model, model_state)

        self.model_old = deepcopy(self.model)
        utils.freeze_model(self.model_old)
        self.model_old.eval()

        return
Beispiel #16
0
    def train(self,t,xtrain,ytrain,xvalid,yvalid):
        best_loss=np.inf
        best_model=utils.get_model(self.model)
        lr=self.lr
        patience=self.lr_patience
        self.optimizer=self._get_optimizer(lr)

        # Loop epochs
        for e in range(self.nepochs):
            # Train
            clock0=time.time()
            self.train_epoch(t,xtrain,ytrain)
            clock1=time.time()
            train_loss,train_acc=self.eval(t,xtrain,ytrain)
            clock2=time.time()
            print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format(e+1,1000*self.sbatch*(clock1-clock0)/xtrain.size(0),1000*self.sbatch*(clock2-clock1)/xtrain.size(0),train_loss,100*train_acc),end='')
            # Valid
            valid_loss,valid_acc=self.eval(t,xvalid,yvalid)
            print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(valid_loss,100*valid_acc),end='')
            # Adapt lr
            if valid_loss<best_loss:
                best_loss=valid_loss
                best_model=utils.get_model(self.model)
                patience=self.lr_patience
                print(' *',end='')
            else:
                patience-=1
                if patience<=0:
                    lr/=self.lr_factor
                    print(' lr={:.1e}'.format(lr),end='')
                    if lr<self.lr_min:
                        print()
                        break
                    patience=self.lr_patience
                    self.optimizer=self._get_optimizer(lr)
            print()

        # Restore best and save model as old
        utils.set_model_(self.model,best_model)
        self.model_old=deepcopy(self.model)
        self.model_old.eval()
        utils.freeze_model(self.model_old)
        
        torch.save(self.model.state_dict(),'pretrain_lwf.pth')
        return
Beispiel #17
0
    def train_network(self, t, train_data, valid_data, batch_size, epochs, device='cuda'):
        # 0 prepare
        print("Training Begin")
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr
        # 1 define the optimizer and scheduler
        self.optimizer = self._get_optimizer(lr)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, epochs)
        # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, patience=self.lr_patience,
        #                                                        factor=self.lr_factor)
        # 2 define the dataloader
        train_loader = torch.utils.data.DataLoader(
            train_data, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
        valid_loader = torch.utils.data.DataLoader(
            valid_data, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

        # 3 training the model
        for e in range(epochs):
            # 3.1 train
            self.train_epoch(t, train_loader, device=device)
            # 3.2 compute training loss
            train_loss, train_acc = self.eval(t, train_loader, mode='train', device=device)
            # 3.3 compute valid loss
            valid_loss, valid_acc = self.eval(t, valid_loader, mode='train', device=device)
            # 3.4 logging
            print('| Epoch {:3d} | Train: loss={:.3f}, acc={:5.1f}% | Valid: loss={:.3f}, acc={:5.1f}% |'.format(
                e, train_loss, 100 * train_acc, valid_loss, 100 * valid_acc))
            self.writer.add_scalars('Train_Loss/Task: {}'.format(t),
                                    {'train_loss': train_loss, 'valid_loss': valid_loss},
                                    global_step=e)
            self.writer.add_scalars('Train_Accuracy/Task: {}'.format(t),
                                    {'train_acc': train_acc * 100, 'valid_acc': valid_acc * 100},
                                    global_step=e)

            # 3.5 Adapt learning rate
            scheduler.step()
            # 3.6 update the best model
            if valid_loss < best_loss:
                best_loss = valid_loss
                best_model = utils.get_model(self.model)

        # 4 Restore best model
        utils.set_model_(self.model, best_model)
Beispiel #18
0
    def train(self, t, xtrain, ytrain, xvalid, yvalid):
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr
        patience = self.lr_patience
        self.optimizer = self._get_optimizer(lr)

        # Loop epochs
        try:
            for e in range(self.nepochs):
                # Train
                clock0 = time.time()
                self.train_epoch(t, xtrain, ytrain)
                clock1 = time.time()
                train_loss, train_acc = self.eval(t, xtrain, ytrain)
                clock2 = time.time()
                print(
                    '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'
                    .format(
                        e + 1, 1000 * self.sbatch * (clock1 - clock0) /
                        xtrain.size(0), 1000 * self.sbatch *
                        (clock2 - clock1) / xtrain.size(0), train_loss,
                        100 * train_acc),
                    end='')
                # Valid
                valid_loss, valid_acc = self.eval(t, xvalid, yvalid)
                print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(
                    valid_loss, 100 * valid_acc),
                      end='')
                # Adapt lr
                if valid_loss < best_loss:
                    best_loss = valid_loss
                    best_model = utils.get_model(self.model)
                    patience = self.lr_patience
                    print(' *', end='')
                else:
                    patience -= 1
                    if patience <= 0:
                        lr /= self.lr_factor
                        print(' lr={:.1e}'.format(lr), end='')
                        if lr < self.lr_min:
                            print()
                            break
                        patience = self.lr_patience
                        self.optimizer = self._get_optimizer(lr)
                print()
        except KeyboardInterrupt:
            print()

        # Restore best validation model
        utils.set_model_(self.model, best_model)

        # Activations mask
        task = torch.autograd.Variable(torch.LongTensor([t]).cuda(),
                                       volatile=False)
        mask = self.model.mask(task, s=self.smax)
        for i in range(len(mask)):
            mask[i] = torch.autograd.Variable(mask[i].data.clone(),
                                              requires_grad=False)
        if t == 0:
            self.mask_pre = mask
        else:
            for i in range(len(self.mask_pre)):
                self.mask_pre[i] = torch.max(self.mask_pre[i], mask[i])

        # Weights mask
        self.mask_back = {}
        for n, _ in self.model.named_parameters():
            vals = self.model.get_view_for(n, self.mask_pre)
            if vals is not None:
                self.mask_back[n] = 1 - vals

        return
Beispiel #19
0
    def train(self, t, train_data_loader, test_data_loader, val_data_loader):
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr
        patience = self.lr_patience
        self.optimizer = self._get_optimizer(lr)

        task = torch.autograd.Variable(
            torch.LongTensor([t]).cuda(), volatile=False
        ) if torch.cuda.is_available() else torch.autograd.Variable(
            torch.LongTensor([t]), volatile=False)
        # Loop epochs
        for e in range(self.nepochs):
            # Train
            clock0 = time.time()

            self.train_epochewc(t, train_data_loader)

            clock1 = time.time()

            train_loss, train_acc, train_recall, train_f1 = self.eval_withregx(
                t, test_data_loader)

            clock2 = time.time()

            print(
                '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'
                .format(
                    e + 1, 1000 * self.sbatch * (clock1 - clock0) /
                    train_data_loader.__len__(), 1000 * self.sbatch *
                    (clock2 - clock1) / train_data_loader.__len__(),
                    train_loss, 100 * train_acc),
                end='')

            # Valid
            valid_loss, valid_acc, valid_recall, valid_f1 = self.eval_withregx(
                t, val_data_loader)
            print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(
                valid_loss, 100 * valid_acc),
                  end='')

            # Adapt lr
            if valid_loss < best_loss:
                best_loss = valid_loss
                best_model = utils.get_model(self.model)
                patience = self.lr_patience
                print(' *', end='')
            else:
                patience -= 1
                if patience <= 0:
                    lr /= self.lr_factor
                    print(' lr={:.1e}'.format(lr), end='')
                    if lr < self.lr_min:
                        print()
                        break
                    patience = self.lr_patience
                    self.optimizer = self._get_optimizer(lr)
            print()

        # Restore best
        utils.set_model_(self.model, best_model)

        # Update old
        self.model_old = deepcopy(self.model)
        self.model_old.eval()
        utils.freeze_model(self.model_old)  # Freeze the weights

        # Fisher ops
        if t > 0:
            fisher_old = {}

            startDateTimeOldLast = datetime.now()
            for n, _ in self.model.named_parameters():

                fisher_old[n] = self.fisher[n].clone()

            print('DataTime OldLast', datetime.now() - startDateTimeOldLast)
            print("Analysis compute memory waste in Old Task")

        # self.fisher=utils.fisher_matrix_diag(t,xtrain,ytrain,self.model,self.criterion)
        self.fisher = utils.fisher_matrix_diag_nlp(t,
                                                   train_data_loader,
                                                   self.model,
                                                   self.criterion,
                                                   opt=self.opt)
        if t > 0:
            # Watch out! We do not want to keep t models (or fisher diagonals) in memory, therefore we have to merge fisher diagonals
            startDateTime = datetime.now()
            for n, _ in self.model.named_parameters():

                self.fisher[n] = (self.fisher[n] + fisher_old[n] * t) / (
                    t + 1)  # Checked: it is better than the other option
                #self.fisher[n]=0.5*(self.fisher[n]+fisher_old[n])
            print("Analysis compute memory waste")
            print('DataTime OldLast', datetime.now() - startDateTime)
        return
Beispiel #20
0
    def train(self, t, train_data_loader, test_data_loader, val_data_loader):
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr
        patience = self.lr_patience
        self.optimizer = self._get_optimizer(lr)

        task = torch.autograd.Variable(
            torch.LongTensor([t]).cuda(), volatile=False
        ) if torch.cuda.is_available() else torch.autograd.Variable(
            torch.LongTensor([t]), volatile=False)
        # Loop epochs
        print("Size of account ===> " + str(self.nepochs))

        for e in range(self.nepochs):
            # Train
            clock0 = time.time()

            self.train_epochlwf(t, train_data_loader)

            clock1 = time.time()

            train_loss, train_acc, train_recall, train_f1 = self.evallwf(
                t, test_data_loader)

            clock2 = time.time()

            print(
                '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'
                .format(
                    e + 1, 1000 * self.sbatch * (clock1 - clock0) /
                    train_data_loader.__len__(), 1000 * self.sbatch *
                    (clock2 - clock1) / train_data_loader.__len__(),
                    train_loss, 100 * train_acc),
                end='')

            # Valid
            valid_loss, valid_acc, valid_recall, valid_f1 = self.evallwf(
                t, val_data_loader)
            print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(
                valid_loss, 100 * valid_acc),
                  end='')

            # Adapt lr
            if valid_loss < best_loss:
                best_loss = valid_loss
                best_model = utils.get_model(self.model)
                patience = self.lr_patience
                print(' *', end='')
            else:
                patience -= 1
                if patience <= 0:
                    lr /= self.lr_factor
                    print(' lr={:.1e}'.format(lr), end='')
                    if lr < self.lr_min:
                        print()
                        break
                    patience = self.lr_patience
                    self.optimizer = self._get_optimizer(lr)
            print()

        # Restore best
        utils.set_model_(self.model, best_model)

        # Update old
        self.model_old = deepcopy(self.model)
        self.model_old.eval()
        utils.freeze_model(self.model_old)  # Freeze the weights

        return
Beispiel #21
0
    def train(self, t, xtrain, ytrain, xvalid, yvalid, data, input_size,
              taskcla):
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr
        lr_rho = self.lr_rho
        patience = self.lr_patience
        self.optimizer = self._get_optimizer(lr, lr_rho)

        # Loop epochs
        for e in range(self.nepochs):
            self.epoch = self.epoch + 1
            # Train
            clock0 = time.time()

            num_batch = xtrain.size(0)

            self.train_epoch(t, xtrain, ytrain)

            clock1 = time.time()
            train_loss, train_acc = self.eval(t, xtrain, ytrain)

            clock2 = time.time()
            print(
                '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'
                .format(e + 1,
                        1000 * self.sbatch * (clock1 - clock0) / num_batch,
                        1000 * self.sbatch * (clock2 - clock1) / num_batch,
                        train_loss, 100 * train_acc),
                end='')
            # Valid

            valid_loss, valid_acc = self.eval(t, xvalid, yvalid)
            print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(
                valid_loss, 100 * valid_acc),
                  end='')

            # save log for current task & old tasks at every epoch
            self.logger.add(epoch=(t * self.nepochs) + e,
                            task_num=t + 1,
                            valid_loss=valid_loss,
                            valid_acc=valid_acc)
            for task in range(t):
                xvalid_t = data[task]['valid']['x'].cuda()
                yvalid_t = data[task]['valid']['y'].cuda()

                valid_loss_t, valid_acc_t = self.eval(task, xvalid_t, yvalid_t)
                self.logger.add(epoch=(t * self.nepochs) + e,
                                task_num=task + 1,
                                valid_loss=valid_loss_t,
                                valid_acc=valid_acc_t)

            if valid_loss < best_loss:
                best_loss = valid_loss
                best_model = utils.get_model(self.model)
                patience = self.lr_patience
                print(' *', end='')
            else:
                patience -= 1
                if patience <= 0:
                    lr /= self.lr_factor
                    lr_rho /= self.lr_factor
                    print(' lr={:.1e}'.format(lr), end='')
                    if lr < self.lr_min:
                        print()
                    patience = self.lr_patience
                    self.optimizer = self._get_optimizer(lr, lr_rho)
            print()

            utils.freeze_model(self.model_old)  # Freeze the weights

        # Restore best
        utils.set_model_(self.model, best_model)
        self.model_old = deepcopy(self.model)
        self.saved = 1

        self.logger.save()

        return
Beispiel #22
0
    def train(self, t, xtrain, ytrain, xvalid, yvalid, phase, args):  #N-CL
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr
        patience = self.lr_patience
        self.optimizer = self._get_optimizer(lr, phase)

        if phase == 'mcl':
            print('before: ', self.model.mcl.fc1.weight)
            task = torch.autograd.Variable(torch.LongTensor([t]).cuda(),
                                           volatile=False)
            print(
                'before: ',
                self.model.mask(task, phase=phase, smax=self.smax,
                                args=args)[0])

        # Loop epochs
        for e in range(self.nepochs):
            # Train
            clock0 = time.time()
            self.train_epoch(t, xtrain, ytrain, phase=phase, args=args)
            clock1 = time.time()
            train_loss, train_acc = self.eval(t,
                                              xtrain,
                                              ytrain,
                                              phase=phase,
                                              args=args)
            clock2 = time.time()
            print(
                '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'
                .format(
                    e + 1,
                    1000 * self.sbatch * (clock1 - clock0) / xtrain.size(0),
                    1000 * self.sbatch * (clock2 - clock1) / xtrain.size(0),
                    train_loss, 100 * train_acc),
                end='')
            # Valid
            valid_loss, valid_acc = self.eval(t,
                                              xvalid,
                                              yvalid,
                                              phase=phase,
                                              args=args)
            print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(
                valid_loss, 100 * valid_acc),
                  end='')
            # Adapt lr
            if valid_loss < best_loss:
                best_loss = valid_loss
                best_model = utils.get_model(self.model)
                patience = self.lr_patience
                print(' *', end='')
            else:
                patience -= 1
                if patience <= 0:
                    lr /= self.lr_factor
                    print(' lr={:.1e}'.format(lr), end='')
                    if lr < self.lr_min:
                        print()
                        break
                    patience = self.lr_patience
                    self.optimizer = self._get_optimizer(lr, phase)
            print()

        # Restore best
        utils.set_model_(self.model, best_model)

        if phase == 'mcl':
            print('after: ', self.model.mcl.fc1.weight)
            task = torch.autograd.Variable(torch.LongTensor([t]).cuda(),
                                           volatile=False)
            print(
                'after: ',
                self.model.mask(task, phase=phase, smax=self.smax,
                                args=args)[0])
        return
Beispiel #23
0
    def train(self, tasks, xtrain, ytrain, xvalid, yvalid, args):
        self.model = deepcopy(self.initial_model)  # Restart model

        task_t, task_v = tasks
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr
        patience = self.lr_patience
        self.optimizer = self._get_optimizer(lr)
        miles = []
        for i in range(3):
            miles.append(int(self.nepochs * 0.9**(i + 1)))
        print(sorted(miles))
        train_scheduler = torch.optim.lr_scheduler.MultiStepLR(
            self.optimizer, milestones=sorted(miles),
            gamma=0.2)  #learning rate

        # Loop epochs
        try:
            for e in range(self.nepochs):
                #print("newnewnewnewnewnew")
                # Train
                clock0 = time.time()
                self.train_epoch(task_t, xtrain, ytrain)
                clock1 = time.time()
                train_loss, train_acc = self.eval_validation(
                    task_t, xtrain, ytrain)
                clock2 = time.time()
                print(
                    '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}%|'
                    .format(
                        e + 1, 1000 * self.sbatch * (clock1 - clock0) /
                        xtrain.size(0), 1000 * self.sbatch *
                        (clock2 - clock1) / xtrain.size(0), train_loss,
                        100 * train_acc),
                    end='')
                # Valid
                #print(xtrain.size(0))
                valid_loss, valid_acc = self.eval_validation(
                    task_v, xvalid, yvalid)
                print(' Valid: loss={:.3f}, acc={:5.1f}%|'.format(
                    valid_loss, valid_acc * 100),
                      end='')
                # Adapt lr
                train_scheduler.step(e)
                print(' lr={:.1e}'.format(lr), end='')
                if valid_loss < best_loss:
                    best_loss = valid_loss
                    best_model = utils.get_model(self.model)
                #     patience=self.lr_patience
                #     print(' *',end='')
                #     print(' lr={:.1e}'.format(lr),end='')
                # else:
                #     patience-=1
                #     if patience<=0:
                #         lr/=self.lr_factor
                #         print(' lr={:.1e}'.format(lr),end='')
                #         if lr<self.lr_min:
                #             print()
                #             break
                #         patience=self.lr_patience
                #         self.optimizer=self._get_optimizer(lr)
                print()
        except KeyboardInterrupt:
            print()

        # Restore best
        utils.set_model_(self.model, best_model)
        torch.save(self.model.state_dict(),
                   'pretrain_cifar100_joint_lr0.1_without-clip.pth')
        return
    def train(self, t, xtrain, ytrain, xvalid, yvalid, data, input_size, taskcla):
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr
        patience = self.lr_patience
        self.optimizer = self._get_optimizer(lr)

        # Loop epochs
        for e in range(self.nepochs):
            # Train
            clock0=time.time()
            self.train_epoch(t,xtrain,ytrain)
            clock1=time.time()
            train_loss,train_acc=self.eval(t,xtrain,ytrain)
            clock2=time.time()
            print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format(
                e+1,1000*self.sbatch*(clock1-clock0)/xtrain.size(0),1000*self.sbatch*(clock2-clock1)/xtrain.size(0),train_loss,100*train_acc),end='')
            # Valid
            valid_loss,valid_acc=self.eval(t,xvalid,yvalid)
            print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(valid_loss,100*valid_acc),end='')
            
            #save log for current task & old tasks at every epoch
            self.logger.add(epoch=(t*self.nepochs)+e, task_num=t+1, valid_loss=valid_loss, valid_acc=valid_acc)
            for task in range(t): 
                xvalid_t=data[task]['valid']['x'].cuda()
                yvalid_t=data[task]['valid']['y'].cuda()
                valid_loss_t,valid_acc_t=self.eval(task,xvalid_t,yvalid_t)
                self.logger.add(epoch=(t*self.nepochs)+e, task_num=task+1, valid_loss=valid_loss_t, valid_acc=valid_acc_t)
            
            # Adapt lr
            if valid_loss < best_loss:
                best_loss = valid_loss
                best_model = utils.get_model(self.model)
                patience = self.lr_patience
                print(' *', end='')
            else:
                patience -= 1
                if patience <= 0:
                    lr /= self.lr_factor
                    print(' lr={:.1e}'.format(lr), end='')
                    if lr < self.lr_min:
                        print()
                        break
                    patience = self.lr_patience
                    self.optimizer = self._get_optimizer(lr)
            print()

        # Restore best
        utils.set_model_(self.model, best_model)

        self.logger.save()
        
        # Update old
        self.model_old = Net(input_size, taskcla).cuda()
        self.model_old.load_state_dict(self.model.state_dict())
        self.model_old.eval()
        utils.freeze_model(self.model_old) # Freeze the weights

        # Fisher ops
        if t>0:
            fisher_old={}
            for n,_ in self.model.named_parameters():
                fisher_old[n]=self.fisher[n].clone()
        self.fisher=utils.fisher_matrix_diag(t,xtrain,ytrain,self.model,self.criterion)
        if t>0:
            # Watch out! We do not want to keep t models (or fisher diagonals) in memory, therefore we have to merge fisher diagonals
            for n,_ in self.model.named_parameters():
                self.fisher[n]=(self.fisher[n]+fisher_old[n]*t)/(t+1)       # Checked: it is better than the other option
                #self.fisher[n]=0.5*(self.fisher[n]+fisher_old[n])

        return
Beispiel #25
0
    def train(self, t, xtrain, ytrain, xvalid, yvalid, data, input_size,
              taskcla):
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr
        self.optimizer = self._get_optimizer(lr)

        # Loop epochs
        for e in range(self.nepochs):
            # Train
            clock0 = time.time()

            num_batch = xtrain.size(0)

            self.train_epoch(t, xtrain, ytrain)

            clock1 = time.time()
            train_loss, train_acc = self.eval(t, xtrain, ytrain)
            clock2 = time.time()
            print(
                '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'
                .format(e + 1,
                        1000 * self.sbatch * (clock1 - clock0) / num_batch,
                        1000 * self.sbatch * (clock2 - clock1) / num_batch,
                        train_loss, 100 * train_acc),
                end='')
            # Valid
            valid_loss, valid_acc = self.eval(t, xvalid, yvalid)
            print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(
                valid_loss, 100 * valid_acc),
                  end='')
            print(' lr : {:.6f}'.format(self.optimizer.param_groups[0]['lr']))
            #save log for current task & old tasks at every epoch

            # Adapt lr
            if valid_loss < best_loss:
                best_loss = valid_loss
                best_model = utils.get_model(self.model)
                patience = self.lr_patience
                print(' *', end='')

            else:
                patience -= 1
                if patience <= 0:
                    lr /= self.lr_factor
                    print(' lr={:.1e}'.format(lr), end='')
                    if lr < self.lr_min:
                        print()
                    patience = self.lr_patience
                    self.optimizer = self._get_optimizer(lr)
            print()

        # Restore best
        utils.set_model_(self.model, best_model)

        # Update old
        self.model_old = deepcopy(self.model)
        utils.freeze_model(self.model_old)  # Freeze the weights
        self.omega_update(t, xtrain)

        return
Beispiel #26
0
    def train(self, xtrain, ytrain, xvalid, yvalid, xtest, ytest):
        best_loss = np.inf
        best_acc = -np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr
        patience = self.lr_patience
        self.optimizer = self._get_optimizer(lr)

        # Loop epochs
        for e in range(self.nepochs):
            # Train
            clock0 = time.time()

            num_batch = xtrain.size(0)

            self.train_epoch(xtrain, ytrain)

            clock1 = time.time()
            # train_loss,train_acc=self.eval(xtrain,ytrain)
            # clock2=time.time()
            # print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format(
            #     e+1,1000*self.sbatch*(clock1-clock0)/num_batch,
            #     1000*self.sbatch*(clock2-clock1)/num_batch,train_loss,100*train_acc),end='')
            # Valid
            valid_loss, valid_acc = self.eval(xvalid, yvalid, test=False)
            clock2 = time.time()
            print(
                'Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms, Valid: loss={:.3f}, acc={:5.1f}% |'
                .format(
                    e + 1, 1000 * self.sbatch * (clock1 - clock0) / num_batch,
                    1000 * self.sbatch * (clock2 - clock1) / xvalid.size(0),
                    valid_loss, 100 * valid_acc),
                end='')
            # test_loss,test_acc=self.eval(xtest,ytest)
            # print(' Test: acc={:5.1f}% |'.format(100*test_acc),end='')
            self.valid_rs.append((valid_loss, valid_acc))
            # Adapt lr
            # if valid_acc > best_acc:
            #     best_model = utils.get_model(self.model)
            #     best_acc = valid_acc

            if valid_loss < best_loss:
                best_loss = valid_loss
                best_model = utils.get_model(self.model)
                patience = self.lr_patience
                print(' *', end='')

            else:
                patience -= 1
                if patience <= 0:
                    lr /= self.lr_factor
                    print(' lr={:.1e}'.format(lr), end='')
                    if lr < self.lr_min:
                        lr = self.lr_min
                        print()
                    patience = self.lr_patience
                    self.optimizer = self._get_optimizer(lr)
            print()

        # Restore best
        utils.set_model_(self.model, best_model)

        # self.logger.save()
        return
Beispiel #27
0
    def train(self, t, xtrain, ytrain, xvalid, yvalid):

        if t > 0:  #reinit modules not in bestpath with random, according to the paper
            layers = ['conv1', 'conv2', 'conv3', 'fc1', 'fc2']
            for (n, p), (m, q) in zip(self.model.named_parameters(),
                                      self.initial_model.named_parameters()):
                if n == m:
                    layer, module, par = n.split(".")
                    module = int(module)
                    if layer in layers:
                        if module not in self.model.bestPath[
                                0:t, layers.index(layer)]:
                            p.data = deepcopy(q.data)

        #init path for this task
        Path = np.random.randint(0, self.M - 1, size=(self.P, self.L, self.N))
        guesses = list(range(self.M))
        lr = []
        patience = []
        best_loss = []
        for p in range(self.P):
            lr.append(self.lr)
            patience.append(self.lr_patience)
            best_loss.append(np.inf)
            for j in range(self.L):
                np.random.shuffle(guesses)
                Path[p, j, :] = guesses[:self.N]  #do not repeat modules

        winner = 0
        best_path_model = utils.get_model(self.model)
        best_loss_overall = np.inf

        try:
            for g in range(self.generations):
                if np.max(lr) < self.lr_min: break

                for p in range(self.P):
                    if lr[p] < self.lr_min: continue

                    # train only the modules in the current path, minus the ones in the model.bestPath
                    self.model.unfreeze_path(t, Path[p])

                    # the optimizer trains solely the params for the current task
                    self.optimizer = self._get_optimizer(lr[p])

                    # Loop epochs
                    for e in range(self.nepochs):
                        # Train
                        clock0 = time.time()
                        self.train_epoch(t, xtrain, ytrain, Path[p])
                        clock1 = time.time()
                        train_loss, train_acc, _ = self.eval(
                            t, xtrain, ytrain, Path[p])
                        clock2 = time.time()
                        print(
                            '| Generation {:3d} | Path {:3d} | Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'
                            .format(
                                g + 1, p + 1, e + 1, 1000 * self.sbatch *
                                (clock1 - clock0) / xtrain.size(0),
                                1000 * self.sbatch * (clock2 - clock1) /
                                xtrain.size(0), train_loss, 100 * train_acc),
                            end='')
                        # Valid
                        valid_loss, valid_acc, _ = self.eval(
                            t, xvalid, yvalid, Path[p])
                        print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(
                            valid_loss, 100 * valid_acc),
                              end='')

                        # Save the winner
                        if valid_loss < best_loss_overall:
                            best_loss_overall = valid_loss
                            best_path_model = utils.get_model(self.model)
                            winner = p
                            print(' B', end='')

                        # Adapt lr
                        if valid_loss < best_loss[p]:
                            best_loss[p] = valid_loss
                            patience[p] = self.lr_patience
                            print(' *', end='')
                        else:
                            patience[p] -= 1
                            if patience[p] <= 0:
                                lr[p] /= self.lr_factor
                                print(' lr={:.1e}'.format(lr[p]), end='')
                                if lr[p] < self.lr_min:
                                    print()
                                    break
                                patience[p] = self.lr_patience
                        print()

                # Restore winner model
                utils.set_model_(self.model, best_path_model)
                print('| Winning path: {:3d} | Best loss: {:.3f} |'.format(
                    winner + 1, best_loss_overall))

                # Keep the winner and mutate it
                print('Mutating')
                probability = 1 / (self.N * self.L)  #probability to mutate
                for p in range(self.P):
                    if p != winner:
                        best_loss[p] = np.inf
                        lr[p] = lr[winner]
                        patience[p] = self.lr_patience
                        for j in range(self.L):
                            for k in range(self.N):
                                Path[p, j, k] = Path[winner, j, k]
                                if np.random.rand() < probability:
                                    Path[p, j, k] = (
                                        Path[p, j, k] +
                                        np.random.randint(-2, 3)
                                    ) % self.M  # add int in [-2,2] to the path, this seems yet another hyperparam

        except KeyboardInterrupt:
            print()

        #save the best path into the model
        self.model.bestPath[t] = Path[winner]
        print(self.model.bestPath[t])

        return
    def train(self, t, xtrain, ytrain, xvalid, yvalid, data, input_size,
              taskcla):
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr
        patience = self.lr_patience
        self.optimizer = self._get_optimizer(lr)

        # Loop epochs
        for e in range(self.nepochs):

            # Train
            clock0 = time.time()

            # self.model.variance_init()  # trainer net의 variance크게 init

            # 1. trainer_net training 하는데 regularization을 위해서 saver_net의 정보 이용

            self.train_epoch(xtrain, ytrain)

            clock1 = time.time()
            train_loss, train_acc = self.eval(xtrain, ytrain, self.sample)
            clock2 = time.time()
            print(
                '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'
                .format(
                    e + 1,
                    1000 * self.sbatch * (clock1 - clock0) / xtrain.size(0),
                    1000 * self.sbatch * (clock2 - clock1) / xtrain.size(0),
                    train_loss, 100 * train_acc),
                end='')
            # Valid
            valid_loss, valid_acc = self.eval(xvalid, yvalid, self.sample)
            print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(
                valid_loss, 100 * valid_acc),
                  end='')

            # save log for current task & old tasks at every epoch
            self.logger.add(epoch=(t * self.nepochs) + e,
                            task_num=t + 1,
                            valid_loss=valid_loss,
                            valid_acc=valid_acc)
            for task in range(t):
                xvalid_t = data[task]['valid']['x'].cuda()
                yvalid_t = data[task]['valid']['y'].cuda()
                valid_loss_t, valid_acc_t = self.eval(xvalid_t, yvalid_t,
                                                      self.sample)
                self.logger.add(epoch=(t * self.nepochs) + e,
                                task_num=task + 1,
                                valid_loss=valid_loss_t,
                                valid_acc=valid_acc_t)

            # Adapt lr
            if valid_loss < best_loss:
                best_loss = valid_loss
                best_model = utils.get_model(self.model)
                patience = self.lr_patience
                print(' *', end='')
            else:
                patience -= 1
                if patience <= 0:
                    lr /= self.lr_factor
                    print(' lr={:.1e}'.format(lr), end='')
                    if lr < self.lr_min:
                        print()
                        break
                    patience = self.lr_patience
                    self.optimizer = self._get_optimizer(lr)
            print()

            #self.model_old = deepcopy(self.model)
            utils.freeze_model(self.model_old)  # Freeze the weights

            #self.print_log(e)

            # for n, m in self.model.named_children():
            #     print(n, m.weight.sigma.min())

        # Restore best
        utils.set_model_(self.model, best_model)

        self.logger.save()

        return
Beispiel #29
0
    def train(self,t,xtrain,ytrain,xvalid,yvalid):
        best_loss=np.inf
        best_model=utils.get_model(self.model)
        lr=self.lr
        patience=self.lr_patience
        self.optimizer=self._get_optimizer(lr)

        #log
        losses_train = []
        losses_valid = []
        acc_train = []
        acc_valid = []
        reg_train = []
        reg_valid = []
        self.logs['mask'][t]={}
        self.logs['mask_pre'][t]={}
        task=torch.autograd.Variable(torch.LongTensor([t]).cuda(),volatile=False)
        bmask=self.model.mask(task,s=self.smax)
        for i in range(len(bmask)):
            bmask[i]=torch.autograd.Variable(bmask[i].data.clone(),requires_grad=False)
            self.logs['mask'][t][i]={}
            self.logs['mask'][t][i][-1]=deepcopy(bmask[i].data.cpu().numpy().astype(np.float32))
            if t==0:
                self.logs['mask_pre'][t][i]=deepcopy((0*bmask[i]).data.cpu().numpy().astype(np.float32))
            else:
                self.logs['mask_pre'][t][i]=deepcopy(self.mask_pre[i].data.cpu().numpy().astype(np.float32))

        if not self.single_task or (self.single_task and t==0):
            # Loop epochs
            try:
                for e in range(self.nepochs):
                    # Train
                    clock0=time.time()
                    self.train_epoch(t,xtrain,ytrain)
                    clock1=time.time()
                    train_loss,train_acc,train_reg=self.eval_withreg(t,xtrain,ytrain)
                    clock2=time.time()
                    print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format(e+1,
                        1000*self.sbatch*(clock1-clock0)/xtrain.size(0),1000*self.sbatch*(clock2-clock1)/xtrain.size(0),train_loss,100*train_acc),end='')
                    # Valid
                    valid_loss,valid_acc,valid_reg=self.eval_withreg(t,xvalid,yvalid)
                    print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(valid_loss,100*valid_acc),end='')

                    #log
                    losses_train.append(train_loss)
                    acc_train.append(train_acc)
                    reg_train.append(train_reg)
                    losses_valid.append(valid_loss)
                    acc_valid.append(valid_acc)
                    reg_valid.append(valid_reg)

                    # Adapt lr
                    if valid_loss<best_loss:
                        best_loss=valid_loss
                        best_model=utils.get_model(self.model)
                        patience=self.lr_patience
                        print(' *',end='')
                    else:
                        patience-=1
                        if patience<=0:
                            lr/=self.lr_factor
                            print(' lr={:.1e}'.format(lr),end='')
                            if lr<self.lr_min:
                                print()
                                break
                            patience=self.lr_patience
                            self.optimizer=self._get_optimizer(lr)
                    print()

                    # Log activations mask
                    task=torch.autograd.Variable(torch.LongTensor([t]).cuda(),volatile=False)
                    bmask=self.model.mask(task,s=self.smax)
                    for i in range(len(bmask)):
                        self.logs['mask'][t][i][e] = deepcopy(bmask[i].data.cpu().numpy().astype(np.float32))

                # Log losses
                if self.logs is not None:
                    self.logs['train_loss'][t] = np.array(losses_train)
                    self.logs['train_acc'][t] = np.array(acc_train)
                    self.logs['train_reg'][t] = np.array(reg_train)
                    self.logs['valid_loss'][t] = np.array(losses_valid)
                    self.logs['valid_acc'][t] = np.array(acc_valid)
                    self.logs['valid_reg'][t] = np.array(reg_valid)
            except KeyboardInterrupt:
                print()

        # Restore best validation model
        utils.set_model_(self.model,best_model)

        # Activations mask
        task=torch.autograd.Variable(torch.LongTensor([t]).cuda(),volatile=False)
        mask=self.model.mask(task,s=self.smax)
        for i in range(len(mask)):
            mask[i]=torch.autograd.Variable(mask[i].data.clone(),requires_grad=False)
        if t==0:
            self.mask_pre=mask
        else:
            for i in range(len(self.mask_pre)):
                self.mask_pre[i]=torch.max(self.mask_pre[i],mask[i])

        # Weights mask
        self.mask_back={}
        for n,_ in self.model.named_parameters():
            vals=self.model.get_view_for(n,self.mask_pre)
            if vals is not None:
                self.mask_back[n]=1-vals

        return
Beispiel #30
0
    def train(self, t, xtrain, ytrain, xvalid, yvalid):
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr
        patience = self.lr_patience
        self.optimizer = self._get_optimizer(lr)

        # Update old
        self.model_old = deepcopy(self.model)
        self.model_old.eval()
        utils.freeze_model(self.model_old)  # Freeze the weights

        # reset importance omega
        for n, p in self.model.named_parameters():
            if p.requires_grad:
                self.omega[n] = p.data.clone().zero_()
                self.DELTA[n] = p.data.clone().zero_()
                self.p_old[n] = p.data.clone()

        # Loop epochs
        for e in range(self.nepochs):
            # Train
            clock0 = time.time()
            self.train_epoch(t, xtrain, ytrain, e)
            clock1 = time.time()
            train_loss, train_acc = self.eval(t, xtrain, ytrain)
            clock2 = time.time()
            print(
                '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'
                .format(
                    e + 1,
                    1000 * self.sbatch * (clock1 - clock0) / xtrain.size(0),
                    1000 * self.sbatch * (clock2 - clock1) / xtrain.size(0),
                    train_loss, 100 * train_acc),
                end='')
            # Valid
            valid_loss, valid_acc = self.eval(t, xvalid, yvalid)
            print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(
                valid_loss, 100 * valid_acc),
                  end='')

            self.logger.log_scalar(str(t) + "_train acc", train_acc, e)
            self.logger.log_scalar(str(t) + "_valid acc", valid_acc, e)
            self.logger.log_scalar(str(t) + "_train loss", train_loss, e)
            self.logger.log_scalar(str(t) + "_valid loss", valid_loss, e)

            # Adapt lr
            if valid_loss < best_loss:
                best_loss = valid_loss
                best_model = utils.get_model(self.model)
                patience = self.lr_patience
                print(' *', end='')
            else:
                patience -= 1
                if patience <= 0:
                    lr /= self.lr_factor
                    print(' lr={:.1e}'.format(lr), end='')
                    if lr < self.lr_min:
                        print()
                        break
                    patience = self.lr_patience
                    self.optimizer = self._get_optimizer(lr)
            print()

        # Restore best
        utils.set_model_(self.model, best_model)

        # Update task regularization OMEGA
        for (n, param), (_,
                         param_old) in zip(self.model.named_parameters(),
                                           self.model_old.named_parameters()):
            if p.requires_grad:
                #change = param.detach().clone() - param_old
                #o = torch.nn.functional.relu(self.omega[n])/(change.pow(2) + self.xi)
                o = torch.nn.functional.relu(
                    self.omega[n]) / (self.DELTA[n].pow(2) + self.xi)
                self.OMEGA[n] = self.OMEGA[n] * self.decay + o * (
                    1 - self.decay)  #self.OMEGA[n] + o #

        return