Ejemplo n.º 1
0
    def __init__(self, multimodal_feat_size=256):
        super(ImageEncoder, self).__init__()
        self.inception = custom_inception_v3()
        freeze_model(self.inception)

        self.map_global = nn.Linear(2048, multimodal_feat_size)
        self.map_local = nn.Linear(768, multimodal_feat_size)
Ejemplo n.º 2
0
    def train(self, t, xtrain, ytrain, xvalid, yvalid, data, input_size, taskcla):
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr
        patience = self.lr_patience
        self.optimizer = self._get_optimizer(lr)

        self.W = {}
        self.p_old = {}
        for n, p in self.model.named_parameters():
            if p.requires_grad:
                n = n.replace('.', '__')
                self.W[n] = p.data.clone().zero_()
                self.p_old[n] = p.data.clone()

        # Loop epochs
        for e in range(self.nepochs):
            # Train
            clock0=time.time()
            num_batch = xtrain.size(0)

            self.train_epoch(t,xtrain,ytrain)

            clock1=time.time()
            train_loss,train_acc=self.eval(t,xtrain,ytrain)
            clock2=time.time()
            print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format(
                e+1,1000*self.sbatch*(clock1-clock0)/num_batch,1000*self.sbatch*(clock2-clock1)/num_batch,train_loss,100*train_acc),end='')
            # Valid
            valid_loss,valid_acc=self.eval(t,xvalid,yvalid)
            print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(valid_loss,100*valid_acc),end='')
            print()
            #save log for current task & old tasks at every epoch

            if valid_loss < best_loss:
                best_loss = valid_loss
                best_model = utils.get_model(self.model)
                patience = self.lr_patience
                print(' *', end='')

            else:
                patience -= 1
                if patience <= 0:
                    lr /= self.lr_factor
                    print(' lr={:.1e}'.format(lr), end='')
                    if lr < self.lr_min:
                        print()
                    patience = self.lr_patience
                    self.optimizer = self._get_optimizer(lr)
            print()


        # Restore best
        utils.set_model_(self.model, best_model)

        self.update_omega(self.W, self.epsilon)
        self.model_old = deepcopy(self.model)
        utils.freeze_model(self.model_old) # Freeze the weights

        return
Ejemplo n.º 3
0
def main(args):
    print('Dataset: {}, Normal Label: {}, LR: {}'.format(args.dataset, args.label, args.lr))
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(device)
    model_type = args.model
    if model_type == 'resnet':
        model = utils.get_resnet_model(resnet_type=args.resnet_type)
        if args.dataset in ['rsna3D']:
            model = ResNet3D(model)
    elif model_type == 'timesformer':
        model = utils.get_timesformer_model(mode=args.timesformer_mode)
    model = model.to(device)

    ewc_loss = None

    # Freezing Pre-trained model for EWC
    if args.ewc:
        frozen_model = deepcopy(model).to(device)
        frozen_model.eval()
        utils.freeze_model(frozen_model)
        fisher = torch.load(args.diag_path)
        ewc_loss = EWCLoss(frozen_model, fisher)

    utils.freeze_parameters(model)

    sorted_train_loader, shuffled_train_loader, test_loader = utils.get_loaders(dataset=args.dataset, label_class=args.label,
                                                  batch_size=args.batch_size,
                                                  lookup_tables_paths=(args.train_lookup_table, args.test_lookup_table))
    train_model(model, sorted_train_loader, shuffled_train_loader, test_loader, device, args, ewc_loss)
Ejemplo n.º 4
0
    def post_train(self, t, xtrain, ytrain, xvalid, yvalid):
        # store the old model (and freeze it for gradients)
        self.model_old = deepcopy(self.model)
        self.model_old.eval()
        utils.freeze_model(self.model_old)  # Freeze the weights

        # NOTE: other option is to save models to disk and reload them after each training session (slower but more accurate?)

        # deep copy the values from the old fisher matrix (previous models)
        if t > 0:
            fisher_old = {}
            for n, _ in self.model.named_parameters():
                fisher_old[n] = self.fisher[n].clone()

        # compute the fisher matrix for the current model
        # NOTE: shouldn't it be recomputed for all outputs?
        self.fisher = utils.fisher_matrix_diag(t, xtrain, ytrain, self.model,
                                               self._fw_pass)

        # combine the fisher matrices
        if t > 0:
            # Watch out! We do not want to keep t models (or fisher diagonals) in memory, therefore we have to merge fisher diagonals
            # NOTE: is that equivalent?
            for n, _ in self.model.named_parameters():
                # count the old fisher matrix t times for the number of pervious tasks
                self.fisher[n] = (self.fisher[n] + fisher_old[n] * t) / (
                    t + 1)  # Checked: it is better than the other option
                #self.fisher[n]=0.5*(self.fisher[n]+fisher_old[n])

        return
Ejemplo n.º 5
0
    def train(self,t,xtrain,ytrain,xvalid,yvalid):
        best_loss=np.inf
        best_model=utils.get_model(self.model)
        lr=self.lr
        patience=self.lr_patience
        self.optimizer=self._get_optimizer(lr)

        # Loop epochs
        for e in range(self.nepochs):
            # Train
            clock0=time.time()
            self.train_epoch(t,xtrain,ytrain)
            clock1=time.time()
            train_loss,train_acc=self.eval(t,xtrain,ytrain)
            clock2=time.time()
            print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format(
                e+1,1000*self.sbatch*(clock1-clock0)/xtrain.size(0),1000*self.sbatch*(clock2-clock1)/xtrain.size(0),train_loss,100*train_acc),end='')
            # Valid
            valid_loss,valid_acc=self.eval(t,xvalid,yvalid)
            print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(valid_loss,100*valid_acc),end='')
            # Adapt lr
            if valid_loss<best_loss:
                best_loss=valid_loss
                best_model=utils.get_model(self.model)
                patience=self.lr_patience
                print(' *',end='')
            else:
                patience-=1
                if patience<=0:
                    lr/=self.lr_factor
                    print(' lr={:.1e}'.format(lr),end='')
                    if lr<self.lr_min:
                        print()
                        break
                    patience=self.lr_patience
                    self.optimizer=self._get_optimizer(lr)
            print()

        # Restore best
        utils.set_model_(self.model,best_model)

        # Update old
        self.model_old=deepcopy(self.model)
        self.model_old.eval()
        utils.freeze_model(self.model_old) # Freeze the weights

        # Fisher ops
        if t>0:
            fisher_old={}
            for n,_ in self.model.named_parameters():
                fisher_old[n]=self.fisher[n].clone()
        self.fisher=utils.fisher_matrix_diag(t,xtrain,ytrain,self.model,self.criterion)
        if t>0:
            # Watch out! We do not want to keep t models (or fisher diagonals) in memory, therefore we have to merge fisher diagonals
            for n,_ in self.model.named_parameters():
                self.fisher[n]=(self.fisher[n]+fisher_old[n]*t)/(t+1)       # Checked: it is better than the other option
                #self.fisher[n]=0.5*(self.fisher[n]+fisher_old[n])
        torch.save(self.model.state_dict(),'pretrain_ewc.pth')
        return
Ejemplo n.º 6
0
def Xnet(backbone_name='densenet121',
         input_shape=(None, None, 3),
         input_tensor=None,
         encoder_weights='imagenet',
         freeze_encoder=False,
         skip_connections='default',
         decoder_block_type='upsampling',
         decoder_filters=(256, 128, 64, 32, 16),
         decoder_use_batchnorm=True,
         n_upsample_blocks=5,
         upsample_rates=(2, 2, 2, 2, 2),
         classes=1,
         activation='sigmoid'):
    """
    Args:
        backbone_name: (str) look at list of available backbones.
        input_shape:  (tuple) dimensions of input data (H, W, C)
        input_tensor: keras tensor
        encoder_weights: one of `None` (random initialization),
            'imagenet' (pre-training on ImageNet),
            'dof' (pre-training on DoF)
        freeze_encoder: (bool) Set encoder layers weights as non-trainable. Useful for fine-tuning
        skip_connections: if 'default' is used take default skip connections,
            else provide a list of layer numbers or names starting from top of model
        decoder_block_type: (str) one of 'upsampling' and 'transpose' (look at blocks.py)
        decoder_filters: (int) number of convolution layer filters in decoder blocks
        decoder_use_batchnorm: (bool) if True add batch normalisation layer between `Conv2D` ad `Activation` layers
        n_upsample_blocks: (int) a number of upsampling blocks
        upsample_rates: (tuple of int) upsampling rates decoder blocks
        classes: (int) a number of classes for output
        activation: (str) one of keras activations for last model layer
    Returns:
        keras.models.Model instance
    """

    backbone = tf.keras.applications.DenseNet121(input_shape=input_shape,
                                                 input_tensor=input_tensor,
                                                 weights=encoder_weights,
                                                 include_top=False)

    if skip_connections == 'default':
        skip_connections = (311, 139, 51, 4)
    # n_upsample_blocks = len(skip_connections)

    model = build_xnet(backbone,
                       classes,
                       skip_connections,
                       decoder_filters=decoder_filters,
                       block_type=decoder_block_type,
                       activation=activation,
                       n_upsample_blocks=n_upsample_blocks,
                       upsample_rates=upsample_rates,
                       use_batchnorm=decoder_use_batchnorm)

    # lock encoder weights for fine-tuning
    if freeze_encoder:
        freeze_model(backbone)

    return model
Ejemplo n.º 7
0
Archivo: lwf.py Proyecto: dandelin/hat
    def train(self, t, xtrain, ytrain, xvalid, yvalid):
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr
        patience = self.lr_patience
        self.optimizer = self._get_optimizer(lr)

        # Loop epochs
        for e in range(self.nepochs):
            # Train
            clock0 = time.time()
            self.train_epoch(t, xtrain, ytrain)
            clock1 = time.time()
            train_loss, train_acc = self.eval(t, xtrain, ytrain)
            clock2 = time.time()
            print(
                "| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |".format(
                    e + 1,
                    1000 * self.sbatch * (clock1 - clock0) / xtrain.size(0),
                    1000 * self.sbatch * (clock2 - clock1) / xtrain.size(0),
                    train_loss,
                    100 * train_acc,
                ),
                end="",
            )
            # Valid
            valid_loss, valid_acc = self.eval(t, xvalid, yvalid)
            print(
                " Valid: loss={:.3f}, acc={:5.1f}% |".format(
                    valid_loss, 100 * valid_acc
                ),
                end="",
            )
            # Adapt lr
            if valid_loss < best_loss:
                best_loss = valid_loss
                best_model = utils.get_model(self.model)
                patience = self.lr_patience
                print(" *", end="")
            else:
                patience -= 1
                if patience <= 0:
                    lr /= self.lr_factor
                    print(" lr={:.1e}".format(lr), end="")
                    if lr < self.lr_min:
                        print()
                        break
                    patience = self.lr_patience
                    self.optimizer = self._get_optimizer(lr)
            print()

        # Restore best and save model as old
        utils.set_model_(self.model, best_model)
        self.model_old = deepcopy(self.model)
        self.model_old.eval()
        utils.freeze_model(self.model_old)

        return
Ejemplo n.º 8
0
    def train(self,t,xtrain,ytrain,xvalid,yvalid):
        best_loss=np.inf
        best_model=utils.get_model(self.model)
        lr=self.lr
        patience=self.lr_patience
        self.optimizer=self._get_optimizer(lr)

        # Loop epochs
        for e in range(self.nepochs):
            # Train
            clock0=time.time()
            self.train_epoch(t,xtrain,ytrain)
            clock1=time.time()
            train_loss,train_acc=self.eval(t,xtrain,ytrain)
            clock2=time.time()
            print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format(
                e+1,1000*self.sbatch*(clock1-clock0)/xtrain.size(0),1000*self.sbatch*(clock2-clock1)/xtrain.size(0),train_loss,100*train_acc),end='')
            # Valid
            valid_loss,valid_acc=self.eval(t,xvalid,yvalid)
            print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(valid_loss,100*valid_acc),end='')
            # Adapt lr
            if valid_loss<best_loss:
                best_loss=valid_loss
                best_model=utils.get_model(self.model)
                patience=self.lr_patience
                print(' *',end='')
            else:
                patience-=1
                if patience<=0:
                    lr/=self.lr_factor
                    print(' lr={:.1e}'.format(lr),end='')
                    if lr<self.lr_min:
                        print()
                        break
                    patience=self.lr_patience
                    self.optimizer=self._get_optimizer(lr)
            print()

        # Restore best
        utils.set_model_(self.model,best_model)

        # Model update
        if t==0:
            self.fisher=utils.fisher_matrix_diag(t,xtrain,ytrain,self.model,self.criterion)
        else:
            fisher_new=utils.fisher_matrix_diag(t,xtrain,ytrain,self.model,self.criterion)
            for (n,p),(_,p_old) in zip(self.model.named_parameters(),self.model_old.named_parameters()):
                p=fisher_new[n]*p+self.fisher[n]*p_old
                self.fisher[n]+=fisher_new[n]
                p/=(self.fisher[n]==0).float()+self.fisher[n]

        # Old model save
        self.model_old=deepcopy(self.model)
        self.model_old.eval()
        utils.freeze_model(self.model_old)

        return
    def train(self,t,xtrain,ytrain,xvalid,yvalid,data):
        best_loss=np.inf
        best_model=utils.get_model(self.model)
        lr=self.lr
        patience=self.lr_patience
        self.optimizer=self._get_optimizer(lr)

        # Loop epochs
        for e in range(self.nepochs):
            # Train
            clock0=time.time()
            self.train_epoch(t,xtrain,ytrain)
            clock1=time.time()
            train_loss,train_acc=self.eval(t,xtrain,ytrain)
            clock2=time.time()
            print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format(e+1,1000*self.sbatch*(clock1-clock0)/xtrain.size(0),1000*self.sbatch*(clock2-clock1)/xtrain.size(0),train_loss,100*train_acc),end='')
            # Valid
            valid_loss,valid_acc=self.eval(t,xvalid,yvalid)
            print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(valid_loss,100*valid_acc),end='')
            
            #save log for current task & old tasks at every epoch
            self.logger.add(epoch=(t*self.nepochs)+e, task_num=t+1, valid_loss=valid_loss, valid_acc=valid_acc)
            for task in range(t): 
                xvalid_t=data[task]['valid']['x'].cuda()
                yvalid_t=data[task]['valid']['y'].cuda()
                valid_loss_t,valid_acc_t=self.eval(task,xvalid_t,yvalid_t)
                self.logger.add(epoch=(t*self.nepochs)+e, task_num=task+1, valid_loss=valid_loss_t, valid_acc=valid_acc_t)
            
            # Adapt lr
            if valid_loss<best_loss:
                best_loss=valid_loss
                best_model=utils.get_model(self.model)
                patience=self.lr_patience
                print(' *',end='')
            else:
                patience-=1
                if patience<=0:
                    lr/=self.lr_factor
                    print(' lr={:.1e}'.format(lr),end='')
                    if lr<self.lr_min:
                        print()
                        break
                    patience=self.lr_patience
                    self.optimizer=self._get_optimizer(lr)
            print()

        # Restore best and save model as old
        utils.set_model_(self.model,best_model)
        self.model_old = Net([1, 28, 28], [(0, 10), (1, 10), (2, 10), (3, 10), (4, 10), (5, 10), (6, 10), (7, 10), (8, 10), (9, 10)]).cuda()
        self.model_old.load_state_dict(self.model.state_dict())
        self.model_old.eval()
        
        utils.freeze_model(self.model_old)
        self.logger.save()
        return
Ejemplo n.º 10
0
    def train(self,t,xtrain,ytrain,xvalid,yvalid):
        best_loss=np.inf
        best_model=utils.get_model(self.model)
        lr=self.lr
        patience=self.lr_patience
        self.optimizer=self._get_optimizer(lr)

        # Loop epochs
        for e in range(self.nepochs):
            # Train
            clock0=time.time()
            self.train_epoch(t,xtrain,ytrain)
            clock1=time.time()
            train_loss,train_acc=self.eval(t,xtrain,ytrain)
            clock2=time.time()
            print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format(
                e+1,1000*self.sbatch*(clock1-clock0)/xtrain.size(0),1000*self.sbatch*(clock2-clock1)/xtrain.size(0),train_loss,100*train_acc),end='')
            # Valid
            valid_loss,valid_acc=self.eval(t,xvalid,yvalid)
            print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(valid_loss,100*valid_acc),end='')
            # Adapt lr
            if valid_loss<best_loss:
                best_loss=valid_loss
                best_model=utils.get_model(self.model)
                patience=self.lr_patience
                print(' *',end='')
            else:
                patience-=1
                if patience<=0:
                    lr/=self.lr_factor
                    print(' lr={:.1e}'.format(lr),end='')
                    if lr<self.lr_min:
                        print()
                        break
                    patience=self.lr_patience
                    self.optimizer=self._get_optimizer(lr)
            print()

        # Restore best, save model as old
        utils.set_model_(self.model,best_model)
        if t>0:
            model_state = utils.get_model(self.model)
            model_old_state = utils.get_model(self.model_old)
            for name, param in self.model.named_parameters():
                #model_state[name]=(1-self.alpha)*model_old_state[name]+self.alpha*model_state[name]
                model_state[name]=(model_state[name]+model_old_state[name]*t)/(t+1)
            utils.set_model_(self.model,model_state)

        self.model_old=deepcopy(self.model)
        utils.freeze_model(self.model_old)
        self.model_old.eval()


        return
Ejemplo n.º 11
0
    def post_train(self, t, xtrain, ytrain, xvalid, yvalid):
        # Restore best, save model as old
        if t > 0:
            model_state = utils.get_model(self.model)
            model_old_state = utils.get_model(self.model_old)
            for name, param in self.model.named_parameters():
                #model_state[name]=(1-self.alpha)*model_old_state[name]+self.alpha*model_state[name]
                model_state[name] = (model_state[name] +
                                     model_old_state[name] * t) / (t + 1)
            utils.set_model_(self.model, model_state)

        self.model_old = deepcopy(self.model)
        utils.freeze_model(self.model_old)
        self.model_old.eval()

        return
Ejemplo n.º 12
0
    def load_model_ckpt(self, path):
        # Load
        weights = torch.load(path)
        self.image_encoder.load_state_dict(weights['img_enc'])
        self.text_encoder.load_state_dict(weights['txt_enc'])
        # Freeze parameters
        freeze_model(self.image_encoder)
        freeze_model(self.text_encoder)

        self.image_encoder.eval()
        self.text_encoder.eval()

        for i, d in enumerate(self.discriminators):
            d.load_state_dict(weights['discriminator'][i])
        self.generator.load_state_dict(weights['generator'])

        return weights['epoch']
Ejemplo n.º 13
0
def main():
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    print(f"Training on {device}")

    cnn_model = models.resnet18(pretrained = True)
    tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
    transformer_model = AutoModelForSequenceClassification.from_pretrained(
        "distilbert-base-uncased"
    )

    if FREEZE:
        freeze_model(cnn_model)
        freeze_model(transformer_model)

    image_model = ImageModel(cnn_model, num_out=H)
    text_model = TextModel(transformer_model, num_out=H)

    if PRETRAINED_MODEL:
        model = torch.load(PRETRAINED_MODEL)
    else:
        model = MegaModel(image_model, text_model, num_hidden=H)
        # model = MegaModelAggregator(image_model, text_model, num_hidden=2*H)
    model.to(device)

    train_dataloader, test_dataloader = load_dataloaders(
        path=PATH,
        image_folder=IMAGE_FOLDER,
        descriptor=DESCRIPTOR,
        batch_size=BATCH_SIZE
    )

    loss_fn = torch.nn.CrossEntropyLoss(reduction='mean')

    train_optim(
        model=model,
        tokenizer=tokenizer,
        train_dataloader=train_dataloader,
        test_dataloader=train_dataloader,
        loss_fn=loss_fn,
        epochs=EPOCHS,
        log_frequency=LOG_FREQUENCY,
        device=device,
        save_file=SAVE_INFO,
        save_dir=SAVE_MODEL,
        learning_rate=LEARNING_RATE 
    )
Ejemplo n.º 14
0
    def __init__(self, config):
        self.config = config
        self.augmentation = None

        self.CLIP, clip_preprocess = clip.load("ViT-B/32",
                                               device=self.config.device)
        self.CLIP = self.CLIP.eval()
        freeze_model(self.CLIP)
        self.model = self.config.model(config).to(self.config.device).eval()
        freeze_model(self.model)

        if config.task == "txt2img":
            self.tokens = clip.tokenize([self.config.target
                                         ]).to(self.config.device)
            self.text_features = self.CLIP.encode_text(self.tokens).detach()
        if config.task == "img2txt":
            image = clip_preprocess(Image.open(
                self.config.target)).unsqueeze(0).to(self.config.device)
            self.image_features = self.CLIP.encode_image(image)
Ejemplo n.º 15
0
    def solve(self, t, Tasks):
        task = Tasks[t]
        train_loader = task['train_loader']
        val_loader = task['test_loader']
        class_num = task['class_num']
        self.optimizer = torch.optim.SGD(self.model.parameters(),
                                         self.lr,
                                         momentum=self.momentum,
                                         weight_decay=self.weight_decay)
        criterion = self.criterion

        world_size = dist.get_world_size()
        rank = dist.get_rank()

        best_model = utils.get_model(self.model)
        best_accu = 0

        train_sampler = task['train_sampler']
        test_sampler = task['test_sampler']

        for epoch in range(self.epochs):
            train_sampler.set_epoch(epoch)
            self.adjust_learning_rate(self.optimizer, epoch)
            # train for one epoch
            self.train(t, train_loader, self.model, self.model_old,
                       self.optimizer, epoch, Tasks)
            # evaluate on validation set
            accu = self.validate(t, self.model, epoch, Tasks)

            # remember best prec@1 and save checkpoint
            if accu > best_accu:
                best_accu = accu
                #best_model = utils.get_model(self.model) ???

        # if rank == 0:
        #     print('Best accuracy: ', best_accu)
        # Restore best and save model as old
        #utils.set_model_(self.model, best_model)
        self.model_old = deepcopy(self.model)
        utils.freeze_model(self.model_old)

        return best_accu
Ejemplo n.º 16
0
    def post_train(self, t, xtrain, ytrain, xvalid, yvalid):
        # Model update
        if t == 0:
            self.fisher = utils.fisher_matrix_diag(t, xtrain, ytrain,
                                                   self.model, self._fw_pass)
        else:
            fisher_new = utils.fisher_matrix_diag(t, xtrain, ytrain,
                                                  self.model, self._fw_pass)
            for (n, p), (_, p_old) in zip(self.model.named_parameters(),
                                          self.model_old.named_parameters()):
                p = fisher_new[n] * p + self.fisher[n] * p_old
                self.fisher[n] += fisher_new[n]
                p /= (self.fisher[n] == 0).float() + self.fisher[n]

        # Old model save
        self.model_old = deepcopy(self.model)
        self.model_old.eval()
        utils.freeze_model(self.model_old)

        return
Ejemplo n.º 17
0
    def train(self, t, train_data, valid_data, device='cuda'):
        # train network for task t
        # 1 search the best model for task 1:n
        if t > 0:
            # 1.2.1 expand
            self.model.expand(t, device)
            # 1.2.2 freeze the model
            utils.freeze_model(self.model)
            # 1.2.3 search the best expand action
            self.search_network(t, train_data, valid_data, self.o_batch, self.o_epochs, device=device)
            # 1.2.4 select the best action
            best_archi = self.model.select(t)
            self.archis.append(best_archi)
            # 1.2.5 unfreeze the model that need to train
            utils.freeze_model(self.model)
            self.model.modify_param(self.model.model_to_train, True)
            # 1.2.6 look up the super model
            print(best_archi)
            utils.print_model_report(self.model)

        # 2 training
        self.train_network(t, train_data, valid_data, self.batch, self.epochs, device)
Ejemplo n.º 18
0
def main(args):
    print('Dataset: {}, Normal Label: {}, LR: {}'.format(
        args.dataset, args.label, args.lr))
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(device)
    model = utils.get_resnet_model(resnet_type=args.resnet_type)
    model = model.to(device)

    ewc_loss = None

    # Freezing Pre-trained model for EWC
    if args.ewc:
        frozen_model = deepcopy(model).to(device)
        frozen_model.eval()
        utils.freeze_model(frozen_model)
        fisher = torch.load(args.diag_path)
        ewc_loss = EWCLoss(frozen_model, fisher)

    utils.freeze_parameters(model)
    train_loader, test_loader = utils.get_loaders(dataset=args.dataset,
                                                  label_class=args.label,
                                                  batch_size=args.batch_size)
    train_model(model, train_loader, test_loader, device, args, ewc_loss)
Ejemplo n.º 19
0
    def __init__(self, args):
        super(ARDM, self).__init__()
        self.args = args

        # define the two language models
        self.model_A = GPT2SimpleLM(UnifiedGPT2SmallConfig)
        self.model_B = GPT2SimpleLM(UnifiedGPT2SmallConfig)
        # language model KL
        self.language_model = GPT2SimpleLM(UnifiedGPT2SmallConfig)
        # load weights
        self.model_A.load_state_dict(get_pretrained("unified-gpt2-small"))
        self.model_B.load_state_dict(get_pretrained("unified-gpt2-small"))
        self.language_model.load_state_dict(
            get_pretrained("unified-gpt2-small")
        )
        # freeze weights
        utils.freeze_model(self.language_model)

        self.criterion = sequence_ce_lm_loss
        self.lm_coef = 0.1
        self.lm_coef_decay = 0.9999
        self.discount_factor = 0.95
        self.lm_stream = torch.cuda.Stream()
Ejemplo n.º 20
0
    def load_model(self, ckpt, text_enc, gen):
        if ckpt is None and text_enc is None and gen is None:
            raise FileNotFoundError("Set path to load the model")

        if ckpt is not None and (text_enc is not None or gen is not None):
            raise ValueError("Specify just one way for loading:"
                             "checkpoint path or two separate files"
                             "for text encoder and generator")

        if ckpt is not None:
            print('Loading from checkpoint')
            weights = torch.load(ckpt)
            self.text_encoder.load_state_dict(weights['txt_enc'])
            self.generator.load_state_dict(weights['generator'])
        elif gen is not None and text_enc is not None:
            print('Loading from separate files')
            self.text_encoder.load_state_dict(torch.load(text_enc))
            self.generator.load_state_dict(torch.load(gen))
        elif gen is None or text_enc is None:
            raise FileNotFoundError(
                "Specify both generator and text encoder files")

        self.eval()
        freeze_model(self)
Ejemplo n.º 21
0
    def fit_n_epochs(self,
                     num_epochs,
                     lr,
                     freeze_until=None,
                     sched_type='onecycle'):
        """
        Train the model for a given number of epochs
        Args:
            num_epochs (int): number of epochs to train
            lr (float): learning rate to be used by the scheduler
            freeze_until (str, optional): last layer to freeze
            sched_type (str, optional): type of scheduler to use
        """

        if self.configwb:
            wandb.watch(self.criterion, log="all", log_freq=10)

        self.epoch = 0
        self.train_loss_recorder = []
        self.val_loss_recorder = []

        self.model = freeze_model(self.model, freeze_until)
        # Update param groups & LR
        self._reset_opt(lr)
        # Scheduler
        self._reset_scheduler(lr, num_epochs, sched_type)

        mb = master_bar(range(num_epochs))
        for _ in mb:

            self._fit_epoch(freeze_until, mb)
            # Check whether ops invalidated the buffer
            self._params.assert_buffer_is_valid()
            eval_metrics = self.evaluate()

            # master bar
            mb.main_bar.comment = f"Epoch {self.start_epoch + self.epoch}/{self.start_epoch + num_epochs}"
            mb.write(
                f"Epoch {self.start_epoch + self.epoch}/{self.start_epoch + num_epochs} - "
                f"{self._eval_metrics_str(eval_metrics)}")

            self.save(self.output_file)
Ejemplo n.º 22
0
    def train(self, t, xtrain, ytrain, xvalid, yvalid, data, input_size,
              taskcla):
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr
        self.optimizer = self._get_optimizer(lr)

        # Loop epochs
        for e in range(self.nepochs):
            # Train
            clock0 = time.time()

            num_batch = xtrain.size(0)

            self.train_epoch(t, xtrain, ytrain)

            clock1 = time.time()
            train_loss, train_acc = self.eval(t, xtrain, ytrain)
            clock2 = time.time()
            print(
                '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'
                .format(e + 1,
                        1000 * self.sbatch * (clock1 - clock0) / num_batch,
                        1000 * self.sbatch * (clock2 - clock1) / num_batch,
                        train_loss, 100 * train_acc),
                end='')
            # Valid
            valid_loss, valid_acc = self.eval(t, xvalid, yvalid)
            print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(
                valid_loss, 100 * valid_acc),
                  end='')
            print(' lr : {:.6f}'.format(self.optimizer.param_groups[0]['lr']))
            #save log for current task & old tasks at every epoch

            # Adapt lr
            if valid_loss < best_loss:
                best_loss = valid_loss
                best_model = utils.get_model(self.model)
                patience = self.lr_patience
                print(' *', end='')

            else:
                patience -= 1
                if patience <= 0:
                    lr /= self.lr_factor
                    print(' lr={:.1e}'.format(lr), end='')
                    if lr < self.lr_min:
                        print()
                    patience = self.lr_patience
                    self.optimizer = self._get_optimizer(lr)
            print()

        # Restore best
        utils.set_model_(self.model, best_model)

        # Update old
        self.model_old = deepcopy(self.model)
        utils.freeze_model(self.model_old)  # Freeze the weights
        self.omega_update(t, xtrain)

        return
Ejemplo n.º 23
0
    def train(self, t, train_data_loader, test_data_loader, val_data_loader):
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr
        patience = self.lr_patience
        self.optimizer = self._get_optimizer(lr)

        task = torch.autograd.Variable(
            torch.LongTensor([t]).cuda(), volatile=False
        ) if torch.cuda.is_available() else torch.autograd.Variable(
            torch.LongTensor([t]), volatile=False)
        # Loop epochs
        for e in range(self.nepochs):
            # Train
            clock0 = time.time()

            self.train_epochewc(t, train_data_loader)

            clock1 = time.time()

            train_loss, train_acc, train_recall, train_f1 = self.eval_withregx(
                t, test_data_loader)

            clock2 = time.time()

            print(
                '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'
                .format(
                    e + 1, 1000 * self.sbatch * (clock1 - clock0) /
                    train_data_loader.__len__(), 1000 * self.sbatch *
                    (clock2 - clock1) / train_data_loader.__len__(),
                    train_loss, 100 * train_acc),
                end='')

            # Valid
            valid_loss, valid_acc, valid_recall, valid_f1 = self.eval_withregx(
                t, val_data_loader)
            print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(
                valid_loss, 100 * valid_acc),
                  end='')

            # Adapt lr
            if valid_loss < best_loss:
                best_loss = valid_loss
                best_model = utils.get_model(self.model)
                patience = self.lr_patience
                print(' *', end='')
            else:
                patience -= 1
                if patience <= 0:
                    lr /= self.lr_factor
                    print(' lr={:.1e}'.format(lr), end='')
                    if lr < self.lr_min:
                        print()
                        break
                    patience = self.lr_patience
                    self.optimizer = self._get_optimizer(lr)
            print()

        # Restore best
        utils.set_model_(self.model, best_model)

        # Update old
        self.model_old = deepcopy(self.model)
        self.model_old.eval()
        utils.freeze_model(self.model_old)  # Freeze the weights

        # Fisher ops
        if t > 0:
            fisher_old = {}

            startDateTimeOldLast = datetime.now()
            for n, _ in self.model.named_parameters():

                fisher_old[n] = self.fisher[n].clone()

            print('DataTime OldLast', datetime.now() - startDateTimeOldLast)
            print("Analysis compute memory waste in Old Task")

        # self.fisher=utils.fisher_matrix_diag(t,xtrain,ytrain,self.model,self.criterion)
        self.fisher = utils.fisher_matrix_diag_nlp(t,
                                                   train_data_loader,
                                                   self.model,
                                                   self.criterion,
                                                   opt=self.opt)
        if t > 0:
            # Watch out! We do not want to keep t models (or fisher diagonals) in memory, therefore we have to merge fisher diagonals
            startDateTime = datetime.now()
            for n, _ in self.model.named_parameters():

                self.fisher[n] = (self.fisher[n] + fisher_old[n] * t) / (
                    t + 1)  # Checked: it is better than the other option
                #self.fisher[n]=0.5*(self.fisher[n]+fisher_old[n])
            print("Analysis compute memory waste")
            print('DataTime OldLast', datetime.now() - startDateTime)
        return
Ejemplo n.º 24
0
def main():
    mask_dir = os.path.join(args.dataset_dir, args.train_mask_dir_name)
    val_mask_dir = os.path.join(args.dataset_dir, args.val_mask_dir_name)

    train_data_dir = os.path.join(args.dataset_dir, args.train_data_dir_name)
    val_data_dir = os.path.join(args.dataset_dir, args.val_data_dir_name)

    # mask_dir = 'data/train/masks_fail'
    # val_mask_dir = 'data/val/masks'
    #
    # train_data_dir = 'data/train/images_fail'
    # val_data_dir = 'data/val/images

    if args.net_alias is not None:
        formatted_net_alias = '-{}-'.format(args.net_alias)

    best_model_file =\
        '{}/{}{}loss-{}-fold_{}-{}{:.6f}'.format(args.models_dir, args.network, formatted_net_alias, args.loss_function, args.fold, args.input_width, args.learning_rate) +\
        '-{epoch:d}-{val_loss:0.7f}-{val_dice_coef:0.7f}-{val_mean_io:0.7f}-{val_dice_coef_clipped:0.7f}.h5'
    if args.edges:
        ch = 5
    else:
        ch = 3
    model = make_model((None, None, args.stacked_channels + ch))
    freeze_model(model, args.freeze_till_layer)

    if args.weights is None:
        print('No weights passed, training from scratch')
    else:
        print('Loading weights from {}'.format(args.weights))
        model.load_weights(args.weights, by_name=True)

    optimizer = Adam(lr=args.learning_rate)

    if args.show_summary:
        model.summary()

    model.compile(loss=make_loss(args.loss_function),
                  optimizer=optimizer,
                  metrics=[
                      dice_coef_border, dice_coef, binary_crossentropy,
                      dice_coef_clipped, mean_iou
                  ])

    crop_size = None

    if args.use_crop:
        crop_size = (args.input_height, args.input_width)
        print('Using crops of shape ({}, {})'.format(args.input_height,
                                                     args.input_width))
    else:
        print('Using full size images, --use_crop=True to do crops')

    # folds_df = pd.read_csv(os.path.join(args.dataset_dir, args.folds_source))
    # train_ids = generate_filenames(folds_df[folds_df.fold != args.fold]['id'])
    # val_ids = generate_filenames(folds_df[folds_df.fold == args.fold]['id'])
    train_df = pd.read_csv('../data/train_df.csv')
    val_df = pd.read_csv('../data/val_df.csv')
    train_ids = [img + '.png' for img in train_df['id'].values]
    val_ids = [img + '.png' for img in val_df['id'].values]
    # train_ids = os.listdir(train_data_dir)
    # val_ids = os.listdir(val_data_dir)

    print('Training fold #{}, {} in train_ids, {} in val_ids'.format(
        args.fold, len(train_ids), len(val_ids)))

    train_generator = build_batch_generator(train_ids,
                                            img_dir=train_data_dir,
                                            batch_size=args.batch_size,
                                            shuffle=True,
                                            out_size=(args.out_height,
                                                      args.out_width),
                                            crop_size=crop_size,
                                            mask_dir=mask_dir,
                                            aug=True)

    val_generator = build_batch_generator(val_ids,
                                          img_dir=val_data_dir,
                                          batch_size=args.batch_size,
                                          shuffle=False,
                                          out_size=(args.out_height,
                                                    args.out_width),
                                          crop_size=crop_size,
                                          mask_dir=val_mask_dir,
                                          aug=False)

    best_model = ModelCheckpoint(best_model_file,
                                 monitor='val_loss',
                                 verbose=1,
                                 save_best_only=False,
                                 save_weights_only=True)

    callbacks = [
        best_model,
        EarlyStopping(patience=45, verbose=10),
        TensorBoard(log_dir='./logs',
                    histogram_freq=0,
                    write_graph=True,
                    write_images=True)
    ]
    if args.clr is not None:
        clr_params = args.clr.split(',')
        base_lr = float(clr_params[0])
        max_lr = float(clr_params[1])
        step = int(clr_params[2])
        mode = clr_params[3]
        clr = CyclicLR(base_lr=base_lr,
                       max_lr=max_lr,
                       step_size=step,
                       mode=mode)
        callbacks.append(clr)
    model.fit_generator(ThreadsafeIter(train_generator),
                        steps_per_epoch=len(train_ids) / args.batch_size + 1,
                        epochs=args.epochs,
                        validation_data=ThreadsafeIter(val_generator),
                        validation_steps=len(val_ids) / args.batch_size + 1,
                        callbacks=callbacks,
                        max_queue_size=50,
                        workers=4)
    def train(self, t, xtrain, ytrain, xvalid, yvalid, data, input_size, taskcla):
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr
        patience = self.lr_patience
        self.optimizer = self._get_optimizer(lr)

        # Loop epochs
        for e in range(self.nepochs):
            # Train
            clock0=time.time()
            self.train_epoch(t,xtrain,ytrain)
            clock1=time.time()
            train_loss,train_acc=self.eval(t,xtrain,ytrain)
            clock2=time.time()
            print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format(
                e+1,1000*self.sbatch*(clock1-clock0)/xtrain.size(0),1000*self.sbatch*(clock2-clock1)/xtrain.size(0),train_loss,100*train_acc),end='')
            # Valid
            valid_loss,valid_acc=self.eval(t,xvalid,yvalid)
            print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(valid_loss,100*valid_acc),end='')
            
            #save log for current task & old tasks at every epoch
            self.logger.add(epoch=(t*self.nepochs)+e, task_num=t+1, valid_loss=valid_loss, valid_acc=valid_acc)
            for task in range(t): 
                xvalid_t=data[task]['valid']['x'].cuda()
                yvalid_t=data[task]['valid']['y'].cuda()
                valid_loss_t,valid_acc_t=self.eval(task,xvalid_t,yvalid_t)
                self.logger.add(epoch=(t*self.nepochs)+e, task_num=task+1, valid_loss=valid_loss_t, valid_acc=valid_acc_t)
            
            # Adapt lr
            if valid_loss < best_loss:
                best_loss = valid_loss
                best_model = utils.get_model(self.model)
                patience = self.lr_patience
                print(' *', end='')
            else:
                patience -= 1
                if patience <= 0:
                    lr /= self.lr_factor
                    print(' lr={:.1e}'.format(lr), end='')
                    if lr < self.lr_min:
                        print()
                        break
                    patience = self.lr_patience
                    self.optimizer = self._get_optimizer(lr)
            print()

        # Restore best
        utils.set_model_(self.model, best_model)

        self.logger.save()
        
        # Update old
        self.model_old = Net(input_size, taskcla).cuda()
        self.model_old.load_state_dict(self.model.state_dict())
        self.model_old.eval()
        utils.freeze_model(self.model_old) # Freeze the weights

        # Fisher ops
        if t>0:
            fisher_old={}
            for n,_ in self.model.named_parameters():
                fisher_old[n]=self.fisher[n].clone()
        self.fisher=utils.fisher_matrix_diag(t,xtrain,ytrain,self.model,self.criterion)
        if t>0:
            # Watch out! We do not want to keep t models (or fisher diagonals) in memory, therefore we have to merge fisher diagonals
            for n,_ in self.model.named_parameters():
                self.fisher[n]=(self.fisher[n]+fisher_old[n]*t)/(t+1)       # Checked: it is better than the other option
                #self.fisher[n]=0.5*(self.fisher[n]+fisher_old[n])

        return
Ejemplo n.º 26
0
    def train(self, t, train_data_loader, test_data_loader, val_data_loader):
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr
        patience = self.lr_patience
        self.optimizer = self._get_optimizer(lr)

        task = torch.autograd.Variable(
            torch.LongTensor([t]).cuda(), volatile=False
        ) if torch.cuda.is_available() else torch.autograd.Variable(
            torch.LongTensor([t]), volatile=False)
        # Loop epochs
        print("Size of account ===> " + str(self.nepochs))

        for e in range(self.nepochs):
            # Train
            clock0 = time.time()

            self.train_epochlwf(t, train_data_loader)

            clock1 = time.time()

            train_loss, train_acc, train_recall, train_f1 = self.evallwf(
                t, test_data_loader)

            clock2 = time.time()

            print(
                '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'
                .format(
                    e + 1, 1000 * self.sbatch * (clock1 - clock0) /
                    train_data_loader.__len__(), 1000 * self.sbatch *
                    (clock2 - clock1) / train_data_loader.__len__(),
                    train_loss, 100 * train_acc),
                end='')

            # Valid
            valid_loss, valid_acc, valid_recall, valid_f1 = self.evallwf(
                t, val_data_loader)
            print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(
                valid_loss, 100 * valid_acc),
                  end='')

            # Adapt lr
            if valid_loss < best_loss:
                best_loss = valid_loss
                best_model = utils.get_model(self.model)
                patience = self.lr_patience
                print(' *', end='')
            else:
                patience -= 1
                if patience <= 0:
                    lr /= self.lr_factor
                    print(' lr={:.1e}'.format(lr), end='')
                    if lr < self.lr_min:
                        print()
                        break
                    patience = self.lr_patience
                    self.optimizer = self._get_optimizer(lr)
            print()

        # Restore best
        utils.set_model_(self.model, best_model)

        # Update old
        self.model_old = deepcopy(self.model)
        self.model_old.eval()
        utils.freeze_model(self.model_old)  # Freeze the weights

        return
Ejemplo n.º 27
0
    def train(self, t, xtrain, ytrain, xvalid, yvalid, data, input_size,
              taskcla):
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr
        lr_rho = self.lr_rho
        patience = self.lr_patience
        self.optimizer = self._get_optimizer(lr, lr_rho)

        # Loop epochs
        for e in range(self.nepochs):
            self.epoch = self.epoch + 1
            # Train
            clock0 = time.time()

            num_batch = xtrain.size(0)

            self.train_epoch(t, xtrain, ytrain)

            clock1 = time.time()
            train_loss, train_acc = self.eval(t, xtrain, ytrain)

            clock2 = time.time()
            print(
                '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'
                .format(e + 1,
                        1000 * self.sbatch * (clock1 - clock0) / num_batch,
                        1000 * self.sbatch * (clock2 - clock1) / num_batch,
                        train_loss, 100 * train_acc),
                end='')
            # Valid

            valid_loss, valid_acc = self.eval(t, xvalid, yvalid)
            print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(
                valid_loss, 100 * valid_acc),
                  end='')

            # save log for current task & old tasks at every epoch
            self.logger.add(epoch=(t * self.nepochs) + e,
                            task_num=t + 1,
                            valid_loss=valid_loss,
                            valid_acc=valid_acc)
            for task in range(t):
                xvalid_t = data[task]['valid']['x'].cuda()
                yvalid_t = data[task]['valid']['y'].cuda()

                valid_loss_t, valid_acc_t = self.eval(task, xvalid_t, yvalid_t)
                self.logger.add(epoch=(t * self.nepochs) + e,
                                task_num=task + 1,
                                valid_loss=valid_loss_t,
                                valid_acc=valid_acc_t)

            if valid_loss < best_loss:
                best_loss = valid_loss
                best_model = utils.get_model(self.model)
                patience = self.lr_patience
                print(' *', end='')
            else:
                patience -= 1
                if patience <= 0:
                    lr /= self.lr_factor
                    lr_rho /= self.lr_factor
                    print(' lr={:.1e}'.format(lr), end='')
                    if lr < self.lr_min:
                        print()
                    patience = self.lr_patience
                    self.optimizer = self._get_optimizer(lr, lr_rho)
            print()

            utils.freeze_model(self.model_old)  # Freeze the weights

        # Restore best
        utils.set_model_(self.model, best_model)
        self.model_old = deepcopy(self.model)
        self.saved = 1

        self.logger.save()

        return
Ejemplo n.º 28
0
def main():

    print('start')
    args = get_args()
    logging.basicConfig(filename=args.model + '.log',
                        level=logging.INFO,
                        format="%(asctime)s: %(message)s")
    logger = logging.getLogger()

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    logger.info(device)

    models = ['FCN', 'Unet', 'Deeplab']
    backbones = ['resnet18', 'resnet34', 'resnet50', 'mobilenet']
    assert args.model in models, "Choose valid model"

    if args.model is 'FCN':
        assert args.backbone[:
                             6] == 'resnet', 'Only resnet backbones supported for FCN'
        model = FCN.FCN16(args.backbone, num_classes=11).to(device)
    if args.model is 'Unet':
        model = Unet().to(device)
    if args.model is 'Deeplab':
        assert args.backbone[:
                             9] == 'mobilenet', 'Only mobilenet backbones supported for Deeplab'
        model = Deeplab().to(device)  # TODO

    logger.info("Model: {}, Backbone Used: {}".format(args.model,
                                                      args.backbone))
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    criterion = torch.nn.CrossEntropyLoss()
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode='min',
                                                           factor=0.1,
                                                           patience=2,
                                                           threshold=1e-3)
    logger.info("Optimizer: Adam")
    logger.info("Criterion/Loss Function: Cross Entropy Loss")
    logger.info("Scheduler: ReduceLROnPlateau")
    logger.info("Learning Rate: {}".format(args.lr))

    if args.load:
        checkpoint = torch.load(args.checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        criterion = checkpoint['criterion']
        logger.info("Model Loaded")

    if args.fine_tune:
        assert args.load, "Please specify model to be loaded to fine-tune"
        model = utils.freeze_model(args, model)

    ## Dataloaders
    train_dataset, val_dataset = get_datasets(args.dataset, train=True)
    logger.info("Dataset Used: {}".format(args.dataset))
    if train_dataset:
        train_dataloader = DataLoader(train_dataset, batch_size=args.batchsize)
        logger.info("Training Dataset Length: {}".format(
            len(train_dataloader)))
    if val_dataset:
        val_dataloader = DataLoader(val_dataset,
                                    batch_size=args.batchsize,
                                    shuffle=True)
        logger.info("Validation Dataset Length: {}".format(
            len(val_dataloader)))

    #if not os.path.exists(args.saveDir):
    writer = SummaryWriter(os.path.join(args.saveDir, args.model + '_log'))
    train_model(model=model,
                optimizer=optimizer,
                criterion=criterion,
                scheduler=scheduler,
                training_dataloader=train_dataloader,
                validation_dataloader=val_dataloader,
                device=device,
                epochs=args.epochs,
                batch_size=args.batchsize,
                lr=args.lr,
                CHECKPOINT_PATH=args.saveDir,
                model_name=args.model,
                logger=logger,
                writer=writer,
                save_epoch=5,
                show_figure=True,
                save_model=True)
Ejemplo n.º 29
0
    def train(self, t, xtrain, ytrain, xvalid, yvalid):
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr
        patience = self.lr_patience
        self.optimizer = self._get_optimizer(lr)

        # Update old
        self.model_old = deepcopy(self.model)
        self.model_old.eval()
        utils.freeze_model(self.model_old)  # Freeze the weights

        # reset importance omega
        for n, p in self.model.named_parameters():
            if p.requires_grad:
                self.omega[n] = p.data.clone().zero_()
                self.DELTA[n] = p.data.clone().zero_()
                self.p_old[n] = p.data.clone()

        # Loop epochs
        for e in range(self.nepochs):
            # Train
            clock0 = time.time()
            self.train_epoch(t, xtrain, ytrain, e)
            clock1 = time.time()
            train_loss, train_acc = self.eval(t, xtrain, ytrain)
            clock2 = time.time()
            print(
                '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'
                .format(
                    e + 1,
                    1000 * self.sbatch * (clock1 - clock0) / xtrain.size(0),
                    1000 * self.sbatch * (clock2 - clock1) / xtrain.size(0),
                    train_loss, 100 * train_acc),
                end='')
            # Valid
            valid_loss, valid_acc = self.eval(t, xvalid, yvalid)
            print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(
                valid_loss, 100 * valid_acc),
                  end='')

            self.logger.log_scalar(str(t) + "_train acc", train_acc, e)
            self.logger.log_scalar(str(t) + "_valid acc", valid_acc, e)
            self.logger.log_scalar(str(t) + "_train loss", train_loss, e)
            self.logger.log_scalar(str(t) + "_valid loss", valid_loss, e)

            # Adapt lr
            if valid_loss < best_loss:
                best_loss = valid_loss
                best_model = utils.get_model(self.model)
                patience = self.lr_patience
                print(' *', end='')
            else:
                patience -= 1
                if patience <= 0:
                    lr /= self.lr_factor
                    print(' lr={:.1e}'.format(lr), end='')
                    if lr < self.lr_min:
                        print()
                        break
                    patience = self.lr_patience
                    self.optimizer = self._get_optimizer(lr)
            print()

        # Restore best
        utils.set_model_(self.model, best_model)

        # Update task regularization OMEGA
        for (n, param), (_,
                         param_old) in zip(self.model.named_parameters(),
                                           self.model_old.named_parameters()):
            if p.requires_grad:
                #change = param.detach().clone() - param_old
                #o = torch.nn.functional.relu(self.omega[n])/(change.pow(2) + self.xi)
                o = torch.nn.functional.relu(
                    self.omega[n]) / (self.DELTA[n].pow(2) + self.xi)
                self.OMEGA[n] = self.OMEGA[n] * self.decay + o * (
                    1 - self.decay)  #self.OMEGA[n] + o #

        return
Ejemplo n.º 30
0
    def train(self, t, xtrain, ytrain, xvalid, yvalid, data, input_size,
              taskcla):
        best_loss = np.inf
        best_model = utils.get_model(self.model)
        lr = self.lr
        patience = self.lr_patience
        self.optimizer = self._get_optimizer(lr)

        # Loop epochs
        for e in range(self.nepochs):

            # Train
            clock0 = time.time()

            # self.model.variance_init()  # trainer net의 variance크게 init

            # 1. trainer_net training 하는데 regularization을 위해서 saver_net의 정보 이용

            self.train_epoch(xtrain, ytrain)

            clock1 = time.time()
            train_loss, train_acc = self.eval(xtrain, ytrain, self.sample)
            clock2 = time.time()
            print(
                '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'
                .format(
                    e + 1,
                    1000 * self.sbatch * (clock1 - clock0) / xtrain.size(0),
                    1000 * self.sbatch * (clock2 - clock1) / xtrain.size(0),
                    train_loss, 100 * train_acc),
                end='')
            # Valid
            valid_loss, valid_acc = self.eval(xvalid, yvalid, self.sample)
            print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(
                valid_loss, 100 * valid_acc),
                  end='')

            # save log for current task & old tasks at every epoch
            self.logger.add(epoch=(t * self.nepochs) + e,
                            task_num=t + 1,
                            valid_loss=valid_loss,
                            valid_acc=valid_acc)
            for task in range(t):
                xvalid_t = data[task]['valid']['x'].cuda()
                yvalid_t = data[task]['valid']['y'].cuda()
                valid_loss_t, valid_acc_t = self.eval(xvalid_t, yvalid_t,
                                                      self.sample)
                self.logger.add(epoch=(t * self.nepochs) + e,
                                task_num=task + 1,
                                valid_loss=valid_loss_t,
                                valid_acc=valid_acc_t)

            # Adapt lr
            if valid_loss < best_loss:
                best_loss = valid_loss
                best_model = utils.get_model(self.model)
                patience = self.lr_patience
                print(' *', end='')
            else:
                patience -= 1
                if patience <= 0:
                    lr /= self.lr_factor
                    print(' lr={:.1e}'.format(lr), end='')
                    if lr < self.lr_min:
                        print()
                        break
                    patience = self.lr_patience
                    self.optimizer = self._get_optimizer(lr)
            print()

            #self.model_old = deepcopy(self.model)
            utils.freeze_model(self.model_old)  # Freeze the weights

            #self.print_log(e)

            # for n, m in self.model.named_children():
            #     print(n, m.weight.sigma.min())

        # Restore best
        utils.set_model_(self.model, best_model)

        self.logger.save()

        return