Esempio n. 1
0
class CustomScheduler(_LRScheduler):
    timestep: int = 0

    def __init__(self, optimizer, gamma, warmup=None):
        self.optimizer = optimizer
        self.after_warmup = ExponentialLR(optimizer, gamma=gamma)
        self.initial_lrs = [
            p_group['lr'] for p_group in self.optimizer.param_groups
        ]
        self.warmup = 0 if warmup is None else warmup
        super(CustomScheduler, self).__init__(optimizer)

    def get_lr(self):
        return [self.timestep * group_init_lr / self.warmup for group_init_lr in self.initial_lrs] \
            if self.timestep < self.warmup else self.after_warmup.get_lr()

    def step(self, epoch=None):
        if self.timestep < self.warmup:
            self.timestep += 1
            super(CustomScheduler, self).step(epoch)
        else:
            self.after_warmup.step(epoch)
Esempio n. 2
0
class ParamOptim:
    def __init__(
        self,
        params: List[torch.Tensor],
        lr: LRParam,
        eps: float = .0003,
        clip_grad: float = None,
        optimizer: Optimizer = Adam,
        retain_graph=False,
    ):
        self.params = params
        self.clip_grad = clip_grad
        self.optim = optimizer(self.params, lr=lr.start, eps=eps)
        self.retain_graph = retain_graph
        self.lr_scheduler = ExponentialLR(self.optim, lr.decay_rate)
        self.lr = lr
        self.lr_need_update = True

    def step_lr(self, n_iter):
        if self.lr_need_update or\
            (n_iter % self.lr.update_every == 0 and
                n_iter // self.lr.update_every <= self.lr.last_update):
            self.lr_need_update = False
            ep = min(n_iter // self.lr.update_every, self.lr.last_update)
            self.lr_scheduler.step(ep)
            return self.lr_scheduler.get_lr()[0]
        else:
            return None

    def step(self, loss):
        self.optim.zero_grad()
        loss.backward(retain_graph=self.retain_graph)
        if self.clip_grad is not None:
            torch.nn.utils.clip_grad_norm_(self.params, self.clip_grad)
        self.optim.step()
        return loss
Esempio n. 3
0
    params_torch['SDF.bsdf.reflectance.data'].data.clamp_(0.0, 1.0)
    try:
        sdf = params_torch['SDF.data'].data.cpu().numpy().reshape([sdf_res] *
                                                                  3)
        sdf = skfmm.distance(sdf, sdf_scale / sdf_res)

        vtk.record_epoch(epoch, sdf, grad)

        params_torch['SDF.data'].data.copy_(torch.from_numpy(sdf.flatten()))

        if epoch % 10 == 9:
            write_binary_grid3d(f'{out_path}sdf_e{epoch}.vol', sdf)
            write_bitmap(f'{out_path}color_e{epoch:03d}.exr',
                         params['SDF.bsdf.reflectance.data'],
                         [color_texture_res] * 2)

    except RuntimeError as e:
        print(
            f'skfmm failed: mean={sdf.mean()}, min={sdf.min()}, max={sdf.max()}'
        )
        print(e)

    with open(f"{out_path}log.txt", mode='a+') as f:
        # f.write(','.join(list(map(str, [epoch, lr_scheduler.get_lr()[0], loss_img, *(pyramid_loss.cpu().numpy()), '\n']))))
        f.write(','.join(
            list(map(str,
                     [epoch, lr_scheduler.get_lr()[0], loss_img, "\n"]))))

    print(f'epoch {epoch}: loss_img={loss_img}')
def train(model_name, optim='adam'):
    train_dataset = PretrainDataset(output_shape=config['image_resolution'])
    train_loader = DataLoader(train_dataset,
                              batch_size=config['batch_size'],
                              shuffle=True,
                              num_workers=8,
                              pin_memory=True,
                              drop_last=True)

    val_dataset = IDRND_dataset_CV(fold=0,
                                   mode=config['mode'].replace('train', 'val'),
                                   double_loss_mode=True,
                                   output_shape=config['image_resolution'])
    val_loader = DataLoader(val_dataset,
                            batch_size=config['batch_size'],
                            shuffle=True,
                            num_workers=4,
                            drop_last=False)

    if model_name == 'EF':
        model = DoubleLossModelTwoHead(base_model=EfficientNet.from_pretrained(
            'efficientnet-b3')).to(device)
        model.load_state_dict(
            torch.load(
                f"../models_weights/pretrained/{model_name}_{4}_2.0090592697255896_1.0.pth"
            ))
    elif model_name == 'EFGAP':
        model = DoubleLossModelTwoHead(
            base_model=EfficientNetGAP.from_pretrained('efficientnet-b3')).to(
                device)
        model.load_state_dict(
            torch.load(
                f"../models_weights/pretrained/{model_name}_{4}_2.3281182915644134_1.0.pth"
            ))

    criterion = FocalLoss(add_weight=False).to(device)
    criterion4class = CrossEntropyLoss().to(device)

    if optim == 'adam':
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=config['learning_rate'],
                                     weight_decay=config['weight_decay'])
    elif optim == 'sgd':
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=config['learning_rate'],
                                    weight_decay=config['weight_decay'],
                                    nesterov=False)
    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    momentum=0.9,
                                    lr=config['learning_rate'],
                                    weight_decay=config['weight_decay'],
                                    nesterov=True)

    steps_per_epoch = train_loader.__len__() - 15
    swa = SWA(optimizer,
              swa_start=config['swa_start'] * steps_per_epoch,
              swa_freq=int(config['swa_freq'] * steps_per_epoch),
              swa_lr=config['learning_rate'] / 10)
    scheduler = ExponentialLR(swa, gamma=0.9)
    # scheduler = StepLR(swa, step_size=5*steps_per_epoch, gamma=0.5)

    global_step = 0
    for epoch in trange(10):
        if epoch < 5:
            scheduler.step()
            continue
        model.train()
        train_bar = tqdm(train_loader)
        train_bar.set_description_str(desc=f"N epochs - {epoch}")

        for step, batch in enumerate(train_bar):
            global_step += 1
            image = batch['image'].to(device)
            label4class = batch['label0'].to(device)
            label = batch['label1'].to(device)

            output4class, output = model(image)
            loss4class = criterion4class(output4class, label4class)
            loss = criterion(output.squeeze(), label)
            swa.zero_grad()
            total_loss = loss4class * 0.5 + loss * 0.5
            total_loss.backward()
            swa.step()
            train_writer.add_scalar(tag="learning_rate",
                                    scalar_value=scheduler.get_lr()[0],
                                    global_step=global_step)
            train_writer.add_scalar(tag="BinaryLoss",
                                    scalar_value=loss.item(),
                                    global_step=global_step)
            train_writer.add_scalar(tag="SoftMaxLoss",
                                    scalar_value=loss4class.item(),
                                    global_step=global_step)
            train_bar.set_postfix_str(f"Loss = {loss.item()}")
            try:
                train_writer.add_scalar(tag="idrnd_score",
                                        scalar_value=idrnd_score_pytorch(
                                            label, output),
                                        global_step=global_step)
                train_writer.add_scalar(tag="far_score",
                                        scalar_value=far_score(label, output),
                                        global_step=global_step)
                train_writer.add_scalar(tag="frr_score",
                                        scalar_value=frr_score(label, output),
                                        global_step=global_step)
                train_writer.add_scalar(tag="accuracy",
                                        scalar_value=bce_accuracy(
                                            label, output),
                                        global_step=global_step)
            except Exception:
                pass

        if (epoch > config['swa_start']
                and epoch % 2 == 0) or (epoch == config['number_epochs'] - 1):
            swa.swap_swa_sgd()
            swa.bn_update(train_loader, model, device)
            swa.swap_swa_sgd()

        scheduler.step()
        evaluate(model, val_loader, epoch, model_name)
Esempio n. 5
0
    def train(self):
        """Method to train the model."""
        self.writer.add_text('Comments', self.comments)
        train_loader, val_loader, test_loader = self.dataloaders
        transformations = get_transformations(self.transform_names,
                                              sizes=(self.w_size, self.h_size))
        self._set_seeds()
        self.net.apply(self._init_weights)
        running_losses = list()
        criterion = define_loss(self.signal_type, self.custom_loss,
                                self.device)
        optimizer = optim.Adam(self.net.parameters(), lr=self.lr)
        scheduler = ExponentialLR(optimizer, gamma=0.9)
        iteration = 0
        best_val_prec = 0
        self.net.to(self.device)

        for epoch in range(self.nb_epochs):
            if epoch % self.lr_step == 0 and epoch != 0:
                scheduler.step()
            for _, sequence_data in enumerate(train_loader):
                seq_name, seq = sequence_data
                path_to_frames = os.path.join(self.paths['carrada'],
                                              seq_name[0])
                frame_dataloader = DataLoader(MultiFrameCarradaDataset(
                    seq, self.annot_type, self.signal_type, path_to_frames,
                    self.process_signal, self.n_input_ch, transformations),
                                              shuffle=False,
                                              batch_size=self.batch_size,
                                              num_workers=4)
                for _, frame in enumerate(frame_dataloader):
                    data = frame['matrix'].to(self.device).float()
                    mask = frame['mask'].to(self.device).float()
                    data = normalize(data,
                                     self.signal_type,
                                     self.paths['carrada'],
                                     norm_type=self.norm_type)
                    optimizer.zero_grad()
                    outputs = self.net(data).to(self.device)
                    mask = F.interpolate(mask, (self.w_size, self.h_size))
                    loss = criterion(outputs, torch.argmax(mask, axis=1))
                    loss.backward()
                    optimizer.step()
                    running_losses.append(loss.data.cpu().numpy()[()])
                    if iteration % self.loss_step == 0:
                        train_loss = np.mean(running_losses)
                        print('[Epoch {}/{}, iter {}]: '
                              'train loss {}'.format(epoch + 1, self.nb_epochs,
                                                     iteration, train_loss))
                        self.visualizer.update_train_loss(
                            train_loss, iteration)
                        running_losses = list()
                        self.visualizer.update_learning_rate(
                            scheduler.get_lr()[0], iteration)
                    if iteration % self.val_step == 0 and iteration > 0:
                        if iteration % self.viz_step == 0 and iteration > 0:
                            val_metrics = self.tester.predict(
                                self.net, val_loader, iteration)
                        else:
                            val_metrics = self.tester.predict(
                                self.net, val_loader)
                        self.visualizer.update_val_metrics(
                            val_metrics, iteration)
                        print('[Epoch {}/{}] Validation loss: {}'.format(
                            epoch + 1, self.nb_epochs, val_metrics['loss']))
                        print('[Epoch {}/{}] Validation Pixel Prec: {}'.format(
                            epoch + 1, self.nb_epochs, val_metrics['prec']))
                        print('[Epoch {}/{}] Validation Pixel Prec by class: '
                              '{}'.format(epoch + 1, self.nb_epochs,
                                          val_metrics['prec_by_class']))

                        if val_metrics[
                                'prec'] > best_val_prec and iteration > 0:
                            best_val_prec = val_metrics['prec']
                            test_metrics = self.tester.predict(
                                self.net, test_loader)
                            print('[Epoch {}/{}] Test loss: {}'.format(
                                epoch + 1, self.nb_epochs,
                                test_metrics['loss']))
                            print('[Epoch {}/{}] Test Pixel Prec: {}'.format(
                                epoch + 1, self.nb_epochs,
                                test_metrics['prec']))
                            print('[Epoch {}/{}] Test Pixel Prec by class: '
                                  '{}'.format(epoch + 1, self.nb_epochs,
                                              test_metrics['prec_by_class']))

                            self.results['train_loss'] = train_loss.item()
                            self.results['val_metrics'] = val_metrics
                            self.results['test_metrics'] = test_metrics
                            self._save_results()
                        self.net.train()  # Train mode after evaluation process
                    iteration += 1
        self.writer.close()
Esempio n. 6
0
    def train(self, x_train, y_train):
        idx = np.random.permutation(len(x_train))
        print('samples:', len(x_train))
        x_train = np.array(x_train)[idx]
        y_train = np.array(y_train)[idx]
        x_train = torch.tensor(x_train, dtype=torch.float32)
        y_train = torch.tensor(y_train)
        x_val = x_train[32000:]
        x_tr = x_train[:32000]
        y_val = y_train[32000:]
        y_tr = y_train[:32000]
        #x_val = x_train[5000:6000]
        #y_val = y_train[5000:6000]
        #x_tr  = x_train[:5000]
        #y_tr  = y_train[:5000]
        y_tr_ = y_tr.clone()

        x_tr, x_val = self.PCA(x_tr, x_val)
        print('PCA done', x_tr.shape)

        optimizer = Adam(self.model.parameters(), lr=0.0001, weight_decay=5e-3)
        scheduler = ExponentialLR(optimizer, 1)
        loss_fn = nn.MSELoss()

        train_loader = torch.utils.data.DataLoader(Loader(x_tr, y_tr),
                                                   batch_size=256,
                                                   shuffle=True)

        best_acc = 0
        dist = self.get_dist(x_tr.cuda(), x_val.cuda()).cpu()
        for k in [1, 3, 5, 8, 10, 15, 20, 25, 30]:
            y_pred = self.predict(dist, y_tr_, k)
            acc = (y_pred == y_val).sum().float().numpy() / y_val.shape[0]
            print("K=", k, "  acc=", acc)

        for epoch in range(200):
            self.model.train()
            scheduler.step()
            loss_ = acc_ = cnt = yc = 0
            for i, (input, target) in enumerate(train_loader):
                optimizer.zero_grad()

                B = target.shape[0]
                gt_p = target.clone().cuda().view(1, B).float()
                gt = target.clone().cuda()
                output = self.model(input.cuda())
                dists = output.view(B, B)
                dm = dists.sum(0).view(1, -1)
                #dists = dists / dm

                sorted, ind = dists.sort(dim=0, descending=False)
                sorted = sorted[:20]
                ind = ind[:20]

                y_p = gt[ind]

                gt_p = gt_p.expand(20, -1).contiguous().float()

                y_p = y_p.float() - gt_p
                y_p[y_p != 0] = 1
                yy = torch.sum(y_p)

                loss0 = torch.div(1, sorted[y_p != 0])

                loss1 = sorted[y_p == 0]

                loss = loss0.mean() + loss1.mean()
                loss.backward()

                optimizer.step()
                lr = scheduler.get_lr()

                yc += yy.cpu().data.numpy()
                loss_ += loss.cpu().data.numpy()
                cnt += 1

            print('Epoch %2d: loss = %6.5f,  %5.3f, lr=%f' %
                  (epoch, loss_ / cnt, yc / cnt, lr[0]))
            loss_ = yc = 0

            if (epoch % 20) == 19:
                dist = self.get_dist(x_tr.cuda(), x_val.cuda()).cpu()
                for k in [1, 3, 5, 8, 10, 15, 20, 25, 30]:
                    y_pred = self.predict(dist, y_tr_, k)
                    acc = (y_pred
                           == y_val).sum().float().numpy() / y_val.shape[0]
                    print("K=", k, "  acc=", acc)
                    if k == 25:
                        acc_25 = acc
                torch.save(self.model.state_dict(), 'knn_dml_checkpoint.pth')
                if best_acc <= acc_25:
                    best_acc = acc_25
                    torch.save(self.model.state_dict(),
                               'knn_dml_best_model.pth')
def train(**kwargs):
    opt._parse(kwargs)

    carrada = download('Carrada')
    train_set = Carrada().get('Train')
    val_set = Carrada().get('Validation')
    test_set = Carrada().get('Test')

    train_seqs = SequenceCarradaDataset(train_set)
    val_seqs = SequenceCarradaDataset(val_set)
    test_seqs = SequenceCarradaDataset(test_set)

    train_seqs_loader = data_.DataLoader(train_seqs, \
                                         batch_size=1, \
                                         shuffle=True, \
                                         # pin_memory=True,
                                         num_workers=opt.num_workers)

    val_seqs_loader = data_.DataLoader(val_seqs,
                                       batch_size=1,
                                       shuffle=False,
                                       # pin_memory=True,
                                       num_workers=opt.num_workers)

    test_seqs_loader = data_.DataLoader(test_seqs,
                                        batch_size=1,
                                        shuffle=False,
                                        # pin_memory=True,
                                        num_workers=opt.num_workers)

    # faster_rcnn = FasterRCNNVGG16(n_fg_class=3)
    # faster_rcnn = FasterRCNNRESNET101(n_fg_class=3)
    faster_rcnn = FasterRCNNRESNET18(n_fg_class=3)
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    scheduler = ExponentialLR(trainer.faster_rcnn.optimizer, gamma=0.9)
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)

    writer_path = os.path.join(opt.logs_path, opt.model_name)
    os.makedirs(writer_path, exist_ok=True)
    writer = SummaryWriter(writer_path)
    iteration = 0
    best_map = 0
    lr_ = opt.lr

    for epoch in range(opt.epoch):
        print('Processing epoch: {}/{}'.format(epoch, opt.epoch))
        trainer.reset_meters()
        for n_seq, sequence_data in tqdm(enumerate(train_seqs_loader)):
            seq_name, seq = sequence_data
            path_to_frames = os.path.join(carrada, seq_name[0])
            train_frame_set = CarradaDataset(opt, seq, 'box', opt.signal_type,
                                             path_to_frames)
            train_frame_loader = data_.DataLoader(train_frame_set,
                                                  batch_size=1,
                                                  shuffle=False,
                                                  num_workers=opt.num_workers)

            for ii, (img, bbox_, label_, scale) in tqdm(enumerate(train_frame_loader)):
                iteration += 1
                scale = at.scalar(scale)
                img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
                img = normalize(img)

                if opt.debug_step and (iteration+1) % opt.debug_step == 0:
                    trainer.train_step(img, bbox, label, scale, stop=True)
                else:
                    trainer.train_step(img, bbox, label, scale)

                if (iteration + 1) % opt.plot_every == 0:
                    if os.path.exists(opt.debug_file):
                        ipdb.set_trace()

                    train_results = trainer.get_meter_data()
                    writer.add_scalar('Losses/rpn_loc', train_results['rpn_loc_loss'],
                                      iteration)
                    writer.add_scalar('Losses/rpn_cls', train_results['rpn_cls_loss'],
                                      iteration)
                    writer.add_scalar('Losses/roi_loc', train_results['roi_loc_loss'],
                                      iteration)
                    writer.add_scalar('Losses/roi_cls', train_results['roi_cls_loss'],
                                      iteration)
                    writer.add_scalar('Losses/total', train_results['total_loss'],
                                      iteration)

                if (iteration + 1) % opt.img_every == 0:
                    ori_img_ = at.tonumpy(img[0])
                    gt_img = visdom_bbox(ori_img_,
                                         at.tonumpy(bbox_[0]),
                                         at.tonumpy(label_[0]))
                    gt_img_grid = make_grid(torch.from_numpy(gt_img))
                    writer.add_image('Ground_truth_img', gt_img_grid, iteration)

                    # plot predicti bboxes
                    _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], opt.signal_type,
                                                                            visualize=True)
                    # FLAG: vis
                    pred_img = visdom_bbox(ori_img_,
                                           at.tonumpy(_bboxes[0]),
                                           at.tonumpy(_labels[0]).reshape(-1),
                                           at.tonumpy(_scores[0]))
                    pred_img_grid = make_grid(torch.from_numpy(pred_img))
                    writer.add_image('Predicted_img', pred_img_grid, iteration)

                    if opt.train_eval and (iteration + 1) % opt.train_eval == 0:
                        train_eval_result, train_best_iou = eval(train_seqs_loader, faster_rcnn,
                                                                 opt.signal_type)
                        writer.add_scalar('Train/mAP', train_eval_result['map'],
                                          iteration)
                        writer.add_scalar('Train/Best_IoU', train_best_iou,
                                          iteration)

        eval_result, best_val_iou = eval(val_seqs_loader, faster_rcnn, opt.signal_type,
                                         test_num=opt.test_num)
        writer.add_scalar('Validation/mAP', eval_result['map'],
                          iteration)
        writer.add_scalar('Validation/Best_IoU', best_val_iou,
                          iteration)
        lr_ = scheduler.get_lr()[0]
        writer.add_scalar('learning_rate', lr_, iteration)

        log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_),
                                                  str(eval_result['map']),
                                                  str(trainer.get_meter_data()))
        print(log_info)
        if eval_result['map'] > best_map:
            test_result, test_best_iou = eval(test_seqs_loader, faster_rcnn, opt.signal_type,
                                              test_num=opt.test_num)
            writer.add_scalar('Test/mAP', test_result['map'],
                              iteration)
            writer.add_scalar('Test/Best_IoU', test_best_iou,
                              iteration)
            best_map = eval_result['map']
            best_test_map = test_result['map']
            best_path = trainer.save(best_val_map=best_map, best_test_map=best_test_map)
            # best_path = trainer.save(best_map=best_map)

        if (epoch + 1) % opt.lr_step == 0:
            scheduler.step()
Esempio n. 8
0
        progressbar.DynamicMessage('loss_encoder'), ' ',
        progressbar.DynamicMessage('loss_decoder'), ' ',
        progressbar.DynamicMessage('loss_discriminator'), ' ',
        progressbar.DynamicMessage("epoch")
    ]

    for i in range(num_epochs):
        progress = progressbar.ProgressBar(min_value=0,
                                           max_value=batch_number,
                                           initial_value=0,
                                           widgets=widgets).start()
        loss_nle_mean = RollingMeasure()
        loss_encoder_mean = RollingMeasure()
        loss_decoder_mean = RollingMeasure()
        loss_discriminator_mean = RollingMeasure()
        print("LR:{}".format(lr_encoder.get_lr()))

        for j, (data_batch, labels_batch) in enumerate(dataloader):
            net.train()

            # trasformo in variabili

            data_target = Variable(torch.squeeze(data_batch),
                                   requires_grad=False).float().cuda()
            data_in = Variable(data_batch, requires_grad=True).float().cuda()

            # azzero gradiente
            net.zero_grad()
            # calcolo uscita
            out, out_labels, out_layer, mus, variances = net(data_in)
            out_layer_original = out_layer[:len(out_layer) // 2]
        print(f"rendered image {i}: loss={ob_val.cpu().item()}"
              )  # , pyramid_losses={list(pyr_ob.data.cpu().numpy())}")

        ob_val /= len(cams_origins)
        loss_img += ob_val.item()
        # loss_pyr += pyr_ob.data.cpu().numpy()
        ob_val.backward()

    if smoothing:
        params_torch['SDF.data'].grad = smoothing(
            params_torch['SDF.data'].grad)

    opt.step()

    if lr_scheduler:
        print("lr = ", lr_scheduler.get_lr()[0])
        lr_scheduler.step()

    try:
        sdf = params_torch['SDF.data'].data.cpu().numpy().reshape([sdf_res] *
                                                                  3)
        sdf = skfmm.distance(sdf, sdf_scale / sdf_res)

        params_torch['SDF.data'].data.copy_(torch.from_numpy(sdf.flatten()))

        if epoch % 10 == 0:
            write_binary_grid3d(f'{out_path}sdf_e{epoch}.vol', sdf)

    except RuntimeError as e:
        print(
            f'skfmm failed: mean={sdf.mean()}, min={sdf.min()}, max={sdf.max()}'
Esempio n. 10
0
		train_bar.set_description_str(desc=f"N epochs - {epoch}")

		for step, batch in enumerate(train_bar):
			global_step += 1
			image = batch['image'].to(device)
			label4class = batch['label0'].to(device)
			label = batch['label1'].to(device)

			output4class, output = model(image)
			loss4class = criterion4class(output4class, label4class)
			loss = criterion(output.squeeze(), label)
			swa.zero_grad()
			total_loss = loss4class*0.5 + loss*0.5
			total_loss.backward()
			swa.step()
			train_writer.add_scalar(tag="learning_rate", scalar_value=scheduler.get_lr()[0], global_step=global_step)
			train_writer.add_scalar(tag="BinaryLoss", scalar_value=loss.item(), global_step=global_step)
			train_writer.add_scalar(tag="SoftMaxLoss", scalar_value=loss4class.item(), global_step=global_step)
			train_bar.set_postfix_str(f"Loss = {loss.item()}")
			try:
				train_writer.add_scalar(tag="idrnd_score", scalar_value=idrnd_score_pytorch(label, output), global_step=global_step)
				train_writer.add_scalar(tag="far_score", scalar_value=far_score(label, output), global_step=global_step)
				train_writer.add_scalar(tag="frr_score", scalar_value=frr_score(label, output), global_step=global_step)
				train_writer.add_scalar(tag="accuracy", scalar_value=bce_accuracy(label, output), global_step=global_step)
			except Exception:
				pass

		if (epoch > config['swa_start'] and epoch % 2 == 0) or (epoch == config['number_epochs']-1):
			swa.swap_swa_sgd()
			swa.bn_update(train_loader, model, device)
			swa.swap_swa_sgd()
Esempio n. 11
0
    def train(self, add_temp=False):
        """
        Method to train a network

        PARAMETERS
        ----------
        add_temp: boolean
            Add a temporal dimension during training?
            Considering the input as a sequence.
            Default: False
        """
        self.writer.add_text('Comments', self.comments)
        train_loader, val_loader, test_loader = self.dataloaders
        transformations = get_transformations(self.transform_names,
                                              sizes=(self.w_size, self.h_size))
        self._set_seeds()
        self.net.apply(self._init_weights)
        rd_criterion = define_loss('range_doppler', self.custom_loss,
                                   self.device)
        ra_criterion = define_loss('range_angle', self.custom_loss,
                                   self.device)
        nb_losses = len(rd_criterion)
        running_losses = list()
        rd_running_losses = list()
        rd_running_global_losses = [list(), list()]
        ra_running_losses = list()
        ra_running_global_losses = [list(), list()]
        coherence_running_losses = list()
        optimizer = optim.Adam(self.net.parameters(), lr=self.lr)
        scheduler = ExponentialLR(optimizer, gamma=0.9)
        iteration = 0
        best_val_prec = 0
        self.net.to(self.device)

        for epoch in range(self.nb_epochs):
            if epoch % self.lr_step == 0 and epoch != 0:
                scheduler.step()
            for _, sequence_data in enumerate(train_loader):
                seq_name, seq = sequence_data
                path_to_frames = os.path.join(self.paths['carrada'],
                                              seq_name[0])
                frame_dataloader = DataLoader(CarradaDataset(
                    seq, self.annot_type, path_to_frames, self.process_signal,
                    self.n_frames, transformations, add_temp),
                                              shuffle=self.is_shuffled,
                                              batch_size=self.batch_size,
                                              num_workers=4)
                for _, frame in enumerate(frame_dataloader):
                    rd_data = frame['rd_matrix'].to(self.device).float()
                    ra_data = frame['ra_matrix'].to(self.device).float()
                    ad_data = frame['ad_matrix'].to(self.device).float()
                    rd_mask = frame['rd_mask'].to(self.device).float()
                    ra_mask = frame['ra_mask'].to(self.device).float()
                    rd_data = normalize(rd_data,
                                        'range_doppler',
                                        norm_type=self.norm_type)
                    ra_data = normalize(ra_data,
                                        'range_angle',
                                        norm_type=self.norm_type)
                    if self.model_name == 'tmvanet':
                        ad_data = normalize(ad_data,
                                            'angle_doppler',
                                            norm_type=self.norm_type)
                    optimizer.zero_grad()

                    if self.model_name == 'tmvanet':
                        rd_outputs, ra_outputs = self.net(
                            rd_data, ra_data, ad_data)
                    else:
                        rd_outputs, ra_outputs = self.net(rd_data, ra_data)
                    rd_outputs = rd_outputs.to(self.device)
                    ra_outputs = ra_outputs.to(self.device)
                    if nb_losses < 3:
                        # Case without the CoL
                        rd_losses = [
                            c(rd_outputs, torch.argmax(rd_mask, axis=1))
                            for c in rd_criterion
                        ]
                        rd_loss = torch.mean(torch.stack(rd_losses))
                        ra_losses = [
                            c(ra_outputs, torch.argmax(ra_mask, axis=1))
                            for c in ra_criterion
                        ]
                        ra_loss = torch.mean(torch.stack(ra_losses))
                        loss = torch.mean(rd_loss + ra_loss)
                    else:
                        # Case with the CoL
                        # Select the wCE and wSDice
                        rd_losses = [
                            c(rd_outputs, torch.argmax(rd_mask, axis=1))
                            for c in rd_criterion[:2]
                        ]
                        rd_loss = torch.mean(torch.stack(rd_losses))
                        ra_losses = [
                            c(ra_outputs, torch.argmax(ra_mask, axis=1))
                            for c in ra_criterion[:2]
                        ]
                        ra_loss = torch.mean(torch.stack(ra_losses))
                        # Coherence loss
                        coherence_loss = rd_criterion[2](rd_outputs,
                                                         ra_outputs)
                        loss = torch.mean(rd_loss + ra_loss + coherence_loss)

                    loss.backward()
                    optimizer.step()
                    running_losses.append(loss.data.cpu().numpy()[()])
                    rd_running_losses.append(rd_loss.data.cpu().numpy()[()])
                    rd_running_global_losses[0].append(
                        rd_losses[0].data.cpu().numpy()[()])
                    rd_running_global_losses[1].append(
                        rd_losses[1].data.cpu().numpy()[()])
                    ra_running_losses.append(ra_loss.data.cpu().numpy()[()])
                    ra_running_global_losses[0].append(
                        ra_losses[0].data.cpu().numpy()[()])
                    ra_running_global_losses[1].append(
                        ra_losses[1].data.cpu().numpy()[()])
                    if nb_losses > 2:
                        coherence_running_losses.append(
                            coherence_loss.data.cpu().numpy()[()])

                    if iteration % self.loss_step == 0:
                        train_loss = np.mean(running_losses)
                        rd_train_loss = np.mean(rd_running_losses)
                        rd_train_losses = [
                            np.mean(sub_loss)
                            for sub_loss in rd_running_global_losses
                        ]
                        ra_train_loss = np.mean(ra_running_losses)
                        ra_train_losses = [
                            np.mean(sub_loss)
                            for sub_loss in ra_running_global_losses
                        ]
                        if nb_losses > 2:
                            coherence_train_loss = np.mean(
                                coherence_running_losses)
                        print('[Epoch {}/{}, iter {}]: '
                              'train loss {}'.format(epoch + 1, self.nb_epochs,
                                                     iteration, train_loss))
                        if nb_losses > 2:
                            self.visualizer.update_multi_train_loss(
                                train_loss, rd_train_loss, rd_train_losses,
                                ra_train_loss, ra_train_losses, iteration,
                                coherence_train_loss)
                        else:
                            self.visualizer.update_multi_train_loss(
                                train_loss, rd_train_loss, rd_train_losses,
                                ra_train_loss, ra_train_losses, iteration)
                        running_losses = list()
                        rd_running_losses = list()
                        ra_running_losses = list()
                        self.visualizer.update_learning_rate(
                            scheduler.get_lr()[0], iteration)

                    if iteration % self.val_step == 0 and iteration > 0:
                        if iteration % self.viz_step == 0 and iteration > 0:
                            val_metrics = self.tester.predict(
                                self.net,
                                val_loader,
                                iteration,
                                add_temp=add_temp)
                        else:
                            val_metrics = self.tester.predict(
                                self.net, val_loader, add_temp=add_temp)

                        self.visualizer.update_multi_val_metrics(
                            val_metrics, iteration)
                        print('[Epoch {}/{}] Validation losses: '
                              'RD={}, RA={}'.format(
                                  epoch + 1, self.nb_epochs,
                                  val_metrics['range_doppler']['loss'],
                                  val_metrics['range_angle']['loss']))
                        print('[Epoch {}/{}] Validation Pixel Prec: '
                              'RD={}, RA={}'.format(
                                  epoch + 1, self.nb_epochs,
                                  val_metrics['range_doppler']['prec'],
                                  val_metrics['range_angle']['prec']))

                        if val_metrics[
                                'global_prec'] > best_val_prec and iteration > 0:
                            best_val_prec = val_metrics['global_prec']
                            test_metrics = self.tester.predict(
                                self.net, test_loader, add_temp=add_temp)
                            print('[Epoch {}/{}] Test losses: '
                                  'RD={}, RA={}'.format(
                                      epoch + 1, self.nb_epochs,
                                      test_metrics['range_doppler']['loss'],
                                      test_metrics['range_angle']['loss']))
                            print('[Epoch {}/{}] Test Prec: '
                                  'RD={}, RA={}'.format(
                                      epoch + 1, self.nb_epochs,
                                      test_metrics['range_doppler']['prec'],
                                      test_metrics['range_angle']['prec']))

                            self.results['rd_train_loss'] = rd_train_loss.item(
                            )
                            self.results['ra_train_loss'] = ra_train_loss.item(
                            )
                            self.results['train_loss'] = train_loss.item()
                            self.results['val_metrics'] = val_metrics
                            self.results['test_metrics'] = test_metrics
                            if nb_losses > 3:
                                self.results[
                                    'coherence_train_loss'] = coherence_train_loss.item(
                                    )
                            self._save_results()
                        self.net.train()  # Train mode after evaluation process
                    iteration += 1
        self.writer.close()
Esempio n. 12
0
        os.makedirs(args.checkpoints_dir)

    if args.explore:
        scheduler = ExponentialLR(optimizer, 2.0)
    else:
        scheduler = MultiStepLR(optimizer,
                                milestones=args.learning_steps,
                                gamma=args.learning_gamma,
                                last_epoch=starting_epoch - 1)
    # training loop
    print(f"training for {args.nb_epochs} epochs")
    losses = []
    learning_rates = []
    for epoch in range(starting_epoch, args.nb_epochs):
        scheduler.step()
        logger.write({'learning rate': scheduler.get_lr()[0]}, index=epoch)

        for step in ['train', 'test']:
            metrics = train_eval(model, dataloaders, optimizer,
                                 step == 'train')
            logger.write(metrics, curve=f"mean_{step}", increment=False)
            print(
                "                                                                  ",
                end='\r')
            print('{}\tEpoch [{}/{}],\tLoss: {:.4f},\tAccuracy: {:.2f}%\t'.
                  format(step, epoch, args.nb_epochs, metrics['loss'],
                         metrics['accuracy'] * 100),
                  flush=True)

        learning_rates.append(scheduler.get_lr()[0])
        # TODO save best model according to loss
Esempio n. 13
0
class Trainer(object):
    ''' An object that encapsulates model training '''
    def __init__(self, config, model, dataloader, device):
        self.model = model
        self.config = config
        self.device = device
        self.stopped_early = False
        self.dataloader = dataloader
        self.validation_dataloader = dataloader
        self.last_checkpoint_time = time.time()

        if 'cuda' in device.type:
            self.model = nn.DataParallel(model.cuda())

        self.optimizer = optim.Adam(model.parameters(), config.base_lr, betas=(0.9, 0.98), eps=1e-9)
        if config.lr_scheduler == 'warmup':
            self.lr_scheduler = LambdaLR(
                self.optimizer,
                WarmupLRSchedule(
                    config.warmup_steps
                )
            )
        elif config.lr_scheduler == 'linear':
            self.lr_scheduler = LambdaLR(
                self.optimizer,
                LinearLRSchedule(
                    config.base_lr,
                    config.final_lr,
                    config.max_steps
                )
            )
        elif config.lr_scheduler == 'exponential':
            self.lr_scheduler = ExponentialLR(
                self.optimizer,
                config.lr_decay
            )
        else:
            raise ValueError('Unknown learning rate scheduler!')

        # Initialize the metrics
        metrics_path = os.path.join(self.config.checkpoint_directory, 'train_metrics.pt')
        self.metric_store = metrics.MetricStore(metrics_path)
        self.metric_store.add(metrics.Metric('oom', metrics.format_int, 't'))
        self.metric_store.add(metrics.Metric('nll', metrics.format_float, max_history=1000))
        self.metric_store.add(metrics.Metric('lr', metrics.format_scientific, 'g', max_history=1))
        self.metric_store.add(metrics.Metric('num_tok', metrics.format_int, 'a', max_history=1000))

        if self.config.early_stopping:
            self.metric_store.add(metrics.Metric('vnll', metrics.format_float, 'g'))

        self.modules = {
            'model': model,
            'optimizer': self.optimizer,
            'lr_scheduler': self.lr_scheduler
        }

    @property
    def dataset(self):
        ''' Get the dataset '''
        return self.dataloader.dataset

    def train_epoch(self, epoch, experiment, verbose=0):
        ''' Run one training epoch '''
        oom = self.metric_store['oom']
        learning_rate = self.metric_store['lr']
        num_tokens = self.metric_store['num_tok']
        neg_log_likelihood = self.metric_store['nll']

        def try_optimize(i, last=False):
            # optimize if:
            #  1) last and remainder
            #  2) not last and not remainder
            remainder = bool(i % self.config.accumulate_steps)
            if not last ^ remainder:
                next_lr = self.optimize()

                learning_rate.update(next_lr)
                experiment.log_metric('learning_rate', next_lr)
                return True

            return False

        def get_description():
            description = f'Train #{epoch}'
            if verbose > 0:
                description += f' {self.metric_store}'
            if verbose > 1:
                description += f' [{profile.mem_stat_string(["allocated"])}]'
            return description

        batches = tqdm(
            self.dataloader,
            unit='batch',
            dynamic_ncols=True,
            desc=get_description(),
            file=sys.stdout # needed to make tqdm_wrap_stdout work
        )
        with tqdm_wrap_stdout():
            i = 1
            nll_per_update = 0.
            length_per_update = 0
            num_tokens_per_update = 0
            for i, batch in enumerate(batches, 1):
                try:
                    nll, length = self.calculate_gradient(batch)
                    did_optimize = try_optimize(i)

                    # record the effective number of tokens
                    num_tokens_per_update += int(sum(batch['input_lens']))
                    num_tokens_per_update += int(sum(batch['target_lens']))

                    if length:
                        # record length and nll
                        nll_per_update += nll
                        length_per_update += length

                    if did_optimize:
                        # advance the experiment step
                        experiment.set_step(experiment.curr_step + 1)

                        num_tokens.update(num_tokens_per_update)
                        neg_log_likelihood.update(nll_per_update / length_per_update)

                        experiment.log_metric('num_tokens', num_tokens_per_update)
                        experiment.log_metric('nll', neg_log_likelihood.last_value)

                        nll_per_update = 0.
                        length_per_update = 0
                        num_tokens_per_update = 0

                except RuntimeError as rte:
                    if 'out of memory' in str(rte):
                        torch.cuda.empty_cache()

                        oom.update(1)
                        experiment.log_metric('oom', oom.total)
                    else:
                        batches.close()
                        raise rte

                if self.should_checkpoint():
                    new_best = False
                    if self.config.early_stopping:
                        with tqdm_unwrap_stdout():
                            new_best = self.evaluate(experiment, epoch, verbose)

                    self.checkpoint(epoch, experiment.curr_step, new_best)

                batches.set_description_str(get_description())
                if self.is_done(experiment, epoch):
                    batches.close()
                    break

            try_optimize(i, last=True)

    def should_checkpoint(self):
        ''' Function which determines if a new checkpoint should be saved '''
        return time.time() - self.last_checkpoint_time > self.config.checkpoint_interval

    def checkpoint(self, epoch, step, best=False):
        ''' Save a checkpoint '''
        checkpoint_path = checkpoint(
            epoch, step, self.modules,
            self.config.checkpoint_directory,
            max_checkpoints=self.config.max_checkpoints
        )

        if best:
            dirname = os.path.dirname(checkpoint_path)
            basename = os.path.basename(checkpoint_path)
            best_checkpoint_path = os.path.join(dirname, f'best_{basename}')
            shutil.copy2(checkpoint_path, best_checkpoint_path)

        self.metric_store.save()
        self.last_checkpoint_time = time.time()

    def evaluate(self, experiment, epoch, verbose=0):
        ''' Evaluate the current model and determine if it is a new best '''
        model = self.modules['model']
        evaluator = Evaluator(args.ArgGroup(None), model, self.validation_dataloader, self.device)
        vnll = evaluator(epoch, experiment, verbose)
        metric = self.metric_store['vnll']
        full_history = metric.values
        metric.update(vnll)
        self.metric_store.save()

        return all(vnll < nll for nll in full_history[:-1])

    def is_done(self, experiment, epoch):
        ''' Has training completed '''
        if self.config.max_steps and experiment.curr_step >= self.config.max_steps:
            return True

        if self.config.max_epochs and epoch >= self.config.max_epochs:
            return True

        if self.config.early_stopping:
            history = self.metric_store['vnll'].values[-self.config.early_stopping - 1:]
            if len(history) == self.config.early_stopping + 1:
                self.stopped_early = all(history[-1] > nll for nll in history[:-1])
                return self.stopped_early

        return False

    def optimize(self):
        ''' Calculate an optimization step '''
        self.lr_scheduler.step()
        self.optimizer.step()
        self.optimizer.zero_grad()

        return self.lr_scheduler.get_lr()[0]

    def calculate_gradient(self, batch):
        ''' Runs one step of optimization '''
        # run the data through the model
        self.model.train()
        loss, nll = self.model(batch)

        # nn.DataParallel wants to gather rather than doing a reduce_add, so the output here
        # will be a tensor of values that must be summed
        nll = nll.sum()
        loss = loss.sum()

        # calculate gradients then run an optimization step
        loss.backward()

        # need to use .item() which converts to Python scalar
        # because as a Tensor it accumulates gradients
        return nll.item(), torch.sum(batch['target_lens']).item()

    def __call__(self, start_epoch, experiment, verbose=0):
        ''' Execute training '''
        with ExitStack() as stack:
            stack.enter_context(chunked_scattering())
            stack.enter_context(experiment.train())

            if start_epoch > 0 or experiment.curr_step > 0:
                # TODO: Hacky approach to decide if the metric store should be loaded. Revisit later
                self.metric_store = self.metric_store.load()

            epoch = start_epoch
            experiment.log_current_epoch(epoch)
            while not self.is_done(experiment, epoch):
                experiment.log_current_epoch(epoch)
                self.train_epoch(epoch, experiment, verbose)
                experiment.log_epoch_end(epoch)
                epoch += 1

            if self.stopped_early:
                print('Stopping early!')
            else:
                new_best = False
                if self.config.early_stopping:
                    new_best = self.evaluate(experiment, epoch, verbose)

                self.checkpoint(epoch, experiment.curr_step, new_best)
Esempio n. 14
0
    opt.step()

    try:
        sdf = params_torch['SDF.data'].data.cpu().numpy().reshape([sdf_res] *
                                                                  3)
        sdf = skfmm.distance(sdf, sdf_scale / sdf_res)

        params_torch['SDF.data'].data.copy_(torch.from_numpy(sdf.flatten()))

        if epoch % 10 == 9:
            write_binary_grid3d(f'{out_path}sdf_e{epoch}.vol', sdf)

    except RuntimeError as e:
        print(
            f'skfmm failed: mean={sdf.mean()}, min={sdf.min()}, max={sdf.max()}'
        )
        print(e)

    print(
        f'epoch {epoch}: lr={lr_scheduler.get_lr()[0]}, total_loss={np.mean(loss_imgs)}, loss_imgs={loss_imgs}'
    )
    with open(f"{out_path}log.txt", mode='a+') as f:
        f.write(','.join(
            list(
                map(str, [
                    epoch,
                    lr_scheduler.get_lr()[0],
                    np.mean(loss_imgs), *loss_imgs, "\n"
                ]))))

    lr_scheduler.step()
class Train(object):
    def __init__(self):
        self.vocab = Vocab(config.vocab_path, config.vocab_size)
        self.batcher = Batcher(config.train_data_path,
                               self.vocab,
                               mode='train',
                               batch_size=config.batch_size,
                               single_pass=False)
        time.sleep(15)
        self.val_batcher = Batcher(config.eval_data_path,
                                   self.vocab,
                                   mode='eval',
                                   batch_size=config.batch_size,
                                   single_pass=False)
        time.sleep(15)

        train_dir = os.path.join(config.log_root,
                                 'train_%d' % (int(time.time())))
        if not os.path.exists(train_dir):
            os.mkdir(train_dir)

        self.model_dir = os.path.join(train_dir, 'model')
        if not os.path.exists(self.model_dir):
            os.mkdir(self.model_dir)

        self.summary_writer = tf.compat.v1.summary.FileWriter(train_dir)

    def save_model(self, running_avg_loss, iter):
        state = {
            'iter': iter,
            'encoder_state_dict': self.model.encoder.state_dict(),
            'decoder_state_dict': self.model.decoder.state_dict(),
            'reduce_state_dict': self.model.reduce_state.state_dict(),
            'optimizer': self.optimizer.state_dict(),
            'current_loss': running_avg_loss
        }
        model_save_path = os.path.join(
            self.model_dir, 'model_%d_%d' % (iter, int(time.time())))
        torch.save(state, model_save_path)

    def setup_train(self, model_file_path=None):
        self.model = Model(model_file_path)

        params = list(self.model.encoder.parameters()) + list(self.model.decoder.parameters()) + \
                 list(self.model.reduce_state.parameters())
        initial_lr = config.lr_coverage if config.is_coverage else config.lr
        self.optimizer = Adagrad(
            params,
            lr=initial_lr,
            initial_accumulator_value=config.adagrad_init_acc)
        self.scheduler = ExponentialLR(self.optimizer, gamma=0.99)
        start_iter, start_loss = 0, 0

        if model_file_path is not None:  #途中から始める場合
            state = torch.load(model_file_path,
                               map_location=lambda storage, location: storage)
            start_iter = state['iter']
            start_loss = state['current_loss']

            if not config.is_coverage:  #coverageが無しの場合
                self.optimizer.load_state_dict(state['optimizer'])
                if use_cuda:
                    for state in self.optimizer.state.values():
                        for k, v in state.items():
                            if torch.is_tensor(v):
                                state[k] = v.cuda()

        return start_iter, start_loss

    def train_one_batch(self, batch, iter):
        enc_batch, enc_padding_mask, enc_lens, enc_batch_extend_vocab, extra_zeros, c_t_1, coverage = \
            get_input_from_batch(batch, use_cuda)
        dec_batch, dec_padding_mask, max_dec_len, dec_lens_var, target_batch = \
            get_output_from_batch(batch, use_cuda)

        self.optimizer.zero_grad()

        encoder_outputs, encoder_feature, encoder_hidden = self.model.encoder(
            enc_batch, enc_lens)
        s_t_1 = self.model.reduce_state(encoder_hidden)

        step_losses = []

        words = []
        for di in range(min(max_dec_len, config.max_dec_steps)):
            y_t_1 = dec_batch[:, di]  # Teacher forcing
            final_dist, s_t_1, c_t_1, attn_dist, p_gen, next_coverage = self.model.decoder(
                y_t_1, s_t_1, encoder_outputs, encoder_feature,
                enc_padding_mask, c_t_1, extra_zeros, enc_batch_extend_vocab,
                coverage, di)
            words.append(self.vocab.id2word(final_dist[0].argmax().item()))

            target = target_batch[:, di]
            gold_probs = torch.gather(final_dist, 1,
                                      target.unsqueeze(1)).squeeze()
            step_loss = -torch.log(gold_probs + config.eps)
            # print('step_loss',step_loss)
            # print('step_loss.size()',step_loss.size())
            if config.is_coverage:
                step_coverage_loss = torch.sum(torch.min(attn_dist, coverage),
                                               1)
                step_loss = step_loss + config.cov_loss_wt * step_coverage_loss
                coverage = next_coverage

            step_mask = dec_padding_mask[:, di]
            step_loss = step_loss * step_mask
            step_losses.append(step_loss)

        if iter % 100 == 0:
            print(words)
            print([self.vocab.id2word(idx.item()) for idx in dec_batch[0]])
            print([self.vocab.id2word(idx.item()) for idx in target_batch[0]])

        sum_losses = torch.sum(torch.stack(step_losses, 1), 1)
        batch_avg_loss = sum_losses / dec_lens_var
        loss = torch.mean(batch_avg_loss)

        loss.backward()

        self.norm = clip_grad_norm_(self.model.encoder.parameters(),
                                    config.max_grad_norm)
        clip_grad_norm_(self.model.decoder.parameters(), config.max_grad_norm)
        clip_grad_norm_(self.model.reduce_state.parameters(),
                        config.max_grad_norm)

        self.optimizer.step()

        return loss.item()

    def eval_one_batch(self, batch):
        enc_batch, enc_padding_mask, enc_lens, enc_batch_extend_vocab, extra_zeros, c_t_1, coverage = \
            get_input_from_batch(batch, use_cuda)
        dec_batch, dec_padding_mask, max_dec_len, dec_lens_var, target_batch = \
            get_output_from_batch(batch, use_cuda)

        encoder_outputs, encoder_feature, encoder_hidden = self.model.encoder(
            enc_batch, enc_lens)
        s_t_1 = self.model.reduce_state(encoder_hidden)

        step_losses = []
        for di in range(min(max_dec_len, config.max_dec_steps)):
            y_t_1 = dec_batch[:, di]  # Teacher forcing
            final_dist, s_t_1, c_t_1, attn_dist, p_gen, next_coverage = self.model.decoder(
                y_t_1, s_t_1, encoder_outputs, encoder_feature,
                enc_padding_mask, c_t_1, extra_zeros, enc_batch_extend_vocab,
                coverage, di)
            target = target_batch[:, di]
            gold_probs = torch.gather(final_dist, 1,
                                      target.unsqueeze(1)).squeeze()
            step_loss = -torch.log(gold_probs + config.eps)
            if config.is_coverage:
                step_coverage_loss = torch.sum(torch.min(attn_dist, coverage),
                                               1)
                step_loss = step_loss + config.cov_loss_wt * step_coverage_loss
                coverage = next_coverage

            step_mask = dec_padding_mask[:, di]
            step_loss = step_loss * step_mask
            step_losses.append(step_loss)

        sum_step_losses = torch.sum(torch.stack(step_losses, 1), 1)
        batch_avg_loss = sum_step_losses / dec_lens_var
        loss = torch.mean(batch_avg_loss)
        # print(loss)
        # print(type(loss))
        # print(loss.data)
        # print(loss.data.item())
        # return loss.data[0]
        return loss.data.item()

    def trainIters(self, n_iters, model_file_path=None):
        iter, running_avg_loss = self.setup_train(model_file_path)
        start = time.time()
        while iter < n_iters:
            batch = self.batcher.next_batch()
            loss = self.train_one_batch(batch, iter)
            val_loss = None
            if iter % 100 == 0:
                val_batch = self.val_batcher.next_batch()
                val_loss = self.eval_one_batch(val_batch)
                # print("val_loss",val_loss)
                self.scheduler.step()
                print("lr", self.scheduler.get_lr())

            running_avg_loss = calc_running_avg_loss(loss, running_avg_loss,
                                                     self.summary_writer, iter)
            iter += 1

            if iter % 100 == 0:
                self.summary_writer.flush()
            print_interval = 1
            if iter % print_interval == 0:
                if val_loss is not None:
                    print(
                        'steps %d, seconds for %d batch: %.2f , loss: %f , eval_loss: %f'
                        % (iter, print_interval, time.time() - start, loss,
                           val_loss))
                else:
                    print('steps %d, seconds for %d batch: %.2f , loss: %f' %
                          (iter, print_interval, time.time() - start, loss))
                start = time.time()
            if iter % 1000 == 0:
                self.save_model(running_avg_loss, iter)
                             **params_torch)
        write_bitmap(f'{out_path}{i}_image_e{epoch:03d}.png', image, crop_size)

        #ob_val = lambda_img * objective(image, images_ref[i]) / len(cams_origins)
        ob_val, pyr_ob = objective(image, i)
        print(
            f"rendered image {i}: loss={ob_val.cpu().item()}, pyramid_losses={list(pyr_ob.data.cpu().numpy())}"
        )

        ob_val /= len(cams_origins)
        loss_img += ob_val.item()
        ob_val.backward()

    opt.step()
    if lr_scheduler:
        print("lr = ", lr_scheduler.get_lr()[0])
        lr_scheduler.step()
        if epoch == T_max and T_max > 0:
            lr_scheduler = CosineAnnealingLR(opt, T_max=T_max, eta_min=0.001)

    print(ek.hsum(params['SDF.bsdf.reflectance.data']))

    try:
        sdf = params_torch['SDF.data'].data.cpu().numpy().reshape([sdf_res] *
                                                                  3)
        sdf = skfmm.distance(sdf, sdf_scale / sdf_res)

        if epoch in sdf_res_squedule:
            pass
            # print(sdf.shape, sdf.flatten().shape)
            # sdf = double_sdf_res(sdf)
Esempio n. 17
0
    def train(self, x_train, y_train):
        idx = np.random.permutation(len(x_train))
        print('samples:', len(x_train))
        x_train = np.array(x_train)[idx]
        y_train = np.array(y_train)[idx]
        x_val = x_train[35000:]
        x_tr = x_train[:35000]
        y_val = y_train[35000:]
        y_tr = y_train[:35000]
        print(np.max(y_tr))

        #optimizer = SGD(self.model.parameters(), lr=0.1, weight_decay=1e-4, nesterov=True, momentum=0.9)
        optimizer = Adam(self.model.parameters(), lr=0.001, weight_decay=1e-4)
        scheduler = ExponentialLR(optimizer, 0.98)
        loss_fn = nn.CrossEntropyLoss()

        train_loader = torch.utils.data.DataLoader(Loader(x_tr, y_tr),
                                                   batch_size=256,
                                                   shuffle=True)
        val_loader = torch.utils.data.DataLoader(Loader(x_val, y_val),
                                                 batch_size=256,
                                                 shuffle=False)

        best_acc = 0

        for epoch in range(50):
            self.model.train()
            scheduler.step()
            loss_ = acc_ = cnt = 0
            for i, (input, target) in enumerate(train_loader):
                output = self.model(input[0].cuda(), input[1].cuda())
                optimizer.zero_grad()
                loss = loss_fn(output, target.cuda().view(-1))
                loss.backward()
                optimizer.step()
                lr = scheduler.get_lr()

                pred = output.max(1)[1].cpu()
                match = torch.sum(
                    pred == target.view(-1)).float() / target.shape[0]
                loss_ += loss.cpu().data.numpy()
                acc_ += match.data.numpy()
                cnt += 1
            print('Epoch %2d: loss = %6.5f, training acc=%5.3f, lr=%f' %
                  (epoch, loss_ / cnt, acc_ * 100 / cnt, lr[0]))
            loss_ = acc_ = 0

            acc_ = 0
            val_cnt = 0
            self.model.eval()
            for i, (input, target) in enumerate(val_loader):
                with torch.no_grad():
                    output = self.model(input[0].cuda(), input[1].cuda())
                pred = output.max(1)[1].cpu()
                acc_ += torch.sum(
                    pred == target.view(-1)).float() / target.shape[0]
                val_cnt += 1
            star = '*' if best_acc <= (acc_ / val_cnt) else ''
            print('val acc= %5.3f' % (acc_ / val_cnt) + star)
            torch.save(self.model.state_dict(), 'rnn_checkpoint.pth')
            if best_acc <= (acc_ / val_cnt):
                best_acc = (acc_ / val_cnt)
                torch.save(self.model.state_dict(), 'rnn_best_model.pth')