Ejemplo n.º 1
0
    def train_by_epoch(self):
        tqdm_batch = tqdm(self.dataloader,
                          total=self.total_iter,
                          desc='epoch-{}'.format(self.epoch))

        avg_loss = AverageMeter()
        for curr_it, data in enumerate(tqdm_batch):
            self.reg.train()

            edge = data['edge'].float().cuda(async=self.config.async_loading)
            corner = data['corner'].float().cuda(
                async=self.config.async_loading)
            box = data['box'].float().cuda(async=self.config.async_loading)

            reg_out = self.reg(torch.cat((edge, corner), dim=1))

            loss = self.mse(reg_out, box)

            self.opt.zero_grad()
            loss.backward()
            self.opt.step()

            avg_loss.update(loss)

        tqdm_batch.close()

        self.summary_writer.add_scalar('reg/loss', avg_loss.val, self.epoch)

        self.scheduler.step(avg_loss.val)

        self.logger.warning('info - lr: {}, loss: {}'.format(
            get_lr(self.opt), avg_loss.val))
Ejemplo n.º 2
0
    def train_by_epoch(self):
        tqdm_batch = tqdm(self.dataloader, total=self.total_iter, desc='epoch-{}'.format(self.epoch))

        avg_loss = AverageMeter()
        corner, edge, out = None, None, None
        for curr_it, data in enumerate(tqdm_batch):
            self.encoder.train()
            self.edge.train()
            self.corner.train()

            img = data['img'].float().cuda(async=self.config.async_loading)
            line = data['line'].float().cuda(async=self.config.async_loading)
            edge = data['edge'].float().cuda(async=self.config.async_loading)
            corner = data['corner'].float().cuda(async=self.config.async_loading)

            encoder_out_list = self.encoder(torch.cat((img, line), dim=1))
            edge_out_list, edge_out = self.edge(encoder_out_list)
            corner_out = self.corner(edge_out_list)

            loss = self.bce(edge_out, edge)
            loss[edge > 0.] *= 4
            loss = loss.mean()

            c_loss = self.bce(corner_out, corner)
            c_loss[corner > 0.] *= 4
            loss += c_loss.mean()

            out = (edge_out, corner_out)

            self.opt.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(chain(self.encoder.parameters(), self.edge.parameters(), self.corner.parameters()),
                                     3.0, norm_type='inf')
            self.opt.step()

            avg_loss.update(loss)

        tqdm_batch.close()

        self.summary_writer.add_image('corner/edge_origin 1', edge[0], self.epoch)
        self.summary_writer.add_image('corner/edge_origin 2', edge[1], self.epoch)

        self.summary_writer.add_image('corner/edge_train 1', out[0][0], self.epoch)
        self.summary_writer.add_image('corner/edge_train 2', out[0][1], self.epoch)

        self.summary_writer.add_image('corner/corner_origin 1', corner[0], self.epoch)
        self.summary_writer.add_image('corner/corner_origin 2', corner[1], self.epoch)

        self.summary_writer.add_image('corner/corner_train 1', out[1][0], self.epoch)
        self.summary_writer.add_image('corner/corner_train 2', out[1][1], self.epoch)

        self.summary_writer.add_scalar('corner/loss', avg_loss.val, self.epoch)

        self.scheduler.step(avg_loss.val)

        self.logger.warning('info - lr: {}, loss: {}'.format(get_lr(self.opt), avg_loss.val))
Ejemplo n.º 3
0
def train(cfg):

    batch_size = int(cfg['batch_size'])
    n_epochs = int(cfg['n_epochs'])
    sample_size = int(cfg['fixed_size'])

    #### DATA LOADING
    trans_train = []
    trans_val = []
    if cfg['rnd_sampling']:
        trans_train.append(RndSampling(sample_size, maintain_prop=False))
        #prop_vector=[1, 1]))
        trans_val.append(RndSampling(sample_size, maintain_prop=False))

    dataset, dataloader = get_dataset(cfg, trans=trans_train)
    val_dataset, val_dataloader = get_dataset(cfg,
                                              trans=trans_val,
                                              train=False)
    # summary for tensorboard
    writer = create_tb_logger(cfg)
    dump_code(cfg, writer.logdir)

    #### BUILD THE MODEL
    classifier = get_model(cfg)
    if cfg['verbose']:
        print(classifier)

    #### SET THE TRAINING
    optimizer = get_optimizer(cfg, classifier)

    lr_scheduler = get_lr_scheduler(cfg, optimizer)

    classifier.cuda()

    num_batch = len(dataset) / batch_size
    print('num of batches per epoch: %d' % num_batch)
    cfg['num_batch'] = num_batch

    n_iter = 0
    #best_pred = 0
    best_pred = 10
    best_epoch = 0
    current_lr = float(cfg['learning_rate'])
    for epoch in range(n_epochs + 1):

        # update bn decay
        if cfg['bn_decay'] and epoch != 0 and epoch % int(
                cfg['bn_decay_step']) == 0:
            update_bn_decay(cfg, classifier, epoch)

        loss, n_iter = train_ep(cfg, dataloader, classifier, optimizer, writer,
                                epoch, n_iter)

        ### validation during training
        if epoch % int(cfg['val_freq']) == 0 and cfg['val_in_train']:
            best_epoch, best_pred = val_ep(cfg, val_dataloader, classifier,
                                           writer, epoch, best_epoch,
                                           best_pred)

        # update lr
        if cfg['lr_type'] == 'step' and current_lr >= float(cfg['min_lr']):
            lr_scheduler.step()
        if cfg['lr_type'] == 'plateau':
            lr_scheduler.step(loss)

        current_lr = get_lr(optimizer)
        writer.add_scalar('train/lr', current_lr, epoch)

    writer.close()