Example #1
0
    def _train_epoch(self, epoch):
        self.model.train()
        epoch_start = time.time()
        batch_start = time.time()
        train_loss = 0.
        running_metric_text = runningScore(2)
        lr = self.optimizer.param_groups[0]['lr']

        for i, batch in enumerate(self.train_loader):
            if i >= self.train_loader_len:
                break
            self.global_step += 1
            lr = self.optimizer.param_groups[0]['lr']

            # 数据进行转换和丢到gpu
            for key, value in batch.items():
                if value is not None:
                    if isinstance(value, torch.Tensor):
                        batch[key] = value.to(self.device)
            cur_batch_size = batch['img'].size()[0]

            preds = self.model(batch['img'])
            loss_dict = self.criterion(preds, batch)
            # backward
            self.optimizer.zero_grad()
            loss_dict['loss'].backward()
            self.optimizer.step()
            self.scheduler.step()

            # acc iou
            score_shrink_map = cal_text_score(preds[:, 0, :, :], batch['shrink_map'], batch['shrink_mask'],
                                              running_metric_text,
                                              thred=self.config['post_processing']['args']['thresh'])

            # loss 和 acc 记录到日志
            loss_str = 'loss: {:.4f}, '.format(loss_dict['loss'].item())
            for idx, (key, value) in enumerate(loss_dict.items()):
                loss_dict[key] = value.item()
                if key == 'loss':
                    continue
                loss_str += '{}: {:.4f}'.format(key, loss_dict[key])
                if idx < len(loss_dict) - 1:
                    loss_str += ', '

            train_loss += loss_dict['loss']
            acc = score_shrink_map['Mean Acc']
            iou_shrink_map = score_shrink_map['Mean IoU']

            if self.global_step % self.log_iter == 0:
                batch_time = time.time() - batch_start
                self.logger_info(
                    '[{}/{}], [{}/{}], global_step: {}, speed: {:.1f} samples/sec, acc: {:.4f}, iou_shrink_map: {:.4f}, {}lr:{:.6}, time:{:.2f}'.format(
                        epoch, self.epochs, i + 1, self.train_loader_len, self.global_step,
                                            self.log_iter * cur_batch_size / batch_time, acc, iou_shrink_map, loss_str,
                        lr, batch_time))
                batch_start = time.time()

        return {'train_loss': train_loss / self.train_loader_len, 'lr': lr, 'time': time.time() - epoch_start,
                'epoch': epoch}
Example #2
0
    def _train_epoch(self, epoch):
        self.model.train()
        epoch_start = time.time()
        batch_start = time.time()
        train_loss = 0.
        running_metric_text = runningScore(2)
        running_metric_kernel = runningScore(2)
        lr = self.optimizer.param_groups[0]['lr']
        for i, (images, labels,
                training_masks) in enumerate(self.train_loader):
            if i >= self.train_loader_len:
                break
            self.global_step += 1
            lr = self.optimizer.param_groups[0]['lr']

            # 数据进行转换和丢到gpu
            cur_batch_size = images.size()[0]
            images, labels, training_masks = images.to(self.device), labels.to(
                self.device), training_masks.to(self.device)

            preds = self.model(images)
            loss_all, loss_tex, loss_ker, loss_agg, loss_dis = self.criterion(
                preds, labels, training_masks)
            # backward
            self.optimizer.zero_grad()
            loss_all.backward()
            self.optimizer.step()
            if self.config['lr_scheduler']['type'] == 'PolynomialLR':
                self.scheduler.step()
            # acc iou
            score_text = cal_text_score(preds[:, 0, :, :], labels[:, 0, :, :],
                                        training_masks, running_metric_text)
            score_kernel = cal_kernel_score(preds[:, 1, :, :], labels[:,
                                                                      1, :, :],
                                            labels[:, 0, :, :], training_masks,
                                            running_metric_kernel)

            # loss 和 acc 记录到日志
            loss_all = loss_all.item()
            loss_tex = loss_tex.item()
            loss_ker = loss_ker.item()
            loss_agg = loss_agg.item()
            loss_dis = loss_dis.item()
            train_loss += loss_all
            acc = score_text['Mean Acc']
            iou_text = score_text['Mean IoU']
            iou_kernel = score_kernel['Mean IoU']

            if (i + 1) % self.display_interval == 0:
                batch_time = time.time() - batch_start
                self.logger.info(
                    '[{}/{}], [{}/{}], global_step: {}, Speed: {:.1f} samples/sec, acc: {:.4f}, iou_text: {:.4f}, iou_kernel: {:.4f}, loss_all: {:.4f}, loss_tex: {:.4f}, loss_ker: {:.4f}, loss_agg: {:.4f}, loss_dis: {:.4f}, lr:{:.6}, time:{:.2f}'
                    .format(
                        epoch, self.epochs, i + 1, self.train_loader_len,
                        self.global_step,
                        self.display_interval * cur_batch_size / batch_time,
                        acc, iou_text, iou_kernel, loss_all, loss_tex,
                        loss_ker, loss_agg, loss_dis, lr, batch_time))
                batch_start = time.time()

            if self.tensorboard_enable:
                # write tensorboard
                self.writer.add_scalar('TRAIN/LOSS/loss_all', loss_all,
                                       self.global_step)
                self.writer.add_scalar('TRAIN/LOSS/loss_tex', loss_tex,
                                       self.global_step)
                self.writer.add_scalar('TRAIN/LOSS/loss_ker', loss_ker,
                                       self.global_step)
                self.writer.add_scalar('TRAIN/LOSS/loss_agg', loss_agg,
                                       self.global_step)
                self.writer.add_scalar('TRAIN/LOSS/loss_dis', loss_dis,
                                       self.global_step)
                self.writer.add_scalar('TRAIN/ACC_IOU/acc', acc,
                                       self.global_step)
                self.writer.add_scalar('TRAIN/ACC_IOU/iou_text', iou_text,
                                       self.global_step)
                self.writer.add_scalar('TRAIN/ACC_IOU/iou_kernel', iou_kernel,
                                       self.global_step)
                self.writer.add_scalar('TRAIN/lr', lr, self.global_step)
                if i % self.show_images_interval == 0:
                    # show images on tensorboard
                    self.writer.add_images('TRAIN/imgs', images,
                                           self.global_step)
                    # text kernel and training_masks
                    gt_texts, gt_kernels = labels[:, 0, :, :], labels[:,
                                                                      1, :, :]
                    gt_texts[gt_texts <= 0.5] = 0
                    gt_texts[gt_texts > 0.5] = 1
                    gt_kernels[gt_kernels <= 0.5] = 0
                    gt_kernels[gt_kernels > 0.5] = 1
                    show_label = torch.cat(
                        [gt_texts, gt_kernels,
                         training_masks.float()])
                    show_label = vutils.make_grid(show_label.unsqueeze(1),
                                                  nrow=cur_batch_size,
                                                  normalize=False,
                                                  padding=20,
                                                  pad_value=1)
                    self.writer.add_image('TRAIN/gt', show_label,
                                          self.global_step)
                    # model output
                    preds[:, :2, :, :] = torch.sigmoid(preds[:, :2, :, :])
                    show_pred = torch.cat(
                        [preds[:, 0, :, :], preds[:, 1, :, :]])
                    show_pred = vutils.make_grid(show_pred.unsqueeze(1),
                                                 nrow=cur_batch_size,
                                                 normalize=False,
                                                 padding=20,
                                                 pad_value=1)
                    self.writer.add_image('TRAIN/preds', show_pred,
                                          self.global_step)

        return {
            'train_loss': train_loss / self.train_loader_len,
            'lr': lr,
            'time': time.time() - epoch_start,
            'epoch': epoch
        }
Example #3
0
    def _train_epoch(self, epoch):
        self.model.train()
        epoch_start = time.time()
        batch_start = time.time()
        train_loss = 0.
        running_metric_text = runningScore(2)
        lr = self.optimizer.param_groups[0]['lr']

        for i, batch in enumerate(self.train_loader):
            if i >= self.train_loader_len:
                break
            self.global_step += 1
            lr = self.optimizer.param_groups[0]['lr']

            # 数据进行转换和丢到gpu
            for key, value in batch.items():
                if value is not None:
                    if isinstance(value, torch.Tensor):
                        batch[key] = value.to(self.device)
            cur_batch_size = batch['img'].size()[0]

            preds = self.model(batch['img'])
            loss_dict = self.criterion(preds, batch)
            # backward
            self.optimizer.zero_grad()
            loss_dict['loss'].backward()
            self.optimizer.step()
            if self.config['lr_scheduler']['type'] == 'WarmupPolyLR':
                self.scheduler.step()
            # acc iou
            score_shrink_map = cal_text_score(
                preds[:, 0, :, :],
                batch['shrink_map'],
                batch['shrink_mask'],
                running_metric_text,
                thred=self.config['post_processing']['args']['thresh'])

            # loss 和 acc 记录到日志
            loss_str = 'loss: {:.4f}, '.format(loss_dict['loss'].item())
            for idx, (key, value) in enumerate(loss_dict.items()):
                loss_dict[key] = value.item()
                if key == 'loss':
                    continue
                loss_str += '{}: {:.4f}'.format(key, loss_dict[key])
                if idx < len(loss_dict) - 1:
                    loss_str += ', '

            train_loss += loss_dict['loss']
            acc = score_shrink_map['Mean Acc']
            iou_shrink_map = score_shrink_map['Mean IoU']

            if self.global_step % self.log_iter == 0:
                batch_time = time.time() - batch_start
                self.logger_info(
                    '[{}/{}], [{}/{}], global_step: {}, speed: {:.1f} samples/sec, acc: {:.4f}, iou_shrink_map: {:.4f}, {}, lr:{:.6}, time:{:.2f}'
                    .format(epoch, self.epochs, i + 1, self.train_loader_len,
                            self.global_step,
                            self.log_iter * cur_batch_size / batch_time, acc,
                            iou_shrink_map, loss_str, lr, batch_time))
                batch_start = time.time()

            if self.tensorboard_enable and self.config['local_rank'] == 0:
                # write tensorboard
                for key, value in loss_dict.items():
                    self.writer.add_scalar('TRAIN/LOSS/{}'.format(key), value,
                                           self.global_step)
                self.writer.add_scalar('TRAIN/ACC_IOU/acc', acc,
                                       self.global_step)
                self.writer.add_scalar('TRAIN/ACC_IOU/iou_shrink_map',
                                       iou_shrink_map, self.global_step)
                self.writer.add_scalar('TRAIN/lr', lr, self.global_step)
                if self.global_step % self.show_images_iter == 0:
                    # show images on tensorboard
                    self.inverse_normalize(batch['img'])
                    self.writer.add_images('TRAIN/imgs', batch['img'],
                                           self.global_step)
                    # shrink_labels and threshold_labels
                    shrink_labels = batch['shrink_map']
                    threshold_labels = batch['threshold_map']
                    shrink_labels[shrink_labels <= 0.5] = 0
                    shrink_labels[shrink_labels > 0.5] = 1
                    show_label = torch.cat([shrink_labels, threshold_labels])
                    show_label = vutils.make_grid(show_label.unsqueeze(1),
                                                  nrow=cur_batch_size,
                                                  normalize=False,
                                                  padding=20,
                                                  pad_value=1)
                    self.writer.add_image('TRAIN/gt', show_label,
                                          self.global_step)
                    # model output
                    show_pred = []
                    for kk in range(preds.shape[1]):
                        show_pred.append(preds[:, kk, :, :])
                    show_pred = torch.cat(show_pred)
                    show_pred = vutils.make_grid(show_pred.unsqueeze(1),
                                                 nrow=cur_batch_size,
                                                 normalize=False,
                                                 padding=20,
                                                 pad_value=1)
                    self.writer.add_image('TRAIN/preds', show_pred,
                                          self.global_step)
        return {
            'train_loss': train_loss / self.train_loader_len,
            'lr': lr,
            'time': time.time() - epoch_start,
            'epoch': epoch
        }
Example #4
0
    def _train_epoch(self, epoch):
        self.model.train()
        epoch_start = time.time()
        batch_start = time.time()
        train_loss = 0.
        running_metric_text = runningScore(2)
        lr = self.optimizer.param_groups[0]['lr']
        for i, (images, shrink_labels,
                threshold_labels) in enumerate(self.train_loader):
            if i >= self.train_loader_len:
                break
            self.global_step += 1
            lr = self.optimizer.param_groups[0]['lr']

            # 数据进行转换和丢到gpu
            cur_batch_size = images.size()[0]
            images, shrink_labels, threshold_labels = images.to(
                self.device), shrink_labels.to(
                    self.device), threshold_labels.to(self.device)

            preds = self.model(images)
            loss_all, loss_shrink_map, loss_binary_map, loss_threshold_map = self.criterion(
                preds, shrink_labels, threshold_labels)
            # backward
            self.optimizer.zero_grad()
            loss_all.backward()
            self.optimizer.step()
            if self.config['lr_scheduler']['type'] == 'WarmupPolyLR':
                self.scheduler.step()
            # acc iou
            score_shrink_map = cal_text_score(preds[:, 0, :, :],
                                              shrink_labels,
                                              running_metric_text,
                                              thred=0.5)

            # loss 和 acc 记录到日志
            loss_all = loss_all.item()
            loss_shrink_map = loss_shrink_map.item()
            loss_binary_map = loss_binary_map.item()
            loss_threshold_map = loss_threshold_map.item()
            train_loss += loss_all
            acc = score_shrink_map['Mean Acc']
            iou_shrink_map = score_shrink_map['Mean IoU']

            if (i + 1) % self.display_interval == 0:
                batch_time = time.time() - batch_start
                self.logger.info(
                    '[{}/{}], [{}/{}], global_step: {}, Speed: {:.1f} samples/sec, acc: {:.4f}, iou_shrink_map: {:.4f}, loss_all: {:.4f}, loss_shrink_map: {:.4f}, loss_binary_map: {:.4f}, loss_threshold_map: {:.4f}, lr:{:.6}, time:{:.2f}'
                    .format(
                        epoch, self.epochs, i + 1, self.train_loader_len,
                        self.global_step,
                        self.display_interval * cur_batch_size / batch_time,
                        acc, iou_shrink_map, loss_all, loss_shrink_map,
                        loss_binary_map, loss_threshold_map, lr, batch_time))
                batch_start = time.time()

            if self.tensorboard_enable:
                # write tensorboard
                self.writer.add_scalar('TRAIN/LOSS/loss_all', loss_all,
                                       self.global_step)
                self.writer.add_scalar('TRAIN/LOSS/loss_shrink_map',
                                       loss_shrink_map, self.global_step)
                self.writer.add_scalar('TRAIN/LOSS/loss_binary_map',
                                       loss_binary_map, self.global_step)
                self.writer.add_scalar('TRAIN/LOSS/loss_threshold_map',
                                       loss_threshold_map, self.global_step)
                self.writer.add_scalar('TRAIN/ACC_IOU/acc', acc,
                                       self.global_step)
                self.writer.add_scalar('TRAIN/ACC_IOU/iou_shrink_map',
                                       iou_shrink_map, self.global_step)
                self.writer.add_scalar('TRAIN/lr', lr, self.global_step)
                if i % self.show_images_interval == 0:
                    # show images on tensorboard
                    self.writer.add_images('TRAIN/imgs', images,
                                           self.global_step)
                    # shrink_labels and threshold_labels
                    shrink_labels[shrink_labels <= 0.5] = 0
                    shrink_labels[shrink_labels > 0.5] = 1
                    show_label = torch.cat([shrink_labels, threshold_labels])
                    show_label = vutils.make_grid(show_label.unsqueeze(1),
                                                  nrow=cur_batch_size,
                                                  normalize=False,
                                                  padding=20,
                                                  pad_value=1)
                    self.writer.add_image('TRAIN/gt', show_label,
                                          self.global_step)
                    # model output
                    show_pred = torch.cat([
                        preds[:, 0, :, :], preds[:, 1, :, :], preds[:, 2, :, :]
                    ])
                    show_pred = vutils.make_grid(show_pred.unsqueeze(1),
                                                 nrow=cur_batch_size,
                                                 normalize=False,
                                                 padding=20,
                                                 pad_value=1)
                    self.writer.add_image('TRAIN/preds', show_pred,
                                          self.global_step)

        return {
            'train_loss': train_loss / self.train_loader_len,
            'lr': lr,
            'time': time.time() - epoch_start,
            'epoch': epoch
        }