Exemple #1
0
def train(train_loader, model, criterion, optimizer, args):
    model.train()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    running_metric_text = runningScore(2)
    running_metric_kernel = runningScore(2)

    end = time.time()
    for batch_idx, (imgs, gt_texts, gt_kernels,
                    training_masks) in enumerate(train_loader):
        data_time.update(time.time() - end)

        imgs = Variable(imgs.cuda())
        gt_texts = Variable(gt_texts.cuda())
        gt_kernels = Variable(gt_kernels.cuda())
        training_masks = Variable(training_masks.cuda())

        outputs = model(imgs)
        texts = outputs[:, 0, :, :]
        kernels = outputs[:, 1:, :, :]

        loss = criterion(texts, gt_texts, kernels, gt_kernels, training_masks)
        losses.update(loss.item(), imgs.size(0))

        optimizer.zero_grad()
        loss.backward()

        if (args.sr_lr is not None):
            updateBN(model, args)

        optimizer.step()

        score_text = cal_text_score(texts, gt_texts, training_masks,
                                    running_metric_text)
        score_kernel = cal_kernel_score(kernels, gt_kernels, gt_texts,
                                        training_masks, running_metric_kernel)

        batch_time.update(time.time() - end)
        end = time.time()

        if batch_idx % 20 == 0:
            output_log = '({batch}/{size}) Batch: {bt:.3f}s | TOTAL: {total:.0f}min | ETA: {eta:.0f}min | Loss: {loss:.4f} | Acc_t: {acc: .4f} | IOU_t: {iou_t: .4f} | IOU_k: {iou_k: .4f}'.format(
                batch=batch_idx + 1,
                size=len(train_loader),
                bt=batch_time.avg,
                total=batch_time.avg * batch_idx / 60.0,
                eta=batch_time.avg * (len(train_loader) - batch_idx) / 60.0,
                loss=losses.avg,
                acc=score_text['Mean Acc'],
                iou_t=score_text['Mean IoU'],
                iou_k=score_kernel['Mean IoU'])
            print(output_log)
            sys.stdout.flush()

    return (losses.avg, score_text['Mean Acc'], score_kernel['Mean Acc'],
            score_text['Mean IoU'], score_kernel['Mean IoU'])
Exemple #2
0
def validate(valloader, net, criterion):
    #validate
    net.eval()

    #initialization
    n_classes = 3
    val_loss = 0
    running_metrics = runningScore(n_classes)
    running_metrics.reset()

    with torch.no_grad():
        pbar = tqdm(total=250, desc='Validation')

        for val_idx, data_samples in enumerate(valloader):
            volume, labels = data_samples['data'], data_samples['target']
            volume = volume.cuda()

            labels = labels.long().cuda()

            outputs = net(volume)
            validation_loss_current_model = criterion(input_=outputs,
                                                      target=labels)
            val_loss += criterion(input_=outputs, target=labels)
            pred = outputs.data.max(1)[1].cpu().numpy()
            gt = labels.cpu().numpy()
            running_metrics.update(gt, pred)

            pbar.update(1)
        pbar.close()
    validation_loss = val_loss / (2 * val_idx)
    #print("Training Loss: {}".format(validation_loss))
    return (validation_loss, running_metrics.get_scores())
Exemple #3
0
def eval_model(model, valid_dl, test_dl, wandb_log, args):
    device = next(model.parameters()).device
    model.eval()
    eval_running_metrics = [runningScore(20) for i in range(5)]
    with torch.no_grad():
        pbar = tqdm.tqdm(enumerate(valid_dl), total=len(valid_dl))
        for _, ((b_clear, ), (b_beta_005, b_beta_01, b_beta_02, ), (b_sparse, _, )) in pbar:
            for running_metrics, b_input in zip(eval_running_metrics[:4], [b_clear, b_beta_005, b_beta_01, b_beta_02, ]):
                b_sparse_pred = model(b_input.to(device)).argmax(1).cpu()
                running_metrics.update(b_sparse.numpy(), b_sparse_pred.numpy(), )
            pbar.set_description("Valid Epoch {:3d}".format(wandb_log.running_metrics_epoch_step))
        if wandb_log.use_wandb:
            for name, running_metrics in zip(['clear', 'beta_0.005', 'beta_0.01', 'beta_0.02', ], eval_running_metrics[:4]):
                wandb_log.running_metrics_epoch_log(name, running_metrics)

        pbar = tqdm.tqdm(enumerate(test_dl), total=len(test_dl))
        for _, (b_input, b_sparse, _, ) in pbar:
            b_sparse_pred = model(b_input.to(device)).argmax(1).cpu()
            eval_running_metrics[-1].update(b_sparse.numpy(), b_sparse_pred.numpy(), )
        if wandb_log.use_wandb:
            wandb_log.running_metrics_epoch_log('testv2', eval_running_metrics[-1])
            pbar.set_description("Test Epoch {:3d}".format(wandb_log.running_metrics_epoch_step))

    for name, running_metrics in zip(['clear', 'beta_0.005', 'beta_0.01', 'beta_0.02', 'testv2'], eval_running_metrics):
        metrics, per_class_IoU = running_metrics.get_scores()
        pbar.write("{} Evaluation Metrics={}".format(name, metrics))
        pbar.write("{} Evaluation per_class_IoU={}".format(name, per_class_IoU))
    return eval_running_metrics
Exemple #4
0
def validate(model, dataloader, checkpoint_path, save_path=None):
    since = time.time()

    # set the device to gpu if possible
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("Testing Device: {}\n".format(device))
    model.to(device)
    metrics = runningScore(model.num_classes)

    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model_state_dict'])

    # Iterate over data.
    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)
        pred = outputs.data.max(1)[1].cpu().numpy()
        gt = labels.cpu().numpy()
        metrics.update(gt, pred)

    time_elapsed = time.time() - since

    score, class_iou = metrics.get_scores()

    for k, v in score.items():
        print(k, v)

    for i in range(model.num_classes):
        print(i, class_iou[i])

    return model
def main(test_args):

    testset = "/mnt/iusers01/eee01/mchiwml4/CamVid/test"
    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize(mean, std)])
    test_dataset = DataLoader(Loaddata(testset,
                                       transform=transform,
                                       target_transform=MaskToTensor()),
                              batch_size=1,
                              shuffle=False,
                              num_workers=8)

    label_num = 11
    model = linknetmodel.linknet(label_num)
    model = model.cuda()
    model.load_state_dict(torch.load(test_args.load_param))
    model.eval()

    total = np.zeros((label_num, ))

    running_metrics = runningScore(label_num)

    time_elapsed = 0
    for j, data in enumerate(test_dataset):

        since = time.time()
        inputs, labels = data
        inputs = Variable(inputs.cuda(), volatile=True)
        outputs = model(inputs)
        time_elapsed += time.time() - since

        pred = outputs.data.max(1)[1].cpu().numpy()
        gt = labels.numpy()
        running_metrics.update(gt, pred)

        for i in range(label_num):
            mask = gt == i  # ground truth mask of class i
            total[i] += np.sum(
                mask)  # total number of pixels of class i (tp+fn)

    print('Inference speed: {:.0f}ms, {:.0f}fps '.format(
        time_elapsed / len(test_dataset) * 1000,
        1 / (time_elapsed / len(test_dataset))))

    score, class_iou, class_acc = running_metrics.get_scores()

    for k, v in score.items():
        print(k, v)
    print('class iou: ')
    for i in range(label_num):
        print(i, class_iou[i])
    print('class acc: ')
    for i in range(label_num):
        print(i, class_acc[i])

    print('number of pixels:')
    print(total)
Exemple #6
0
def update_metric(outputs_dict,
                  targets_dict,
                  running_metrics,
                  metric_fn_dict,
                  config,
                  summary_all=False,
                  prefix_note='train'):
    """
    update running_metrics and metric_fn_dict summary
    running_metrics: update seg miou and acc for summary
    metric_fn_dict: update aux,edge miou and acc for summary
    """
    if summary_all:
        # convert tensor to numpy,
        np_outputs_dict = {}
        for key, value in outputs_dict.items():
            np_outputs_dict[key] = torch.argmax(
                value, dim=1).data.cpu().numpy()
            if key not in metric_fn_dict.keys():
                if key.startswith(('seg','aux')):
                    metric_fn_dict[key] = runningScore(config.model.class_number)
                elif key.startswith('edge'):
                    metric_fn_dict[key] = runningScore(config.dataset.edge_class_num)
                else:
                    assert False, 'unexcepted key %s in outputs_dict' % key

        np_targets_dict = {}
        for key, value in targets_dict.items():
            np_targets_dict[key] = value.data.cpu().numpy()

        # main metric, run for each epoch
        running_metrics.update(np_targets_dict['seg'], np_outputs_dict['seg'])
        for key, value in np_outputs_dict.items():
            if key.startswith(('seg', 'aux')):
                metric_fn_dict[key].update(np_targets_dict['seg'], value)
            elif key.startswith('edge'):
                metric_fn_dict[key].update(np_targets_dict['edge'], value)
            else:
                assert False, 'unexcepted key %s in outputs_dict' % key
    else:
        # main metric, run for each epoch
        running_metrics.update(targets_dict['seg'].data.cpu().numpy(
        ), torch.argmax(outputs_dict['seg'], dim=1).data.cpu().numpy())
    return running_metrics, metric_fn_dict
Exemple #7
0
def eval(cfg):
    # Setup seeds
    torch.manual_seed(cfg.get("seed", 1337))
    torch.cuda.manual_seed(cfg.get("seed", 1337))
    np.random.seed(cfg.get("seed", 1337))
    random.seed(cfg.get("seed", 1337))

    # Setup device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Setup evaluation data
    data_eval_images = utils.recursive_glob(
        os.path.join(cfg["data"]["path"], 'images'))
    # data_eval_labels = utils.recursive_glob(os.path.join(cfg["data"]["path"], 'labels'))

    # Setup model
    model = DRCN(cfg).to(device)
    checkpoint = torch.load(cfg["training"]["checkpoint"])
    model.load_state_dict(checkpoint["model_state"])

    # Setup Metrics and visualizer
    running_metrics_val = runningScore(cfg["data"]["n_classes"])

    # Start training
    utils.mkdirs(cfg["training"]["checkpoint"])

    s = cfg["data"]["img_rows"]
    for img_name in tqdm.tqdm(data_eval_images):
        img = np.array(Image.open(img_name))
        lbl = np.array(Image.open(img_name.replace('images', 'labels')))
        w, h, _ = img.shape
        out = np.zeros((6, w, h))
        for x in range(0, w - s, 200):
            for y in range(0, h - s, 200):
                img_input, lbl_input = threeCityLoader.transform(
                    img[x:x + s, y:y + s, :], lbl[x:x + s, y:y + s])
                model.set_input(img_input.unsqueeze(0), lbl_input.unsqueeze(0))
                model.inference()
                out[:, x:x + s,
                    y:y + s] += model.out1.cpu().detach().numpy().squeeze()
        max_x = (w - s) // 200 * 200
        max_y = (h - s) // 200 * 200
        pred = out[:, :max_x, :max_y]
        pred = pred.argmax(0).squeeze()
        gt = lbl[:max_x, :max_y]

        running_metrics_val.update(gt, pred)

        score, class_iou = running_metrics_val.get_scores()
        for k, v in score.items():
            print(k, v)

        for k, v in class_iou.items():
            print("{}: {}".format(k, v))

    running_metrics_val.reset()
Exemple #8
0
def train(cfg, logger):
    # Setup seeds
    torch.manual_seed(cfg.get("seed", 1337))
    torch.cuda.manual_seed(cfg.get("seed", 1337))
    np.random.seed(cfg.get("seed", 1337))
    random.seed(cfg.get("seed", 1337))

    # Setup device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Setup Dataloader
    loader_train = get_loader(cfg, "train")
    loader_val = get_loader(cfg, "val")

    # Setup model
    model = DRCN(cfg).to(device)
    start_epoch = 1
    if cfg["training"]["resume"] is not None:
        if os.path.isfile(cfg["training"]["resume"]):
            print("Loading model and optimizer from checkpoint '{}'".format(
                cfg["training"]["resume"]))
            checkpoint = torch.load(cfg["training"]["resume"])
            model.load_state_dict(checkpoint["model_state"])
            start_epoch = checkpoint["epoch"]
            del checkpoint

        else:
            print("No checkpoint found at '{}'".format(
                cfg["training"]["resume"]))

    # Setup Metrics and visualizer
    running_metrics_val = runningScore(cfg["data"]["n_classes"])
    val_loss1_meter = averageMeter()
    val_loss2_meter = averageMeter()
    opt = BaseOptions()
    visualizer = Visualizer(opt)

    # Start training
    utils.mkdirs(cfg["training"]["checkpoint"])

    best_iou = -100.0
    epoch = start_epoch
    train_epochs = cfg["training"]["epochs"]
    iters_per_epoch = len(loader_train)
    while epoch < train_epochs:
        visualizer.reset()
        for iter, (images, labels) in enumerate(loader_train):
            model.set_input(images, labels)
            model.optimize_parameters()

            if iter % cfg["training"]["print_interval"] == 0 and iter != 0:
                print_info = "Epoch:[{:2d}/{:2d}] Iter: [{:4d}/{:4d}] loss1: {:.5f}  loss2: {:.5f}  lr: {:.5f}"\
                    .format(epoch, train_epochs, iter, iters_per_epoch, model.loss1.item(), model.loss2.item(), model.optimizer1.defaults['lr'])
                print(print_info)

            if iter % cfg["training"]["val_interval"] == 0 and iter != 0:
                for images, labels in loader_val:
                    model.set_input(images, labels)
                    model.inference()
                    preds = torch.argmax(model.out1, 1).cpu().numpy()
                    labels = labels.data.numpy().squeeze()

                    running_metrics_val.update(labels, preds)
                    val_loss1_meter.update(model.loss1.item())
                    val_loss2_meter.update(model.loss2.item())

                # visualizer.display_current_results(model.get_current_visuals(), epoch, save_result)
                losses = {
                    'loss1': val_loss1_meter.avg,
                    '5loss2': val_loss2_meter.avg * 5
                }
                score, class_iou = running_metrics_val.get_scores()
                accs = []
                accs.append(score["Overall Acc: \t"])
                accs.extend(list(class_iou.values()))
                accs = dict(zip(AccNames, accs))
                tmp = iter / iters_per_epoch
                visualizer.plot_current_losses(epoch, tmp, losses)
                visualizer.plot_current_accuracy(epoch, tmp, accs)
                logger.info(
                    "Epoch:{:03d} val_loss1:{:.05f} val_loss2:{:.05f}".format(
                        epoch, val_loss1_meter.avg, val_loss2_meter.avg))
                for k, v in score.items():
                    print(k, v)
                    logger.info("{}: {}".format(k, v))

                for k, v in class_iou.items():
                    print("{}: {}".format(k, v))
                    logger.info("{}: {}".format(k, v))

                running_metrics_val.reset()

                if score["Mean IoU : \t"] >= best_iou:
                    best_iou = score["Mean IoU : \t"]
                    state = {
                        "epoch": epoch,
                        "model_state": model.state_dict(),
                        "optimizer1_state": model.optimizer1.state_dict(),
                        "scheduler1_state": model.scheduler1.state_dict(),
                        "optimizer2_state": model.optimizer2.state_dict(),
                        "scheduler2_state": model.scheduler2.state_dict(),
                        "best_iou": best_iou,
                    }
                    save_path = os.path.join(
                        cfg["training"]["checkpoint"],
                        "{}_{}_best_model.pkl".format(cfg["model"]["arch"],
                                                      cfg["data"]["dataset"]),
                    )
                    torch.save(state, save_path)
        epoch += 1
Exemple #9
0
def keras_fit(model, train_loader=None, val_loader=None, config=None):
    """
    target to multiple output model
    remove args (depracated)
    """
    # support for pure model without config
    if config is None:
        config = model.config

    # support for cpu/gpu
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    if config.args.checkpoint_path is not None:
        ckpt_path = get_ckpt_path(config.args.checkpoint_path)
        print('load checkpoint file from', ckpt_path)
        state_dict = torch.load(ckpt_path)
        if 'model_state' in state_dict.keys():
            model.load_state_dict(state_dict['model_state'])
        else:
            model.load_state_dict(state_dict)

    optimizer = get_optimizer(model, config)
    scheduler = get_scheduler(optimizer, config)
    
    if config.args.center_loss is not None:
        center_loss_model=CenterLoss(model.center_channels,model.class_number,
                                     ignore_index=config.dataset.ignore_index,
                                     loss_fn=config.args.center_loss).to(device)
        center_optimizer=torch.optim.SGD(center_loss_model.parameters(), lr=0.5)
    else:
        center_loss_model=None
        center_optimizer=None
        
    loss_fn_dict = get_loss_fn_dict(config)
    # for different output, generate the metric_fn_dict automaticly.
    metric_fn_dict = {}
    # output for main output
    running_metrics = runningScore(config.model.class_number)

    time_str = time.strftime("%Y-%m-%d___%H-%M-%S", time.localtime())
    log_dir = os.path.join(config.args.log_dir, model.name,
                           config.dataset.name, config.args.note, time_str)
#    checkpoint_path = os.path.join(
#        log_dir, "{}_{}_best_model.pkl".format(model.name, config.dataset.name))
    writer = None
    best_iou = 0.0
    # create loader from config
    if train_loader is None and val_loader is None:
        train_loader, val_loader = get_loader(config)

    loaders = [train_loader, val_loader]
    loader_names = ['train', 'val']

    # support for multiple gpu, model will be changed, model.name will not exist
    if device.type == 'cuda':
        gpu_num = torch.cuda.device_count()
        if gpu_num > 1:
            device_ids = [i for i in range(gpu_num)]
            model = torch.nn.DataParallel(model, device_ids=device_ids)

    # eval module
    if train_loader is None:
        config.args.n_epoch = 1

    summary_all_step=max(1,config.args.n_epoch//10)
    # 1<= summary_metric_step <=10
    summary_metric_step=max(min(10,config.args.n_epoch//10),1)

    tqdm_epoch = trange(config.args.n_epoch, desc='epoches', leave=True)
    for epoch in tqdm_epoch:
        tqdm_epoch.set_postfix(best_iou=best_iou)
        for loader, loader_name in zip(loaders, loader_names):
            if loader is None:
                continue
        
            # summary all only 10 times
            if epoch % summary_all_step == 0:
                summary_all = True
                summary_metric = True
            else:
                summary_all = False
                if epoch % summary_metric_step == 0 or epoch == config.args.n_epoch-1:
                    summary_metric=True
                else:
                    summary_metric=False

            # summary_all=True ==> summary_metric=True
            if loader_name == 'val':
                # val at summary, and val for plateau scheduler
                if (not summary_metric) and (scheduler is None):
                    continue

                with torch.no_grad():
                    outputs_dict, targets_dict, \
                    running_metrics, metric_fn_dict, \
                    grads_dict, losses_dict, loss_weight_dict = train_val(
                        model=model,
                        optimizer=optimizer,
                        scheduler=scheduler,
                        loss_fn_dict=loss_fn_dict,
                        metric_fn_dict=metric_fn_dict,
                        running_metrics=running_metrics,
                        loader=loader,
                        config=config,
                        epoch=epoch,
                        summary_all=summary_all,
                        summary_metric=summary_metric,
                        loader_name=loader_name,
                        center_loss_model=center_loss_model,
                        center_optimizer=center_optimizer)
                
                # use rop/poly_rop to schedule learning rate
                if isinstance(scheduler,(poly_rop,rop)):
                    total_loss=sum(losses_dict['%s/total_loss' % loader_name])
                    scheduler.step(total_loss)
            else:
                outputs_dict, targets_dict, \
                running_metrics, metric_fn_dict, \
                grads_dict, losses_dict, loss_weight_dict = train_val(
                    model=model,
                    optimizer=optimizer,
                    scheduler=scheduler,
                    loss_fn_dict=loss_fn_dict,
                    metric_fn_dict=metric_fn_dict,
                    running_metrics=running_metrics,
                    loader=loader,
                    config=config,
                    epoch=epoch,
                    summary_all=summary_all,
                    summary_metric=summary_metric,
                    loader_name=loader_name,
                    center_loss_model=center_loss_model,
                    center_optimizer=center_optimizer)

                # use cos_lr to shceduler the learning rate
                if isinstance(scheduler,cos_lr):
                    scheduler.step()

            metric_dict, class_iou_dict = get_metric(
                running_metrics, metric_fn_dict, 
                summary_all=summary_all, prefix_note=loader_name, summary_metric=summary_metric)
            if loader_name == 'val' and summary_metric:
                val_iou = metric_dict['val/iou']
                tqdm.write('epoch %d,curruent val iou is %0.5f' %
                        (epoch, val_iou))
                if val_iou >= best_iou:
                    best_iou = val_iou
                    iou_save_threshold = config.args.iou_save_threshold

                    # save the best the model if good enough
                    if best_iou >= iou_save_threshold:
                        print('save current best model', '*'*30)
                        checkpoint_path = os.path.join(
                            log_dir, 'model-best-%d.pkl' % epoch)
                        save_model_if_necessary(model, config, checkpoint_path)

                # save the last model if the best model not good enough
                if epoch == config.args.n_epoch-1 and best_iou < iou_save_threshold:
                    print('save the last model', '*'*30)
                    checkpoint_path = os.path.join(
                        log_dir, 'model-last-%d.pkl' % epoch)
                    save_model_if_necessary(model, config, checkpoint_path)

            # return valid image when summary_all=True
            image_dict = get_image_dict(
                outputs_dict, targets_dict, config, 
                summary_all=summary_all, prefix_note=loader_name)
            
            if writer is None:
                writer = init_writer(config=config, log_dir=log_dir)
            
            # change weight and learning rate (train only)
            if loader_name == 'train':
                weight_dict = {}
                for k, v in loss_weight_dict.items():
                    weight_dict['%s/weight_%s' % (loader_name, k)] = v
                    
                lr_dict = get_lr_dict(optimizer, prefix_note=loader_name)
            else:
                weight_dict = {}
                lr_dict = {}
            
            write_summary(writer=writer,
                          losses_dict=losses_dict,
                          metric_dict=metric_dict,
                          class_iou_dict=class_iou_dict,
                          lr_dict=lr_dict,
                          image_dict=image_dict,
                          weight_dict=weight_dict,
                          grads_dict=grads_dict,
                          epoch=epoch)
    writer.close()
    print('total epoch is %d, best iou is' % config.args.n_epoch, best_iou)
    return best_iou
Exemple #10
0
    def _train_epoch(self, epoch):
        self.model.train()
        epoch_start = time.time()
        batch_start = time.time()
        train_loss = 0.
        running_metric_text = runningScore(2)
        lr = self.optimizer.param_groups[0]['lr']

        for i, batch in enumerate(self.train_loader):
            if i >= self.train_loader_len:
                break
            self.global_step += 1
            lr = self.optimizer.param_groups[0]['lr']

            # 数据进行转换和丢到gpu
            for key, value in batch.items():
                if value is not None:
                    if isinstance(value, torch.Tensor):
                        batch[key] = value.to(self.device)
            cur_batch_size = batch['img'].size()[0]

            preds = self.model(batch['img'])
            loss_dict = self.criterion(preds, batch)
            # backward
            self.optimizer.zero_grad()
            loss_dict['loss'].backward()
            self.optimizer.step()
            if self.config.lr_scheduler_type == 'WarmupPolyLR':
                self.scheduler.step()
            # acc iou
            score_shrink_map = cal_text_score(
                preds[:, 0, :, :],
                batch['shrink_map'],
                batch['shrink_mask'],
                running_metric_text,
                thred=self.config.post_processing_thresh)

            # loss 和 acc 记录到日志
            loss_str = 'loss: {:.4f}, '.format(loss_dict['loss'].item())
            for idx, (key, value) in enumerate(loss_dict.items()):
                loss_dict[key] = value.item()
                if key == 'loss':
                    continue
                loss_str += '{}: {:.4f}'.format(key, loss_dict[key])
                if idx < len(loss_dict) - 1:
                    loss_str += ', '

            train_loss += loss_dict['loss']
            acc = score_shrink_map['Mean Acc']
            iou_shrink_map = score_shrink_map['Mean IoU']

            if self.global_step % self.log_iter == 0:
                batch_time = time.time() - batch_start
                self.logger_info(
                    '[{}/{}], [{}/{}], global_step: {}, speed: {:.1f} samples/sec, acc: {:.4f}, iou_shrink_map: {:.4f}, {}, lr:{:.6}, time:{:.2f}'
                    .format(epoch, self.epochs, i + 1, self.train_loader_len,
                            self.global_step,
                            self.log_iter * cur_batch_size / batch_time, acc,
                            iou_shrink_map, loss_str, lr, batch_time))
                batch_start = time.time()

        return {
            'train_loss': train_loss / self.train_loader_len,
            'lr': lr,
            'time': time.time() - epoch_start,
            'epoch': epoch
        }
Exemple #11
0
def train(cfg, writer, logger):

    # Setup seeds
    torch.manual_seed(cfg.get("seed", 1337))
    torch.cuda.manual_seed(cfg.get("seed", 1337))
    np.random.seed(cfg.get("seed", 1337))
    random.seed(cfg.get("seed", 1337))

    # Setup device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Setup Dataloader
    trainloader = get_loader(cfg, "train")
    valloader = get_loader(cfg, "val")

    n_classes = cfg["data"]["n_classes"]
    n_channels = cfg["data"]["channels"]

    # Setup Metrics
    running_metrics_val = runningScore(n_classes)

    # Setup Model
    model = get_model(cfg, n_classes, n_channels).to(device)
    model = torch.nn.DataParallel(model,
                                  device_ids=range(torch.cuda.device_count()))

    # Setup optimizer, lr_scheduler and loss function
    optimizer_cls = get_optimizer(cfg)
    optimizer_params = {
        k: v
        for k, v in cfg["training"]["optimizer"].items() if k != "name"
    }

    optimizer = optimizer_cls(model.parameters(), **optimizer_params)
    logger.info("Using optimizer {}".format(optimizer))

    scheduler = get_scheduler(optimizer, cfg["training"]["lr_schedule"])

    loss_fn = get_loss_function(cfg)
    logger.info("Using loss {}".format(loss_fn))

    start_iter = 0
    if cfg["training"]["resume"] is not None:
        if os.path.isfile(cfg["training"]["resume"]):
            logger.info(
                "Loading model and optimizer from checkpoint '{}'".format(
                    cfg["training"]["resume"]))
            checkpoint = torch.load(cfg["training"]["resume"])
            model.module.load_state_dict(checkpoint["model_state"])
            optimizer.load_state_dict(checkpoint["optimizer_state"])
            scheduler.load_state_dict(checkpoint["scheduler_state"])
            start_iter = checkpoint["epoch"]
            logger.info("Loaded checkpoint '{}' (iter {})".format(
                cfg["training"]["resume"], checkpoint["epoch"]))
        else:
            logger.info("No checkpoint found at '{}'".format(
                cfg["training"]["resume"]))

    val_loss_meter = averageMeter()
    time_meter = averageMeter()

    best_iou = -100.0
    i = start_iter
    flag = True

    # fig = plt.figure()
    # plt.rcParams['xtick.major.pad'] = '15'
    # fig.show()
    # fig.canvas.draw()

    while i <= cfg["training"]["train_iters"] and flag:
        for (images, labels) in trainloader:
            i += 1
            start_ts = time.time()
            model.train()
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)

            loss = loss_fn(input=outputs, target=labels)

            loss.backward()
            # torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
            # plot_grad_flow(model.named_parameters(), fig)

            # zero mean conv for layer 1 of dsm encoder
            optimizer.step()
            scheduler.step()
            # m = model._modules['module'].encoderDSM._modules['0']._modules['0']
            # model._modules['module'].encoderDSM._modules['0']._modules['0'].weight = m.weight - torch.mean(m.weight)
            model = zero_mean(model, all=False)

            time_meter.update(time.time() - start_ts)

            if (i + 1) % cfg["training"]["print_interval"] == 0:
                fmt_str = "Iter [{:d}/{:d}]  Loss: {:.4f}  Time/Image: {:.4f}"
                print_str = fmt_str.format(
                    i + 1,
                    cfg["training"]["train_iters"],
                    loss.item(),
                    time_meter.avg / cfg["training"]["batch_size"],
                )

                print(print_str)
                logger.info(print_str)
                writer.add_scalar("loss/train_loss", loss.item(), i + 1)
                time_meter.reset()

            if (i + 1) % cfg["training"]["val_interval"] == 0 or (
                    i + 1) == cfg["training"]["train_iters"]:
                model.eval()
                with torch.no_grad():
                    for i_val, (images_val,
                                labels_val) in tqdm(enumerate(valloader)):
                        images_val = images_val.to(device)
                        labels_val = labels_val.to(device)

                        outputs = model(images_val)
                        val_loss = loss_fn(input=outputs, target=labels_val)

                        pred = outputs.data.max(1)[1].cpu().numpy()
                        gt = labels_val.data.cpu().numpy()
                        # plt.imshow(v_loader.decode_segmap(gt[0,:,:]))
                        # plt.imshow(v_loader.decode_segmap(pred[0, :, :]))
                        running_metrics_val.update(gt, pred)
                        val_loss_meter.update(val_loss.item())

                writer.add_scalar("loss/val_loss", val_loss_meter.avg, i + 1)
                logger.info("Iter %d Loss: %.4f" % (i + 1, val_loss_meter.avg))

                score, class_iou = running_metrics_val.get_scores()
                for k, v in score.items():
                    #print(k, v)
                    logger.info("{}: {}".format(k, v))
                    writer.add_scalar("val_metrics/{}".format(k), v, i + 1)

                for k, v in class_iou.items():
                    logger.info("{}: {}".format(k, v))
                    writer.add_scalar("val_metrics/cls_{}".format(k), v, i + 1)

                val_loss_meter.reset()
                running_metrics_val.reset()

                if score["Mean IoU : \t"] >= best_iou:
                    best_iou = score["Mean IoU : \t"]
                    state = {
                        "epoch": i + 1,
                        "model_state": model.state_dict(),
                        "optimizer_state": optimizer.state_dict(),
                        "scheduler_state": scheduler.state_dict(),
                        "best_iou": best_iou,
                    }
                    save_path = os.path.join(
                        writer.file_writer.get_logdir(),
                        "{}_{}_best_model.pkl".format(cfg["model"]["arch"],
                                                      cfg["data"]["dataset"]),
                    )
                    torch.save(state, save_path)

            if (i + 1) == cfg["training"]["train_iters"]:
                flag = False
                break
def train_net(config):
    os.environ["CUDA_VISIBLE_DEVICES"] = config['pruned']['gpu_id']
    data_loader = DataLoader(config)
    train_loader = torch.utils.data.DataLoader(
        data_loader,
        batch_size=config['train']['batch_size'],
        shuffle=True,
        num_workers=config['train']['num_workers'],
        worker_init_fn=worker_init_fn,
        drop_last=True,
        pin_memory=False)

    start_epoch = 0
    running_metric_binary = runningScore(2)

    if not (os.path.exists(config['train']['checkpoints'])):
        os.mkdir(config['train']['checkpoints'])
    checkpoints = os.path.join(
        config['pruned']['save_checkpoints'], "DB_%s_bs_%d_ep_%d" %
        (config['train']['backbone'], config['train']['batch_size'],
         config['train']['n_epoch']))
    if not (os.path.exists(checkpoints)):
        os.mkdir(checkpoints)

    model = DBNet(config)
    criterion = L1BalanceCELoss()
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=config['pruned']['finetune_lr'],
                                momentum=0.99,
                                weight_decay=5e-4)

    if config['pruned']['restore']:
        print('Resuming from checkpoint.')
        assert os.path.isfile(
            config['pruned']
            ['resume']), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(config['pruned']['resume'])
        start_epoch = checkpoint['epoch']
        model = load_prune_model(model,
                                 config['pruned']['checkpoints_dict']).cuda()
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        log_write = Logger(os.path.join(checkpoints, 'log.txt'),
                           title=config['train']['backbone'],
                           resume=True)
    else:
        print('Training from scratch.')
        model_dict = torch.load(config['pruned']['pruned_checkpoints'])
        model = load_prune_model(model,
                                 config['pruned']['checkpoints_dict']).cuda()
        print(model)
        try:
            model.load_state_dict(model_dict)
        except:
            state = model.state_dict()
            for key in state.keys():
                state[key] = model_dict['module.' + key]
            model.load_state_dict(state)
        log_write = Logger(os.path.join(checkpoints, 'log.txt'),
                           title=config['train']['backbone'])
        log_write.set_names([
            '   epoch', 'Total loss', '  Bce loss', 'Thresh loss', '  L1 loss',
            'Binary Acc', 'Binary IoU', '   rescall', ' precision', '   hmean'
        ])
    max_hmean = -1
    for epoch in range(start_epoch, config['pruned']['n_epoch']):
        model.train()

        bce_loss_list = []
        thresh_loss_list = []
        l1_loss_list = []
        total_loss_list = []

        if (config['train']['decay_method'] == 'e_decay'):
            adjust_learning_rate_poly(config['pruned']['finetune_lr'],
                                      optimizer,
                                      epoch,
                                      max_epoch=config['pruned']['n_epoch'],
                                      factor=0.9)
        else:
            adjust_learning_rate(config, optimizer, epoch,
                                 config['train']['gama'])

        for batch_idx, (imgs, gts, gt_masks, thresh_maps,
                        thresh_masks) in enumerate(train_loader):
            imgs = Variable(imgs.cuda())
            gts = Variable(gts.cuda())
            gt_masks = Variable(gt_masks.cuda())
            thresh_maps = Variable(thresh_maps.cuda())
            thresh_masks = Variable(thresh_masks.cuda())
            batch = {}
            batch['gt'] = gts
            batch['mask'] = gt_masks
            batch['thresh_map'] = thresh_maps
            batch['thresh_mask'] = thresh_masks

            pre = model(imgs)
            loss, metrics = criterion(pre, batch)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            score_binary = cal_binary_score(pre['binary'], gts,
                                            gt_masks.unsqueeze(1),
                                            running_metric_binary)

            bce_loss_list.append(metrics['bce_loss'].item())
            thresh_loss_list.append(metrics['thresh_loss'].item())
            l1_loss_list.append(metrics['l1_loss'].item())
            total_loss_list.append(loss.item())
            if batch_idx % config['train']['show_step'] == 0:
                if (config['train']['print_format'] == 'linux'):
                    headers = [
                        'epoch/epochs', 'batch/batchs', 'TotalLoss', 'BceLoss',
                        ' ThreshLoss', 'L1Loss', 'Binary Acc', 'Binary IoU',
                        'Lr Rate'
                    ]
                    show_item = [[
                        str(epoch) + '/' + str(config['pruned']['n_epoch']),
                        str(batch_idx + 1) + '/' + str(len(train_loader)),
                        get_str(np.mean(total_loss_list)),
                        get_str(np.mean(bce_loss_list)),
                        get_str(np.mean(thresh_loss_list)),
                        get_str(np.mean(l1_loss_list)),
                        get_str(score_binary['Mean Acc']),
                        get_str(score_binary['Mean IoU']),
                        get_str(optimizer.param_groups[0]['lr'])
                    ]]
                    print_table(headers, show_item, type_str='train')
                else:
                    output_log = '({epoch}/{epochs}/{batch}/{size}) | TotalLoss: {total_loss:.4f} | BceLoss: {bce_loss:.4f} | ThreshLoss: {thresh_loss: .4f} | L1Loss: {l1_loss: .4f} | Binary Acc: {bin_acc: .4f} | Binary IoU: {bin_iou: .4f} | Lr: {lr: .4f}'.format(
                        epoch=epoch,
                        epochs=config['pruned']['n_epoch'],
                        batch=batch_idx + 1,
                        size=len(train_loader),
                        total_loss=np.mean(total_loss_list),
                        bce_loss=np.mean(bce_loss_list),
                        thresh_loss=np.mean(thresh_loss_list),
                        l1_loss=np.mean(l1_loss_list),
                        bin_acc=score_binary['Mean Acc'],
                        bin_iou=score_binary['Mean IoU'],
                        lr=optimizer.param_groups[0]['lr'])
                    print(output_log)
                    sys.stdout.flush()

        if (epoch > config['pruned']['start_val_epoch']):
            result_dict = val(model, config)
            rescall, precision, hmean = result_dict['recall'], result_dict[
                'precision'], result_dict['hmean']
            print('epoch:', epoch, 'rescall:', rescall, 'precision:',
                  precision, 'hmean:', hmean)
        else:
            rescall = 0
            precision = 0
            hmean = 0
        log_write.append([
            epoch,
            np.mean(total_loss_list),
            np.mean(bce_loss_list),
            np.mean(thresh_loss_list),
            np.mean(l1_loss_list), score_binary['Mean Acc'],
            score_binary['Mean IoU'], rescall, precision, hmean
        ])
        if (hmean > max_hmean and config['pruned']['start_val_epoch'] <
                config['pruned']['n_epoch']):
            max_hmean = hmean
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'lr': optimizer.param_groups[0]['lr'],
                    'optimizer': optimizer.state_dict(),
                },
                checkpoint=checkpoints,
                filename='best_model.pth.tar')

        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'lr': optimizer.param_groups[0]['lr'],
                'optimizer': optimizer.state_dict(),
            },
            checkpoint=checkpoints)
def do_train_or_val(net, args=None, train_loader=None, val_loader=None):
    gpu_config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.3
    gpu_config.gpu_options.allow_growth = True
    session = tf.Session(config=gpu_config)
    KTF.set_session(session)
    session.run(tf.global_variables_initializer())

    if args is None:
        args = net.config.training
    metrics = net.get_metrics(net.class_number)
    opt = net.get_optimizer(args)
    net.model.compile(loss='categorical_crossentropy',
                      optimizer=opt,
                      metrics=metrics)

    if train_loader is None and val_loader is None:
        train_dataset = dataset_generalize(net.config.dataset,
                                           split='train',
                                           bchw=False)
        train_loader = TD.DataLoader(dataset=train_dataset,
                                     batch_size=net.config.dataset.batch_size,
                                     shuffle=True,
                                     drop_last=False)

        val_dataset = dataset_generalize(net.config.dataset,
                                         split='val',
                                         bchw=False)
        val_loader = TD.DataLoader(dataset=val_dataset,
                                   batch_size=net.config.dataset.batch_size,
                                   shuffle=True,
                                   drop_last=False)

    running_metrics = runningScore(net.class_number)

    time_str = time.strftime("%Y-%m-%d___%H-%M-%S", time.localtime())
    log_dir = os.path.join(args.log_dir, net.name, args.dataset_name,
                           args.note, time_str)
    checkpoint_path = os.path.join(
        log_dir, "{}_{}_best_model.pkl".format(net.name, args.dataset_name))
    os.makedirs(log_dir, exist_ok=True)
    writer = SummaryWriter(log_dir=log_dir)
    config = net.config
    config_str = json.dumps(config, indent=2, sort_keys=True).replace(
        '\n', '\n\n').replace('  ', '\t')
    writer.add_text(tag='config', text_string=config_str)

    # write config to config.txt
    config_path = os.path.join(log_dir, 'config.txt')
    config_file = open(config_path, 'w')
    json.dump(config, config_file, sort_keys=True)
    config_file.close()

    best_iou = 0.6

    loaders = [train_loader, val_loader]
    loader_names = ['train', 'val']
    for epoch in range(args.n_epoch):
        for loader, loader_name in zip(loaders, loader_names):
            if loader is None:
                continue

            if loader_name == 'val':
                if epoch % 5 != 0:
                    continue

            print(loader_name + '.' * 50)
            n_step = len(loader)
            losses = []

            for i, (images, labels) in enumerate(loader):
                x = images.data.numpy()
                trues = labels.data.numpy()
                y = to_categorical(trues, net.class_number)
                if loader_name == 'train':
                    outputs = net.model.train_on_batch(x, y)
                else:
                    outputs = net.model.test_on_batch(x, y)

                predict_outputs = net.model.predict_on_batch(x)
                predicts = np.argmax(predict_outputs, axis=-1)

                losses.append(outputs[0])
                if epoch % 5 == 0:
                    print('keras metrics as follow:', '*' * 30)
                    print("%s Epoch [%d/%d] Step [%d/%d]" %
                          (loader_name, epoch + 1, args.n_epoch, i, n_step))
                    for name, value in zip(net.model.metrics_names, outputs):
                        print(name, value)

                    print('running metrics as follow:', '*' * 30)
                    running_metrics.update(trues, predicts)
                    score, class_iou = running_metrics.get_scores()
                    for k, v in score.items():
                        print(k, v)

            if epoch % 5 == 0:
                writer.add_scalar('%s/loss' % loader_name, np.mean(losses),
                                  epoch)
                writer.add_scalar('%s/acc' % loader_name,
                                  score['Overall Acc: \t'], epoch)
                writer.add_scalar('%s/iou' % loader_name,
                                  score['Mean IoU : \t'], epoch)

            running_metrics.reset()
            if loader_name == 'val':
                if score['Mean IoU : \t'] >= best_iou:
                    best_iou = score['Mean IoU : \t']
                    net.model.save(checkpoint_path)


#                if epoch % (1+args.n_epoch//10) == 0:
#                    print('write image to tensorboard'+'.'*50)
#                    idx=np.random.choice(predicts.shape[0])
#                    writer.add_image('val/images',x[idx,:,:,:],epoch)
#                    writer.add_image('val/predicts', torch.from_numpy(predicts[idx,:,:]), epoch)
#                    writer.add_image('val/trues', torch.from_numpy(trues[idx,:,:]), epoch)
#                    diff_img=(predicts[idx,:,:]==trues[idx,:,:]).astype(np.uint8)
#                    writer.add_image('val/difference', torch.from_numpy(diff_img), epoch)

    print('best iou is', best_iou)
    writer.close()
Exemple #14
0
    def train(self, args=None, train_loader=None, val_loader=None):
        if args is None:
            args = self.config.training
        self.model.compile(loss='categorical_crossentropy',
                           optimizer='adam',
                           metrics=['acc'])

        if train_loader is None and val_loader is None:
            train_dataset = dataset_generalize(config.dataset,
                                               split='train',
                                               bchw=False)
            train_loader = TD.DataLoader(
                dataset=train_dataset,
                batch_size=self.config.dataset.batch_size,
                shuffle=True,
                drop_last=False)

            val_dataset = dataset_generalize(config.dataset,
                                             split='val',
                                             bchw=False)
            val_loader = TD.DataLoader(
                dataset=val_dataset,
                batch_size=self.config.dataset.batch_size,
                shuffle=True,
                drop_last=False)

        running_metrics = runningScore(self.class_number)

        time_str = time.strftime("%Y-%m-%d___%H-%M-%S", time.localtime())
        log_dir = os.path.join(args.log_dir, self.name, self.dataset_name,
                               args.note, time_str)
        checkpoint_path = os.path.join(
            log_dir, "{}_{}_best_model.pkl".format(self.name,
                                                   self.dataset_name))
        os.makedirs(log_dir, exist_ok=True)
        writer = SummaryWriter(log_dir=log_dir)
        best_iou = 0.6

        loaders = [train_loader, val_loader]
        loader_names = ['train', 'val']
        for epoch in range(args.n_epoch):
            for loader, loader_name in zip(loaders, loader_names):
                if loader is None:
                    continue

                if loader_name == 'val':
                    if epoch % 10 != 0:
                        continue

                print(loader_name + '.' * 50)
                n_step = len(loader)
                losses = []

                for i, (images, labels) in enumerate(loader):
                    x = images.data.numpy()
                    trues = labels.data.numpy()
                    y = to_categorical(trues, self.class_number)
                    if loader_name == 'train':
                        outputs = self.model.train_on_batch(x, y)
                    else:
                        outputs = self.model.test_on_batch(x, y)

                    predict_outputs = self.model.predict_on_batch(x)
                    predicts = np.argmax(predict_outputs, axis=-1)

                    losses.append(outputs[0])
                    if i % 5 == 0:
                        print(
                            "%s Epoch [%d/%d] Step [%d/%d]" %
                            (loader_name, epoch + 1, args.n_epoch, i, n_step))
                        for name, value in zip(self.model.metrics_names,
                                               outputs):
                            print(name, value)

                        running_metrics.update(trues, predicts)
                        score, class_iou = running_metrics.get_scores()
                        for k, v in score.items():
                            print(k, v)

                writer.add_scalar('%s/loss' % loader_name, np.mean(losses),
                                  epoch)
                writer.add_scalar('%s/acc' % loader_name,
                                  score['Overall Acc: \t'], epoch)
                writer.add_scalar('%s/iou' % loader_name,
                                  score['Mean IoU : \t'], epoch)

                running_metrics.reset()
                if loader_name == 'val':
                    if score['Mean IoU : \t'] >= best_iou:
                        best_iou = score['Mean IoU : \t']
                        self.model.save(checkpoint_path)

                    if epoch % (1 + args.n_epoch // 10) == 0:
                        print('write image to tensorboard' + '.' * 50)
                        idx = np.random.choice(predicts.shape[0])
                        writer.add_image('val/images', x[idx, :, :, :], epoch)
                        writer.add_image('val/predicts',
                                         torch.from_numpy(predicts[idx, :, :]),
                                         epoch)
                        writer.add_image('val/trues',
                                         torch.from_numpy(trues[idx, :, :]),
                                         epoch)
                        diff_img = (
                            predicts[idx, :, :] == trues[idx, :, :]).astype(
                                np.uint8)
                        writer.add_image('val/difference',
                                         torch.from_numpy(diff_img), epoch)

        writer.close()
Exemple #15
0
def train(data_path, models_path, backend, snapshot, crop_x, crop_y,
          batch_size, alpha, epochs, start_lr, milestones, gpu):
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu
    net, starting_epoch = build_network(snapshot, backend)
    data_path = os.path.abspath(os.path.expanduser(data_path))
    models_path = os.path.abspath(os.path.expanduser(models_path))
    os.makedirs(models_path, exist_ok=True)
    '''
        To follow this training routine you need a DataLoader that yields the tuples of the following format:
        (Bx3xHxW FloatTensor x, BxHxW LongTensor y, BxN LongTensor y_cls) where
        x - batch of input images,
        y - batch of groung truth seg maps,
        y_cls - batch of 1D tensors of dimensionality N: N total number of classes, 
        y_cls[i, T] = 1 if class T is present in image i, 0 otherwise
    '''

    voc_data = pascalVOCLoader(root=data_path,
                               is_transform=True,
                               augmentations=None)
    # train_loader, class_weights, n_images = None, None, None
    train_loader = DataLoader(voc_data,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=0)
    max_steps = len(voc_data)
    class_weights = None

    optimizer = optim.Adam(net.parameters(), lr=start_lr)
    scheduler = MultiStepLR(optimizer,
                            milestones=[int(x) for x in milestones.split(',')],
                            gamma=0.1)
    running_score = runningScore(21)
    for epoch in range(starting_epoch, starting_epoch + epochs):
        seg_criterion = nn.NLLLoss(weight=class_weights)
        cls_criterion = nn.BCEWithLogitsLoss(weight=class_weights)
        epoch_losses = []
        # train_iterator = tqdm(train_loader, total=max_steps // batch_size + 1)
        net.train()
        print('------------epoch[{}]----------'.format(epoch + 1))
        for i, (x, y, y_cls) in enumerate(train_loader):
            optimizer.zero_grad()
            x, y, y_cls = Variable(x).cuda(), Variable(y).cuda(), Variable(
                y_cls).float().cuda()
            out, out_cls = net(x)
            pred = out.data.max(1)[1].cpu().numpy()
            seg_loss, cls_loss = seg_criterion(out, y), cls_criterion(
                out_cls, y_cls)
            loss = seg_loss + alpha * cls_loss
            epoch_losses.append(loss.item())
            running_score.update(y.data.cpu().numpy(), pred)
            if (i + 1) % 138 == 0:
                score, class_iou = running_score.get_scores()
                for k, v in score.items():
                    print(k, v)
                    logger.info('{}:{}'.format(k, v))
                running_score.reset()
            print_format_str = "Epoch[{}] batch[{}] loss = {:.4f} LR = {}"
            print_str = print_format_str.format(epoch + 1, i + 1, loss.item(),
                                                scheduler.get_lr()[0])
            print(print_str)
            logger.info(print_str)
            '''
            status = '[{}] loss = {:.4f} avg = {:.4f}, LR = {}'.format(
                epoch + 1, loss.item(), np.mean(epoch_losses), scheduler.get_lr()[0])
            train_iterator.set_description(status)
            '''
            loss.backward()
            optimizer.step()

        scheduler.step()
        if epoch + 1 % 20 == 0:
            train_loss = ('%.4f' % np.mean(epoch_losses))
            torch.save(
                net.state_dict(),
                os.path.join(
                    models_path,
                    '_'.join(["PSPNet", str(epoch + 1), train_loss]) + '.pth'))
Exemple #16
0
output_path = 'output/pspnet/voc2012/voc_val'
label_path = '/home/yzbx/.cv/datasets/VOC/VOCdevkit/VOC2012/SegmentationClass'
val_output_files = glob.glob(os.path.join(output_path, '*.png'))
# all label files for train and validation
label_files = glob.glob(os.path.join(label_path, '*.png'))
val_label_files = []

for f in val_output_files:
    basename = os.path.basename(f)
    val_label_file = os.path.join(label_path, basename)
    assert val_label_file in label_files, '%s %s %s' % (
        basename, val_label_file, label_files[0])
    val_label_files.append(val_label_file)

run_score = runningScore(21)
for output_file, label_file in tqdm(zip(val_output_files, val_label_files)):
    label_img_pil = Image.open(label_file)
    label_img = np.array(label_img_pil, dtype=np.uint8)

    output_img_pil = Image.open(output_file)
    output_img = np.array(output_img_pil, dtype=np.uint8)
    assert label_img.shape == output_img.shape
    run_score.update(label_trues=label_img, label_preds=output_img)
#    run_score.update(label_trues=output_img, label_preds=label_img)

score, class_iou = run_score.get_scores()
for k, v in score.items():
    print(k, v)

labels = [
Exemple #17
0
        'params': [p for p in model.parameters() if p.requires_grad]
    }]
    if config.optimizer == 'adam':
        optimizer = torch.optim.Adam(optimizer_params,
                                     lr=config['init_lr'],
                                     amsgrad=False)
    else:
        assert config.init_lr > 1e-3
        optimizer = torch.optim.SGD(optimizer_params,
                                    lr=config['init_lr'],
                                    momentum=0.9,
                                    weight_decay=1e-4)

    metric_mask_loss = Metric_Mean()
    metric_total_loss = Metric_Mean()
    running_metrics = runningScore(config.class_number)
    tqdm_epoch = trange(config['epoch'],
                        desc='{} epochs'.format(config.note),
                        leave=True)
    for epoch in tqdm_epoch:
        for split in ['train', 'val']:
            if split == 'train':
                model.train()
            else:
                model.eval()

            metric_mask_loss.reset()
            metric_total_loss.reset()
            running_metrics.reset()

            tqdm_step = tqdm(dataset_loaders[split], desc='steps', leave=False)