Esempio n. 1
0
def visualize(cfg):
    base_dir = os.path.join('visualization', cfg["training"]["logdir"])
    for task in cfg['training']["tasks"]:
        if not os.path.exists(base_dir + "/" + task):
            os.makedirs(base_dir + "/" + task)

    # Setup seeds
    torch.manual_seed(cfg.get('seed', 1337))
    torch.cuda.manual_seed(cfg.get('seed', 1337))
    np.random.seed(cfg.get('seed', 1337))
    random.seed(cfg.get('seed', 1337))

    # Setup device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Setup Dataloader
    data_loader = get_loader(cfg['data']['dataset'])
    data_path = cfg['data']['path']

    loader = data_loader(
        data_path,
        split=cfg['data']['val_split'],
        is_transform=True,
        img_size=(cfg['data']['img_rows'], cfg['data']['img_cols']),
        get_flow=True,
        discard_flow_bottom=cfg['data']['discard_flow_bottom'],
    )

    n_classes = loader.n_classes
    vizloader = data.DataLoader(loader, batch_size=1, shuffle=False)

    # Setup Model
    model = get_model(cfg['model'], n_classes).to(device)
    state = convert_state_dict(torch.load(args.model_path)["model_state"])
    model.load_state_dict(state)
    model.eval()
    model.to(device)

    i = 0
    while i <= cfg['training']['train_iters']:
        for (images, labels, flows) in vizloader:
            i += 1
            model.train()
            images = images.to(device)
            target = labels.to(device)
            flow = flows.to(device)
            pred = model(images)
            if 'input' in cfg['training']['tasks']:
                visualize_input(images, i, base_dir)
            if 'output' in cfg['training']['tasks']:
                visualize_output(pred, i, base_dir)
            if 'ground_truth' in cfg['training']['tasks']:
                visualize_ground_truth(target, i, base_dir)
            if 'flow' in cfg['training']['tasks']:
                visualize_flow(flow, i, base_dir)
            if 'fl_weights' in cfg['training']['tasks']:
                visualize_fl_weights(pred, flow, i, base_dir)
            print("done with " + str(i) + "...")
            if i > cfg['training']['train_iters']:
                break
    parser.add_argument(
        '-viz', dest='viz', action='store_true',
        help='Visualize (i.e. save) output of flow.'
    )
    parser.add_argument(
        '-cont', dest='cont', action='store_true',
        help='Continue instead of overwrite'
    )

    args = parser.parse_args()

    with open(args.config) as fp:
        cfg = yaml.load(fp)

    # Setup Dataloader
    data_loader = get_loader(cfg['data']['dataset'])
    data_path = cfg['data']['path']

    t_loader = data_loader(
        data_path,
        is_transform=True,
        split=cfg['data']['train_split'],
        img_size=(cfg['data']['img_rows'], cfg['data']['img_cols']),
        n_img_after=1)

    optical_flow(t_loader, args.viz, args.cont)

    v_loader = data_loader(
        data_path,
        is_transform=True,
        split=cfg['data']['val_split'],
def validate(cfg, args):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Setup Dataloader
    data_loader = get_loader(cfg['data']['dataset'])
    data_path = cfg['data']['path']

    loader = data_loader(
        data_path,
        split=cfg['data']['val_split'],
        is_transform=True,
        img_size=(cfg['data']['img_rows'], cfg['data']['img_cols']),
    )

    n_classes = loader.n_classes

    valloader = data.DataLoader(loader, 
                                batch_size=cfg['training']['batch_size_lbl'],
                                num_workers=8)
    running_metrics = runningScore(n_classes)

    # Setup Model

    model = get_model(cfg['model'], n_classes).to(device)
    state = convert_state_dict(torch.load(args.model_path)["model_state"])
    model.load_state_dict(state)
    model.eval()
    model.to(device)

    for i, (images, labels, _) in enumerate(valloader):
        start_time = timeit.default_timer()

        images = images.to(device)

        if args.eval_flip:
            outputs = model(images)

            # Flip images in numpy (not support in tensor)
            outputs = outputs.data.cpu().numpy()
            flipped_images = np.copy(images.data.cpu().numpy()[:, :, :, ::-1])
            flipped_images = torch.from_numpy(flipped_images).float().to(device)
            outputs_flipped = model(flipped_images)
            outputs_flipped = outputs_flipped.data.cpu().numpy()
            outputs = (outputs + outputs_flipped[:, :, :, ::-1]) / 2.0

            pred = np.argmax(outputs, axis=1)
        else:
            outputs = model(images)
            pred = outputs.data.max(1)[1].cpu().numpy()

        gt = labels.numpy()

        elapsed_time = timeit.default_timer() - start_time
        print(
            "Inference time \
              (iter {0:5d}): {1:3.5f} fps".format(
                i + 1, pred.shape[0] / elapsed_time
            )
        )
        running_metrics.update(gt, pred)

    score, class_iou = running_metrics.get_scores()

    for k, v in score.items():
        print(k, v)

    for i in range(n_classes):
        print(i, class_iou[i])
Esempio n. 4
0
def train(cfg, writer, logger):
    # Setup seeds
    torch.manual_seed(cfg.get('seed', 1337))
    torch.cuda.manual_seed(cfg.get('seed', 1337))
    np.random.seed(cfg.get('seed', 1337))
    random.seed(cfg.get('seed', 1337))

    # Setup device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Setup Augmentations
    augmentations = cfg['training'].get('augmentations', None)
    data_aug = get_composed_augmentations(augmentations)

    # Setup Dataloader
    data_loader = get_loader(cfg['data']['dataset'])
    data_path = cfg['data']['path']

    t_loader_lbl = data_loader(
        data_path,
        is_transform=True,
        split=cfg['data']['train_split'],
        img_size=(cfg['data']['img_rows'], cfg['data']['img_cols']),
        augmentations=data_aug,
        frac_img=cfg['data']['frac_img'],
        get_flow=cfg['data']['get_flow'],
        discard_flow_bottom=cfg['data']['discard_flow_bottom'])

    if cfg['training']['batch_size_flow'] > 0:
        t_loader_flow = data_loader(
            data_path,
            is_transform=True,
            split=cfg['data']['train_split'],
            img_size=(cfg['data']['img_rows'], cfg['data']['img_cols']),
            augmentations=data_aug,
            frac_img=1.0,
            frac_lbl=0.0,
            get_flow=cfg['data']['get_flow'],
            discard_flow_bottom=cfg['data']['discard_flow_bottom'])

    v_loader = data_loader(
        data_path,
        is_transform=True,
        split=cfg['data']['val_split'],
        img_size=(cfg['data']['img_rows'], cfg['data']['img_cols']),)

    n_classes = t_loader_lbl.n_classes
    trainloader_lbl = data.DataLoader(t_loader_lbl,
                                  batch_size=cfg['training']['batch_size_lbl'],
                                  num_workers=cfg['training']['n_workers'],
                                  shuffle=True)
    if cfg['training']['batch_size_flow'] > 0:
        trainloader_flow = data.DataLoader(t_loader_flow,
                                    batch_size=cfg['training']['batch_size_flow'],
                                    num_workers=cfg['training']['n_workers'],
                                    shuffle=True)
        iterator_flow = iter(trainloader_flow)

    valloader = data.DataLoader(v_loader,
                                batch_size=cfg['training']['batch_size_lbl'],
                                num_workers=cfg['training']['n_workers'])

    # Retrieve Frequencies:
    if cfg['training']['frequency_weighting']:
        with open("frequencies.yml", 'r') as stream:
            frequency_data = yaml.load(stream)
        av_freq_labels = []
        for i in range(1, len(t_loader_lbl.class_names)):
            av_freq_labels.append(frequency_data[t_loader_lbl.class_names[i]])
        print("class frequencies: ")
        for index in range(n_classes):
            print("   " + str(t_loader_lbl.class_names[index + 1]) + ": " + "{0:.3f}".format(av_freq_labels[index]))
        weight_labels = 1.0 / (torch.FloatTensor(av_freq_labels).float().to(device)  + 0.01)

    # Setup Metrics
    running_metrics_val = runningScore(n_classes)

    # Setup Model
    model = get_model(cfg['model'], n_classes).to(device)

    model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))

    # Setup optimizer, lr_scheduler and loss function
    optimizer_cls = get_optimizer(cfg)
    optimizer_params = {k: v for k, v in cfg['training']['optimizer'].items()
                        if k != 'name'}

    optimizer = optimizer_cls(model.parameters(), **optimizer_params)
    logger.info("Using optimizer {}".format(optimizer))

    scheduler = get_scheduler(optimizer, cfg['training']['lr_schedule'])

    loss_fn = get_loss_function(cfg)
    logger.info("Using loss {}".format(loss_fn))

    start_iter = 0
    if cfg['training']['resume'] is not None:
        if os.path.isfile(cfg['training']['resume']):
            logger.info(
                "Loading model and optimizer from checkpoint '{}'".format(cfg['training']['resume'])
            )
            checkpoint = torch.load(cfg['training']['resume'])
            model.load_state_dict(checkpoint["model_state"])
            optimizer.load_state_dict(checkpoint["optimizer_state"])
            scheduler.load_state_dict(checkpoint["scheduler_state"])
            start_iter = checkpoint["epoch"]
            logger.info(
                "Loaded checkpoint '{}' (iter {})".format(
                    cfg['training']['resume'], checkpoint["epoch"]
                )
            )
            del checkpoint
        else:
            logger.info("No checkpoint found at '{}'".format(cfg['training']['resume']))

    val_loss_meter = averageMeter()
    time_meter = averageMeter()

    best_iou = -100.0
    i = start_iter
    flag = True

    while i <= cfg['training']['train_iters'] and flag:
        for (images, labels, _) in trainloader_lbl:
            i += 1
            start_ts = time.time()
            scheduler.step()
            model.train()
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)

            if cfg['training']['loss']['name'] == 'cross_flow':
                loss = loss_fn(input=outputs, target=labels, flow=None)
            else:
                if cfg['training']['frequency_weighting']:
                    loss = loss_fn(input=outputs, target=labels, weight=weight_labels)
                else:
                    loss = loss_fn(input=outputs, target=labels)

            loss.backward()
            optimizer.step()

            #################### Flow Consistency Update ##########################
            if cfg['training']['batch_size_flow'] > 0 and i >= cfg['training']['iter_start_semi']:
                try:
                    (images, _, flows) = next(iterator_flow)
                except StopIteration:
                    iterator_flow = iter(trainloader_flow)
                    (images, _, flows) = next(iterator_flow)
                images = images.to(device)
                flows = flows.to(device)
                optimizer.zero_grad()
                outputs = model(images)
                loss = loss_fn(input=outputs, target=None, flow=flows)
                loss.backward()
                optimizer.step()
            #################### Flow Consistency Update End ######################

            time_meter.update(time.time() - start_ts)

            if (i + 1) % cfg['training']['print_interval'] == 0:
                fmt_str = "Iter [{:d}/{:d}]  Loss: {:.4f}  Time/Image: {:.4f}"
                print_str = fmt_str.format(i + 1,
                                           cfg['training']['train_iters'],
                                           loss.item(),
                                           time_meter.avg / (cfg['training']['batch_size_lbl'] + cfg['training']['batch_size_flow']))

                #print(print_str)
                logger.info(print_str)
                writer.add_scalar('loss/train_loss', loss.item(), i + 1)
                time_meter.reset()

            if (i + 1) % cfg['training']['val_interval'] == 0 or \
                    (i + 1) == cfg['training']['train_iters']:
                model.eval()
                with torch.no_grad():
                    for i_val, (images_val, labels_val, _) in tqdm(enumerate(valloader)):
                        images_val = images_val.to(device)
                        labels_val = labels_val.to(device)

                        outputs = model(images_val)
                        val_loss = loss_fn(input=outputs, target=labels_val)

                        pred = outputs.data.max(1)[1].cpu().numpy()
                        gt = labels_val.data.cpu().numpy()

                        running_metrics_val.update(gt, pred)
                        val_loss_meter.update(val_loss.item())

                writer.add_scalar('loss/val_loss', val_loss_meter.avg, i + 1)
                logger.info("Iter %d Loss: %.4f" % (i + 1, val_loss_meter.avg))

                score, class_iou = running_metrics_val.get_scores()
                for k, v in score.items():
                    #print(k, v)
                    logger.info('{}: {}'.format(k, v))
                    writer.add_scalar('val_metrics/{}'.format(k), v, i + 1)

                for k, v in class_iou.items():
                    logger.info('{}: {}'.format(k, v))
                    writer.add_scalar('val_metrics/cls_{}'.format(k), v, i + 1)

                val_loss_meter.reset()
                running_metrics_val.reset()

                if score["Mean IoU : \t"] >= best_iou:
                    best_iou = score["Mean IoU : \t"]
                    state = {
                        "epoch": i + 1,
                        "model_state": model.state_dict(),
                        "optimizer_state": optimizer.state_dict(),
                        "scheduler_state": scheduler.state_dict(),
                        "best_iou": best_iou,
                    }
                    save_path = os.path.join(writer.file_writer.get_logdir(),
                                             "{}_{}_best_model.pkl".format(
                                                 cfg['model']['arch'],
                                                 cfg['data']['dataset']))
                    torch.save(state, save_path)
                print("Best mIoU for " + cfg['training']['logdir'] + ": " + str(best_iou))

            if (i + 1) == cfg['training']['train_iters']:
                flag = False
                break