Ejemplo n.º 1
0
def main():

    # augmentation
    transform_aug = Compose([
        aug.HueSaturationValue(),
        aug.RandomBrightnessContrast(),
        aug.CLAHE(),
        aug.JpegCompression(),
        aug.GaussNoise(),
        aug.MedianBlur(),
        aug.ElasticTransform(),
        aug.HorizontalFlip(),
        aug.Rotate(),
        aug.CoarseDropout(),
        aug.RandomSizedCrop()
    ],
                            p=1)
    # transform for output
    transform = Compose([
        Resize(cons.IMAGE_SIZE, cons.IMAGE_SIZE),
        Normalize(
            mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), max_pixel_value=255.0)
    ],
                        p=1)

    # Dataset
    '''
    dataset = UkiyoeTrainDataset(
        train_images_path='data',
        train_labels_path='data',
        valid=False,
        confidence_boader=0.87,
        result_path='result/model_effi_b3/efficientnet_b3_980/inference_with_c.csv',
        test_images_path='data',
        over_sampling=False,
        transform_aug=None,
        augmix=False,
        mixup=False,
        transform=transform)
    img, label = dataset[0]
    #print(img.shape)
    #plt.imshow(img)
    #plt.show()
    '''
    # train data loader
    loader = load_train_data(train_images_path='data',
                             train_labels_path='data',
                             batch_size=2,
                             valid=False,
                             nfold=0,
                             transform_aug=None,
                             augmix=True,
                             mixup=False,
                             transform=transform,
                             as_numpy=True)
    image_batch, label_batch = next(loader.__iter__())
    print(image_batch[0].shape)
    print(label_batch[0].shape)
    '''
Ejemplo n.º 2
0
def main(argv=None):

    transform = Compose([
        Resize(cons.IMAGE_SIZE, cons.IMAGE_SIZE),
        Normalize(mean=(0.5, 0.5, 0.5),
                  std=(0.5, 0.5, 0.5),
                  max_pixel_value=255.0)
    ])
    valid_loader = load_train_data(train_images_path=FLAGS.train_images_path,
                                   train_labels_path=FLAGS.train_labels_path,
                                   batch_size=FLAGS.batch_size,
                                   num_worker=FLAGS.num_worker,
                                   valid=True,
                                   nfold=FLAGS.nfold,
                                   transform=transform)

    model = models.get_model(model_name=FLAGS.model_name,
                             num_classes=cons.NUM_CLASSES)
    model.cuda()
    #model = torch.nn.DataParallel(model)

    DIR = '/' + FLAGS.case + '/' + FLAGS.model_name + '/fold' + str(
        FLAGS.nfold)
    RESULT_PATH = ''
    if FLAGS.confidence_border is not None:
        DIR = DIR + '/with_pseudo_labeling'
        RESULT_PATH = RESULT_PATH + FLAGS.result_path
        if FLAGS.result_case is not None:
            RESULT_PATH = RESULT_PATH + '/' + FLAGS.result_case
        RESULT_PATH = RESULT_PATH + '/inference_with_c.csv'

    PARAM_DIR = FLAGS.params_path + DIR
    os.makedirs(PARAM_DIR, exist_ok=True)
    PARAM_NAME = PARAM_DIR + '/' + FLAGS.case
    if FLAGS.executed_epoch > 0:
        TRAINED_PARAM_PATH = FLAGS.restart_param_path + '/' + FLAGS.case + str(
            FLAGS.executed_epoch)
        restart_epoch = FLAGS.executed_epoch + 1
        if FLAGS.restart_from_final:
            TRAINED_PARAM_PATH = TRAINED_PARAM_PATH + '_final'
        TRAINED_PARAM_PATH = TRAINED_PARAM_PATH + '.pth'
        model.load_state_dict(torch.load(TRAINED_PARAM_PATH))
    else:
        restart_epoch = 0

    optimizer = optim.Adam(model.parameters(), lr=cons.start_lr)
    model, optimizer = amp.initialize(model,
                                      optimizer,
                                      opt_level=FLAGS.opt_level)

    if FLAGS.add_class_weight:
        loader = load_train_data(train_images_path=FLAGS.train_images_path,
                                 train_labels_path=FLAGS.train_labels_path,
                                 batch_size=FLAGS.batch_size,
                                 num_worker=FLAGS.num_worker,
                                 nfold=FLAGS.nfold)
        count_label = np.zeros(10, dtype=np.int64)
        for feed in loader:
            _, labels = feed
            count_label += np.sum(labels.numpy().astype(np.int64), axis=0)
        weight = torch.from_numpy(count_label).cuda()
    else:
        weight = None
    criterion = nn.BCEWithLogitsLoss(weight=weight)

    writer = SummaryWriter(log_dir=FLAGS.logs_path + DIR + '/tensorboardX/')
    best_acc = 0

    if FLAGS.augmentation and FLAGS.aug_decrease:
        p = 0.5

        for e in range(restart_epoch, FLAGS.final_epoch):
            p_partical = p * (FLAGS.final_epoch - e) / FLAGS.final_epoch

            lr = set_lr.cosine_annealing(optimizer, cons.start_lr, e, 100)
            writer.add_scalar('LearningRate', lr, e)

            train_loader = load_train_data(
                train_images_path=FLAGS.train_images_path,
                train_labels_path=FLAGS.train_labels_path,
                batch_size=FLAGS.batch_size,
                num_worker=FLAGS.num_worker,
                nfold=FLAGS.nfold,
                confidence_border=FLAGS.confidence_border,
                result_path=RESULT_PATH,
                test_images_path=FLAGS.test_images_path,
                over_sampling=FLAGS.over_sampling,
                transform_aug=Compose([
                    aug.HueSaturationValue(p=p_partical),
                    aug.RandomBrightnessContrast(p=p_partical),
                    aug.CLAHE(p=p_partical),
                    aug.JpegCompression(p=p_partical),
                    aug.GaussNoise(p=p),
                    aug.MedianBlur(p=p),
                    aug.ElasticTransform(p=p_partical),
                    aug.HorizontalFlip(p=p),
                    aug.Rotate(p=p),
                    aug.CoarseDropout(p=p_partical),
                    aug.RandomSizedCrop(p=p)
                ]),
                mixup=FLAGS.mixup,
                transform=transform)

            train_loss = train_loop(model, train_loader, criterion, optimizer)
            writer.add_scalar('train_loss', train_loss, e)

            valid_loss, valid_acc = valid_loop(model, valid_loader, criterion)
            writer.add_scalar('valid_loss', valid_loss, e)
            writer.add_scalar('valid_acc', valid_acc, e)

            print(
                'Epoch: {}, Train Loss: {:.4f}, Valid Loss: {:.4f}, Valid Accuracy:{:.2f}'
                .format(e + 1, train_loss, valid_loss, valid_acc))
            if e % 10 == 0:
                torch.save(model.state_dict(),
                           PARAM_NAME + '_' + str(e) + '.pth')
            if valid_acc > best_acc:
                best_acc = valid_acc
                torch.save(model.state_dict(), PARAM_NAME + '_best.pth')
    else:

        if FLAGS.augmentation and not FLAGS.augmix:
            transform_aug = Compose([
                aug.HueSaturationValue(),
                aug.RandomBrightnessContrast(),
                aug.CLAHE(),
                aug.JpegCompression(),
                aug.GaussNoise(),
                aug.MedianBlur(),
                aug.ElasticTransform(),
                aug.HorizontalFlip(),
                aug.Rotate(),
                aug.CoarseDropout(),
                aug.RandomSizedCrop()
            ])
        else:
            transform_aug = None

        train_loader = load_train_data(
            train_images_path=FLAGS.train_images_path,
            train_labels_path=FLAGS.train_labels_path,
            batch_size=FLAGS.batch_size,
            num_worker=FLAGS.num_worker,
            valid=False,
            nfold=FLAGS.nfold,
            over_sampling=FLAGS.over_sampling,
            transform_aug=transform_aug,
            augmix=FLAGS.augmix,
            mixup=FLAGS.mixup,
            transform=transform)

        total_time = 0
        for e in range(restart_epoch, FLAGS.final_epoch):
            start = time.time()
            lr = set_lr.cosine_annealing(optimizer, cons.start_lr, e, 100)
            writer.add_scalar('LearningRate', lr, e)
            train_loss = train_loop(model, train_loader, criterion, optimizer)
            writer.add_scalar('train_loss', train_loss, e)
            valid_loss, valid_acc = valid_loop(model, valid_loader, criterion)
            writer.add_scalar('valid_loss', valid_loss, e)
            writer.add_scalar('valid_acc', valid_acc, e)
            print(
                'Epoch: {}, Train Loss: {:.4f}, Valid Loss: {:.4f}, Valid Accuracy:{:.2f}'
                .format(e + 1, train_loss, valid_loss, valid_acc))
            if e % 10 == 0:
                torch.save(model.state_dict(),
                           PARAM_NAME + '_' + str(e) + '.pth')
            if valid_acc > best_acc:
                best_acc = valid_acc
                torch.save(model.state_dict(), PARAM_NAME + '_best.pth')
            total_time = total_time + (time.time() - start)
            print('average time: {}[sec]'.format(total_time / (e + 1)))

    torch.save(model.state_dict(),
               PARAM_NAME + '_' + str(FLAGS.final_epoch - 1) + '_final.pth')
Ejemplo n.º 3
0
def main(args):

    if args.debug:
        import pdb;
        pdb.set_trace();

    tb_dir = args.exp_name+'/tb_logs/'
    ckpt_dir = args.exp_name + '/checkpoints/'

    if not os.path.exists(args.exp_name):
        os.mkdir(args.exp_name)
        os.mkdir(tb_dir)
        os.mkdir(ckpt_dir)

    #writer = SummaryWriter(tb_dir+'{}'.format(args.exp_name), flush_secs=10)
    writer = SummaryWriter(tb_dir, flush_secs=10)

    # create model
    print("=> creating model: ")
    os.system('nvidia-smi')
    #model = models.__dict__[args.arch]()

    #model = resnet_dilated.Resnet18_32s(num_classes=21)
    print(args.no_pre_train,' pretrain')
    #model = resnet18_fcn.Resnet18_fcn(num_classes=args.n_classes,pre_train=args.no_pre_train)

    model_map = {
        'deeplabv3_resnet18': arma_network.deeplabv3_resnet18,
        'deeplabv3_resnet50': arma_network.deeplabv3_resnet50,
        'fcn_resnet18': arma_network.fcn_resnet18,
        #'deeplabv3_resnet101': network.deeplabv3_resnet101,
        # 'deeplabv3plus_resnet18': network.deeplabv3plus_resnet18,
        # 'deeplabv3plus_resnet50': network.deeplabv3plus_resnet50,
        # 'deeplabv3plus_resnet101': network.deeplabv3plus_resnet101
    }
    
    model = model_map['deeplabv3_resnet50'](arma=False,num_classes=args.n_classes)

    model = model.cuda()
    model = nn.DataParallel(model)


    optimizer = torch.optim.SGD(model.parameters(), args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            model,optimizer,args = helper.load_checkpoint(args,model,optimizer)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    #USE this only when batch size is fixed. 
    #This takes time, but optimizes to crazy speeds once input is fixed. 
    cudnn.benchmark = True

    #Load dataloaders
    augmentations = aug.Compose([aug.RandomCrop(512),aug.RandomHorizontallyFlip(5),\
        aug.RandomRotate(30),aug.RandomSizedCrop(512)])

    my_dataset = pascalVOCLoader(args=args,root=args.data,sbd_path=args.data,\
        augmentations=augmentations)

    my_dataset.get_loaders()

    init_weight_filename ='initial_state.pth.tar'
    helper.save_checkpoint(args,model,optimizer,custom_name=init_weight_filename)

    with open(args.exp_name+'/'+'args.pkl','wb') as fout:
        pickle.dump(args,fout)


    best_iou = -100.0
    for epoch in range(args.start_epoch, args.epochs):

        helper.adjust_learning_rate(optimizer, epoch, args)

        train_loss = trainer.train(my_dataset.train_loader,model,optimizer,epoch,args,writer)
        val_loss,scores,class_iou,running_metrics_val = trainer.validate(my_dataset.val_loader, model,epoch,args,writer)
        
        if scores["Mean IoU : \t"] >= best_iou:
            best_iou = scores["Mean IoU : \t"]
            is_best = True

        if not args.multiprocessing_distributed or (args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):

            if epoch in [0,1,2,3,4,5,6,7,8]:
                helper.save_checkpoint(args,model,optimizer,epoch,custom_name=str(epoch)+'.pth')

            if args.save_freq is None:
                helper.save_checkpoint(args,model,optimizer,epoch,is_best=is_best,periodic=False)
            else:
                helper.save_checkpoint(args,model,optimizer,epoch,is_best=is_best,periodic=True)

    with open(args.exp_name+'/running_metric.pkl','wb') as fout:
        pickle.dump(running_metrics_val,fout)
Ejemplo n.º 4
0
# Leave code for debugging purposes
# import ptsemseg.augmentations as aug
if __name__ == '__main__':
    # local_path = '/home/meetshah1995/datasets/VOCdevkit/VOC2012/'
    import args

    args = args.get_args()

    import augmentations as aug

    augmentations = aug.Compose([
        aug.RandomCrop(512),
        aug.RandomHorizontallyFlip(5),
        aug.RandomRotate(30),
        aug.RandomSizedCrop(512)
    ])

    my_dataset = pascalVOCLoader(args=args,root='pascal_voc/',sbd_path='pascal_voc/',\
     augmentations=augmentations)
    my_dataset.get_loaders()

    for i, data in enumerate(my_dataset.train_loader):
        print(torch.unique(data[-1]), data[0].shape)
        if torch.max(torch.unique(data[-1])) > 20:
            print(i, data[-1])

    imgs, labels = data
    imgs = imgs.numpy()[:, ::-1, :, :]
    imgs = np.transpose(imgs, [0, 2, 3, 1])
    f, axarr = plt.subplots(bs, 2)
Ejemplo n.º 5
0
def main_worker(gpu, ngpus_per_node, args):
    args.gpu = gpu

    if args.debug:
        import pdb;
        pdb.set_trace();

    if not args.multiprocessing_distributed or (args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):
        
        tb_dir = args.exp_name+'/tb_logs/'
        ckpt_dir = args.exp_name + '/checkpoints/'

        if not os.path.exists(args.exp_name):
            os.mkdir(args.exp_name)
            os.mkdir(tb_dir)
            os.mkdir(ckpt_dir)

        print("writing to : ",tb_dir+'{}'.format(args.exp_name),args.rank,ngpus_per_node)

        #writer = SummaryWriter(tb_dir+'{}'.format(args.exp_name), flush_secs=10)
        writer = SummaryWriter(tb_dir, flush_secs=10)

    # suppress printing if not master
    if args.multiprocessing_distributed and args.gpu != 0:
        def print_pass(*args):
            pass
        builtins.print = print_pass

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                world_size=args.world_size, rank=args.rank)
    # create model
    print("=> creating model: ")
    #model = models.__dict__[args.arch]()

    model = resnet_dilated.Resnet18_32s(num_classes=21)

    if args.distributed:
        print("distributed")
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        else:
            model.cuda()
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
        # comment out the following line for debugging
        raise NotImplementedError("Only DistributedDataParallel is supported.")
    else:
        raise NotImplementedError("Only DistributedDataParallel is supported.")



    optimizer = torch.optim.SGD(model.parameters(), args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            model,optimizer,args = helper.load_checkpoint(args,model,optimizer)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    #USE this only when batch size is fixed. 
    #This takes time, but optimizes to crazy speeds once input is fixed. 
    cudnn.benchmark = True

    #Load dataloaders
    augmentations = aug.Compose([aug.RandomCrop(512),aug.RandomHorizontallyFlip(5),aug.RandomRotate(30),aug.RandomSizedCrop(512)])
    my_dataset = pascalVOCLoader(args=args,root='/scratch0/shishira/pascal_voc/',sbd_path='/scratch0/shishira/pascal_voc/',\
        augmentations=augmentations)
    my_dataset.get_loaders()

    init_weight_filename ='initial_state.pth.tar'
    helper.save_checkpoint(args,model,optimizer,custom_name=init_weight_filename)

    with open(args.exp_name+'/'+'args.pkl','wb') as fout:
        pickle.dump(args,fout)


    best_iou = -100.0
    for epoch in range(args.start_epoch, args.epochs):

        if args.distributed:
            my_dataset.train_sampler.set_epoch(epoch)

        helper.adjust_learning_rate(optimizer, epoch, args)

        train_loss = trainer.train(my_dataset.train_loader,model,optimizer,epoch,args,writer)
        val_loss,scores,class_iou = trainer.validate(my_dataset.val_loader, model,epoch,args,writer)
        
        if scores["Mean IoU : \t"] >= best_iou:
            best_iou = scores["Mean IoU : \t"]
            is_best = True

        if not args.multiprocessing_distributed or (args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):

            if epoch in [0,1,2,3,4,5,6,7,8]:
                helper.save_checkpoint(args,model,optimizer,epoch,custom_name=str(epoch)+'.pth')

            if args.save_freq is None:
                helper.save_checkpoint(args,model,optimizer,epoch,is_best=is_best,periodic=False)
            else:
                helper.save_checkpoint(args,model,optimizer,epoch,is_best=is_best,periodic=True)