def main(): # augmentation transform_aug = Compose([ aug.HueSaturationValue(), aug.RandomBrightnessContrast(), aug.CLAHE(), aug.JpegCompression(), aug.GaussNoise(), aug.MedianBlur(), aug.ElasticTransform(), aug.HorizontalFlip(), aug.Rotate(), aug.CoarseDropout(), aug.RandomSizedCrop() ], p=1) # transform for output transform = Compose([ Resize(cons.IMAGE_SIZE, cons.IMAGE_SIZE), Normalize( mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), max_pixel_value=255.0) ], p=1) # Dataset ''' dataset = UkiyoeTrainDataset( train_images_path='data', train_labels_path='data', valid=False, confidence_boader=0.87, result_path='result/model_effi_b3/efficientnet_b3_980/inference_with_c.csv', test_images_path='data', over_sampling=False, transform_aug=None, augmix=False, mixup=False, transform=transform) img, label = dataset[0] #print(img.shape) #plt.imshow(img) #plt.show() ''' # train data loader loader = load_train_data(train_images_path='data', train_labels_path='data', batch_size=2, valid=False, nfold=0, transform_aug=None, augmix=True, mixup=False, transform=transform, as_numpy=True) image_batch, label_batch = next(loader.__iter__()) print(image_batch[0].shape) print(label_batch[0].shape) '''
def main(argv=None): transform = Compose([ Resize(cons.IMAGE_SIZE, cons.IMAGE_SIZE), Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), max_pixel_value=255.0) ]) valid_loader = load_train_data(train_images_path=FLAGS.train_images_path, train_labels_path=FLAGS.train_labels_path, batch_size=FLAGS.batch_size, num_worker=FLAGS.num_worker, valid=True, nfold=FLAGS.nfold, transform=transform) model = models.get_model(model_name=FLAGS.model_name, num_classes=cons.NUM_CLASSES) model.cuda() #model = torch.nn.DataParallel(model) DIR = '/' + FLAGS.case + '/' + FLAGS.model_name + '/fold' + str( FLAGS.nfold) RESULT_PATH = '' if FLAGS.confidence_border is not None: DIR = DIR + '/with_pseudo_labeling' RESULT_PATH = RESULT_PATH + FLAGS.result_path if FLAGS.result_case is not None: RESULT_PATH = RESULT_PATH + '/' + FLAGS.result_case RESULT_PATH = RESULT_PATH + '/inference_with_c.csv' PARAM_DIR = FLAGS.params_path + DIR os.makedirs(PARAM_DIR, exist_ok=True) PARAM_NAME = PARAM_DIR + '/' + FLAGS.case if FLAGS.executed_epoch > 0: TRAINED_PARAM_PATH = FLAGS.restart_param_path + '/' + FLAGS.case + str( FLAGS.executed_epoch) restart_epoch = FLAGS.executed_epoch + 1 if FLAGS.restart_from_final: TRAINED_PARAM_PATH = TRAINED_PARAM_PATH + '_final' TRAINED_PARAM_PATH = TRAINED_PARAM_PATH + '.pth' model.load_state_dict(torch.load(TRAINED_PARAM_PATH)) else: restart_epoch = 0 optimizer = optim.Adam(model.parameters(), lr=cons.start_lr) model, optimizer = amp.initialize(model, optimizer, opt_level=FLAGS.opt_level) if FLAGS.add_class_weight: loader = load_train_data(train_images_path=FLAGS.train_images_path, train_labels_path=FLAGS.train_labels_path, batch_size=FLAGS.batch_size, num_worker=FLAGS.num_worker, nfold=FLAGS.nfold) count_label = np.zeros(10, dtype=np.int64) for feed in loader: _, labels = feed count_label += np.sum(labels.numpy().astype(np.int64), axis=0) weight = torch.from_numpy(count_label).cuda() else: weight = None criterion = nn.BCEWithLogitsLoss(weight=weight) writer = SummaryWriter(log_dir=FLAGS.logs_path + DIR + '/tensorboardX/') best_acc = 0 if FLAGS.augmentation and FLAGS.aug_decrease: p = 0.5 for e in range(restart_epoch, FLAGS.final_epoch): p_partical = p * (FLAGS.final_epoch - e) / FLAGS.final_epoch lr = set_lr.cosine_annealing(optimizer, cons.start_lr, e, 100) writer.add_scalar('LearningRate', lr, e) train_loader = load_train_data( train_images_path=FLAGS.train_images_path, train_labels_path=FLAGS.train_labels_path, batch_size=FLAGS.batch_size, num_worker=FLAGS.num_worker, nfold=FLAGS.nfold, confidence_border=FLAGS.confidence_border, result_path=RESULT_PATH, test_images_path=FLAGS.test_images_path, over_sampling=FLAGS.over_sampling, transform_aug=Compose([ aug.HueSaturationValue(p=p_partical), aug.RandomBrightnessContrast(p=p_partical), aug.CLAHE(p=p_partical), aug.JpegCompression(p=p_partical), aug.GaussNoise(p=p), aug.MedianBlur(p=p), aug.ElasticTransform(p=p_partical), aug.HorizontalFlip(p=p), aug.Rotate(p=p), aug.CoarseDropout(p=p_partical), aug.RandomSizedCrop(p=p) ]), mixup=FLAGS.mixup, transform=transform) train_loss = train_loop(model, train_loader, criterion, optimizer) writer.add_scalar('train_loss', train_loss, e) valid_loss, valid_acc = valid_loop(model, valid_loader, criterion) writer.add_scalar('valid_loss', valid_loss, e) writer.add_scalar('valid_acc', valid_acc, e) print( 'Epoch: {}, Train Loss: {:.4f}, Valid Loss: {:.4f}, Valid Accuracy:{:.2f}' .format(e + 1, train_loss, valid_loss, valid_acc)) if e % 10 == 0: torch.save(model.state_dict(), PARAM_NAME + '_' + str(e) + '.pth') if valid_acc > best_acc: best_acc = valid_acc torch.save(model.state_dict(), PARAM_NAME + '_best.pth') else: if FLAGS.augmentation and not FLAGS.augmix: transform_aug = Compose([ aug.HueSaturationValue(), aug.RandomBrightnessContrast(), aug.CLAHE(), aug.JpegCompression(), aug.GaussNoise(), aug.MedianBlur(), aug.ElasticTransform(), aug.HorizontalFlip(), aug.Rotate(), aug.CoarseDropout(), aug.RandomSizedCrop() ]) else: transform_aug = None train_loader = load_train_data( train_images_path=FLAGS.train_images_path, train_labels_path=FLAGS.train_labels_path, batch_size=FLAGS.batch_size, num_worker=FLAGS.num_worker, valid=False, nfold=FLAGS.nfold, over_sampling=FLAGS.over_sampling, transform_aug=transform_aug, augmix=FLAGS.augmix, mixup=FLAGS.mixup, transform=transform) total_time = 0 for e in range(restart_epoch, FLAGS.final_epoch): start = time.time() lr = set_lr.cosine_annealing(optimizer, cons.start_lr, e, 100) writer.add_scalar('LearningRate', lr, e) train_loss = train_loop(model, train_loader, criterion, optimizer) writer.add_scalar('train_loss', train_loss, e) valid_loss, valid_acc = valid_loop(model, valid_loader, criterion) writer.add_scalar('valid_loss', valid_loss, e) writer.add_scalar('valid_acc', valid_acc, e) print( 'Epoch: {}, Train Loss: {:.4f}, Valid Loss: {:.4f}, Valid Accuracy:{:.2f}' .format(e + 1, train_loss, valid_loss, valid_acc)) if e % 10 == 0: torch.save(model.state_dict(), PARAM_NAME + '_' + str(e) + '.pth') if valid_acc > best_acc: best_acc = valid_acc torch.save(model.state_dict(), PARAM_NAME + '_best.pth') total_time = total_time + (time.time() - start) print('average time: {}[sec]'.format(total_time / (e + 1))) torch.save(model.state_dict(), PARAM_NAME + '_' + str(FLAGS.final_epoch - 1) + '_final.pth')
def main(args): if args.debug: import pdb; pdb.set_trace(); tb_dir = args.exp_name+'/tb_logs/' ckpt_dir = args.exp_name + '/checkpoints/' if not os.path.exists(args.exp_name): os.mkdir(args.exp_name) os.mkdir(tb_dir) os.mkdir(ckpt_dir) #writer = SummaryWriter(tb_dir+'{}'.format(args.exp_name), flush_secs=10) writer = SummaryWriter(tb_dir, flush_secs=10) # create model print("=> creating model: ") os.system('nvidia-smi') #model = models.__dict__[args.arch]() #model = resnet_dilated.Resnet18_32s(num_classes=21) print(args.no_pre_train,' pretrain') #model = resnet18_fcn.Resnet18_fcn(num_classes=args.n_classes,pre_train=args.no_pre_train) model_map = { 'deeplabv3_resnet18': arma_network.deeplabv3_resnet18, 'deeplabv3_resnet50': arma_network.deeplabv3_resnet50, 'fcn_resnet18': arma_network.fcn_resnet18, #'deeplabv3_resnet101': network.deeplabv3_resnet101, # 'deeplabv3plus_resnet18': network.deeplabv3plus_resnet18, # 'deeplabv3plus_resnet50': network.deeplabv3plus_resnet50, # 'deeplabv3plus_resnet101': network.deeplabv3plus_resnet101 } model = model_map['deeplabv3_resnet50'](arma=False,num_classes=args.n_classes) model = model.cuda() model = nn.DataParallel(model) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): model,optimizer,args = helper.load_checkpoint(args,model,optimizer) else: print("=> no checkpoint found at '{}'".format(args.resume)) #USE this only when batch size is fixed. #This takes time, but optimizes to crazy speeds once input is fixed. cudnn.benchmark = True #Load dataloaders augmentations = aug.Compose([aug.RandomCrop(512),aug.RandomHorizontallyFlip(5),\ aug.RandomRotate(30),aug.RandomSizedCrop(512)]) my_dataset = pascalVOCLoader(args=args,root=args.data,sbd_path=args.data,\ augmentations=augmentations) my_dataset.get_loaders() init_weight_filename ='initial_state.pth.tar' helper.save_checkpoint(args,model,optimizer,custom_name=init_weight_filename) with open(args.exp_name+'/'+'args.pkl','wb') as fout: pickle.dump(args,fout) best_iou = -100.0 for epoch in range(args.start_epoch, args.epochs): helper.adjust_learning_rate(optimizer, epoch, args) train_loss = trainer.train(my_dataset.train_loader,model,optimizer,epoch,args,writer) val_loss,scores,class_iou,running_metrics_val = trainer.validate(my_dataset.val_loader, model,epoch,args,writer) if scores["Mean IoU : \t"] >= best_iou: best_iou = scores["Mean IoU : \t"] is_best = True if not args.multiprocessing_distributed or (args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): if epoch in [0,1,2,3,4,5,6,7,8]: helper.save_checkpoint(args,model,optimizer,epoch,custom_name=str(epoch)+'.pth') if args.save_freq is None: helper.save_checkpoint(args,model,optimizer,epoch,is_best=is_best,periodic=False) else: helper.save_checkpoint(args,model,optimizer,epoch,is_best=is_best,periodic=True) with open(args.exp_name+'/running_metric.pkl','wb') as fout: pickle.dump(running_metrics_val,fout)
# Leave code for debugging purposes # import ptsemseg.augmentations as aug if __name__ == '__main__': # local_path = '/home/meetshah1995/datasets/VOCdevkit/VOC2012/' import args args = args.get_args() import augmentations as aug augmentations = aug.Compose([ aug.RandomCrop(512), aug.RandomHorizontallyFlip(5), aug.RandomRotate(30), aug.RandomSizedCrop(512) ]) my_dataset = pascalVOCLoader(args=args,root='pascal_voc/',sbd_path='pascal_voc/',\ augmentations=augmentations) my_dataset.get_loaders() for i, data in enumerate(my_dataset.train_loader): print(torch.unique(data[-1]), data[0].shape) if torch.max(torch.unique(data[-1])) > 20: print(i, data[-1]) imgs, labels = data imgs = imgs.numpy()[:, ::-1, :, :] imgs = np.transpose(imgs, [0, 2, 3, 1]) f, axarr = plt.subplots(bs, 2)
def main_worker(gpu, ngpus_per_node, args): args.gpu = gpu if args.debug: import pdb; pdb.set_trace(); if not args.multiprocessing_distributed or (args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): tb_dir = args.exp_name+'/tb_logs/' ckpt_dir = args.exp_name + '/checkpoints/' if not os.path.exists(args.exp_name): os.mkdir(args.exp_name) os.mkdir(tb_dir) os.mkdir(ckpt_dir) print("writing to : ",tb_dir+'{}'.format(args.exp_name),args.rank,ngpus_per_node) #writer = SummaryWriter(tb_dir+'{}'.format(args.exp_name), flush_secs=10) writer = SummaryWriter(tb_dir, flush_secs=10) # suppress printing if not master if args.multiprocessing_distributed and args.gpu != 0: def print_pass(*args): pass builtins.print = print_pass if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # create model print("=> creating model: ") #model = models.__dict__[args.arch]() model = resnet_dilated.Resnet18_32s(num_classes=21) if args.distributed: print("distributed") if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) else: model.cuda() model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) # comment out the following line for debugging raise NotImplementedError("Only DistributedDataParallel is supported.") else: raise NotImplementedError("Only DistributedDataParallel is supported.") optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): model,optimizer,args = helper.load_checkpoint(args,model,optimizer) else: print("=> no checkpoint found at '{}'".format(args.resume)) #USE this only when batch size is fixed. #This takes time, but optimizes to crazy speeds once input is fixed. cudnn.benchmark = True #Load dataloaders augmentations = aug.Compose([aug.RandomCrop(512),aug.RandomHorizontallyFlip(5),aug.RandomRotate(30),aug.RandomSizedCrop(512)]) my_dataset = pascalVOCLoader(args=args,root='/scratch0/shishira/pascal_voc/',sbd_path='/scratch0/shishira/pascal_voc/',\ augmentations=augmentations) my_dataset.get_loaders() init_weight_filename ='initial_state.pth.tar' helper.save_checkpoint(args,model,optimizer,custom_name=init_weight_filename) with open(args.exp_name+'/'+'args.pkl','wb') as fout: pickle.dump(args,fout) best_iou = -100.0 for epoch in range(args.start_epoch, args.epochs): if args.distributed: my_dataset.train_sampler.set_epoch(epoch) helper.adjust_learning_rate(optimizer, epoch, args) train_loss = trainer.train(my_dataset.train_loader,model,optimizer,epoch,args,writer) val_loss,scores,class_iou = trainer.validate(my_dataset.val_loader, model,epoch,args,writer) if scores["Mean IoU : \t"] >= best_iou: best_iou = scores["Mean IoU : \t"] is_best = True if not args.multiprocessing_distributed or (args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): if epoch in [0,1,2,3,4,5,6,7,8]: helper.save_checkpoint(args,model,optimizer,epoch,custom_name=str(epoch)+'.pth') if args.save_freq is None: helper.save_checkpoint(args,model,optimizer,epoch,is_best=is_best,periodic=False) else: helper.save_checkpoint(args,model,optimizer,epoch,is_best=is_best,periodic=True)