def __init__(self): self.transform = Compose([ Normalize((0.4914, 0.4822, 0.4465), ((0.2023, 0.1994, 0.2010))), ToTensor(), ])
0) #same as output channels #determine all of the eval_classes #always ignoring 0 (background) if eval_classes is None: if num_classes == 1: eval_classes = [1] else: eval_classes = list(range(1, num_classes)) #create the evaluation transforms with the correct normalization #and use FactorResize to resize images such that height and width #are divisible by 32 eval_tfs = Compose([ FactorResize(32), Normalize(mean=norms[0], std=norms[1]), ToTensorV2() ]) #create the dataset and dataloader test_data = SegmentationData(test_dir, tfs=eval_tfs, gray_channels=gray_channels) test = DataLoader(test_data, batch_size=1, shuffle=False, pin_memory=True, num_workers=8) #determine if we're in inference only mode or not inference_only = False if test_data.has_masks else True
def inference_by_model(model_name, filenames, batch_size=2, num_workers=0, fullsize_mode=False): # TODO: Optimize parameters for p2.xlarge print(f'Inrefernce by {model_name}') prefix = '_'.join(model_name.split('_')[:2]) model_checkpoint_file = f'./wdata/models/{prefix}/{model_name}' pred_mask_dir = f'./wdata/models/{prefix}/test_{model_name}/' Path(pred_mask_dir).mkdir(parents=True, exist_ok=True) model = unet_vgg16(pretrained=False) cp = torch.load(model_checkpoint_file) if 'module.final.weight' in cp['model']: model = nn.DataParallel(model).cuda() epoch = cp['epoch'] model.load_state_dict(cp['model']) model = model.module model = model.cuda() else: epoch = cp['epoch'] model.load_state_dict(cp['model']) model = model.cuda() image_ids = [ Path(path).name.lstrip('Pan-Sharpen_').rstrip('.tif') for path in Path('./wdata/dataset/test_rgb/').glob('Pan-Sharpen*.tif') ] tst_transformer = Compose([ Normalize(), ], p=1.0) tst_dataset = AtlantaTestDataset(image_ids, aug=tst_transformer) tst_loader = DataLoader(tst_dataset, sampler=SequentialSampler(tst_dataset), batch_size=batch_size, drop_last=False, num_workers=num_workers, pin_memory=torch.cuda.is_available()) with torch.no_grad(): tq = tqdm.tqdm(total=(len(tst_loader) * tst_loader.batch_size)) tq.set_description(f'(test) Ep{epoch:>3d}') for X, names in tst_loader: tq.update(X.size(0)) # TODO if fullsize_mode: pass else: pass for j, name in enumerate(names): # Image level inference # 900 -> 512 crop X_ = torch.stack([ X[j, :, :512, :512], X[j, :, -512:, :512], X[j, :, :512, -512:], X[j, :, -512:, -512:], ]) y_pred = np.zeros((900, 900), dtype=np.float32) y_pred_weight = np.zeros((900, 900), dtype=np.uint8) inputs = X_.cuda() outputs = model(inputs) y_pred_sigmoid = np.clip( torch.sigmoid( torch.squeeze(outputs)).detach().cpu().numpy(), 0.0, 1.0) y_pred[:512, :512] += y_pred_sigmoid[0] y_pred_weight[:512, :512] += 1 y_pred[-512:, :512] += y_pred_sigmoid[1] y_pred_weight[-512:, :512] += 1 y_pred[:512, -512:] += y_pred_sigmoid[2] y_pred_weight[:512, -512:] += 1 y_pred[-512:, -512:] += y_pred_sigmoid[3] y_pred_weight[-512:, -512:] += 1 y_pred = y_pred / y_pred_weight # Save quanlized values y_pred_mat = ss.csr_matrix( np.round(y_pred * 255).astype(np.uint8)) ss.save_npz(str(Path(pred_mask_dir) / Path(f'{name}.npz')), y_pred_mat) tq.close()
def img_transform(p=1): return Compose([Normalize(p=1)], p=p)
Downscale(0.40, 0.80, cv2.INTER_LINEAR, p=0.3) ], p=0.2), OneOf( [ GaussNoise(var_limit=0.1), Blur(), GaussianBlur(blur_limit=3), # RandomGamma(p=0.7), ], p=0.3), HueSaturationValue(p=0.4), HorizontalFlip(0.4), VerticalFlip(0.4), # ColorConstancy(p=0.3, always_apply=False), Normalize(always_apply=True) ]) val_aug = Compose([Normalize(always_apply=True)]) data_dir = 'data' image_path = f'{data_dir}/train_768' test_image_path = f'{data_dir}/test_768' # pseduo_df = pd.read_csv('submissions/sub_946.csv') # df = pd.read_csv(f'{data_dir}/folds.csv') gen_challenge = { 'lower extremity': 2, 'torso': 3, 'head/neck': 0, 'oral/genital': 5, 'palms/soles': 4, 'upper extremity': 1 }
thr = 0.9 opts = ['normal', 'mixup', 'cutmix'] device = 'cuda:0' apex = False pretrained_model = 'se_resnext50_32x4d' # model_name = '{}_trial_stage1_fold_{}_loss.pth'.format(pretrained_model, fold) weight_file = 'model_weights_best_recall.pth' load_model = True results = pd.DataFrame() pred_thr = pd.DataFrame() n_epochs = 60 valid_recall = 0.0 best_valid_recall = 0.0 # val_aug = Compose([Normalize([0.0692], [0.2051])]) val_aug = Compose([Normalize()]) train_df = pd.read_csv('data/train.csv') nunique = list(train_df.nunique())[1:-1] train_df['id'] = train_df['image_id'].apply(lambda x: int(x.split('_')[1])) train_df = train_df.sample(frac=1, random_state=SEED).reset_index(drop=True) X, y = train_df[[ 'id', 'grapheme_root', 'vowel_diacritic', 'consonant_diacritic' ]].values[:, 0], train_df.values[:, 1:] train_df['fold'] = np.nan #split data mskf = MultilabelStratifiedKFold(n_splits=n_fold, random_state=SEED) for i, (_, test_index) in enumerate(mskf.split(X, y)): train_df.iloc[test_index, -1] = i idxs = [i for i in range(len(train_df))]
img = np.array(img) img = self.transforms(image=img)["image"] return img # Albumentations Transformations transform_train_albu = Compose([ RandomCrop(height=32, width=32), #, always_apply=True HorizontalFlip(p=0.2), VerticalFlip(p=0.0), GaussianBlur(p=0.0), Rotate(limit=20), #HueSaturationValue(p=0.25), #ToTensor(), Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010), always_apply=True), Cutout(num_holes=1, max_h_size=8, max_w_size=8, fill_value=[0.4914, 0.4822, 0.4465], p=0.3), ToTensorV2(always_apply=True) ]) transform_test_albu = Compose([ #ToTensor(), Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010)), ToTensorV2(always_apply=True) ])
def get_transforms(*, data): if data == 'train': return Compose( [ #Resize(CFG.size, CFG.size), RandomResizedCrop(CFG.size, CFG.size, scale=(0.85, 1.0)), HorizontalFlip(p=0.5), RandomBrightnessContrast(p=0.2, brightness_limit=(-0.2, 0.2), contrast_limit=(-0.2, 0.2)), HueSaturationValue(p=0.2, hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2), ShiftScaleRotate(p=0.2, shift_limit=0.0625, scale_limit=0.2, rotate_limit=20), CoarseDropout(p=0.2), Cutout(p=0.2, max_h_size=16, max_w_size=16, fill_value=(0., 0., 0.), num_holes=16), Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], ), ToTensorV2(), ], additional_targets={'image_annot': 'image'}) elif data == 'check': return Compose( [ #Resize(CFG.size, CFG.size), RandomResizedCrop(CFG.size, CFG.size, scale=(0.85, 1.0)), HorizontalFlip(p=0.5), RandomBrightnessContrast(p=0.2, brightness_limit=(-0.2, 0.2), contrast_limit=(-0.2, 0.2)), HueSaturationValue(p=0.2, hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2), ShiftScaleRotate(p=0.2, shift_limit=0.0625, scale_limit=0.2, rotate_limit=20), CoarseDropout(p=0.2), Cutout(p=0.2, max_h_size=16, max_w_size=16, fill_value=(0., 0., 0.), num_holes=16), #Normalize( # mean=[0.485, 0.456, 0.406], # std=[0.229, 0.224, 0.225], #), ToTensorV2(), ], additional_targets={'image_annot': 'image'}) elif data == 'valid': return Compose([ Resize(CFG.size, CFG.size), Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], ), ToTensorV2(), ])
h, w, channel = image.shape h = math.ceil( h / factor ) * factor // 2 # 向上取整,由于模型需要下采样5次图像会变成原来的2的5次方分之一,需要输入图像是2的5次方的倍数 w = math.ceil(w / factor) * factor // 2 #print(h, w) mask = np.zeros(shape=(h, w)) image = cv2.resize(image, (w, h)) image_ori = image # image=illum(image) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) aug = Compose([ # PadIfNeeded(min_height=h, min_width=w, border_mode=cv2.BORDER_CONSTANT, value=0, p=1.0), # padding到2的5次方的倍数 Normalize(p=1) # 归一化 ]) augmented = aug(image=image, mask=mask) image = augmented['image'] image = img_to_tensor(image).unsqueeze(0).to( DEVICE ) # torch.from_numpy(img).unsqueeze(0).unsqueeze(0).float().to(DEVICE) # 图像转为tensor格式 output = model(image) # 预测 seg_mask = (output[0].data.cpu().numpy().argmax(axis=0)).astype( np.uint8) t2 = time.time() print("time:", (t2 - t1)) full_mask = np.zeros((h, w, 3)) for mask_label, sub_color in enumerate(class_color): full_mask[seg_mask == mask_label, 0] = sub_color[2]
metrics = logger.metric trainer = Trainer(model, losses, loss_weights, metrics=metrics, optimizer=optimizer, device=opt.device, print_iter=opt.print_iter, num_iter=opt.num_iters, batches_per_update=opt.batches_per_update, **logger.trainer_params) trainer.set_device(opt.gpus, opt.device) for i in range(25 * 1000): ret, img = video.read() if not ret: break image = cv2.resize(img, (0, 0), fx=sc, fy=sc) image = image[:(image.shape[0] // 32) * 32, :(image.shape[1] // 32) * 32, :].copy() mini_dataset = torch.utils.data.TensorDataset( Tensor(Normalize()(image=image)["image"].transpose( 2, 0, 1)).unsqueeze(0)) mini_dataset.scales = (1, ) mini_loader = torch.utils.data.DataLoader(mini_dataset) img, mp = trainer.visualize(mini_loader) img = img | image writer.write(img) m_img = np.tile((mp * 255).astype(np.uint8), (3, 1, 1)).transpose(1, 2, 0) writer.release()
def get_transform() -> Compose: transforms = Compose( [Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) return transforms
def main(): with open('config.yaml', 'r') as f: config = yaml.load(f) set_global_seed(SEED) prepare_cudnn(deterministic=True, benchmark=True) model = EfficientNet.from_name('efficientnet-b7', override_params={'num_classes': 1}) state = torch.load(PRETRAINED_WEIGHTS_PATH, map_location=lambda storage, loc: storage) state.pop('_fc.weight') state.pop('_fc.bias') res = model.load_state_dict(state, strict=False) assert set(res.missing_keys) == set(['_fc.weight', '_fc.bias' ]), 'issue loading pretrained weights' for module in model.modules(): if isinstance(module, MBConvBlock): if module._block_args.expand_ratio != 1: expand_conv = module._expand_conv seq_expand_conv = SeqExpandConv(expand_conv.in_channels, expand_conv.out_channels, len(TRAIN_INDICES)) seq_expand_conv.conv.weight.data[:, :, 0, :, :].copy_( expand_conv.weight.data / 3) seq_expand_conv.conv.weight.data[:, :, 1, :, :].copy_( expand_conv.weight.data / 3) seq_expand_conv.conv.weight.data[:, :, 2, :, :].copy_( expand_conv.weight.data / 3) module._expand_conv = seq_expand_conv model = model.cuda() normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) _, rand_augment, _ = transforms_imagenet_train( (CROP_HEIGHT, CROP_WIDTH), auto_augment='original-mstd0.5', separate=True) train_dataset = TrackPairDataset( os.path.join(config['ARTIFACTS_PATH'], TRACKS_ROOT), os.path.join(config['ARTIFACTS_PATH'], TRACK_PAIRS_FILE_NAME), TRAIN_INDICES, track_length=TRACK_LENGTH, track_transform=TrackTransform(FPS_RANGE, SCALE_RANGE, CRF_RANGE, TUNE_VALUES), image_transform=Compose([ SmallestMaxSize(MIN_SIZE), HorizontalFlip(), RandomCrop(CROP_HEIGHT, CROP_WIDTH), VisionTransform(rand_augment, p=0.5), normalize, ToTensor() ]), sequence_mode=True) print('Train dataset size: {}.'.format(len(train_dataset))) warmup_optimizer = torch.optim.SGD(model._fc.parameters(), INITIAL_LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY, nesterov=True) full_optimizer = torch.optim.SGD(model.parameters(), INITIAL_LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY, nesterov=True) full_lr_scheduler = torch.optim.lr_scheduler.LambdaLR( full_optimizer, lambda iteration: (MAX_ITERS - iteration) / MAX_ITERS) snapshots_root = os.path.join(config['ARTIFACTS_PATH'], SNAPSHOTS_ROOT, OUTPUT_FOLDER_NAME) os.makedirs(snapshots_root) log_root = os.path.join(config['ARTIFACTS_PATH'], LOGS_ROOT, OUTPUT_FOLDER_NAME) os.makedirs(log_root) writer = SummaryWriter(log_root) iteration = 0 if iteration < NUM_WARMUP_ITERATIONS: print('Start {} warmup iterations'.format(NUM_WARMUP_ITERATIONS)) model.eval() model._fc.train() for param in model.parameters(): param.requires_grad = False for param in model._fc.parameters(): param.requires_grad = True optimizer = warmup_optimizer else: print('Start without warmup iterations') model.train() optimizer = full_optimizer max_lr = max(param_group["lr"] for param_group in full_optimizer.param_groups) writer.add_scalar('train/max_lr', max_lr, iteration) epoch = 0 fake_prob_dist = distributions.beta.Beta(0.5, 0.5) while True: epoch += 1 print('Epoch {} is in progress'.format(epoch)) loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, drop_last=True) for samples in tqdm.tqdm(loader): iteration += 1 fake_input_tensor = torch.stack(samples['fake']).transpose( 0, 1).cuda() real_input_tensor = torch.stack(samples['real']).transpose( 0, 1).cuda() target_fake_prob = fake_prob_dist.sample( (len(fake_input_tensor), )).float().cuda() fake_weight = target_fake_prob.view(-1, 1, 1, 1, 1) input_tensor = ( 1.0 - fake_weight ) * real_input_tensor + fake_weight * fake_input_tensor pred = model(input_tensor.flatten(0, 1)).flatten() loss = F.binary_cross_entropy_with_logits( pred, target_fake_prob.repeat_interleave(len(TRAIN_INDICES))) optimizer.zero_grad() loss.backward() optimizer.step() if iteration > NUM_WARMUP_ITERATIONS: full_lr_scheduler.step() max_lr = max(param_group["lr"] for param_group in full_optimizer.param_groups) writer.add_scalar('train/max_lr', max_lr, iteration) writer.add_scalar('train/loss', loss.item(), iteration) if iteration == NUM_WARMUP_ITERATIONS: print('Stop warmup iterations') model.train() for param in model.parameters(): param.requires_grad = True optimizer = full_optimizer if iteration % SNAPSHOT_FREQUENCY == 0: snapshot_name = SNAPSHOT_NAME_TEMPLATE.format(iteration) snapshot_path = os.path.join(snapshots_root, snapshot_name) print('Saving snapshot to {}'.format(snapshot_path)) torch.save(model.state_dict(), snapshot_path) if iteration >= MAX_ITERS: print('Stop training due to maximum iteration exceeded') return
sout = 'Train/Test {:d}/{:d}\n'.format(len(TRAIN_FILES_ALL), len(TEST_FILES)) + \ 'Train mean/std {:.3f}/{:.3f}\n'.format(train_mean, train_std) + \ 'Test mean/std {:.3f}/{:.3f}\n'.format(test_mean, test_std) +\ 'Train num/sample {:d}'.format(len(TRAIN_FILES)) + ' '.join(TRAIN_FILES[:2]) + \ '\nValid num/sample {:d}'.format(len(VALID_FILES)) + ' '.join(VALID_FILES[:2])+'\n' print2file(sout, LOG_FILE) ######################################################################## # Augmentations if args.augment == 0: augment_train = Compose( [ Flip(p=0.5), # Flip vertically or horizontally or both ShiftScaleRotate(rotate_limit=10, p=0.3), Normalize(mean=(train_mean, train_mean, train_mean), std=(train_std, train_std, train_std)), ToFloat(max_value=1.) ], p=1) elif args.augment == 2: augment_train = Compose( [ Flip(p=0.5), # Flip vertically or horizontally or both RandomBrightnessContrast(p=0.3), ShiftScaleRotate(rotate_limit=10, p=0.3), Normalize(mean=(train_mean, train_mean, train_mean), std=(train_std, train_std, train_std)), ToFloat(max_value=1.) ], p=1) elif args.augment == 1:
def post_transforms(): return Compose([Normalize(), ToTensor()])
def __init__(self): transformation = [] transformation += [Normalize(), ToTensor()] self.transform = Compose(transformation)
def strong(p=1): return Compose([ Normalize()], p=p)
def run(*options, cfg=None, local_rank=0, debug=False): """Run training and validation of model Notes: Options can be passed in via the options argument and loaded from the cfg file Options from default.py will be overridden by options loaded from cfg file Options passed in via options argument will override option loaded from cfg file Args: *options (str,int ,optional): Options used to overide what is loaded from the config. To see what options are available consult default.py cfg (str, optional): Location of config file to load. Defaults to None. """ update_config(config, options=options, config_file=cfg) # we will write the model under outputs / config_file_name / model_dir config_file_name = "default_config" if not cfg else cfg.split( "/")[-1].split(".")[0] # Start logging load_log_configuration(config.LOG_CONFIG) logger = logging.getLogger(__name__) logger.debug(config.WORKERS) silence_other_ranks = True world_size = int(os.environ.get("WORLD_SIZE", 1)) distributed = world_size > 1 if distributed: # FOR DISTRIBUTED: Set the device according to local_rank. torch.cuda.set_device(local_rank) # FOR DISTRIBUTED: Initialize the backend. torch.distributed.launch will # provide environment variables, and requires that you use init_method=`env://`. torch.distributed.init_process_group(backend="nccl", init_method="env://") epochs_per_cycle = config.TRAIN.END_EPOCH // config.TRAIN.SNAPSHOTS torch.backends.cudnn.benchmark = config.CUDNN.BENCHMARK torch.manual_seed(config.SEED) if torch.cuda.is_available(): torch.cuda.manual_seed_all(config.SEED) np.random.seed(seed=config.SEED) # Setup Augmentations basic_aug = Compose([ Normalize(mean=(config.TRAIN.MEAN, ), std=(config.TRAIN.STD, ), max_pixel_value=1), PadIfNeeded( min_height=config.TRAIN.PATCH_SIZE, min_width=config.TRAIN.PATCH_SIZE, border_mode=config.OPENCV_BORDER_CONSTANT, always_apply=True, mask_value=255, ), Resize( config.TRAIN.AUGMENTATIONS.RESIZE.HEIGHT, config.TRAIN.AUGMENTATIONS.RESIZE.WIDTH, always_apply=True, ), PadIfNeeded( min_height=config.TRAIN.AUGMENTATIONS.PAD.HEIGHT, min_width=config.TRAIN.AUGMENTATIONS.PAD.WIDTH, border_mode=config.OPENCV_BORDER_CONSTANT, always_apply=True, mask_value=255, ), ]) if config.TRAIN.AUGMENTATION: train_aug = Compose([basic_aug, HorizontalFlip(p=0.5)]) val_aug = basic_aug else: train_aug = val_aug = basic_aug TrainPatchLoader = get_patch_loader(config) train_set = TrainPatchLoader( config.DATASET.ROOT, split="train", is_transform=True, stride=config.TRAIN.STRIDE, patch_size=config.TRAIN.PATCH_SIZE, augmentations=train_aug, ) val_set = TrainPatchLoader( config.DATASET.ROOT, split="val", is_transform=True, stride=config.TRAIN.STRIDE, patch_size=config.TRAIN.PATCH_SIZE, augmentations=val_aug, ) logger.info(f"Validation examples {len(val_set)}") n_classes = train_set.n_classes if debug: val_set = data.Subset(val_set, range(config.VALIDATION.BATCH_SIZE_PER_GPU)) train_set = data.Subset(train_set, range(config.TRAIN.BATCH_SIZE_PER_GPU * 2)) logger.info(f"Training examples {len(train_set)}") logger.info(f"Validation examples {len(val_set)}") train_sampler = torch.utils.data.distributed.DistributedSampler( train_set, num_replicas=world_size, rank=local_rank) train_loader = data.DataLoader( train_set, batch_size=config.TRAIN.BATCH_SIZE_PER_GPU, num_workers=config.WORKERS, sampler=train_sampler, ) val_sampler = torch.utils.data.distributed.DistributedSampler( val_set, num_replicas=world_size, rank=local_rank) val_loader = data.DataLoader( val_set, batch_size=config.VALIDATION.BATCH_SIZE_PER_GPU, num_workers=config.WORKERS, sampler=val_sampler, ) model = getattr(models, config.MODEL.NAME).get_seg_model(config) device = "cpu" if torch.cuda.is_available(): device = "cuda" model = model.to(device) # Send to GPU optimizer = torch.optim.SGD( model.parameters(), lr=config.TRAIN.MAX_LR, momentum=config.TRAIN.MOMENTUM, weight_decay=config.TRAIN.WEIGHT_DECAY, ) # weights are inversely proportional to the frequency of the classes in # the training set class_weights = torch.tensor(config.DATASET.CLASS_WEIGHTS, device=device, requires_grad=False) criterion = torch.nn.CrossEntropyLoss(weight=class_weights, ignore_index=255, reduction="mean") model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[device], find_unused_parameters=True) snapshot_duration = epochs_per_cycle * len( train_loader) if not debug else 2 * len(train_loader) warmup_duration = 5 * len(train_loader) warmup_scheduler = LinearCyclicalScheduler( optimizer, "lr", start_value=config.TRAIN.MAX_LR, end_value=config.TRAIN.MAX_LR * world_size, cycle_size=10 * len(train_loader), ) cosine_scheduler = CosineAnnealingScheduler( optimizer, "lr", config.TRAIN.MAX_LR * world_size, config.TRAIN.MIN_LR * world_size, cycle_size=snapshot_duration, ) scheduler = ConcatScheduler( schedulers=[warmup_scheduler, cosine_scheduler], durations=[warmup_duration]) trainer = create_supervised_trainer(model, optimizer, criterion, prepare_batch, device=device) trainer.add_event_handler(Events.ITERATION_STARTED, scheduler) # Set to update the epoch parameter of our distributed data sampler so that we get # different shuffles trainer.add_event_handler(Events.EPOCH_STARTED, update_sampler_epoch(train_loader)) if silence_other_ranks & local_rank != 0: logging.getLogger("ignite.engine.engine.Engine").setLevel( logging.WARNING) def _select_pred_and_mask(model_out_dict): return (model_out_dict["y_pred"].squeeze(), model_out_dict["mask"].squeeze()) evaluator = create_supervised_evaluator( model, prepare_batch, metrics={ "nll": Loss(criterion, output_transform=_select_pred_and_mask, device=device), "pixa": pixelwise_accuracy(n_classes, output_transform=_select_pred_and_mask, device=device), "cacc": class_accuracy(n_classes, output_transform=_select_pred_and_mask, device=device), "mca": mean_class_accuracy(n_classes, output_transform=_select_pred_and_mask, device=device), "ciou": class_iou(n_classes, output_transform=_select_pred_and_mask, device=device), "mIoU": mean_iou(n_classes, output_transform=_select_pred_and_mask, device=device), }, device=device, ) # Set the validation run to start on the epoch completion of the training run trainer.add_event_handler(Events.EPOCH_COMPLETED, Evaluator(evaluator, val_loader)) if local_rank == 0: # Run only on master process trainer.add_event_handler( Events.ITERATION_COMPLETED, logging_handlers.log_training_output( log_interval=config.TRAIN.BATCH_SIZE_PER_GPU), ) trainer.add_event_handler(Events.EPOCH_STARTED, logging_handlers.log_lr(optimizer)) try: output_dir = generate_path( config.OUTPUT_DIR, git_branch(), git_hash(), config_file_name, config.TRAIN.MODEL_DIR, current_datetime(), ) except TypeError: output_dir = generate_path( config.OUTPUT_DIR, config_file_name, config.TRAIN.MODEL_DIR, current_datetime(), ) summary_writer = create_summary_writer( log_dir=path.join(output_dir, config.LOG_DIR)) logger.info( f"Logging Tensorboard to {path.join(output_dir, config.LOG_DIR)}") trainer.add_event_handler( Events.EPOCH_STARTED, tensorboard_handlers.log_lr(summary_writer, optimizer, "epoch"), ) trainer.add_event_handler( Events.ITERATION_COMPLETED, tensorboard_handlers.log_training_output(summary_writer), ) evaluator.add_event_handler( Events.EPOCH_COMPLETED, logging_handlers.log_metrics( "Validation results", metrics_dict={ "nll": "Avg loss :", "mIoU": " Avg IoU :", "pixa": "Pixelwise Accuracy :", "mca": "Mean Class Accuracy :", }, ), ) evaluator.add_event_handler( Events.EPOCH_COMPLETED, tensorboard_handlers.log_metrics( summary_writer, trainer, "epoch", metrics_dict={ "mIoU": "Validation/IoU", "nll": "Validation/Loss", "mca": "Validation/MCA", }, ), ) def _select_max(pred_tensor): return pred_tensor.max(1)[1] def _tensor_to_numpy(pred_tensor): return pred_tensor.squeeze().cpu().numpy() transform_func = compose(np_to_tb, decode_segmap(n_classes=n_classes), _tensor_to_numpy) transform_pred = compose(transform_func, _select_max) evaluator.add_event_handler( Events.EPOCH_COMPLETED, create_image_writer(summary_writer, "Validation/Image", "image"), ) evaluator.add_event_handler( Events.EPOCH_COMPLETED, create_image_writer(summary_writer, "Validation/Mask", "mask", transform_func=transform_func), ) evaluator.add_event_handler( Events.EPOCH_COMPLETED, create_image_writer( summary_writer, "Validation/Pred", "y_pred", transform_func=transform_pred, ), ) def snapshot_function(): return (trainer.state.iteration % snapshot_duration) == 0 checkpoint_handler = SnapshotHandler( output_dir, config.MODEL.NAME, extract_metric_from("mIoU"), snapshot_function, ) evaluator.add_event_handler(Events.EPOCH_COMPLETED, checkpoint_handler, {"model": model}) logger.info("Starting training") if debug: trainer.run( train_loader, max_epochs=config.TRAIN.END_EPOCH, epoch_length=config.TRAIN.BATCH_SIZE_PER_GPU * 2, seed=config.SEED, ) else: trainer.run(train_loader, max_epochs=config.TRAIN.END_EPOCH, epoch_length=len(train_loader), seed=config.SEED)
from albumentations import RandomResizedCrop, Compose, Flip, Resize, Normalize from albumentations.pytorch import ToTensorV2, ToTensor SIZE = 224 train_aug = Compose([ RandomResizedCrop(height=SIZE, width=SIZE, ), Flip(p=0.3), Normalize(), ToTensorV2(), ]) valid_aug = Compose([ Resize(width=SIZE, height=SIZE), Normalize(), ToTensorV2(), ])
def run(*options, cfg=None, debug=False): """Run training and validation of model Notes: Options can be passed in via the options argument and loaded from the cfg file Options from default.py will be overridden by options loaded from cfg file Options passed in via options argument will override option loaded from cfg file Args: *options (str,int ,optional): Options used to overide what is loaded from the config. To see what options are available consult default.py cfg (str, optional): Location of config file to load. Defaults to None. """ update_config(config, options=options, config_file=cfg) # we will write the model under outputs / config_file_name / model_dir config_file_name = "default_config" if not cfg else cfg.split( "/")[-1].split(".")[0] # Start logging load_log_configuration(config.LOG_CONFIG) logger = logging.getLogger(__name__) logger.debug(config.WORKERS) epochs_per_cycle = config.TRAIN.END_EPOCH // config.TRAIN.SNAPSHOTS torch.backends.cudnn.benchmark = config.CUDNN.BENCHMARK torch.manual_seed(config.SEED) if torch.cuda.is_available(): torch.cuda.manual_seed_all(config.SEED) np.random.seed(seed=config.SEED) # Setup Augmentations basic_aug = Compose([ Normalize(mean=(config.TRAIN.MEAN, ), std=(config.TRAIN.STD, ), max_pixel_value=1) ]) if config.TRAIN.AUGMENTATION: train_aug = Compose([basic_aug, HorizontalFlip(p=0.5)]) val_aug = basic_aug else: train_aug = val_aug = basic_aug TrainLoader = get_section_loader(config) train_set = TrainLoader( data_dir=config.DATASET.ROOT, split="train", is_transform=True, augmentations=train_aug, ) val_set = TrainLoader( data_dir=config.DATASET.ROOT, split="val", is_transform=True, augmentations=val_aug, ) class CustomSampler(torch.utils.data.Sampler): def __init__(self, data_source): self.data_source = data_source def __iter__(self): char = ["i" if np.random.randint(2) == 1 else "x"] self.indices = [ idx for (idx, name) in enumerate(self.data_source) if char[0] in name ] return (self.indices[i] for i in torch.randperm(len(self.indices))) def __len__(self): return len(self.data_source) n_classes = train_set.n_classes val_list = val_set.sections train_list = val_set.sections train_loader = data.DataLoader( train_set, batch_size=config.TRAIN.BATCH_SIZE_PER_GPU, sampler=CustomSampler(train_list), num_workers=config.WORKERS, shuffle=False, ) if debug: val_set = data.Subset(val_set, range(3)) val_loader = data.DataLoader( val_set, batch_size=config.VALIDATION.BATCH_SIZE_PER_GPU, sampler=CustomSampler(val_list), num_workers=config.WORKERS, ) model = getattr(models, config.MODEL.NAME).get_seg_model(config) device = "cpu" if torch.cuda.is_available(): device = "cuda" model = model.to(device) # Send to GPU optimizer = torch.optim.SGD( model.parameters(), lr=config.TRAIN.MAX_LR, momentum=config.TRAIN.MOMENTUM, weight_decay=config.TRAIN.WEIGHT_DECAY, ) try: output_dir = generate_path( config.OUTPUT_DIR, git_branch(), git_hash(), config_file_name, config.TRAIN.MODEL_DIR, current_datetime(), ) except TypeError: output_dir = generate_path( config.OUTPUT_DIR, config_file_name, config.TRAIN.MODEL_DIR, current_datetime(), ) summary_writer = create_summary_writer( log_dir=path.join(output_dir, config.LOG_DIR)) snapshot_duration = epochs_per_cycle * len( train_loader) if not debug else 2 * len(train_loader) scheduler = CosineAnnealingScheduler(optimizer, "lr", config.TRAIN.MAX_LR, config.TRAIN.MIN_LR, cycle_size=snapshot_duration) # weights are inversely proportional to the frequency of the classes in # the training set class_weights = torch.tensor(config.DATASET.CLASS_WEIGHTS, device=device, requires_grad=False) criterion = torch.nn.CrossEntropyLoss(weight=class_weights, ignore_index=255, reduction="mean") trainer = create_supervised_trainer(model, optimizer, criterion, prepare_batch, device=device) trainer.add_event_handler(Events.ITERATION_STARTED, scheduler) trainer.add_event_handler( Events.ITERATION_COMPLETED, logging_handlers.log_training_output( log_interval=config.TRAIN.BATCH_SIZE_PER_GPU), ) trainer.add_event_handler(Events.EPOCH_STARTED, logging_handlers.log_lr(optimizer)) trainer.add_event_handler( Events.EPOCH_STARTED, tensorboard_handlers.log_lr(summary_writer, optimizer, "epoch"), ) trainer.add_event_handler( Events.ITERATION_COMPLETED, tensorboard_handlers.log_training_output(summary_writer), ) def _select_pred_and_mask(model_out_dict): return (model_out_dict["y_pred"].squeeze(), model_out_dict["mask"].squeeze()) evaluator = create_supervised_evaluator( model, prepare_batch, metrics={ "nll": Loss(criterion, output_transform=_select_pred_and_mask, device=device), "pixacc": pixelwise_accuracy(n_classes, output_transform=_select_pred_and_mask, device=device), "cacc": class_accuracy(n_classes, output_transform=_select_pred_and_mask, device=device), "mca": mean_class_accuracy(n_classes, output_transform=_select_pred_and_mask, device=device), "ciou": class_iou(n_classes, output_transform=_select_pred_and_mask, device=device), "mIoU": mean_iou(n_classes, output_transform=_select_pred_and_mask, device=device), }, device=device, ) trainer.add_event_handler(Events.EPOCH_COMPLETED, Evaluator(evaluator, val_loader)) evaluator.add_event_handler( Events.EPOCH_COMPLETED, logging_handlers.log_metrics( "Validation results", metrics_dict={ "nll": "Avg loss :", "pixacc": "Pixelwise Accuracy :", "mca": "Avg Class Accuracy :", "mIoU": "Avg Class IoU :", }, ), ) evaluator.add_event_handler( Events.EPOCH_COMPLETED, logging_handlers.log_class_metrics( "Per class validation results", metrics_dict={ "ciou": "Class IoU :", "cacc": "Class Accuracy :" }, ), ) evaluator.add_event_handler( Events.EPOCH_COMPLETED, tensorboard_handlers.log_metrics( summary_writer, trainer, "epoch", metrics_dict={ "mIoU": "Validation/mIoU", "nll": "Validation/Loss", "mca": "Validation/MCA", "pixacc": "Validation/Pixel_Acc", }, ), ) def _select_max(pred_tensor): return pred_tensor.max(1)[1] def _tensor_to_numpy(pred_tensor): return pred_tensor.squeeze().cpu().numpy() transform_func = compose(np_to_tb, decode_segmap(n_classes=n_classes), _tensor_to_numpy) transform_pred = compose(transform_func, _select_max) evaluator.add_event_handler( Events.EPOCH_COMPLETED, create_image_writer(summary_writer, "Validation/Image", "image"), ) evaluator.add_event_handler( Events.EPOCH_COMPLETED, create_image_writer(summary_writer, "Validation/Mask", "mask", transform_func=transform_func), ) evaluator.add_event_handler( Events.EPOCH_COMPLETED, create_image_writer(summary_writer, "Validation/Pred", "y_pred", transform_func=transform_pred), ) def snapshot_function(): return (trainer.state.iteration % snapshot_duration) == 0 checkpoint_handler = SnapshotHandler( output_dir, config.MODEL.NAME, extract_metric_from("mIoU"), snapshot_function, ) evaluator.add_event_handler(Events.EPOCH_COMPLETED, checkpoint_handler, {"model": model}) logger.info("Starting training") if debug: trainer.run( train_loader, max_epochs=config.TRAIN.END_EPOCH, epoch_length=config.TRAIN.BATCH_SIZE_PER_GPU, seed=config.SEED, ) else: trainer.run(train_loader, max_epochs=config.TRAIN.END_EPOCH, epoch_length=len(train_loader), seed=config.SEED)
decoder.to(device) encoder.eval() decoder.eval() x_adv = [] with torch.no_grad(): bar = tqdm.tqdm(paths) for path in bar: filename = os.path.basename(path) bar.set_description(f"processing:{filename}") image = cv2.imread(path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) h, w = image.shape[:2] norm = Compose([ Resize(img_size, img_size, always_apply=True), Normalize(mean=means, std=std, always_apply=True) ]) norm_data = norm(image=image) image = norm_data["image"] if image.ndim > 2: image = np.transpose(image, axes=[2, 0, 1]) else: image = np.expand_dims(image, axis=0) image = torch.from_numpy(image) image = image.unsqueeze(0) image = image.to(device, dtype=torch.float32) en = encoder(image) de = decoder(en) adv = image + alpha * de #print(adv.shape) adv = adv.squeeze(0)
from albumentations import Compose, Resize, RandomCrop, Flip, HorizontalFlip, VerticalFlip, Transpose, RandomRotate90, \ ShiftScaleRotate, OneOf, OpticalDistortion, HueSaturationValue, RandomGamma, RandomBrightness, Normalize from albumentations.pytorch import ToTensor train_aug = Compose([ RandomCrop(height=96, width=96, p=0.2), OneOf([ VerticalFlip(p=0.2), HorizontalFlip(p=0.3), Transpose(p=0.2), RandomRotate90(p=0.2), ], p=0.3), ShiftScaleRotate(p=0.2), RandomBrightness(p=0.2), Resize(128, 128, always_apply=True), Normalize(mean=0.06922848809290576, std=0.20515700083327537), ToTensor(), ]) valid_aug = Compose([ Resize(128, 128, always_apply=True), Normalize(mean=0.06922848809290576, std=0.20515700083327537), ToTensor(), ])
def main(fold): # check cuda available assert torch.cuda.is_available() == True # when the input dimension doesnot change, add this flag to speed up cudnn.benchmark = True num_classes = config.problem_class[params.problem_type] # input are RGB images in size 3 * h * w # output are binary model = params.model(in_channels=3, num_classes=num_classes) # data parallel model = nn.DataParallel(model, device_ids=params.device_ids).cuda() # loss function if num_classes == 2: loss = LossBinary(jaccard_weight=params.jaccard_weight) valid_metric = validation_binary else: loss = LossMulti(num_classes=num_classes, jaccard_weight=params.jaccard_weight) valid_metric = validation_multi # trainset transform train_transform = Compose([ Resize(height=params.train_height, width=params.train_width, p=1), Normalize(p=1), PadIfNeeded( min_height=params.train_height, min_width=params.train_width, p=1), ], p=1) # validset transform valid_transform = Compose([ PadIfNeeded( min_height=params.valid_height, min_width=params.valid_width, p=1), Resize(height=params.train_height, width=params.train_width, p=1), Normalize(p=1) ], p=1) # train/valid filenmaes train_filenames, valid_filenames = trainval_split(fold) print('fold {}, {} train / {} validation files'.format( fold, len(train_filenames), len(valid_filenames))) # train dataloader train_loader = DataLoader( dataset=RobotSegDataset(train_filenames, transform=train_transform, \ schedule="ordered", batch_size=params.batch_size, problem_type=params.problem_type), shuffle=False, # set to false to disable pytorch dataset shuffle num_workers=params.num_workers, batch_size=params.batch_size, pin_memory=True ) # valid dataloader valid_loader = DataLoader( dataset=RobotSegDataset(valid_filenames, transform=valid_transform, problem_type=params.problem_type), shuffle=False, # set to false to disable pytorch dataset shuffle num_workers=0, # params.num_workers, batch_size=1, # in valid time. have to use one image by one pin_memory=True) train(model=model, loss_func=loss, train_loader=train_loader, valid_loader=valid_loader, valid_metric=valid_metric, fold=fold, num_classes=num_classes)
def make_train_and_validation_data_loaders( hyper_parameters: Dict, ) -> Tuple[DataLoader, DataLoader]: input_data_type = hyper_parameters["input_data_type"] validation_fold_number = hyper_parameters["validation_fold_number"] if input_data_type == "RGB": data_set_class = ColourDataSet # Define a set of image augmentations. augmentations_train = Compose( [ VerticalFlip(p=0.5), HorizontalFlip(p=0.5), RandomRotate90(p=0.5), Normalize(p=1), ToTensorV2(), ], p=1, ) augmentations_validation = Compose([Normalize(p=1), ToTensorV2()], p=1) elif input_data_type == "YCbCr": data_set_class = ColourDataSet # Define a set of image augmentations. augmentations_train = Compose( [ VerticalFlip(p=0.5), HorizontalFlip(p=0.5), RandomRotate90(p=0.5), ToTensorV2(), ], p=1, ) augmentations_validation = Compose([ToTensorV2()], p=1) # augmentations_train = None # augmentations_validation = None elif input_data_type == "DCT": data_set_class = DCTDataSet # Define a set of image augmentations. # augmentations_train = Compose([VerticalFlip(p=0), HorizontalFlip(p=1)], p=1,) # augmentations_validation = Compose([], p=1) augmentations_train = None augmentations_validation = None else: raise ValueError( f"Invalid input data type provided: {input_data_type}" ) # Load a DataFrame with the files and targets. data_set = load_data(n_classes=hyper_parameters["n_classes"]) # Split the data set into folds. data_set = add_fold_to_data_set(data_set) # Create train and validation data sets. train_data_set = data_set_class( kinds=data_set[data_set["fold"] != validation_fold_number].kind.values, image_names=data_set[ data_set["fold"] != validation_fold_number ].image_name.values, labels=data_set[ data_set["fold"] != validation_fold_number ].label.values, n_classes=hyper_parameters["n_classes"], transforms=augmentations_train, colour_space=input_data_type, use_quality_factor=hyper_parameters["use_quality_factor"], separate_classes_by_quality_factor=hyper_parameters[ "separate_classes_by_quality_factor" ], ) validation_data_set = data_set_class( kinds=data_set[data_set["fold"] == validation_fold_number].kind.values, image_names=data_set[ data_set["fold"] == validation_fold_number ].image_name.values, labels=data_set[ data_set["fold"] == validation_fold_number ].label.values, n_classes=hyper_parameters["n_classes"], transforms=augmentations_validation, colour_space=input_data_type, use_quality_factor=hyper_parameters["use_quality_factor"], separate_classes_by_quality_factor=hyper_parameters[ "separate_classes_by_quality_factor" ], ) # Create train and validation data loaders. train_data_loader = DataLoader( train_data_set, sampler=BalanceClassSampler( labels=train_data_set.get_labels(), mode="downsampling" ), batch_size=int( hyper_parameters["batch_size"] * len(hyper_parameters["devices"]) ), shuffle=False, num_workers=hyper_parameters["training_workers"], pin_memory=False, drop_last=True, ) validation_data_loader = DataLoader( validation_data_set, batch_size=int( hyper_parameters["batch_size"] * len(hyper_parameters["devices"]) ), shuffle=False, num_workers=hyper_parameters["training_workers"], pin_memory=False, drop_last=True, ) return train_data_loader, validation_data_loader
pad_width, 'constant', constant_values=signal.min()) else: signal = np.pad(signal, pad_width, 'wrap') else: signal = signal[:, :self.size] return signal data_augmentation = Compose([ OneOf([ PadToSize(mode='wrap'), PadToSize(mode='constant'), ], p=1), Normalize(mean=0.456, std=0.225, p=1.0), ]) data_augmentation_test = Compose([ OneOf([ PadToSize(mode='wrap'), PadToSize(mode='constant'), ], p=1), Normalize(mean=0.456, std=0.225, p=1.0), ]) class DatasetPreparer: def __init__(self, train_path, audio_path, additional: list = None): self.train_path = train_path self.audio_path = audio_path self.label_encoder = LabelEncoder()
p=0.2), Resize(args.image_size, args.image_size) ]), "val": None } tr_img = { "train": Compose([ OneOf([ MotionBlur(p=.2), Blur(blur_limit=3, p=0.1), ], p=0.2), Normalize(mean=args.stats[0], std=args.stats[1], max_pixel_value=1, p=1.0), ]), "val": Normalize(mean=args.stats[0], std=args.stats[1], max_pixel_value=1, p=1.0), } else: tr_dual, tr_img = { "train": None, "val": None }, { "train": None, "val": None
def run(*options, cfg=None, debug=False): """Run testing of model Notes: Options can be passed in via the options argument and loaded from the cfg file Options from default.py will be overridden by options loaded from cfg file Options passed in via options argument will override option loaded from cfg file Args: *options (str,int ,optional): Options used to overide what is loaded from the config. To see what options are available consult default.py cfg (str, optional): Location of config file to load. Defaults to None. """ update_config(config, options=options, config_file=cfg) # Start logging load_log_configuration(config.LOG_CONFIG) logger = logging.getLogger(__name__) logger.debug(config.WORKERS) torch.backends.cudnn.benchmark = config.CUDNN.BENCHMARK torch.manual_seed(config.SEED) if torch.cuda.is_available(): torch.cuda.manual_seed_all(config.SEED) np.random.seed(seed=config.SEED) # Setup Augmentations test_aug = Compose([ Normalize( mean=(config.TRAIN.MEAN, ), std=(config.TRAIN.STD, ), max_pixel_value=config.TRAIN.MAX, ), PadIfNeeded( min_height=config.TRAIN.PATCH_SIZE, min_width=config.TRAIN.PATCH_SIZE, border_mode=config.OPENCV_BORDER_CONSTANT, always_apply=True, mask_value=mask_value, value=0, ), Resize( config.TRAIN.AUGMENTATIONS.RESIZE.HEIGHT, config.TRAIN.AUGMENTATIONS.RESIZE.WIDTH, always_apply=True, ), PadIfNeeded( min_height=config.TRAIN.AUGMENTATIONS.PAD.HEIGHT, min_width=config.TRAIN.AUGMENTATIONS.PAD.WIDTH, border_mode=config.OPENCV_BORDER_CONSTANT, always_apply=True, mask_value=mask_value, value=0, ), ]) PenobscotDataset = get_patch_dataset(config) test_set = PenobscotDataset( config.DATASET.ROOT, config.TRAIN.PATCH_SIZE, config.TRAIN.STRIDE, split="test", transforms=test_aug, n_channels=config.MODEL.IN_CHANNELS, complete_patches_only=config.TEST.COMPLETE_PATCHES_ONLY, ) logger.info(str(test_set)) n_classes = test_set.n_classes test_loader = data.DataLoader( test_set, batch_size=config.VALIDATION.BATCH_SIZE_PER_GPU, num_workers=config.WORKERS, ) model = getattr(models, config.MODEL.NAME).get_seg_model(config) logger.info(f"Loading model {config.TEST.MODEL_PATH}") model.load_state_dict(torch.load(config.TEST.MODEL_PATH), strict=False) device = "cpu" if torch.cuda.is_available(): device = "cuda" model = model.to(device) # Send to GPU try: output_dir = generate_path( config.OUTPUT_DIR, git_branch(), git_hash(), config.MODEL.NAME, current_datetime(), ) except TypeError: output_dir = generate_path( config.OUTPUT_DIR, config.MODEL.NAME, current_datetime(), ) summary_writer = create_summary_writer( log_dir=path.join(output_dir, config.LOG_DIR)) # weights are inversely proportional to the frequency of the classes in # the training set class_weights = torch.tensor(config.DATASET.CLASS_WEIGHTS, device=device, requires_grad=False) criterion = torch.nn.CrossEntropyLoss(weight=class_weights, ignore_index=mask_value, reduction="mean") def _select_pred_and_mask(model_out_dict): return (model_out_dict["y_pred"].squeeze(), model_out_dict["mask"].squeeze()) def _select_all(model_out_dict): return ( model_out_dict["y_pred"].squeeze(), model_out_dict["mask"].squeeze(), model_out_dict["ids"], model_out_dict["patch_locations"], ) inline_mean_iou = InlineMeanIoU( config.DATASET.INLINE_HEIGHT, config.DATASET.INLINE_WIDTH, config.TRAIN.PATCH_SIZE, n_classes, padding=_padding_from(config), scale=_scale_from(config), output_transform=_select_all, ) evaluator = create_supervised_evaluator( model, _prepare_batch, metrics={ "nll": Loss(criterion, output_transform=_select_pred_and_mask, device=device), "inIoU": inline_mean_iou, "pixa": pixelwise_accuracy(n_classes, output_transform=_select_pred_and_mask, device=device), "cacc": class_accuracy(n_classes, output_transform=_select_pred_and_mask, device=device), "mca": mean_class_accuracy(n_classes, output_transform=_select_pred_and_mask, device=device), "ciou": class_iou(n_classes, output_transform=_select_pred_and_mask, device=device), "mIoU": mean_iou(n_classes, output_transform=_select_pred_and_mask, device=device), }, device=device, ) evaluator.add_event_handler( Events.EPOCH_COMPLETED, logging_handlers.log_metrics( "Test results", metrics_dict={ "nll": "Avg loss :", "mIoU": "Avg IoU :", "pixa": "Pixelwise Accuracy :", "mca": "Mean Class Accuracy :", "inIoU": "Mean Inline IoU :", }, ), ) evaluator.add_event_handler( Events.EPOCH_COMPLETED, tensorboard_handlers.log_metrics( summary_writer, evaluator, "epoch", metrics_dict={ "mIoU": "Test/IoU", "nll": "Test/Loss", "mca": "Test/MCA", "inIoU": "Test/MeanInlineIoU", }, ), ) def _select_max(pred_tensor): return pred_tensor.max(1)[1] def _tensor_to_numpy(pred_tensor): return pred_tensor.squeeze().cpu().numpy() transform_func = compose( np_to_tb, decode_segmap, _tensor_to_numpy, ) transform_pred = compose(transform_func, _select_max) evaluator.add_event_handler( Events.EPOCH_COMPLETED, create_image_writer(summary_writer, "Test/Image", "image"), ) evaluator.add_event_handler( Events.EPOCH_COMPLETED, create_image_writer(summary_writer, "Test/Mask", "mask", transform_func=transform_func), ) evaluator.add_event_handler( Events.EPOCH_COMPLETED, create_image_writer(summary_writer, "Test/Pred", "y_pred", transform_func=transform_pred), ) logger.info("Starting training") if debug: evaluator.run(test_loader, max_epochs=1, epoch_length=1) else: evaluator.run(test_loader, max_epochs=1, epoch_length=len(test_loader)) # Log top N and bottom N inlines in terms of IoU to tensorboard inline_ious = inline_mean_iou.iou_per_inline() sorted_ious = sorted(inline_ious.items(), key=lambda x: x[1], reverse=True) topk = ((inline_mean_iou.predictions[key], inline_mean_iou.masks[key]) for key, iou in take(_TOP_K, sorted_ious)) bottomk = ((inline_mean_iou.predictions[key], inline_mean_iou.masks[key]) for key, iou in tail(_BOTTOM_K, sorted_ious)) stack_and_decode = compose(transform_func, torch.stack) predictions, masks = unzip(chain(topk, bottomk)) predictions_tensor = stack_and_decode(list(predictions)) masks_tensor = stack_and_decode(list(masks)) _log_tensor_to_tensorboard(predictions_tensor, "Test/InlinePredictions", summary_writer, evaluator) _log_tensor_to_tensorboard(masks_tensor, "Test/InlineMasks", summary_writer, evaluator) summary_writer.close()
def train(inputs, working_dir, fold_id): # TopCoder num_workers, batch_size = 8, 4 * 8 gpus = [0, 1, 2, 3] # My machine num_workers, batch_size = 8, 2 * 3 gpus = [0, 1] # My CR num_workers, batch_size = 2, 2 * 3 num_workers, batch_size = 4, 2 * 3 gpus = [0] patience, n_epochs = 8, 150 lr, min_lr, lr_update_rate = 1e-4, 5e-5, 0.5 training_timelimit = 60 * 60 * 24 * 2 # 2 days st_time = time.time() model_name = f'v12_f{fold_id}' fh = open_log(model_name) # define the model model = unet_vgg16(pretrained=True) def get_checkpoint(model_name): epochs = [] steps = [] for file in glob.glob("./wdata/models/v12_f0/v12_f0_ep*"): tmp = file.split('ep')[1] # print(tmp,tmp.split('_')[0]) epochs.append(int(tmp.split('_')[0])) steps.append(int(tmp.split('_')[1])) if len(epochs) == 0: return 'xyz' max_epoch = max(epochs) steps_max_epoch = max( [step for step, epoch in zip(steps, epochs) if epoch == max_epoch]) print('latest epoch,steps: ', max_epoch, steps_max_epoch) #prefix = '_'.join(model_name.split('_')[:2]) model_checkpoint_file = f'./wdata/models/{model_name}/{model_name}_ep{max_epoch}_{steps_max_epoch}' return model_checkpoint_file #model_checkpoint_file = f'./wdata/models/{prefix}/{model_name}' model_checkpoint_file = get_checkpoint(model_name) if os.path.exists(model_checkpoint_file): print('load ', model_checkpoint_file) state = torch.load(str(model_checkpoint_file)) start_epoch = state['epoch'] step = state['step'] #step_skip = step print('Found model, epoch {}, step {:,}'.format(start_epoch, step)) model.load_state_dict(state['model'], strict=False) else: start_epoch = 1 step = 0 model = nn.DataParallel(model, device_ids=gpus).cuda() # augmentation techniques train_transformer = Compose([ HorizontalFlip(p=0.5), RandomRotate90(p=0.5), RandomCrop(512, 512, p=1.0), Normalize(), ], p=1.0) val_transformer = Compose([ CenterCrop(512, 512, p=1.0), Normalize(), ], p=1.0) # train/val loadrs df_cvfolds = read_cv_splits(inputs) trn_loader, val_loader = make_train_val_loader(train_transformer, val_transformer, df_cvfolds, fold_id, batch_size, num_workers, working_dir) # train # criterion = binary_loss(jaccard_weight=0.25) optimizer = Adam(model.parameters(), lr=lr) report_epoch = 10 # vers for early stopping best_score = 0 not_improved_count = 0 for epoch in range(start_epoch, n_epochs): model.train() tl = trn_loader # alias trn_metrics = Metrics() try: tq = tqdm.tqdm(total=(len(tl) * trn_loader.batch_size)) tq.set_description(f'Ep{epoch:>3d}') for i, (inputs, targets, labels, names) in enumerate(trn_loader): #if step inputs = inputs.cuda() targets = targets.cuda() outputs = model(inputs) loss = criterion(outputs, targets) optimizer.zero_grad() # Increment step counter batch_size = inputs.size(0) loss.backward() optimizer.step() step += 1 tq.update(batch_size) # Update eval metrics trn_metrics.loss.append(loss.item()) trn_metrics.bce.append(criterion._stash_bce_loss.item()) trn_metrics.jaccard.append(criterion._stash_jaccard.item()) mlflow.log_metric('batch total_loss', loss.item(), step) mlflow.log_metric('batch BCE', criterion._stash_bce_loss.item(), step) mlflow.log_metric('batch Jaccard', criterion._stash_jaccard.item(), step) if i > 0 and i % report_epoch == 0: report_metrics = Bunch( epoch=epoch, step=step, trn_loss=np.mean(trn_metrics.loss[-report_epoch:]), trn_bce=np.mean(trn_metrics.bce[-report_epoch:]), trn_jaccard=np.mean( trn_metrics.jaccard[-report_epoch:]), ) write_event(fh, **report_metrics) tq.set_postfix(loss=f'{report_metrics.trn_loss:.5f}', bce=f'{report_metrics.trn_bce:.5f}', jaccard=f'{report_metrics.trn_jaccard:.5f}') # End of epoch report_metrics = Bunch( epoch=epoch, step=step, trn_loss=np.mean(trn_metrics.loss[-report_epoch:]), trn_bce=np.mean(trn_metrics.bce[-report_epoch:]), trn_jaccard=np.mean(trn_metrics.jaccard[-report_epoch:]), ) write_event(fh, **report_metrics) mlflow.log_metric('train total_loss', np.mean(trn_metrics.loss[-report_epoch:]), epoch) mlflow.log_metric('train BCE', np.mean(trn_metrics.bce[-report_epoch:]), epoch) mlflow.log_metric('train Jaccard', np.mean(trn_metrics.jaccard[-report_epoch:]), epoch) tq.set_postfix(loss=f'{report_metrics.trn_loss:.5f}', bce=f'{report_metrics.trn_bce:.5f}', jaccard=f'{report_metrics.trn_jaccard:.5f}') tq.close() # save model after epoch save(model, epoch, step, model_name) # Run validation val_metrics = validation(model, criterion, val_loader, epoch, step, fh) report_val_metrics = Bunch( epoch=epoch, step=step, val_loss=np.mean(val_metrics.loss[-report_epoch:]), val_bce=np.mean(val_metrics.bce[-report_epoch:]), val_jaccard=np.mean(val_metrics.jaccard[-report_epoch:]), ) write_event(fh, **report_val_metrics) mlflow.log_metric('eval total_loss', np.mean(val_metrics.loss[-report_epoch:]), epoch) mlflow.log_metric('eval BCE', np.mean(val_metrics.bce[-report_epoch:]), epoch) mlflow.log_metric('eval Jaccard', np.mean(val_metrics.jaccard[-report_epoch:]), epoch) if time.time() - st_time > training_timelimit: tq.close() break if best_score < report_val_metrics.val_jaccard: best_score = report_val_metrics.val_jaccard not_improved_count = 0 copy_best(model, epoch, model_name, step) else: not_improved_count += 1 if not_improved_count >= patience: # Update learning rate and optimizer lr *= lr_update_rate # Stop criterion if lr < min_lr: tq.close() break not_improved_count = 0 # Load best weight del model model = unet_vgg16(pretrained=False) path = f'./wdata/models/{model_name}/{model_name}_best' cp = torch.load(path) model = nn.DataParallel(model).cuda() epoch = cp['epoch'] model.load_state_dict(cp['model']) model = model.module model = nn.DataParallel(model, device_ids=gpus).cuda() # Init optimizer optimizer = Adam(model.parameters(), lr=lr) except KeyboardInterrupt: save(model, epoch, step, model_name) tq.close() fh.close() sys.exit(1) except Exception as e: raise e break fh.close()
def post_transforms(): """Transforms that always be applied after all other transformations""" return Compose([Normalize(), ToTensor()])
seed = 12 device = 'cuda' train_transforms = Compose([ HorizontalFlip(), PadIfNeeded(600, 600, border_mode=cv2.BORDER_CONSTANT), ShiftScaleRotate(shift_limit=(-0.05, 0.05), scale_limit=(-0.1, 2.0), rotate_limit=45, p=0.8, border_mode=cv2.BORDER_CONSTANT), RandomCrop(513, 513), GaussNoise(p=0.5), RandomBrightnessContrast(), Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ignore_mask_boundaries, ToTensor(), ]) train_transform_fn = lambda dp: train_transforms(**dp) val_transforms = Compose([ PadIfNeeded(600, 600, border_mode=cv2.BORDER_CONSTANT), CenterCrop(513, 513), Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ignore_mask_boundaries, ToTensor(), ]) val_transform_fn = lambda dp: val_transforms(**dp) batch_size = 8
# get fold valdf = train[train['fold'] == fold].reset_index(drop=True) trndf = train[train['fold'] != fold].reset_index(drop=True) # Data loaders mean_img = [0.22363983, 0.18190407, 0.2523437] std_img = [0.32451536, 0.2956294, 0.31335256] transform_train = Compose([ HorizontalFlip(p=0.5), ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=20, p=0.3, border_mode=cv2.BORDER_REPLICATE), Transpose(p=0.5), Normalize(mean=mean_img, std=std_img, max_pixel_value=255.0, p=1.0), ToTensor() ]) HFLIPVAL = 1.0 if HFLIP == 'T' else 0.0 TRANSPOSEVAL = 1.0 if TRANSPOSE == 'P' else 0.0 transform_test = Compose([ HorizontalFlip(p=HFLIPVAL), Transpose(p=TRANSPOSEVAL), Normalize(mean=mean_img, std=std_img, max_pixel_value=255.0, p=1.0), ToTensor() ]) trndataset = IntracranialDataset(trndf, path=dir_train_img, transform=transform_train,