def initialize_datasets(self): self._dataset_train = FieldsDataset(self.train_folder, transforms.get_train_transform()) self._dataset_valid = FieldsDataset(self.valid_folder, transforms.get_test_transform()) lgblkb_tools.logger.info( f"Length of Training dataset: {len(self._dataset_train)}") lgblkb_tools.logger.info( f"Length of Validation dataset: {len(self._dataset_valid)}") if self.test_folder is not None: self._dataset_test = FieldsDataset(self.test_folder, transforms.get_test_transform()) lgblkb_tools.logger.info( f"Length of Testing dataset: {len(self._dataset_test)}")
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--batch-size', type=int, default=32) arg('--lr', type=float, default=2e-3) arg('--workers', type=int, default=4) arg('--epochs', type=int, default=5) arg('--mixup-alpha', type=float, default=0) arg('--cutmix-alpha', type=float, default=0) arg('--arch', type=str, default='seresnext50') arg('--amp', type=str, default='') arg('--size', type=int, default=192) arg('--debug', action='store_true') arg('--radam', action='store_true') arg('--run-name', type=str, default='') arg('--lookahead-k', type=int, default=-1) arg('--lookahead-alpha', type=float, default=0.5) arg('--from-checkpoint', type=str, default='') arg('--find-lr', action='store_true') args = parser.parse_args() train_dir = DATA_ROOT / 'train' valid_dir = DATA_ROOT / 'val' use_cuda = cuda.is_available() model = get_model(args.arch) if use_cuda: model = model.cuda() criterion = MixUpSoftmaxLoss(nn.CrossEntropyLoss()) (CACHE_DIR / 'params.json').write_text( json.dumps(vars(args), indent=4, sort_keys=True)) df_train, class_map = build_dataframe_from_folder(train_dir) df_valid = build_dataframe_from_folder(valid_dir, class_map) train_transform = get_train_transform(int(args.size * 1.25), args.size) test_transform = get_test_transform(int(args.size * 1.25), args.size) train_loader = make_loader(args, TrainDataset, df_train, train_transform, drop_last=True, shuffle=True) valid_loader = make_loader(args, TrainDataset, df_valid, test_transform, shuffle=False) print(f'{len(train_loader.dataset):,} items in train, ' f'{len(valid_loader.dataset):,} in valid') if args.find_lr: find_lr(args, model, train_loader, criterion) else: if args.from_checkpoint: resume_training(args, model, train_loader, valid_loader) else: train_from_scratch(args, model, train_loader, valid_loader, criterion)
import torch import torchvision from transforms import get_test_transform from dataset import Test_Dataset import csv from torchvision.ops import nms NMS_THRESHOLD = 0.1 SAVED_MODEL = 'fasterRCNN' DATA_DIR = '/home/master/dataset/test/' # Load dataset dataset_test = Test_Dataset(DATA_DIR, transforms=get_test_transform()) device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') # Load model # torch.nn.Module.load_state_dict(torch.load('../fasterrcnn_resnet50_fpn_coco-258fb6c6.pth')) model = torch.load(SAVED_MODEL) model.to(device) predictions = list() for ii, (img, seq, frame) in enumerate(dataset_test): if ii%50 == 0: print("Processed %d / %d images" % (ii, len(dataset_test))) # put the model in evaluation mode model.eval() with torch.no_grad(): prediction = model([img.to(device)])
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--batch-size', type=int, default=32) arg('--lr', type=float, default=2e-3) arg('--workers', type=int, default=4) arg('--epochs', type=int, default=5) arg('--mixup-alpha', type=float, default=0) arg('--arch', type=str, default='seresnext50') arg('--amp', type=str, default='') arg('--size', type=int, default=192) arg('--debug', action='store_true') arg('--from-checkpoint', type=str, default='') arg('--find-lr', action='store_true') args = parser.parse_args() train_dir = DATA_ROOT / 'train' valid_dir = DATA_ROOT / 'val' use_cuda = cuda.is_available() if args.arch == 'seresnext50': model = get_seresnet_model(arch="se_resnext50_32x4d", n_classes=N_CLASSES, pretrained=False) elif args.arch == 'seresnext101': model = get_seresnet_model(arch="se_resnext101_32x4d", n_classes=N_CLASSES, pretrained=False) elif args.arch.startswith("densenet"): model = get_densenet_model(arch=args.arch) elif args.arch.startswith("efficientnet"): model = get_efficientnet_model(arch=args.arch, pretrained=False) else: raise ValueError("No such model") if use_cuda: model = model.cuda() criterion = MixUpSoftmaxLoss(nn.CrossEntropyLoss()) (CACHE_DIR / 'params.json').write_text( json.dumps(vars(args), indent=4, sort_keys=True)) df_train, class_map = build_dataframe_from_folder(train_dir) df_valid = build_dataframe_from_folder(valid_dir, class_map) train_transform = get_train_transform(int(args.size * 1.25), args.size) test_transform = get_test_transform(int(args.size * 1.25), args.size) train_loader = make_loader(args, TrainDataset, df_train, train_transform, drop_last=True, shuffle=True) valid_loader = make_loader(args, TrainDataset, df_valid, test_transform, shuffle=False) print(f'{len(train_loader.dataset):,} items in train, ' f'{len(valid_loader.dataset):,} in valid') if args.find_lr: find_lr(args, model, train_loader, criterion) else: if args.from_checkpoint: resume_training(args, model, train_loader, valid_loader) else: train_from_scratch(args, model, train_loader, valid_loader, criterion)
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--batch-size', type=int, default=32) arg('--workers', type=int, default=4) arg('--arch', type=str, default='seresnext50') arg('--amp', type=str, default='') arg('--size', type=int, default=192) arg('--debug', action='store_true') arg('--model-path', type=str, default='') args = parser.parse_args() train_dir = DATA_ROOT / 'train' valid_dir = DATA_ROOT / 'val' use_cuda = cuda.is_available() model = get_model(args.arch) model.load_state_dict(torch.load(args.model_path, map_location="cpu")) if use_cuda: model = model.cuda() if args.amp: if not APEX_AVAILABLE: raise ValueError("Apex is not installed!") model = amp.initialize(model, opt_level=args.amp) # The first line is to make sure we have the same class_map as in training _, class_map = build_dataframe_from_folder(train_dir) df_valid = build_dataframe_from_folder(valid_dir, class_map) idx_to_name = get_class_idx_to_class_name_mapping(class_map) # Export the mapping for later use with open(CACHE_DIR / "id_to_name_map.json", "w") as fout: json.dump(idx_to_name, fout) test_transform = get_test_transform(int(args.size * 1.25), args.size) valid_loader = make_loader(args, TrainDataset, df_valid, test_transform, shuffle=False) print(f'{len(valid_loader.dataset):,} in valid') bot = ImageClassificationBot(model=model, train_loader=None, valid_loader=None, clip_grad=0, optimizer=None, echo=True, criterion=None, callbacks=[], pbar=True, use_tensorboard=False, use_amp=(args.amp != '')) logits, truths = bot.predict(valid_loader, return_y=True) probs = torch.softmax(logits, dim=-1) preds = torch.argmax(probs, dim=1) print( f"Validation accuracy: {np.mean(preds.numpy() == truths.numpy()) * 100:.2f}%" ) df_out = pd.DataFrame({ "truth": truths.numpy(), "max_prob": np.max(probs.numpy(), axis=1), "truth_prob": torch.gather(probs, 1, truths[:, None]).numpy()[:, 0], "pred": preds, "path": [ valid_loader.dataset._df.iloc[i].image_path for i in range(len(valid_loader.dataset)) ] }) df_out.to_csv(CACHE_DIR / "valid_preds.csv", index=False)
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--data_path', type=str, default='data') arg('--model', type=str, default='pnasnet5large') arg('--exp-name', type=str, default='pnasnet5large_2') arg('--batch-size', type=int, default=32) arg('--lr', type=float, default=1e-2) arg('--patience', type=int, default=4) arg('--n-epochs', type=int, default=15) arg('--n-folds', type=int, default=10) arg('--fold', type=int, default=0) arg('--random-seed', type=int, default=314159) arg('--num-workers', type=int, default=6) arg('--gpus', type=str, default='0') arg('--resize', type=int, default=331) arg('--crop', type=int, default=331) arg('--scale', type=str, default='0.4, 1.0') arg('--mean', type=str, default='0.485, 0.456, 0.406') arg('--std', type=str, default='0.229, 0.224, 0.225') args = parser.parse_args() print(args) os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus # os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '1' # os.environ['MXNET_UPDATE_ON_KVSTORE'] = "0" # os.environ['MXNET_EXEC_ENABLE_ADDTO'] = "1" # os.environ['MXNET_USE_TENSORRT'] = "0" # os.environ['MXNET_GPU_WORKER_NTHREADS'] = "2" # os.environ['MXNET_GPU_COPY_NTHREADS'] = "1" # os.environ['MXNET_OPTIMIZER_AGGREGATION_SIZE'] = "54" random_seed = args.random_seed set_random_seed(random_seed) path_to_data = Path(args.data_path) labels = pd.read_csv(path_to_data / 'labels.csv') num_classes = len(labels) train = pd.read_csv(path_to_data / 'train.csv.zip') n_folds = args.n_folds make_folds(train, n_folds, random_seed) mlb = MultiLabelBinarizer([str(i) for i in range(num_classes)]) s = train['attribute_ids'].str.split() res = pd.DataFrame(mlb.fit_transform(s), columns=mlb.classes_, index=train.index) train = pd.concat([res, train['id'] + '.png', train['fold']], axis=1) gpu_count = len(args.gpus.split(',')) batch_size = args.batch_size resize = args.resize crop = args.crop scale = tuple(float(x) for x in args.scale.split(',')) mean = [float(x) for x in args.mean.split(',')] std = [float(x) for x in args.std.split(',')] # jitter_param = 0.4 # lighting_param = 0.1 labels_ids = [str(i) for i in range(num_classes)] num_workers = args.num_workers fold = args.fold train_transformer = get_train_transform(resize=resize, crop=crop, scale=scale, mean=mean, std=std) train_loader = mx.gluon.data.DataLoader(MXDataset( path_to_data / 'train', train[train['fold'] != fold].copy(), labels_ids, train_transformer), batch_size=batch_size * gpu_count, shuffle=True, num_workers=num_workers, pin_memory=True) test_transformer = get_test_transform(resize=resize, crop=crop, mean=mean, std=std) dev_loader = mx.gluon.data.DataLoader(MXDataset( path_to_data / 'train', train[train['fold'] == fold].copy(), labels_ids, test_transformer), batch_size=batch_size * gpu_count, shuffle=False, num_workers=num_workers, pin_memory=True) fp16 = True if args.model == 'pnasnet5large': net = get_pnasnet5large(num_classes) else: raise (f'No such model {args.model}') if fp16: net.cast('float16') ctx = [mx.gpu(i) for i in range(gpu_count)] net.collect_params().reset_ctx(ctx) epoch_size = len(train_loader) lr = args.lr * batch_size / 256 steps = [step * epoch_size for step in [7, 9]] factor = 0.5 warmup_epochs = 5 warmup_mode = 'linear' schedule = mx.lr_scheduler.MultiFactorScheduler( step=steps, factor=factor, base_lr=lr, warmup_steps=warmup_epochs * epoch_size, warmup_mode=warmup_mode) if fp16: weight = 128 opt = mx.optimizer.Adam( multi_precision=True, learning_rate=lr, rescale_grad=1 / weight, lr_scheduler=schedule, ) else: opt = mx.optimizer.Adam( learning_rate=lr, lr_scheduler=schedule, ) trainer = mx.gluon.Trainer(net.collect_params(), opt) if fp16: loss = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss(weight=weight) else: loss = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss() path_to_models = Path('models') path_to_model = path_to_models / args.exp_name path_to_exp = path_to_model / f'fold_{fold}' if not path_to_exp.exists(): path_to_exp.mkdir(parents=True) patience = args.patience lr_reset_epoch = 1 lr_changes = 0 max_lr_changes = 2 n_epochs = args.n_epochs best_dev_f2 = th2 = 0 train_losses = [] dev_losses, dev_f2s, dev_ths = [], [], [] dev_met1, dev_met2 = [], [] for epoch in range(1, n_epochs + 1): train_loss, all_predictions, all_targets = epoch_step( train_loader, desc=f'[ Training {epoch}/{n_epochs}.. ]', fp16=fp16, ctx=ctx, net=net, loss=loss, trainer=trainer) train_losses.append(train_loss) dev_loss, all_predictions, all_targets = epoch_step( dev_loader, desc=f'[ Validating {epoch}/{n_epochs}.. ]', fp16=fp16, ctx=ctx, net=net, loss=loss) dev_losses.append(dev_loss) metrics = {} argsorted = all_predictions.argsort(axis=1) for threshold in [0.01, 0.05, 0.1, 0.15, 0.2]: metrics[f'valid_f2_th_{threshold:.2f}'] = get_score( binarize_prediction(all_predictions, threshold, argsorted), all_targets) dev_met1.append(metrics) dev_f2 = 0 for th in dev_met1[-1]: if dev_met1[-1][th] > dev_f2: dev_f2 = dev_met1[-1][th] th2 = th all_predictions = all_predictions / all_predictions.max(1, keepdims=True) metrics = {} argsorted = all_predictions.argsort(axis=1) for threshold in [0.05, 0.1, 0.2, 0.3, 0.4]: metrics[f'valid_norm_f2_th_{threshold:.2f}'] = get_score( binarize_prediction(all_predictions, threshold, argsorted), all_targets) dev_met2.append(metrics) for th in dev_met2[-1]: if dev_met2[-1][th] > dev_f2: dev_f2 = dev_met2[-1][th] th2 = th dev_f2s.append(dev_f2) dev_ths.append(th2) if dev_f2 > best_dev_f2: best_dev_f2 = dev_f2 best_th = th2 if fp16: net.cast('float32') net.save_parameters((path_to_exp / 'model').as_posix()) net.cast('float16') else: net.save_parameters((path_to_exp / 'model').as_posix()) save_dict( { 'dev_loss': dev_loss, 'dev_f2': best_dev_f2, 'dev_th': best_th, 'epoch': epoch, 'dev_f2s': dev_f2s, 'dev_ths': dev_ths, 'dev_losses': dev_losses, 'dev_met1': dev_met1, 'dev_met2': dev_met2, }, path_to_exp / 'meta_data.pkl') elif (patience and epoch - lr_reset_epoch > patience and max(dev_f2s[-patience:]) < best_dev_f2): # "patience" epochs without improvement lr_changes += 1 if lr_changes > max_lr_changes: break lr *= factor print(f'lr updated to {lr}') lr_reset_epoch = epoch if fp16: weight = 128 opt = mx.optimizer.Adam(multi_precision=True, learning_rate=lr, rescale_grad=1 / weight) else: opt = mx.optimizer.Adam(learning_rate=lr) trainer = mx.gluon.Trainer(net.collect_params(), opt) plot_all(path_to_exp, train_losses, dev_losses, dev_f2s, dev_ths, dev_met1, dev_met2)