def load_model(config_path): config = load_config(config_path) if 'COLAB_GPU' in os.environ: config.work_dir = '/content/drive/My Drive/kaggle_cloud/' + config.work_dir elif 'KAGGLE_WORKING_DIR' in os.environ: config.work_dir = '/kaggle/working/' + config.work_dir if config.checkpoint_path == None: config.checkpoint_path = config.work_dir + '/checkpoints/best.pth' print(config.checkpoint_path) if config.model.arch == 'Classification': model = CustomNet(config.model.encoder, config.data.num_classes, pretrained=False) else: # create segmentation model with pre-trained encoder model = getattr(smp, config.model.arch)( encoder_name=config.model.encoder, encoder_weights=None, classes=config.data.num_classes, activation=None, ) model.to(config.device) model.eval() checkpoint = load_checkpoint(config.checkpoint_path) model.load_state_dict(checkpoint['model_state_dict']) return model
def _preprocess_model_for_stage(self, stage: str, model: Model): stage_index = self.stages.index(stage) if stage_index > 0: checkpoint_path = f"{self.logdir}/checkpoints/best.pth" checkpoint = utils.load_checkpoint(checkpoint_path) utils.unpack_checkpoint(checkpoint, model=model) return model
def run_cls(config_file_cls): os.environ['CUDA_VISIBLE_DEVICES'] = '0' # ------------------------------------------------------------------------------------------------------------ # 1. classification inference # ------------------------------------------------------------------------------------------------------------ config = load_config(config_file_cls) validloader = make_loader(data_folder=config.data.train_dir, df_path=config.data.train_df_path, phase='valid', batch_size=config.train.batch_size, num_workers=config.num_workers, idx_fold=config.data.params.idx_fold, transforms=get_transforms( config.transforms.test), num_classes=config.data.num_classes, task='cls') model = CustomNet(config.model.encoder, config.data.num_classes) model.to(config.device) model.eval() checkpoint = load_checkpoint(f"{config.work_dir}/checkpoints/best.pth") model.load_state_dict(checkpoint['model_state_dict']) all_predictions = [] all_targets = [] with torch.no_grad(): for i, (batch_images, batch_targets) in enumerate(tqdm(validloader)): batch_images = batch_images.to(config.device) batch_preds = predict_batch(model, batch_images, tta=config.test.tta, task='cls') all_targets.append(batch_targets) all_predictions.append(batch_preds) all_predictions = np.concatenate(all_predictions) all_targets = np.concatenate(all_targets) # evaluation all_accuracy_scores = [] all_f1_scores = [] thresholds = np.linspace(0.1, 0.9, 9) for th in thresholds: accuracy = accuracy_score(all_targets > th, all_predictions > th) f1 = f1_score(all_targets > th, all_predictions > th, average='samples') all_accuracy_scores.append(accuracy) all_f1_scores.append(f1) for th, score in zip(thresholds, all_accuracy_scores): print('validation accuracy for threshold {} = {}'.format(th, score)) for th, score in zip(thresholds, all_f1_scores): print('validation f1 score for threshold {} = {}'.format(th, score)) np.save('valid_preds', all_predictions)
def predict_loader( self, *, loader: DataLoader, model: Model = None, resume: str = None, fp16: Union[Dict, bool] = None, initial_seed: int = 42, ) -> Generator: """ Runs model inference on PyTorch Dataloader and returns python generator with model predictions from `runner.predict_batch`. Cleans up the experiment info to avoid possible collisions. Sets `is_train_loader` and `is_valid_loader` to `False` while keeping `is_infer_loader` as True. Moves model to evaluation mode. Args: loader: loader to predict model: model to use for prediction resume: path to checkpoint to resume fp16 (Union[Dict, bool]): fp16 usage flag initial_seed: seed to use before prediction Yields: bathes with model predictions """ if isinstance(fp16, bool) and fp16: fp16 = {"opt_level": "O1"} if model is not None: self.model = model assert self.model is not None if resume is not None: checkpoint = utils.load_checkpoint(resume) utils.unpack_checkpoint(checkpoint, model=self.model) self.experiment = None utils.set_global_seed(initial_seed) (model, _, _, _, device) = utils.process_components( # noqa: WPS122 model=self.model, distributed_params=fp16, device=self.device, ) self._prepare_inner_state( stage="infer", model=model, device=device, is_train_loader=False, is_valid_loader=False, is_infer_loader=True, ) utils.maybe_recursive_call(self.model, "train", mode=False) utils.set_global_seed(initial_seed) for batch in loader: yield self.predict_batch(batch)
def _get_optimizer(self, *, model_params, **params): load_from_previous_stage = \ params.pop("load_from_previous_stage", False) optimizer = OPTIMIZERS.get_from_params(**params, params=model_params) if load_from_previous_stage: checkpoint_path = f"{self.logdir}/checkpoints/best.pth" checkpoint = utils.load_checkpoint(checkpoint_path) utils.unpack_checkpoint(checkpoint, optimizer=optimizer) for key, value in params.items(): for pg in optimizer.param_groups: pg[key] = value return optimizer
def predict_loader( self, *, loader: DataLoader, model: Model = None, resume: str = None, fp16: Union[Dict, bool] = None, initial_seed: int = 42, ) -> Generator: """ Runs model inference on PyTorch Dataloader and returns python Generator with model predictions from `runner.predict_batch` Args: loader (DataLoader): loader to predict model (Model): model to use for prediction resume (str): path to checkpoint to resume fp16 (Union[Dict, bool]): fp16 usage flag initial_seed (int): seed to use before prediction Yields: bathes with model predictions """ if isinstance(fp16, bool) and fp16: fp16 = {"opt_level": "O1"} if model is not None: self.model = model assert self.model is not None if resume is not None: checkpoint = utils.load_checkpoint(resume) utils.unpack_checkpoint(checkpoint, model=self.model) ( # noqa: WPS122 self.model, _, _, _, self.device, ) = utils.process_components( model=self.model, distributed_params=fp16, device=self.device, ) utils.set_global_seed(initial_seed) for batch in loader: yield self.predict_batch(batch)
def run_cls(config_file_cls): os.environ['CUDA_VISIBLE_DEVICES'] = '0' # ------------------------------------------------------------------------------------------------------------ # 1. classification inference # ------------------------------------------------------------------------------------------------------------ config = load_config(config_file_cls) model = CustomNet(config.model.encoder, config.data.num_classes) testloader = make_loader( data_folder=config.data.test_dir, df_path=config.data.sample_submission_path, phase='test', batch_size=config.test.batch_size, num_workers=config.num_workers, transforms=get_transforms(config.transforms.test), num_classes=config.data.num_classes, ) model.to(config.device) model.eval() checkpoint = load_checkpoint(f"{config.work_dir}/checkpoints/best.pth") model.load_state_dict(checkpoint['model_state_dict']) all_fnames = [] all_predictions = [] with torch.no_grad(): for i, (batch_fnames, batch_images) in enumerate(tqdm(testloader)): batch_images = batch_images.to(config.device) batch_preds = predict_batch(model, batch_images, tta=config.test.tta, task='cls') all_fnames.extend(batch_fnames) all_predictions.append(batch_preds) all_predictions = np.concatenate(all_predictions) np.save('all_preds', all_predictions) df = pd.DataFrame(data=all_predictions, index=all_fnames) df.to_csv('cls_preds.csv', index=False) df.to_csv(f"{config.work_dir}/cls_preds.csv", index=False)
def load_checkpoint(*, filename, state: RunnerState): if os.path.isfile(filename): print(f"=> loading checkpoint {filename}") checkpoint = utils.load_checkpoint(filename) state.epoch = checkpoint["epoch"] utils.unpack_checkpoint(checkpoint, model=state.model, criterion=state.criterion, optimizer=state.optimizer, scheduler=state.scheduler) print( f"loaded checkpoint {filename} (epoch {checkpoint['epoch']})") else: raise Exception(f"No checkpoint found at {filename}")
def load_model(config_path, checkpoint_path, fold=0): config = load_config(config_path) if not 'fold' in config.work_dir: config.work_dir = config.work_dir + '_fold{}'.format(fold) model = CenterNetFPN( slug=config.model.encoder, num_classes=len(config.data.features), ) model.to(config.device) model.eval() checkpoint = load_checkpoint(checkpoint_path) print('load model from {}'.format(checkpoint_path)) model.load_state_dict(checkpoint['model_state_dict']) return model
def load_model(config_path): config = load_config(config_path) print(config.checkpoint_path) if config.model.arch == 'Classification': model = CustomNet(config.model.encoder, config.data.num_classes, pretrained=False) else: # create segmentation model with pre-trained encoder model = getattr(smp, config.model.arch)( encoder_name=config.model.encoder, encoder_weights=None, classes=config.data.num_classes, activation=None, ) model.to(config.device) model.eval() checkpoint = load_checkpoint(config.checkpoint_path) model.load_state_dict(checkpoint['model_state_dict']) return model
def _get_optimizer(self, *, model_params, **params): load_from_previous_stage = \ params.pop("load_from_previous_stage", False) optimizer = OPTIMIZERS.get_from_params(**params, params=model_params) if load_from_previous_stage: checkpoint_path = f"{self.logdir}/checkpoints/best_full.pth" checkpoint = utils.load_checkpoint(checkpoint_path) utils.unpack_checkpoint(checkpoint, optimizer=optimizer) # move optimizer to device device = get_device() for param in model_params: param = param["params"][0] state = optimizer.state[param] for key, value in state.items(): state[key] = any2device(value, device) # update optimizer params for key, value in params.items(): for pg in optimizer.param_groups: pg[key] = value return optimizer
def _get_optimizer(self, stage: str, model: Union[Model, Dict[str, Model]], **params) -> Optimizer: # @TODO 1: refactoring; this method is too long # @TODO 2: load state dicts for schedulers & criterion layerwise_params = params.pop("layerwise_params", OrderedDict()) no_bias_weight_decay = params.pop("no_bias_weight_decay", True) # linear scaling rule from https://arxiv.org/pdf/1706.02677.pdf lr_scaling_params = params.pop("lr_linear_scaling", None) if lr_scaling_params: data_params = dict(self.stages_config[stage]["data_params"]) batch_size = data_params.get("batch_size") per_gpu_scaling = data_params.get("per_gpu_scaling", False) distributed_rank = utils.get_rank() distributed = distributed_rank > -1 if per_gpu_scaling and not distributed: num_gpus = max(1, torch.cuda.device_count()) batch_size *= num_gpus base_lr = lr_scaling_params.get("lr") base_batch_size = lr_scaling_params.get("base_batch_size", 256) lr_scaling = batch_size / base_batch_size params["lr"] = base_lr * lr_scaling # scale default lr else: lr_scaling = 1.0 # getting model parameters model_key = params.pop("_model", None) if model_key is None: assert isinstance( model, nn.Module ), "model is key-value, but optimizer has no specified model" model_params = utils.process_model_params(model, layerwise_params, no_bias_weight_decay, lr_scaling) elif isinstance(model_key, str): model_params = utils.process_model_params( model[model_key], layerwise_params, no_bias_weight_decay, lr_scaling, ) elif isinstance(model_key, (list, tuple)): model_params = [] for model_key_ in model_key: model_params_ = utils.process_model_params( model[model_key_], layerwise_params, no_bias_weight_decay, lr_scaling, ) model_params.extend(model_params_) else: raise ValueError("unknown type of model_params") load_from_previous_stage = params.pop("load_from_previous_stage", False) optimizer_key = params.pop("optimizer_key", None) optimizer = OPTIMIZERS.get_from_params(**params, params=model_params) if load_from_previous_stage and self.stages.index(stage) != 0: checkpoint_path = f"{self.logdir}/checkpoints/best_full.pth" checkpoint = utils.load_checkpoint(checkpoint_path) dict2load = optimizer if optimizer_key is not None: dict2load = {optimizer_key: optimizer} utils.unpack_checkpoint(checkpoint, optimizer=dict2load) # move optimizer to device device = utils.get_device() for param in model_params: param = param["params"][0] state = optimizer.state[param] for key, value in state.items(): state[key] = utils.any2device(value, device) # update optimizer params for key, value in params.items(): for pg in optimizer.param_groups: pg[key] = value return optimizer
def run_seg(config_file_seg): os.environ['CUDA_VISIBLE_DEVICES'] = '0' # ------------------------------------------------------------------------------------------------------------ # 2. segmentation inference # ------------------------------------------------------------------------------------------------------------ config = load_config(config_file_seg) validloader = make_loader( data_folder=config.data.train_dir, df_path=config.data.train_df_path, phase='valid', batch_size=config.train.batch_size, num_workers=config.num_workers, idx_fold=config.data.params.idx_fold, transforms=get_transforms(config.transforms.test), num_classes=config.data.num_classes, ) # create segmentation model with pre-trained encoder model = getattr(smp, config.model.arch)( encoder_name=config.model.encoder, encoder_weights=config.model.pretrained, classes=config.data.num_classes, activation=None, ) model.to(config.device) model.eval() checkpoint = load_checkpoint(f"{config.work_dir}/checkpoints/best.pth") model.load_state_dict(checkpoint['model_state_dict']) all_dice = {} min_sizes = [100, 300, 500, 750, 1000, 1500, 2000, 3000] for min_size in min_sizes: all_dice[min_size] = {} for cls in range(config.data.num_classes): all_dice[min_size][cls] = [] with torch.no_grad(): for i, (batch_images, batch_masks) in enumerate(tqdm(validloader)): batch_images = batch_images.to(config.device) batch_preds = predict_batch(model, batch_images, tta=config.test.tta) batch_masks = batch_masks.cpu().numpy() for masks, preds in zip(batch_masks, batch_preds): for cls in range(config.data.num_classes): for min_size in min_sizes: pred, _ = post_process(preds[cls, :, :], config.test.best_threshold, min_size) mask = masks[cls, :, :] all_dice[min_size][cls].append(dice_score(pred, mask)) for cls in range(config.data.num_classes): for min_size in min_sizes: all_dice[min_size][cls] = sum(all_dice[min_size][cls]) / len( all_dice[min_size][cls]) dict_to_json(all_dice, config.work_dir + '/threshold_search.json') if config.data.num_classes == 4: defect_class = cls + 1 else: defect_class = cls print('average dice score for class{} for min_size {}: {}'.format( defect_class, min_size, all_dice[min_size][cls]))
def trace_model_from_checkpoint( logdir: Path, method_name: str, checkpoint_name: str, stage: str = None, loader: Union[str, int] = None, mode: str = "eval", requires_grad: bool = False, opt_level: str = None, device: Device = "cpu", ): """Traces model using created experiment and runner. Args: logdir (Union[str, Path]): Path to Catalyst logdir with model checkpoint_name (str): Name of model checkpoint to use stage (str): experiment's stage name loader (Union[str, int]): experiment's loader name or its index method_name (str): Model's method name that will be used as entrypoint during tracing mode (str): Mode for model to trace (``train`` or ``eval``) requires_grad (bool): Flag to use grads opt_level (str): AMP FP16 init level device (str): Torch device Returns: the traced model """ config_path = logdir / "configs" / "_config.json" checkpoint_path = logdir / "checkpoints" / f"{checkpoint_name}.pth" print("Load config") config: Dict[str, dict] = utils.load_config(config_path) runner_params = config.get("runner_params", {}) or {} # Get expdir name config_expdir = Path(config["args"]["expdir"]) # We will use copy of expdir from logs for reproducibility expdir = Path(logdir) / "code" / config_expdir.name print("Import experiment and runner from logdir") ExperimentType, RunnerType = utils.import_experiment_and_runner(expdir) experiment: Experiment = ExperimentType(config) print(f"Load model state from checkpoints/{checkpoint_name}.pth") if stage is None: stage = list(experiment.stages)[0] model = experiment.get_model(stage) checkpoint = utils.load_checkpoint(checkpoint_path) utils.unpack_checkpoint(checkpoint, model=model) runner: RunnerType = RunnerType(**runner_params) runner.model, runner.device = model, device if loader is None: loader = 0 batch = experiment.get_native_batch(stage, loader) print("Tracing") traced = trace.trace_model( model=model, runner=runner, batch=batch, method_name=method_name, mode=mode, requires_grad=requires_grad, opt_level=opt_level, device=device, ) print("Done") return traced
def run_seg(config_file_seg): os.environ['CUDA_VISIBLE_DEVICES'] = '0' # ------------------------------------------------------------------------------------------------------------ # 2. segmentation inference # ------------------------------------------------------------------------------------------------------------ config = load_config(config_file_seg) model = getattr(smp, config.model.arch)( encoder_name=config.model.encoder, encoder_weights=config.model.pretrained, classes=config.data.num_classes, activation=None, ) if os.path.exists('cls_preds.csv'): testloader = make_loader(data_folder=config.data.test_dir, df_path='cls_preds.csv', phase='filtered_test', batch_size=config.test.batch_size, num_workers=config.num_workers, transforms=get_transforms( config.transforms.test)) else: testloader = make_loader(data_folder=config.data.test_dir, df_path=config.data.sample_submission_path, phase='test', batch_size=config.test.batch_size, num_workers=config.num_workers, transforms=get_transforms( config.transforms.test)) model.to(config.device) model.eval() checkpoint = load_checkpoint(f"{config.work_dir}/checkpoints/best.pth") model.load_state_dict(checkpoint['model_state_dict']) if os.path.exists(config.work_dir + '/threshold_search.json'): with open(config.work_dir + '/threshold_search.json') as json_file: data = json.load(json_file) df = pd.DataFrame(data) min_sizes = list(df.T.idxmax().values.astype(int)) print('load best threshold from validation:', min_sizes) else: min_sizes = config.test.min_size print('load default threshold:', min_sizes) predictions = [] with torch.no_grad(): for i, (batch_fnames, batch_images) in enumerate(tqdm(testloader)): batch_images = batch_images.to(config.device) batch_preds = predict_batch(model, batch_images, tta=config.test.tta) for fname, preds in zip(batch_fnames, batch_preds): if config.data.num_classes == 4: for cls in range(preds.shape[0]): mask = preds[cls, :, :] mask, num = post_process(mask, config.test.best_threshold, min_sizes[cls]) rle = mask2rle(mask) name = fname + f"_{cls + 1}" predictions.append([name, rle]) else: # == 5 for cls in range(1, 5): mask = preds[cls, :, :] mask, num = post_process(mask, config.test.best_threshold, min_sizes[cls]) rle = mask2rle(mask) name = fname + f"_{cls}" predictions.append([name, rle]) # ------------------------------------------------------------------------------------------------------------ # submission # ------------------------------------------------------------------------------------------------------------ df = pd.DataFrame(predictions, columns=['ImageId_ClassId', 'EncodedPixels']) df.to_csv(config.work_dir + "/submission.csv", index=False)
def main(args, _=None): """Run the ``catalyst-data text2embeddings`` script.""" batch_size = args.batch_size num_workers = args.num_workers max_length = args.max_length pooling_groups = args.pooling.split(",") utils.set_global_seed(args.seed) utils.prepare_cudnn(args.deterministic, args.benchmark) if hasattr(args, "in_huggingface"): model_config = BertConfig.from_pretrained(args.in_huggingface) model_config.output_hidden_states = args.output_hidden_states model = BertModel.from_pretrained(args.in_huggingface, config=model_config) tokenizer = BertTokenizer.from_pretrained(args.in_huggingface) else: model_config = BertConfig.from_pretrained(args.in_config) model_config.output_hidden_states = args.output_hidden_states model = BertModel(config=model_config) tokenizer = BertTokenizer.from_pretrained(args.in_vocab) if hasattr(args, "in_model"): checkpoint = utils.load_checkpoint(args.in_model) checkpoint = {"model_state_dict": checkpoint} utils.unpack_checkpoint(checkpoint=checkpoint, model=model) model = model.eval() model, _, _, _, device = utils.process_components(model=model) df = pd.read_csv(args.in_csv) df = df.dropna(subset=[args.txt_col]) df.to_csv(f"{args.out_prefix}.df.csv", index=False) df = df.reset_index().drop("index", axis=1) df = list(df.to_dict("index").values()) num_samples = len(df) open_fn = LambdaReader( input_key=args.txt_col, output_key=None, lambda_fn=partial( tokenize_text, strip=args.strip, lowercase=args.lowercase, remove_punctuation=args.remove_punctuation, ), tokenizer=tokenizer, max_length=max_length, ) dataloader = utils.get_loader( df, open_fn, batch_size=batch_size, num_workers=num_workers, ) features = {} dataloader = tqdm(dataloader) if args.verbose else dataloader with torch.no_grad(): for idx, batch in enumerate(dataloader): batch = utils.any2device(batch, device) bert_output = model(**batch) mask = (batch["attention_mask"].unsqueeze(-1) if args.mask_for_max_length else None) if utils.check_ddp_wrapped(model): # using several gpu hidden_size = model.module.config.hidden_size hidden_states = model.module.config.output_hidden_states else: # using cpu or one gpu hidden_size = model.config.hidden_size hidden_states = model.config.output_hidden_states features_ = process_bert_output( bert_output=bert_output, hidden_size=hidden_size, output_hidden_states=hidden_states, pooling_groups=pooling_groups, mask=mask, ) # create storage based on network output if idx == 0: for key, value in features_.items(): name_ = key if isinstance(key, str) else f"{key:02d}" _, embedding_size = value.shape features[name_] = np.memmap( f"{args.out_prefix}.{name_}.npy", dtype=np.float32, mode="w+", shape=(num_samples, embedding_size), ) indices = np.arange(idx * batch_size, min((idx + 1) * batch_size, num_samples)) for key, value in features_.items(): name_ = key if isinstance(key, str) else f"{key:02d}" features[name_][indices] = _detach(value)
def main(args, _=None): batch_size = args.batch_size num_workers = args.num_workers max_length = args.max_length pooling_groups = args.pooling.split(",") utils.set_global_seed(args.seed) utils.prepare_cudnn(args.deterministic, args.benchmark) model_config = BertConfig.from_pretrained(args.in_config) model_config.output_hidden_states = args.output_hidden_states model = BertModel(config=model_config) checkpoint = utils.load_checkpoint(args.in_model) checkpoint = {"model_state_dict": checkpoint} utils.unpack_checkpoint(checkpoint=checkpoint, model=model) model = model.eval() model, _, _, _, device = utils.process_components(model=model) tokenizer = BertTokenizer.from_pretrained(args.in_vocab) df = pd.read_csv(args.in_csv) df = df.dropna(subset=[args.txt_col]) df.to_csv(f"{args.out_prefix}.df.csv", index=False) df = df.reset_index().drop("index", axis=1) df = list(df.to_dict("index").values()) num_samples = len(df) open_fn = LambdaReader( input_key=args.txt_col, output_key=None, lambda_fn=get_features, tokenizer=tokenizer, max_length=max_length, ) dataloader = utils.get_loader( df, open_fn, batch_size=batch_size, num_workers=num_workers, ) features = {} poolings = {} dataloader = tqdm(dataloader) if args.verbose else dataloader with torch.no_grad(): for idx, batch in enumerate(dataloader): batch = utils.any2device(batch, device) features_ = model(**batch) # create storage based on network output if idx == 0: # class _, embedding_size = features_[1].shape features["class"] = np.memmap( f"{args.out_prefix}.class.npy", dtype=np.float32, mode="w+", shape=(num_samples, embedding_size), ) if args.output_hidden_states: # all embeddings for i, feature_ in enumerate(features_[2]): name_ = f"embeddings_{i + 1:02d}" _, _, embedding_size = feature_.shape poolings[name_] = LamaPooling( features_in=embedding_size, groups=pooling_groups, ) features[name_] = np.memmap( f"{args.out_prefix}.{name_}.npy", dtype=np.float32, mode="w+", shape=(num_samples, embedding_size), ) else: # last _, _, embedding_size = features_[0].shape poolings["last"] = LamaPooling( features_in=embedding_size, groups=pooling_groups, ) features["last"] = np.memmap( f"{args.out_prefix}.last.npy", dtype=np.float32, mode="w+", shape=(num_samples, embedding_size), ) indices = np.arange(idx * batch_size, min((idx + 1) * batch_size, num_samples)) features["class"][indices] = _detach(features_[1]) if args.output_hidden_states: # all embeddings for i, feature_ in enumerate(features_[2]): name_ = f"embeddings_{i + 1:02d}" feature_ = poolings[name_](feature_) features[name_][indices] = _detach(feature_) else: feature_ = poolings[name_](features_[0]) features["last"][indices] = _detach(feature_)
def main_kaggle_smp(path_dataset='/dataset/kaggle/understanding_cloud_organization', ENCODER='resnet50', ENCODER_WEIGHTS='imagenet', num_workers=0, batch_size=8, epochs=19, debug=False, exec_catalyst=True, logdir="/src/logs/segmentation", pretrained=True ): # below line is potential input args # (name_dataset='eurosat', lr=0.0001, wd=0, ratio=0.9, batch_size=32, workers=4, epochs=15, num_gpus=1, # resume=None, dir_weights='./weights'): torch.backends.cudnn.benchmark = True # Dataset train, sub = get_meta_info_table(path_dataset) train_ids, valid_ids, test_ids = prepare_dataset(train, sub) preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS) train_dataset = CloudDataset(df=train, datatype='train', img_ids=train_ids, transforms=get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn), path=path_dataset) valid_dataset = CloudDataset(df=train, datatype='valid', img_ids=valid_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn), path=path_dataset) # DataLoader train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) loaders = { "train": train_loader, "valid": valid_loader } # todo: check how to used device in this case DEVICE = 'cuda' if debug: device = 'cpu' else: device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") ACTIVATION = None model = smp.Unet( encoder_name=ENCODER, encoder_weights=ENCODER_WEIGHTS, classes=4, activation=ACTIVATION, ) images, labels = next(iter(train_loader)) model.to(device) print(model) print(summary(model, input_size=tuple(images.shape[1:]))) # use smp epoch # num_epochs = 19 # model, criterion, optimizer optimizer = torch.optim.Adam([ {'params': model.decoder.parameters(), 'lr': 1e-2}, {'params': model.encoder.parameters(), 'lr': 1e-3}, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2) criterion = smp.utils.losses.DiceLoss(eps=1.) # smp.utils.losses.BCEDiceLoss(eps=1.) if not pretrained: # catalyst if exec_catalyst: device = utils.get_device() runner = SupervisedRunner(device=device) # train model runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001)], logdir=logdir, num_epochs=epochs, verbose=True ) # # prediction # encoded_pixels = [] # loaders = {"infer": valid_loader} # runner.infer( # model=model, # loaders=loaders, # callbacks=[ # CheckpointCallback( # resume=f"{logdir}/checkpoints/best.pth"), # InferCallback() # ], # ) # valid_masks = [] # # # todo: where .pth? # # todo: from here # valid_num = valid_dataset.__len__() # probabilities = np.zeros((valid_num * 4, 350, 525)) # for i, (batch, output) in enumerate(tqdm(zip( # valid_dataset, runner.callbacks[0].predictions["logits"]))): # image, mask = batch # for m in mask: # if m.shape != (350, 525): # m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) # valid_masks.append(m) # # for j, probability in enumerate(output): # if probability.shape != (350, 525): # probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) # probabilities[valid_num * 4 + j, :, :] = probability # # # todo: from here # class_params = {} # for class_id in range(4): # print(class_id) # attempts = [] # for t in range(0, 100, 5): # t /= 100 # for ms in [0, 100, 1200, 5000, 10000]: # masks = [] # for i in range(class_id, len(probabilities), 4): # probability = probabilities[i] # predict, num_predict = post_process(sigmoid(probability), t, ms) # masks.append(predict) # # d = [] # for i, j in zip(masks, valid_masks[class_id::4]): # if (i.sum() == 0) & (j.sum() == 0): # d.append(1) # else: # d.append(dice(i, j)) # # attempts.append((t, ms, np.mean(d))) # # attempts_df = pd.DataFrame(attempts, columns=['threshold', 'size', 'dice']) # # attempts_df = attempts_df.sort_values('dice', ascending=False) # print(attempts_df.head()) # best_threshold = attempts_df['threshold'].values[0] # best_size = attempts_df['size'].values[0] # # class_params[class_id] = (best_threshold, best_size) else: for epoch in trange(epochs, desc="Epochs"): metrics_train = train_epoch(model, train_loader, criterion, optimizer, device) metrics_eval = eval_epoch(model, valid_loader, criterion, device) scheduler.step(metrics_eval['valid_loss']) print(f'epoch: {epoch} ', metrics_train, metrics_eval) else: if exec_catalyst: device = utils.get_device() checkpoint = utils.load_checkpoint(f'{logdir}/checkpoints/best_full.pth') utils.unpack_checkpoint(checkpoint, model=model) runner = SupervisedRunner(model=model) # prediction with infer encoded_pixels = [] loaders = {"infer": valid_loader} runner.infer( model=model, loaders=loaders, callbacks=[ CheckpointCallback( resume=f"{logdir}/checkpoints/best.pth"), InferCallback() ], ) # todo: jupyterで確認中 valid_masks = [] valid_num = valid_dataset.__len__() probabilities = np.zeros((valid_num * 4, 350, 525)) for i, (batch, output) in enumerate(tqdm(zip( valid_dataset, runner.callbacks[0].predictions["logits"]))): image, mask = batch for m in mask: if m.shape != (350, 525): m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) valid_masks.append(m) for j, probability in enumerate(output): if probability.shape != (350, 525): probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) probabilities[i * 4 + j, :, :] = probability class_params = {} for class_id in range(4): print(class_id) attempts = [] for t in range(0, 100, 5): t /= 100 for ms in [0, 100, 1200, 5000, 10000]: masks = [] for i in range(class_id, len(probabilities), 4): probability = probabilities[i] predict, num_predict = post_process(sigmoid(probability), t, ms) masks.append(predict) d = [] for i, j in zip(masks, valid_masks[class_id::4]): if (i.sum() == 0) & (j.sum() == 0): d.append(1) else: d.append(dice(i, j)) attempts.append((t, ms, np.mean(d))) attempts_df = pd.DataFrame(attempts, columns=['threshold', 'size', 'dice']) attempts_df = attempts_df.sort_values('dice', ascending=False) print(attempts_df.head()) best_threshold = attempts_df['threshold'].values[0] best_size = attempts_df['size'].values[0] class_params[class_id] = (best_threshold, best_size) # predictions torch.cuda.empty_cache() gc.collect() test_dataset = CloudDataset(df=sub, datatype='test', img_ids=test_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=0) loaders = {"test": test_loader} encoded_pixels = [] image_id = 0 for i, test_batch in enumerate(tqdm(loaders['test'])): runner_out = runner.predict_batch({"features": test_batch[0].cuda()})['logits'] for i, batch in enumerate(runner_out): for probability in batch: probability = probability.cpu().detach().numpy() if probability.shape != (350, 525): probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) predict, num_predict = post_process(sigmoid(probability), class_params[image_id % 4][0], class_params[image_id % 4][1]) if num_predict == 0: encoded_pixels.append('') else: r = mask2rle(predict) encoded_pixels.append(r) image_id += 1 sub['EncodedPixels'] = encoded_pixels sub.to_csv('data/kaggle_cloud_org/submission.csv', columns=['Image_Label', 'EncodedPixels'], index=False)