예제 #1
0
def load_model(config_path):
    config = load_config(config_path)
    if 'COLAB_GPU' in os.environ:
        config.work_dir = '/content/drive/My Drive/kaggle_cloud/' + config.work_dir
    elif 'KAGGLE_WORKING_DIR' in os.environ:
        config.work_dir = '/kaggle/working/' + config.work_dir

    if config.checkpoint_path == None:
        config.checkpoint_path = config.work_dir + '/checkpoints/best.pth'
    print(config.checkpoint_path)

    if config.model.arch == 'Classification':
        model = CustomNet(config.model.encoder,
                          config.data.num_classes,
                          pretrained=False)
    else:
        # create segmentation model with pre-trained encoder
        model = getattr(smp, config.model.arch)(
            encoder_name=config.model.encoder,
            encoder_weights=None,
            classes=config.data.num_classes,
            activation=None,
        )

    model.to(config.device)
    model.eval()

    checkpoint = load_checkpoint(config.checkpoint_path)
    model.load_state_dict(checkpoint['model_state_dict'])

    return model
예제 #2
0
 def _preprocess_model_for_stage(self, stage: str, model: Model):
     stage_index = self.stages.index(stage)
     if stage_index > 0:
         checkpoint_path = f"{self.logdir}/checkpoints/best.pth"
         checkpoint = utils.load_checkpoint(checkpoint_path)
         utils.unpack_checkpoint(checkpoint, model=model)
     return model
예제 #3
0
def run_cls(config_file_cls):
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    # ------------------------------------------------------------------------------------------------------------
    # 1. classification inference
    # ------------------------------------------------------------------------------------------------------------
    config = load_config(config_file_cls)

    validloader = make_loader(data_folder=config.data.train_dir,
                              df_path=config.data.train_df_path,
                              phase='valid',
                              batch_size=config.train.batch_size,
                              num_workers=config.num_workers,
                              idx_fold=config.data.params.idx_fold,
                              transforms=get_transforms(
                                  config.transforms.test),
                              num_classes=config.data.num_classes,
                              task='cls')

    model = CustomNet(config.model.encoder, config.data.num_classes)
    model.to(config.device)
    model.eval()
    checkpoint = load_checkpoint(f"{config.work_dir}/checkpoints/best.pth")
    model.load_state_dict(checkpoint['model_state_dict'])

    all_predictions = []
    all_targets = []
    with torch.no_grad():
        for i, (batch_images, batch_targets) in enumerate(tqdm(validloader)):
            batch_images = batch_images.to(config.device)
            batch_preds = predict_batch(model,
                                        batch_images,
                                        tta=config.test.tta,
                                        task='cls')

            all_targets.append(batch_targets)
            all_predictions.append(batch_preds)

    all_predictions = np.concatenate(all_predictions)
    all_targets = np.concatenate(all_targets)

    # evaluation
    all_accuracy_scores = []
    all_f1_scores = []
    thresholds = np.linspace(0.1, 0.9, 9)
    for th in thresholds:
        accuracy = accuracy_score(all_targets > th, all_predictions > th)
        f1 = f1_score(all_targets > th,
                      all_predictions > th,
                      average='samples')
        all_accuracy_scores.append(accuracy)
        all_f1_scores.append(f1)

    for th, score in zip(thresholds, all_accuracy_scores):
        print('validation accuracy for threshold {} = {}'.format(th, score))
    for th, score in zip(thresholds, all_f1_scores):
        print('validation f1 score for threshold {}  = {}'.format(th, score))

    np.save('valid_preds', all_predictions)
예제 #4
0
    def predict_loader(
        self,
        *,
        loader: DataLoader,
        model: Model = None,
        resume: str = None,
        fp16: Union[Dict, bool] = None,
        initial_seed: int = 42,
    ) -> Generator:
        """
        Runs model inference on PyTorch Dataloader and returns
        python generator with model predictions from `runner.predict_batch`.
        Cleans up the experiment info to avoid possible collisions.
        Sets `is_train_loader` and `is_valid_loader` to `False` while
        keeping `is_infer_loader` as True. Moves model to evaluation mode.

        Args:
            loader: loader to predict
            model: model to use for prediction
            resume: path to checkpoint to resume
            fp16 (Union[Dict, bool]): fp16 usage flag
            initial_seed: seed to use before prediction

        Yields:
            bathes with model predictions
        """
        if isinstance(fp16, bool) and fp16:
            fp16 = {"opt_level": "O1"}

        if model is not None:
            self.model = model
        assert self.model is not None

        if resume is not None:
            checkpoint = utils.load_checkpoint(resume)
            utils.unpack_checkpoint(checkpoint, model=self.model)

        self.experiment = None
        utils.set_global_seed(initial_seed)
        (model, _, _, _, device) = utils.process_components(  # noqa: WPS122
            model=self.model,
            distributed_params=fp16,
            device=self.device,
        )
        self._prepare_inner_state(
            stage="infer",
            model=model,
            device=device,
            is_train_loader=False,
            is_valid_loader=False,
            is_infer_loader=True,
        )
        utils.maybe_recursive_call(self.model, "train", mode=False)

        utils.set_global_seed(initial_seed)
        for batch in loader:
            yield self.predict_batch(batch)
예제 #5
0
    def _get_optimizer(self, *, model_params, **params):
        load_from_previous_stage = \
            params.pop("load_from_previous_stage", False)
        optimizer = OPTIMIZERS.get_from_params(**params, params=model_params)

        if load_from_previous_stage:
            checkpoint_path = f"{self.logdir}/checkpoints/best.pth"
            checkpoint = utils.load_checkpoint(checkpoint_path)
            utils.unpack_checkpoint(checkpoint, optimizer=optimizer)
            for key, value in params.items():
                for pg in optimizer.param_groups:
                    pg[key] = value

        return optimizer
예제 #6
0
    def predict_loader(
        self,
        *,
        loader: DataLoader,
        model: Model = None,
        resume: str = None,
        fp16: Union[Dict, bool] = None,
        initial_seed: int = 42,
    ) -> Generator:
        """
        Runs model inference on PyTorch Dataloader and returns
        python Generator with model predictions from `runner.predict_batch`

        Args:
            loader (DataLoader): loader to predict
            model (Model): model to use for prediction
            resume (str): path to checkpoint to resume
            fp16 (Union[Dict, bool]): fp16 usage flag
            initial_seed (int): seed to use before prediction

        Yields:
            bathes with model predictions
        """
        if isinstance(fp16, bool) and fp16:
            fp16 = {"opt_level": "O1"}

        if model is not None:
            self.model = model
        assert self.model is not None

        if resume is not None:
            checkpoint = utils.load_checkpoint(resume)
            utils.unpack_checkpoint(checkpoint, model=self.model)

        (  # noqa: WPS122
            self.model,
            _,
            _,
            _,
            self.device,
        ) = utils.process_components(
            model=self.model,
            distributed_params=fp16,
            device=self.device,
        )

        utils.set_global_seed(initial_seed)
        for batch in loader:
            yield self.predict_batch(batch)
예제 #7
0
def run_cls(config_file_cls):
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    # ------------------------------------------------------------------------------------------------------------
    # 1. classification inference
    # ------------------------------------------------------------------------------------------------------------
    config = load_config(config_file_cls)

    model = CustomNet(config.model.encoder, config.data.num_classes)

    testloader = make_loader(
        data_folder=config.data.test_dir,
        df_path=config.data.sample_submission_path,
        phase='test',
        batch_size=config.test.batch_size,
        num_workers=config.num_workers,
        transforms=get_transforms(config.transforms.test),
        num_classes=config.data.num_classes,
    )

    model.to(config.device)
    model.eval()

    checkpoint = load_checkpoint(f"{config.work_dir}/checkpoints/best.pth")
    model.load_state_dict(checkpoint['model_state_dict'])

    all_fnames = []
    all_predictions = []
    with torch.no_grad():
        for i, (batch_fnames, batch_images) in enumerate(tqdm(testloader)):
            batch_images = batch_images.to(config.device)
            batch_preds = predict_batch(model,
                                        batch_images,
                                        tta=config.test.tta,
                                        task='cls')

            all_fnames.extend(batch_fnames)
            all_predictions.append(batch_preds)

    all_predictions = np.concatenate(all_predictions)

    np.save('all_preds', all_predictions)
    df = pd.DataFrame(data=all_predictions, index=all_fnames)

    df.to_csv('cls_preds.csv', index=False)
    df.to_csv(f"{config.work_dir}/cls_preds.csv", index=False)
예제 #8
0
    def load_checkpoint(*, filename, state: RunnerState):
        if os.path.isfile(filename):
            print(f"=> loading checkpoint {filename}")
            checkpoint = utils.load_checkpoint(filename)

            state.epoch = checkpoint["epoch"]

            utils.unpack_checkpoint(checkpoint,
                                    model=state.model,
                                    criterion=state.criterion,
                                    optimizer=state.optimizer,
                                    scheduler=state.scheduler)

            print(
                f"loaded checkpoint {filename} (epoch {checkpoint['epoch']})")
        else:
            raise Exception(f"No checkpoint found at {filename}")
def load_model(config_path, checkpoint_path, fold=0):
    config = load_config(config_path)
    if not 'fold' in config.work_dir:
        config.work_dir = config.work_dir + '_fold{}'.format(fold)

    model = CenterNetFPN(
        slug=config.model.encoder,
        num_classes=len(config.data.features),
    )

    model.to(config.device)
    model.eval()

    checkpoint = load_checkpoint(checkpoint_path)
    print('load model from {}'.format(checkpoint_path))
    model.load_state_dict(checkpoint['model_state_dict'])

    return model
예제 #10
0
def load_model(config_path):
    config = load_config(config_path)
    print(config.checkpoint_path)

    if config.model.arch == 'Classification':
        model = CustomNet(config.model.encoder, config.data.num_classes, pretrained=False)
    else:
        # create segmentation model with pre-trained encoder
        model = getattr(smp, config.model.arch)(
            encoder_name=config.model.encoder,
            encoder_weights=None,
            classes=config.data.num_classes,
            activation=None,
        )

    model.to(config.device)
    model.eval()

    checkpoint = load_checkpoint(config.checkpoint_path)
    model.load_state_dict(checkpoint['model_state_dict'])

    return model
예제 #11
0
    def _get_optimizer(self, *, model_params, **params):
        load_from_previous_stage = \
            params.pop("load_from_previous_stage", False)
        optimizer = OPTIMIZERS.get_from_params(**params, params=model_params)

        if load_from_previous_stage:
            checkpoint_path = f"{self.logdir}/checkpoints/best_full.pth"
            checkpoint = utils.load_checkpoint(checkpoint_path)
            utils.unpack_checkpoint(checkpoint, optimizer=optimizer)

            # move optimizer to device
            device = get_device()
            for param in model_params:
                param = param["params"][0]
                state = optimizer.state[param]
                for key, value in state.items():
                    state[key] = any2device(value, device)

            # update optimizer params
            for key, value in params.items():
                for pg in optimizer.param_groups:
                    pg[key] = value

        return optimizer
예제 #12
0
    def _get_optimizer(self, stage: str, model: Union[Model, Dict[str, Model]],
                       **params) -> Optimizer:
        # @TODO 1: refactoring; this method is too long
        # @TODO 2: load state dicts for schedulers & criterion
        layerwise_params = params.pop("layerwise_params", OrderedDict())
        no_bias_weight_decay = params.pop("no_bias_weight_decay", True)

        # linear scaling rule from https://arxiv.org/pdf/1706.02677.pdf
        lr_scaling_params = params.pop("lr_linear_scaling", None)
        if lr_scaling_params:
            data_params = dict(self.stages_config[stage]["data_params"])
            batch_size = data_params.get("batch_size")
            per_gpu_scaling = data_params.get("per_gpu_scaling", False)
            distributed_rank = utils.get_rank()
            distributed = distributed_rank > -1
            if per_gpu_scaling and not distributed:
                num_gpus = max(1, torch.cuda.device_count())
                batch_size *= num_gpus

            base_lr = lr_scaling_params.get("lr")
            base_batch_size = lr_scaling_params.get("base_batch_size", 256)
            lr_scaling = batch_size / base_batch_size
            params["lr"] = base_lr * lr_scaling  # scale default lr
        else:
            lr_scaling = 1.0

        # getting model parameters
        model_key = params.pop("_model", None)
        if model_key is None:
            assert isinstance(
                model, nn.Module
            ), "model is key-value, but optimizer has no specified model"
            model_params = utils.process_model_params(model, layerwise_params,
                                                      no_bias_weight_decay,
                                                      lr_scaling)
        elif isinstance(model_key, str):
            model_params = utils.process_model_params(
                model[model_key],
                layerwise_params,
                no_bias_weight_decay,
                lr_scaling,
            )
        elif isinstance(model_key, (list, tuple)):
            model_params = []
            for model_key_ in model_key:
                model_params_ = utils.process_model_params(
                    model[model_key_],
                    layerwise_params,
                    no_bias_weight_decay,
                    lr_scaling,
                )
                model_params.extend(model_params_)
        else:
            raise ValueError("unknown type of model_params")

        load_from_previous_stage = params.pop("load_from_previous_stage",
                                              False)
        optimizer_key = params.pop("optimizer_key", None)
        optimizer = OPTIMIZERS.get_from_params(**params, params=model_params)

        if load_from_previous_stage and self.stages.index(stage) != 0:
            checkpoint_path = f"{self.logdir}/checkpoints/best_full.pth"
            checkpoint = utils.load_checkpoint(checkpoint_path)

            dict2load = optimizer
            if optimizer_key is not None:
                dict2load = {optimizer_key: optimizer}
            utils.unpack_checkpoint(checkpoint, optimizer=dict2load)

            # move optimizer to device
            device = utils.get_device()
            for param in model_params:
                param = param["params"][0]
                state = optimizer.state[param]
                for key, value in state.items():
                    state[key] = utils.any2device(value, device)

            # update optimizer params
            for key, value in params.items():
                for pg in optimizer.param_groups:
                    pg[key] = value

        return optimizer
예제 #13
0
def run_seg(config_file_seg):
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    # ------------------------------------------------------------------------------------------------------------
    # 2. segmentation inference
    # ------------------------------------------------------------------------------------------------------------
    config = load_config(config_file_seg)

    validloader = make_loader(
        data_folder=config.data.train_dir,
        df_path=config.data.train_df_path,
        phase='valid',
        batch_size=config.train.batch_size,
        num_workers=config.num_workers,
        idx_fold=config.data.params.idx_fold,
        transforms=get_transforms(config.transforms.test),
        num_classes=config.data.num_classes,
    )

    # create segmentation model with pre-trained encoder
    model = getattr(smp, config.model.arch)(
        encoder_name=config.model.encoder,
        encoder_weights=config.model.pretrained,
        classes=config.data.num_classes,
        activation=None,
    )
    model.to(config.device)
    model.eval()
    checkpoint = load_checkpoint(f"{config.work_dir}/checkpoints/best.pth")
    model.load_state_dict(checkpoint['model_state_dict'])

    all_dice = {}
    min_sizes = [100, 300, 500, 750, 1000, 1500, 2000, 3000]
    for min_size in min_sizes:
        all_dice[min_size] = {}
        for cls in range(config.data.num_classes):
            all_dice[min_size][cls] = []

    with torch.no_grad():
        for i, (batch_images, batch_masks) in enumerate(tqdm(validloader)):
            batch_images = batch_images.to(config.device)
            batch_preds = predict_batch(model,
                                        batch_images,
                                        tta=config.test.tta)

            batch_masks = batch_masks.cpu().numpy()

            for masks, preds in zip(batch_masks, batch_preds):
                for cls in range(config.data.num_classes):
                    for min_size in min_sizes:
                        pred, _ = post_process(preds[cls, :, :],
                                               config.test.best_threshold,
                                               min_size)
                        mask = masks[cls, :, :]
                        all_dice[min_size][cls].append(dice_score(pred, mask))

    for cls in range(config.data.num_classes):
        for min_size in min_sizes:
            all_dice[min_size][cls] = sum(all_dice[min_size][cls]) / len(
                all_dice[min_size][cls])
            dict_to_json(all_dice, config.work_dir + '/threshold_search.json')
            if config.data.num_classes == 4:
                defect_class = cls + 1
            else:
                defect_class = cls
            print('average dice score for class{} for min_size {}: {}'.format(
                defect_class, min_size, all_dice[min_size][cls]))
예제 #14
0
def trace_model_from_checkpoint(
    logdir: Path,
    method_name: str,
    checkpoint_name: str,
    stage: str = None,
    loader: Union[str, int] = None,
    mode: str = "eval",
    requires_grad: bool = False,
    opt_level: str = None,
    device: Device = "cpu",
):
    """Traces model using created experiment and runner.

    Args:
        logdir (Union[str, Path]): Path to Catalyst logdir with model
        checkpoint_name (str): Name of model checkpoint to use
        stage (str): experiment's stage name
        loader (Union[str, int]): experiment's loader name or its index
        method_name (str): Model's method name that will be
            used as entrypoint during tracing
        mode (str): Mode for model to trace (``train`` or ``eval``)
        requires_grad (bool): Flag to use grads
        opt_level (str): AMP FP16 init level
        device (str): Torch device

    Returns:
        the traced model
    """
    config_path = logdir / "configs" / "_config.json"
    checkpoint_path = logdir / "checkpoints" / f"{checkpoint_name}.pth"
    print("Load config")
    config: Dict[str, dict] = utils.load_config(config_path)
    runner_params = config.get("runner_params", {}) or {}

    # Get expdir name
    config_expdir = Path(config["args"]["expdir"])
    # We will use copy of expdir from logs for reproducibility
    expdir = Path(logdir) / "code" / config_expdir.name

    print("Import experiment and runner from logdir")
    ExperimentType, RunnerType = utils.import_experiment_and_runner(expdir)
    experiment: Experiment = ExperimentType(config)

    print(f"Load model state from checkpoints/{checkpoint_name}.pth")
    if stage is None:
        stage = list(experiment.stages)[0]

    model = experiment.get_model(stage)
    checkpoint = utils.load_checkpoint(checkpoint_path)
    utils.unpack_checkpoint(checkpoint, model=model)

    runner: RunnerType = RunnerType(**runner_params)
    runner.model, runner.device = model, device

    if loader is None:
        loader = 0
    batch = experiment.get_native_batch(stage, loader)

    print("Tracing")
    traced = trace.trace_model(
        model=model,
        runner=runner,
        batch=batch,
        method_name=method_name,
        mode=mode,
        requires_grad=requires_grad,
        opt_level=opt_level,
        device=device,
    )

    print("Done")
    return traced
예제 #15
0
def run_seg(config_file_seg):
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    # ------------------------------------------------------------------------------------------------------------
    # 2. segmentation inference
    # ------------------------------------------------------------------------------------------------------------
    config = load_config(config_file_seg)

    model = getattr(smp, config.model.arch)(
        encoder_name=config.model.encoder,
        encoder_weights=config.model.pretrained,
        classes=config.data.num_classes,
        activation=None,
    )

    if os.path.exists('cls_preds.csv'):
        testloader = make_loader(data_folder=config.data.test_dir,
                                 df_path='cls_preds.csv',
                                 phase='filtered_test',
                                 batch_size=config.test.batch_size,
                                 num_workers=config.num_workers,
                                 transforms=get_transforms(
                                     config.transforms.test))
    else:
        testloader = make_loader(data_folder=config.data.test_dir,
                                 df_path=config.data.sample_submission_path,
                                 phase='test',
                                 batch_size=config.test.batch_size,
                                 num_workers=config.num_workers,
                                 transforms=get_transforms(
                                     config.transforms.test))

    model.to(config.device)
    model.eval()

    checkpoint = load_checkpoint(f"{config.work_dir}/checkpoints/best.pth")
    model.load_state_dict(checkpoint['model_state_dict'])

    if os.path.exists(config.work_dir + '/threshold_search.json'):
        with open(config.work_dir + '/threshold_search.json') as json_file:
            data = json.load(json_file)
        df = pd.DataFrame(data)
        min_sizes = list(df.T.idxmax().values.astype(int))
        print('load best threshold from validation:', min_sizes)
    else:
        min_sizes = config.test.min_size
        print('load default threshold:', min_sizes)

    predictions = []
    with torch.no_grad():
        for i, (batch_fnames, batch_images) in enumerate(tqdm(testloader)):
            batch_images = batch_images.to(config.device)
            batch_preds = predict_batch(model,
                                        batch_images,
                                        tta=config.test.tta)

            for fname, preds in zip(batch_fnames, batch_preds):
                if config.data.num_classes == 4:
                    for cls in range(preds.shape[0]):
                        mask = preds[cls, :, :]
                        mask, num = post_process(mask,
                                                 config.test.best_threshold,
                                                 min_sizes[cls])
                        rle = mask2rle(mask)
                        name = fname + f"_{cls + 1}"
                        predictions.append([name, rle])
                else:  # == 5
                    for cls in range(1, 5):
                        mask = preds[cls, :, :]
                        mask, num = post_process(mask,
                                                 config.test.best_threshold,
                                                 min_sizes[cls])
                        rle = mask2rle(mask)
                        name = fname + f"_{cls}"
                        predictions.append([name, rle])

    # ------------------------------------------------------------------------------------------------------------
    # submission
    # ------------------------------------------------------------------------------------------------------------
    df = pd.DataFrame(predictions,
                      columns=['ImageId_ClassId', 'EncodedPixels'])
    df.to_csv(config.work_dir + "/submission.csv", index=False)
예제 #16
0
def main(args, _=None):
    """Run the ``catalyst-data text2embeddings`` script."""
    batch_size = args.batch_size
    num_workers = args.num_workers
    max_length = args.max_length
    pooling_groups = args.pooling.split(",")

    utils.set_global_seed(args.seed)
    utils.prepare_cudnn(args.deterministic, args.benchmark)

    if hasattr(args, "in_huggingface"):
        model_config = BertConfig.from_pretrained(args.in_huggingface)
        model_config.output_hidden_states = args.output_hidden_states
        model = BertModel.from_pretrained(args.in_huggingface,
                                          config=model_config)
        tokenizer = BertTokenizer.from_pretrained(args.in_huggingface)
    else:
        model_config = BertConfig.from_pretrained(args.in_config)
        model_config.output_hidden_states = args.output_hidden_states
        model = BertModel(config=model_config)
        tokenizer = BertTokenizer.from_pretrained(args.in_vocab)
    if hasattr(args, "in_model"):
        checkpoint = utils.load_checkpoint(args.in_model)
        checkpoint = {"model_state_dict": checkpoint}
        utils.unpack_checkpoint(checkpoint=checkpoint, model=model)

    model = model.eval()
    model, _, _, _, device = utils.process_components(model=model)

    df = pd.read_csv(args.in_csv)
    df = df.dropna(subset=[args.txt_col])
    df.to_csv(f"{args.out_prefix}.df.csv", index=False)
    df = df.reset_index().drop("index", axis=1)
    df = list(df.to_dict("index").values())
    num_samples = len(df)

    open_fn = LambdaReader(
        input_key=args.txt_col,
        output_key=None,
        lambda_fn=partial(
            tokenize_text,
            strip=args.strip,
            lowercase=args.lowercase,
            remove_punctuation=args.remove_punctuation,
        ),
        tokenizer=tokenizer,
        max_length=max_length,
    )

    dataloader = utils.get_loader(
        df,
        open_fn,
        batch_size=batch_size,
        num_workers=num_workers,
    )

    features = {}
    dataloader = tqdm(dataloader) if args.verbose else dataloader
    with torch.no_grad():
        for idx, batch in enumerate(dataloader):
            batch = utils.any2device(batch, device)
            bert_output = model(**batch)
            mask = (batch["attention_mask"].unsqueeze(-1)
                    if args.mask_for_max_length else None)

            if utils.check_ddp_wrapped(model):
                # using several gpu
                hidden_size = model.module.config.hidden_size
                hidden_states = model.module.config.output_hidden_states

            else:
                # using cpu or one gpu
                hidden_size = model.config.hidden_size
                hidden_states = model.config.output_hidden_states

            features_ = process_bert_output(
                bert_output=bert_output,
                hidden_size=hidden_size,
                output_hidden_states=hidden_states,
                pooling_groups=pooling_groups,
                mask=mask,
            )

            # create storage based on network output
            if idx == 0:
                for key, value in features_.items():
                    name_ = key if isinstance(key, str) else f"{key:02d}"
                    _, embedding_size = value.shape
                    features[name_] = np.memmap(
                        f"{args.out_prefix}.{name_}.npy",
                        dtype=np.float32,
                        mode="w+",
                        shape=(num_samples, embedding_size),
                    )

            indices = np.arange(idx * batch_size,
                                min((idx + 1) * batch_size, num_samples))
            for key, value in features_.items():
                name_ = key if isinstance(key, str) else f"{key:02d}"
                features[name_][indices] = _detach(value)
예제 #17
0
def main(args, _=None):
    batch_size = args.batch_size
    num_workers = args.num_workers
    max_length = args.max_length
    pooling_groups = args.pooling.split(",")

    utils.set_global_seed(args.seed)
    utils.prepare_cudnn(args.deterministic, args.benchmark)

    model_config = BertConfig.from_pretrained(args.in_config)
    model_config.output_hidden_states = args.output_hidden_states
    model = BertModel(config=model_config)

    checkpoint = utils.load_checkpoint(args.in_model)
    checkpoint = {"model_state_dict": checkpoint}
    utils.unpack_checkpoint(checkpoint=checkpoint, model=model)

    model = model.eval()
    model, _, _, _, device = utils.process_components(model=model)

    tokenizer = BertTokenizer.from_pretrained(args.in_vocab)

    df = pd.read_csv(args.in_csv)
    df = df.dropna(subset=[args.txt_col])
    df.to_csv(f"{args.out_prefix}.df.csv", index=False)
    df = df.reset_index().drop("index", axis=1)
    df = list(df.to_dict("index").values())
    num_samples = len(df)

    open_fn = LambdaReader(
        input_key=args.txt_col,
        output_key=None,
        lambda_fn=get_features,
        tokenizer=tokenizer,
        max_length=max_length,
    )

    dataloader = utils.get_loader(
        df,
        open_fn,
        batch_size=batch_size,
        num_workers=num_workers,
    )

    features = {}
    poolings = {}
    dataloader = tqdm(dataloader) if args.verbose else dataloader
    with torch.no_grad():
        for idx, batch in enumerate(dataloader):
            batch = utils.any2device(batch, device)
            features_ = model(**batch)

            # create storage based on network output
            if idx == 0:
                # class
                _, embedding_size = features_[1].shape
                features["class"] = np.memmap(
                    f"{args.out_prefix}.class.npy",
                    dtype=np.float32,
                    mode="w+",
                    shape=(num_samples, embedding_size),
                )
                if args.output_hidden_states:
                    # all embeddings
                    for i, feature_ in enumerate(features_[2]):
                        name_ = f"embeddings_{i + 1:02d}"
                        _, _, embedding_size = feature_.shape
                        poolings[name_] = LamaPooling(
                            features_in=embedding_size,
                            groups=pooling_groups,
                        )
                        features[name_] = np.memmap(
                            f"{args.out_prefix}.{name_}.npy",
                            dtype=np.float32,
                            mode="w+",
                            shape=(num_samples, embedding_size),
                        )
                else:
                    # last
                    _, _, embedding_size = features_[0].shape
                    poolings["last"] = LamaPooling(
                        features_in=embedding_size,
                        groups=pooling_groups,
                    )
                    features["last"] = np.memmap(
                        f"{args.out_prefix}.last.npy",
                        dtype=np.float32,
                        mode="w+",
                        shape=(num_samples, embedding_size),
                    )

            indices = np.arange(idx * batch_size,
                                min((idx + 1) * batch_size, num_samples))
            features["class"][indices] = _detach(features_[1])
            if args.output_hidden_states:
                # all embeddings
                for i, feature_ in enumerate(features_[2]):
                    name_ = f"embeddings_{i + 1:02d}"
                    feature_ = poolings[name_](feature_)
                    features[name_][indices] = _detach(feature_)
            else:
                feature_ = poolings[name_](features_[0])
                features["last"][indices] = _detach(feature_)
예제 #18
0
파일: train.py 프로젝트: baabp/CloudSatSeg
def main_kaggle_smp(path_dataset='/dataset/kaggle/understanding_cloud_organization',
                    ENCODER='resnet50',
                    ENCODER_WEIGHTS='imagenet',
                    num_workers=0,
                    batch_size=8,
                    epochs=19,
                    debug=False,
                    exec_catalyst=True,
                    logdir="/src/logs/segmentation",
                    pretrained=True
                    ):
    # below line is potential input args
    # (name_dataset='eurosat', lr=0.0001, wd=0, ratio=0.9, batch_size=32, workers=4, epochs=15, num_gpus=1,
    # resume=None, dir_weights='./weights'):

    torch.backends.cudnn.benchmark = True

    # Dataset
    train, sub = get_meta_info_table(path_dataset)
    train_ids, valid_ids, test_ids = prepare_dataset(train, sub)
    preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)

    train_dataset = CloudDataset(df=train, datatype='train', img_ids=train_ids, transforms=get_training_augmentation(),
                                 preprocessing=get_preprocessing(preprocessing_fn), path=path_dataset)
    valid_dataset = CloudDataset(df=train, datatype='valid', img_ids=valid_ids,
                                 transforms=get_validation_augmentation(),
                                 preprocessing=get_preprocessing(preprocessing_fn), path=path_dataset)
    # DataLoader
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    loaders = {
        "train": train_loader,
        "valid": valid_loader
    }

    # todo: check how to used device in this case
    DEVICE = 'cuda'
    if debug:
        device = 'cpu'
    else:
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    ACTIVATION = None
    model = smp.Unet(
        encoder_name=ENCODER,
        encoder_weights=ENCODER_WEIGHTS,
        classes=4,
        activation=ACTIVATION,
    )
    images, labels = next(iter(train_loader))
    model.to(device)
    print(model)
    print(summary(model, input_size=tuple(images.shape[1:])))

    # use smp epoch
    # num_epochs = 19

    # model, criterion, optimizer
    optimizer = torch.optim.Adam([
        {'params': model.decoder.parameters(), 'lr': 1e-2},
        {'params': model.encoder.parameters(), 'lr': 1e-3},
    ])
    scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2)
    criterion = smp.utils.losses.DiceLoss(eps=1.)  # smp.utils.losses.BCEDiceLoss(eps=1.)

    if not pretrained:
        # catalyst
        if exec_catalyst:
            device = utils.get_device()
            runner = SupervisedRunner(device=device)

            # train model
            runner.train(
                model=model,
                criterion=criterion,
                optimizer=optimizer,
                scheduler=scheduler,
                loaders=loaders,
                callbacks=[DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001)],
                logdir=logdir,
                num_epochs=epochs,
                verbose=True
            )

            # # prediction
            # encoded_pixels = []
            # loaders = {"infer": valid_loader}
            # runner.infer(
            #     model=model,
            #     loaders=loaders,
            #     callbacks=[
            #         CheckpointCallback(
            #             resume=f"{logdir}/checkpoints/best.pth"),
            #         InferCallback()
            #     ],
            # )
            # valid_masks = []
            #
            # # todo: where .pth?
            # # todo: from here
            # valid_num = valid_dataset.__len__()
            # probabilities = np.zeros((valid_num * 4, 350, 525))
            # for i, (batch, output) in enumerate(tqdm(zip(
            #         valid_dataset, runner.callbacks[0].predictions["logits"]))):
            #     image, mask = batch
            #     for m in mask:
            #         if m.shape != (350, 525):
            #             m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
            #         valid_masks.append(m)
            #
            #     for j, probability in enumerate(output):
            #         if probability.shape != (350, 525):
            #             probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
            #         probabilities[valid_num * 4 + j, :, :] = probability
            #
            # # todo: from here
            # class_params = {}
            # for class_id in range(4):
            #     print(class_id)
            #     attempts = []
            #     for t in range(0, 100, 5):
            #         t /= 100
            #         for ms in [0, 100, 1200, 5000, 10000]:
            #             masks = []
            #             for i in range(class_id, len(probabilities), 4):
            #                 probability = probabilities[i]
            #                 predict, num_predict = post_process(sigmoid(probability), t, ms)
            #                 masks.append(predict)
            #
            #             d = []
            #             for i, j in zip(masks, valid_masks[class_id::4]):
            #                 if (i.sum() == 0) & (j.sum() == 0):
            #                     d.append(1)
            #                 else:
            #                     d.append(dice(i, j))
            #
            #             attempts.append((t, ms, np.mean(d)))
            #
            #     attempts_df = pd.DataFrame(attempts, columns=['threshold', 'size', 'dice'])
            #
            #     attempts_df = attempts_df.sort_values('dice', ascending=False)
            #     print(attempts_df.head())
            #     best_threshold = attempts_df['threshold'].values[0]
            #     best_size = attempts_df['size'].values[0]
            #
            #     class_params[class_id] = (best_threshold, best_size)

        else:
            for epoch in trange(epochs, desc="Epochs"):
                metrics_train = train_epoch(model, train_loader, criterion, optimizer, device)
                metrics_eval = eval_epoch(model, valid_loader, criterion, device)

                scheduler.step(metrics_eval['valid_loss'])
                print(f'epoch: {epoch} ', metrics_train, metrics_eval)
    else:
        if exec_catalyst:
            device = utils.get_device()
            checkpoint = utils.load_checkpoint(f'{logdir}/checkpoints/best_full.pth')
            utils.unpack_checkpoint(checkpoint, model=model)
            runner = SupervisedRunner(model=model)

            # prediction with infer
            encoded_pixels = []
            loaders = {"infer": valid_loader}
            runner.infer(
                model=model,
                loaders=loaders,
                callbacks=[
                    CheckpointCallback(
                        resume=f"{logdir}/checkpoints/best.pth"),
                    InferCallback()
                ],
            )

            # todo: jupyterで確認中
            valid_masks = []


            valid_num = valid_dataset.__len__()
            probabilities = np.zeros((valid_num * 4, 350, 525))
            for i, (batch, output) in enumerate(tqdm(zip(
                    valid_dataset, runner.callbacks[0].predictions["logits"]))):
                image, mask = batch
                for m in mask:
                    if m.shape != (350, 525):
                        m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
                    valid_masks.append(m)

                for j, probability in enumerate(output):
                    if probability.shape != (350, 525):
                        probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
                    probabilities[i * 4 + j, :, :] = probability

            class_params = {}
            for class_id in range(4):
                print(class_id)
                attempts = []
                for t in range(0, 100, 5):
                    t /= 100
                    for ms in [0, 100, 1200, 5000, 10000]:
                        masks = []
                        for i in range(class_id, len(probabilities), 4):
                            probability = probabilities[i]
                            predict, num_predict = post_process(sigmoid(probability), t, ms)
                            masks.append(predict)

                        d = []
                        for i, j in zip(masks, valid_masks[class_id::4]):
                            if (i.sum() == 0) & (j.sum() == 0):
                                d.append(1)
                            else:
                                d.append(dice(i, j))

                        attempts.append((t, ms, np.mean(d)))

                attempts_df = pd.DataFrame(attempts, columns=['threshold', 'size', 'dice'])

                attempts_df = attempts_df.sort_values('dice', ascending=False)
                print(attempts_df.head())
                best_threshold = attempts_df['threshold'].values[0]
                best_size = attempts_df['size'].values[0]

                class_params[class_id] = (best_threshold, best_size)

            # predictions
            torch.cuda.empty_cache()
            gc.collect()

            test_dataset = CloudDataset(df=sub, datatype='test', img_ids=test_ids, transforms=get_validation_augmentation(),
                                        preprocessing=get_preprocessing(preprocessing_fn))
            test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=0)

            loaders = {"test": test_loader}
            encoded_pixels = []
            image_id = 0
            for i, test_batch in enumerate(tqdm(loaders['test'])):
                runner_out = runner.predict_batch({"features": test_batch[0].cuda()})['logits']
                for i, batch in enumerate(runner_out):
                    for probability in batch:

                        probability = probability.cpu().detach().numpy()
                        if probability.shape != (350, 525):
                            probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
                        predict, num_predict = post_process(sigmoid(probability), class_params[image_id % 4][0],
                                                            class_params[image_id % 4][1])
                        if num_predict == 0:
                            encoded_pixels.append('')
                        else:
                            r = mask2rle(predict)
                            encoded_pixels.append(r)
                        image_id += 1

            sub['EncodedPixels'] = encoded_pixels
            sub.to_csv('data/kaggle_cloud_org/submission.csv', columns=['Image_Label', 'EncodedPixels'], index=False)