def main(args):
    model = models.__dict__[args.arch](pretrained=True)
    model = model.eval()
    model, device = UtilsFactory.prepare_model(model)

    labels = json.loads(open(args.labels).read())

    i2k = Images2Keywords(model, args.n_keywords, labels)

    images_df = pd.read_csv(args.in_csv)
    images_df = images_df.reset_index().drop("index", axis=1)
    images_df = list(images_df.to_dict("index").values())

    open_fn = ImageReader(row_key=args.img_col,
                          dict_key="image",
                          datapath=args.datapath)

    dataloader = UtilsFactory.create_loader(images_df,
                                            open_fn,
                                            batch_size=args.batch_size,
                                            workers=args.n_workers,
                                            dict_transform=dict_transformer)

    keywords = []
    dataloader = tqdm(dataloader) if args.verbose else dataloader
    with torch.no_grad():
        for batch in dataloader:
            keywords_batch = i2k(batch["image"].to(device))
            keywords += keywords_batch

    input_csv = pd.read_csv(args.in_csv)
    input_csv[args.keywords_col] = keywords
    input_csv.to_csv(args.out_csv, index=False)
def main(args):
    global IMG_SIZE

    IMG_SIZE = (args.img_size, args.img_size)

    model = ResnetEncoder(arch=args.arch, pooling=args.pooling)
    model = model.eval()
    model, device = UtilsFactory.prepare_model(model)

    images_df = pd.read_csv(args.in_csv)
    images_df = images_df.reset_index().drop("index", axis=1)
    images_df = list(images_df.to_dict("index").values())

    open_fn = ImageReader(row_key=args.img_col,
                          dict_key="image",
                          datapath=args.datapath)

    dataloader = UtilsFactory.create_loader(images_df,
                                            open_fn,
                                            batch_size=args.batch_size,
                                            workers=args.n_workers,
                                            dict_transform=dict_transformer)

    features = []
    dataloader = tqdm(dataloader) if args.verbose else dataloader
    with torch.no_grad():
        for batch in dataloader:
            features_ = model(batch["image"].to(device))
            features_ = features_.cpu().detach().numpy()
            features.append(features_)

    features = np.concatenate(features, axis=0)
    np.save(args.out_npy, features)
Exemple #3
0
 def _init(self):
     """
     Inner method for children's classes for model specific initialization.
     As baseline, checks device support and puts model on it.
     :return:
     """
     self.model, self.device = UtilsFactory.prepare_model(self.model)
Exemple #4
0
    def train_stage(self,
                    *,
                    loaders: Dict[str, data.DataLoader],
                    callbacks: Dict[str, Callback],
                    epochs: int = 1,
                    start_epoch: int = 0,
                    verbose: bool = False,
                    logdir: str = None):
        """
        One stage training method.

        :param loaders: OrderedDict or torch DataLoaders to run on
        :param callbacks: OrderedDict of callback to use
        :param epochs: number of epochs to run
        :param start_epoch:
        :param verbose: verbose flag
        :param logdir: logdir for tensorboard logs
        """
        # @TODO: better solution
        if logdir is not None:
            loggers = UtilsFactory.create_loggers(logdir, loaders)
            for key, value in callbacks.items():
                if hasattr(value, "loggers"):
                    value.loggers = loggers
                if hasattr(value, "logdir"):
                    value.logdir = logdir
        self.run(loaders=loaders,
                 callbacks=callbacks,
                 epochs=epochs,
                 start_epoch=start_epoch,
                 mode="train",
                 verbose=verbose)
Exemple #5
0
    def on_loader_end(self, state):
        lm = state.loader_mode

        state.epoch_metrics[lm] = {
            **state.epoch_metrics[lm],
            **self.epoch_metrics[lm]
        }

        state.epoch_metrics[lm] = {
            key: UtilsFactory.get_val_from_metric(value)
            for key, value in state.epoch_metrics[lm].items()
        }

        for key, value in state.epoch_metrics[lm].items():
            self.loggers[lm].add_scalar("epoch " + key, value, state.epoch)

        epoch_metrics_str = "\t".join([
            "{key} {value:.4f}".format(key=key, value=value)
            for key, value in sorted(state.epoch_metrics[lm].items())
        ])

        print("{epoch} * Epoch ({mode}): {metrics}".format(
            epoch=state.epoch, mode=lm, metrics=epoch_metrics_str))

        if self.reset_step:
            state.step = None
    def load_checkpoint(*, filename, state):
        if os.path.isfile(filename):
            print("=> loading checkpoint \"{}\"".format(filename))
            checkpoint = UtilsFactory.load_checkpoint(filename)

            state.epoch = checkpoint["epoch"]
            state.best_metrics = checkpoint["best_metrics"]

            UtilsFactory.unpack_checkpoint(
                checkpoint,
                model=state.model, criterion=state._criterion,
                optimizer=state._optimizer, scheduler=state._scheduler)

            print("loaded checkpoint \"{}\" (epoch {})"
                  .format(filename, checkpoint["epoch"]))
        else:
            raise Exception("no checkpoint found at \"{}\"".format(filename))
Exemple #7
0
 def save_checkpoint(self, logdir, checkpoint, is_best, save_n_best=5):
     suffix = f"{checkpoint['stage']}.{checkpoint['epoch']}"
     filepath = UtilsFactory.save_checkpoint(logdir=logdir,
                                             checkpoint=checkpoint,
                                             is_best=is_best,
                                             suffix=suffix)
     self.top_best_metrics.append(
         (filepath, checkpoint["valid_metrics"][self.main_metric]))
     self.top_best_metrics = sorted(self.top_best_metrics,
                                    key=lambda x: x[1],
                                    reverse=not self.minimize)
     if len(self.top_best_metrics) > save_n_best:
         last_item = self.top_best_metrics.pop(-1)
         last_filepath = last_item[0]
         os.remove(last_filepath)
Exemple #8
0
def prepare_model(config):
    return UtilsFactory.create_model(config=config,
                                     available_networks=NETWORKS)
Exemple #9
0
    def train(self,
              *,
              datasource: AbstractDataSource,
              args: Namespace,
              stages_config: Dict[str, Dict] = None,
              verbose: bool = False):
        """
        Main method for training DL models.

        :param datasource: AbstractDataSource instance
        :param args: console args
        :param stages_config: config
        :param verbose: verbose flag
        """

        stages_data_params = stages_config.pop("data_params", {})
        stages_callbacks_params = stages_config.pop("callbacks_params", {})
        stages_criterion_params = stages_config.pop("criterion_params", {})
        stages_optimizer_params = stages_config.pop("optimizer_params", {})
        loaders = None

        for stage, config in stages_config.items():
            self.stage = stage

            args = UtilsFactory.prepare_stage_args(args=args,
                                                   stage_config=config)
            pprint(args)

            data_params = merge_dicts(stages_data_params,
                                      config.get("data_params", {}))
            reload_loaders = data_params.get("reload_loaders", True)

            if loaders is None or reload_loaders:
                loaders = datasource.prepare_loaders(args,
                                                     data_params,
                                                     stage=stage)

            callbacks_params = merge_dicts(stages_callbacks_params,
                                           config.get("callbacks_params", {}))
            config["criterion_params"] = merge_dicts(
                stages_criterion_params, config.get("criterion_params", {}))
            config["optimizer_params"] = merge_dicts(
                stages_optimizer_params, config.get("optimizer_params", {}))

            callbacks = self.prepare_callbacks(
                callbacks_params=callbacks_params,
                args=args,
                mode="train",
                stage=stage)
            pprint(loaders)
            pprint(callbacks)

            self.run_stage_init(callbacks=callbacks)
            self.criterion, self.optimizer, self.scheduler = \
                UtilsFactory.prepare_stage_stuff(
                    model=self.model, stage_config=config)

            self.train_stage(
                loaders=loaders,
                callbacks=callbacks,
                epochs=args.epochs,
                start_epoch=0 if self.state is None else self.state.epoch + 1,
                verbose=verbose,
                logdir=args.logdir)
Exemple #10
0
    def prepare_loaders(args, data_params, stage=None):
        loaders = collections.OrderedDict()

        df, df_train, df_valid, df_infer = parse_in_csvs(data_params)

        open_fn = [
            ImageReader(row_key="filepath",
                        dict_key="image",
                        datapath=data_params.get("datapath", None)),
            ScalarReader(row_key="class",
                         dict_key="target",
                         default_value=-1,
                         dtype=np.int64)
        ]
        open_fn = ReaderCompose(readers=open_fn)

        if len(df_train) > 0:
            labels = [x["class"] for x in df_train]
            sampler = BalanceClassSampler(labels, mode="upsampling")

            train_loader = UtilsFactory.create_loader(
                data_source=df_train,
                open_fn=open_fn,
                dict_transform=DataSource.prepare_transforms(mode="train",
                                                             stage=stage),
                dataset_cache_prob=getattr(args, "dataset_cache_prob", -1),
                batch_size=args.batch_size,
                workers=args.workers,
                shuffle=sampler is None,
                sampler=sampler)

            print("Train samples", len(train_loader) * args.batch_size)
            print("Train batches", len(train_loader))
            loaders["train"] = train_loader

        if len(df_valid) > 0:
            sampler = None

            valid_loader = UtilsFactory.create_loader(
                data_source=df_valid,
                open_fn=open_fn,
                dict_transform=DataSource.prepare_transforms(mode="valid",
                                                             stage=stage),
                dataset_cache_prob=-1,
                batch_size=args.batch_size,
                workers=args.workers,
                shuffle=False,
                sampler=sampler)

            print("Valid samples", len(valid_loader) * args.batch_size)
            print("Valid batches", len(valid_loader))
            loaders["valid"] = valid_loader

        if len(df_infer) > 0:
            infer_loader = UtilsFactory.create_loader(
                data_source=df_infer,
                open_fn=open_fn,
                dict_transform=DataSource.prepare_transforms(mode="infer",
                                                             stage=None),
                dataset_cache_prob=-1,
                batch_size=args.batch_size,
                workers=args.workers,
                shuffle=False,
                sampler=None)

            print("Infer samples", len(infer_loader) * args.batch_size)
            print("Infer batches", len(infer_loader))
            loaders["infer"] = infer_loader

        return loaders
Exemple #11
0
 def pack_checkpoint(self, **kwargs):
     return UtilsFactory.pack_checkpoint(**kwargs)