def main(args): model = models.__dict__[args.arch](pretrained=True) model = model.eval() model, device = UtilsFactory.prepare_model(model) labels = json.loads(open(args.labels).read()) i2k = Images2Keywords(model, args.n_keywords, labels) images_df = pd.read_csv(args.in_csv) images_df = images_df.reset_index().drop("index", axis=1) images_df = list(images_df.to_dict("index").values()) open_fn = ImageReader(row_key=args.img_col, dict_key="image", datapath=args.datapath) dataloader = UtilsFactory.create_loader(images_df, open_fn, batch_size=args.batch_size, workers=args.n_workers, dict_transform=dict_transformer) keywords = [] dataloader = tqdm(dataloader) if args.verbose else dataloader with torch.no_grad(): for batch in dataloader: keywords_batch = i2k(batch["image"].to(device)) keywords += keywords_batch input_csv = pd.read_csv(args.in_csv) input_csv[args.keywords_col] = keywords input_csv.to_csv(args.out_csv, index=False)
def main(args): global IMG_SIZE IMG_SIZE = (args.img_size, args.img_size) model = ResnetEncoder(arch=args.arch, pooling=args.pooling) model = model.eval() model, device = UtilsFactory.prepare_model(model) images_df = pd.read_csv(args.in_csv) images_df = images_df.reset_index().drop("index", axis=1) images_df = list(images_df.to_dict("index").values()) open_fn = ImageReader(row_key=args.img_col, dict_key="image", datapath=args.datapath) dataloader = UtilsFactory.create_loader(images_df, open_fn, batch_size=args.batch_size, workers=args.n_workers, dict_transform=dict_transformer) features = [] dataloader = tqdm(dataloader) if args.verbose else dataloader with torch.no_grad(): for batch in dataloader: features_ = model(batch["image"].to(device)) features_ = features_.cpu().detach().numpy() features.append(features_) features = np.concatenate(features, axis=0) np.save(args.out_npy, features)
def _init(self): """ Inner method for children's classes for model specific initialization. As baseline, checks device support and puts model on it. :return: """ self.model, self.device = UtilsFactory.prepare_model(self.model)
def train_stage(self, *, loaders: Dict[str, data.DataLoader], callbacks: Dict[str, Callback], epochs: int = 1, start_epoch: int = 0, verbose: bool = False, logdir: str = None): """ One stage training method. :param loaders: OrderedDict or torch DataLoaders to run on :param callbacks: OrderedDict of callback to use :param epochs: number of epochs to run :param start_epoch: :param verbose: verbose flag :param logdir: logdir for tensorboard logs """ # @TODO: better solution if logdir is not None: loggers = UtilsFactory.create_loggers(logdir, loaders) for key, value in callbacks.items(): if hasattr(value, "loggers"): value.loggers = loggers if hasattr(value, "logdir"): value.logdir = logdir self.run(loaders=loaders, callbacks=callbacks, epochs=epochs, start_epoch=start_epoch, mode="train", verbose=verbose)
def on_loader_end(self, state): lm = state.loader_mode state.epoch_metrics[lm] = { **state.epoch_metrics[lm], **self.epoch_metrics[lm] } state.epoch_metrics[lm] = { key: UtilsFactory.get_val_from_metric(value) for key, value in state.epoch_metrics[lm].items() } for key, value in state.epoch_metrics[lm].items(): self.loggers[lm].add_scalar("epoch " + key, value, state.epoch) epoch_metrics_str = "\t".join([ "{key} {value:.4f}".format(key=key, value=value) for key, value in sorted(state.epoch_metrics[lm].items()) ]) print("{epoch} * Epoch ({mode}): {metrics}".format( epoch=state.epoch, mode=lm, metrics=epoch_metrics_str)) if self.reset_step: state.step = None
def load_checkpoint(*, filename, state): if os.path.isfile(filename): print("=> loading checkpoint \"{}\"".format(filename)) checkpoint = UtilsFactory.load_checkpoint(filename) state.epoch = checkpoint["epoch"] state.best_metrics = checkpoint["best_metrics"] UtilsFactory.unpack_checkpoint( checkpoint, model=state.model, criterion=state._criterion, optimizer=state._optimizer, scheduler=state._scheduler) print("loaded checkpoint \"{}\" (epoch {})" .format(filename, checkpoint["epoch"])) else: raise Exception("no checkpoint found at \"{}\"".format(filename))
def save_checkpoint(self, logdir, checkpoint, is_best, save_n_best=5): suffix = f"{checkpoint['stage']}.{checkpoint['epoch']}" filepath = UtilsFactory.save_checkpoint(logdir=logdir, checkpoint=checkpoint, is_best=is_best, suffix=suffix) self.top_best_metrics.append( (filepath, checkpoint["valid_metrics"][self.main_metric])) self.top_best_metrics = sorted(self.top_best_metrics, key=lambda x: x[1], reverse=not self.minimize) if len(self.top_best_metrics) > save_n_best: last_item = self.top_best_metrics.pop(-1) last_filepath = last_item[0] os.remove(last_filepath)
def prepare_model(config): return UtilsFactory.create_model(config=config, available_networks=NETWORKS)
def train(self, *, datasource: AbstractDataSource, args: Namespace, stages_config: Dict[str, Dict] = None, verbose: bool = False): """ Main method for training DL models. :param datasource: AbstractDataSource instance :param args: console args :param stages_config: config :param verbose: verbose flag """ stages_data_params = stages_config.pop("data_params", {}) stages_callbacks_params = stages_config.pop("callbacks_params", {}) stages_criterion_params = stages_config.pop("criterion_params", {}) stages_optimizer_params = stages_config.pop("optimizer_params", {}) loaders = None for stage, config in stages_config.items(): self.stage = stage args = UtilsFactory.prepare_stage_args(args=args, stage_config=config) pprint(args) data_params = merge_dicts(stages_data_params, config.get("data_params", {})) reload_loaders = data_params.get("reload_loaders", True) if loaders is None or reload_loaders: loaders = datasource.prepare_loaders(args, data_params, stage=stage) callbacks_params = merge_dicts(stages_callbacks_params, config.get("callbacks_params", {})) config["criterion_params"] = merge_dicts( stages_criterion_params, config.get("criterion_params", {})) config["optimizer_params"] = merge_dicts( stages_optimizer_params, config.get("optimizer_params", {})) callbacks = self.prepare_callbacks( callbacks_params=callbacks_params, args=args, mode="train", stage=stage) pprint(loaders) pprint(callbacks) self.run_stage_init(callbacks=callbacks) self.criterion, self.optimizer, self.scheduler = \ UtilsFactory.prepare_stage_stuff( model=self.model, stage_config=config) self.train_stage( loaders=loaders, callbacks=callbacks, epochs=args.epochs, start_epoch=0 if self.state is None else self.state.epoch + 1, verbose=verbose, logdir=args.logdir)
def prepare_loaders(args, data_params, stage=None): loaders = collections.OrderedDict() df, df_train, df_valid, df_infer = parse_in_csvs(data_params) open_fn = [ ImageReader(row_key="filepath", dict_key="image", datapath=data_params.get("datapath", None)), ScalarReader(row_key="class", dict_key="target", default_value=-1, dtype=np.int64) ] open_fn = ReaderCompose(readers=open_fn) if len(df_train) > 0: labels = [x["class"] for x in df_train] sampler = BalanceClassSampler(labels, mode="upsampling") train_loader = UtilsFactory.create_loader( data_source=df_train, open_fn=open_fn, dict_transform=DataSource.prepare_transforms(mode="train", stage=stage), dataset_cache_prob=getattr(args, "dataset_cache_prob", -1), batch_size=args.batch_size, workers=args.workers, shuffle=sampler is None, sampler=sampler) print("Train samples", len(train_loader) * args.batch_size) print("Train batches", len(train_loader)) loaders["train"] = train_loader if len(df_valid) > 0: sampler = None valid_loader = UtilsFactory.create_loader( data_source=df_valid, open_fn=open_fn, dict_transform=DataSource.prepare_transforms(mode="valid", stage=stage), dataset_cache_prob=-1, batch_size=args.batch_size, workers=args.workers, shuffle=False, sampler=sampler) print("Valid samples", len(valid_loader) * args.batch_size) print("Valid batches", len(valid_loader)) loaders["valid"] = valid_loader if len(df_infer) > 0: infer_loader = UtilsFactory.create_loader( data_source=df_infer, open_fn=open_fn, dict_transform=DataSource.prepare_transforms(mode="infer", stage=None), dataset_cache_prob=-1, batch_size=args.batch_size, workers=args.workers, shuffle=False, sampler=None) print("Infer samples", len(infer_loader) * args.batch_size) print("Infer batches", len(infer_loader)) loaders["infer"] = infer_loader return loaders
def pack_checkpoint(self, **kwargs): return UtilsFactory.pack_checkpoint(**kwargs)