def main_worker(args, unknown_args): """Runs main worker thread from model training.""" args, config = parse_args_uargs(args, unknown_args) set_global_seed(args.seed) prepare_cudnn(args.deterministic, args.benchmark) config.setdefault("distributed_params", {})["apex"] = args.apex config.setdefault("distributed_params", {})["amp"] = args.amp expdir = Path(args.expdir) # optuna objective def objective(trial: optuna.trial): trial, trial_config = _process_trial_config(trial, config.copy()) experiment, runner, trial_config = prepare_config_api_components( expdir=expdir, config=trial_config) # @TODO: here we need better solution. experiment._trial = trial # noqa: WPS437 if experiment.logdir is not None and get_rank() <= 0: dump_environment(trial_config, experiment.logdir, args.configs) dump_code(args.expdir, experiment.logdir) runner.run_experiment(experiment) return runner.best_valid_metrics[runner.main_metric] # optuna direction direction = ("minimize" if config.get("stages", {}).get( "stage_params", {}).get("minimize_metric", True) else "maximize") # optuna study study_params = config.pop("study_params", {}) # optuna sampler sampler_params = study_params.pop("sampler_params", {}) optuna_sampler_type = sampler_params.pop("sampler", None) optuna_sampler = (optuna.samplers.__dict__[optuna_sampler_type]( **sampler_params) if optuna_sampler_type is not None else None) # optuna pruner pruner_params = study_params.pop("pruner_params", {}) optuna_pruner_type = pruner_params.pop("pruner", None) optuna_pruner = (optuna.pruners.__dict__[optuna_pruner_type]( **pruner_params) if optuna_pruner_type is not None else None) study = optuna.create_study( direction=direction, storage=args.storage or study_params.pop("storage", None), study_name=args.study_name or study_params.pop("study_name", None), sampler=optuna_sampler, pruner=optuna_pruner, ) study.optimize( objective, n_trials=args.n_trials, timeout=args.timeout, n_jobs=args.n_jobs or 1, gc_after_trial=args.gc_after_trial, show_progress_bar=args.show_progress_bar, )
def main(args, unknown_args): """Runs the ``catalyst-dl tune`` script.""" args, config = parse_args_uargs(args, unknown_args) set_global_seed(args.seed) prepare_cudnn(args.deterministic, args.benchmark) # optuna objective def objective(trial: optuna.trial): trial, trial_config = _process_trial_config(trial, config.copy()) runner: ConfigRunner = get_config_runner(expdir=Path(args.expdir), config=trial_config) # @TODO: here we need better solution. runner._trial = trial # noqa: WPS437 if get_rank() <= 0: dump_environment(logdir=runner.logdir, config=config, configs_path=args.configs) dump_code(expdir=args.expdir, logdir=runner.logdir) runner.run() return trial.best_score # optuna study study_params = config.pop("study", {}) # optuna sampler sampler_params = study_params.pop("sampler", {}) optuna_sampler_type = sampler_params.pop("_target_", None) optuna_sampler = ( optuna.samplers.__dict__[optuna_sampler_type](**sampler_params) if optuna_sampler_type is not None else None ) # optuna pruner pruner_params = study_params.pop("pruner", {}) optuna_pruner_type = pruner_params.pop("_target_", None) optuna_pruner = ( optuna.pruners.__dict__[optuna_pruner_type](**pruner_params) if optuna_pruner_type is not None else None ) study = optuna.create_study( direction=args.direction or study_params.pop("direction", "minimize"), storage=args.storage or study_params.pop("storage", None), study_name=args.study_name or study_params.pop("study_name", None), sampler=optuna_sampler, pruner=optuna_pruner, **study_params, ) study.optimize( objective, n_trials=args.n_trials, timeout=args.timeout, n_jobs=args.n_jobs or 1, gc_after_trial=args.gc_after_trial, show_progress_bar=args.show_progress_bar, )
def config_main(args, unknown_args): """Yaml config catalyst-dl run entry point.""" args, config = parse_args_uargs(args, unknown_args) set_global_seed(args.seed) prepare_cudnn(args.deterministic, args.benchmark) runner: ConfigRunner = get_config_runner(expdir=args.expdir, config=config) if get_rank() <= 0: dump_environment(logdir=runner.logdir, config=config, configs_path=args.configs) dump_code(expdir=args.expdir, logdir=runner.logdir) runner.run()
def main_worker(cfg: DictConfig): set_global_seed(cfg.args.seed) prepare_cudnn(cfg.args.deterministic, cfg.args.benchmark) import_module(hydra.utils.to_absolute_path(cfg.args.expdir)) experiment = hydra.utils.instantiate(cfg.experiment, cfg=cfg) runner = hydra.utils.instantiate(cfg.runner) if experiment.logdir is not None and get_rank() <= 0: dump_environment(cfg, experiment.logdir) dump_code( hydra.utils.to_absolute_path(cfg.args.expdir), experiment.logdir ) runner.run_experiment(experiment)
def main_worker(args, unknown_args): """Runs main worker thread from model training.""" args, config = parse_args_uargs(args, unknown_args) set_global_seed(args.seed) prepare_cudnn(args.deterministic, args.benchmark) config.setdefault("distributed_params", {})["apex"] = args.apex config.setdefault("distributed_params", {})["amp"] = args.amp experiment, runner, config = prepare_config_api_components(expdir=Path( args.expdir), config=config) if experiment.logdir is not None and get_rank() <= 0: dump_environment(config, experiment.logdir, args.configs) dump_code(args.expdir, experiment.logdir) runner.run_experiment(experiment)
def main(args, _=None): """Run the ``catalyst-contrib image2embeddings`` script.""" global IMG_SIZE set_global_seed(args.seed) prepare_cudnn(args.deterministic, args.benchmark) IMG_SIZE = (args.img_size, args.img_size) # noqa: WPS442 if args.traced_model is not None: device = get_device() model = torch.jit.load(str(args.traced_model), map_location=device) else: model = ResnetEncoder(arch=args.arch, pooling=args.pooling) model = model.eval() model, _, _, _, device = process_components(model=model) df = pd.read_csv(args.in_csv) df = df.reset_index().drop("index", axis=1) df = list(df.to_dict("index").values()) open_fn = ImageReader(input_key=args.img_col, output_key="image", rootpath=args.rootpath) dataloader = get_loader( df, open_fn, batch_size=args.batch_size, num_workers=args.num_workers, dict_transform=dict_transformer, ) features = [] dataloader = tqdm(dataloader) if args.verbose else dataloader with torch.no_grad(): for batch in dataloader: batch_features = model(batch["image"].to(device)) batch_features = batch_features.cpu().detach().numpy() features.append(batch_features) features = np.concatenate(features, axis=0) np.save(args.out_npy, features)
def main(cfg: DictConfig): """ Hydra config catalyst-dl run entry point Args: cfg: (DictConfig) configuration """ cfg = prepare_hydra_config(cfg) set_global_seed(cfg.args.seed) prepare_cudnn(cfg.args.deterministic, cfg.args.benchmark) import_module(hydra.utils.to_absolute_path(cfg.args.expdir)) runner = hydra.utils.instantiate(cfg.runner, cfg=cfg) if get_rank() <= 0: dump_environment(logdir=runner.logdir, config=cfg) dump_code(expdir=hydra.utils.to_absolute_path(cfg.args.expdir), logdir=runner.logdir) runner.run()
import argparse import pandas as pd from catalyst.dl.runner import SupervisedRunner from catalyst.dl import utils from catalyst.dl.callbacks import (EarlyStoppingCallback, CriterionCallback, OptimizerCallback, DiceCallback, CheckpointCallback) # import torch from torch import optim from torch.utils.data import DataLoader from dataloader import CloudDataset import segmentation_models_pytorch as smp from catalyst.utils.seed import set_global_seed from catalyst.utils.torch import prepare_cudnn set_global_seed(2019) prepare_cudnn(deterministic=True) parser = argparse.ArgumentParser("PyTorch Segmentation Pipeline") args = parser.add_argument('-E', '--epochs', default=1, type=int) args = parser.add_argument('-F', '--fold', default=1, type=int) args = parser.add_argument('-C', '--checkpoint', default=False, type=bool) args = parser.add_argument('-M', '--model', default='AlbuNet', type=str) args = parser.add_argument('-A', '--encoder', default='resnet18', type=str) args = parser.add_argument('-P', '--pretrained', default=True, type=bool) args = parser.add_argument('--lr', default=1e-4, type=float) args = parser.add_argument('--lr_e', default=1e-4, type=float) args = parser.add_argument('--lr_d', default=1e-4, type=float) args = parser.add_argument('--bs', default=4, type=int) args = parser.add_argument('--size', default=320, type=int) args = parser.add_argument('--dice-weight', default=0.5, type=float) args = parser.parse_args()
def main(args, _=None): """Run the ``catalyst-contrib text2embeddings`` script.""" batch_size = args.batch_size num_workers = args.num_workers max_length = args.max_length pooling_groups = args.pooling.split(",") bert_level = args.bert_level if bert_level is not None: assert (args.output_hidden_states ), "You need hidden states output for level specification" set_global_seed(args.seed) prepare_cudnn(args.deterministic, args.benchmark) if getattr(args, "in_huggingface", False): model_config = BertConfig.from_pretrained(args.in_huggingface) model_config.output_hidden_states = args.output_hidden_states model = BertModel.from_pretrained(args.in_huggingface, config=model_config) tokenizer = BertTokenizer.from_pretrained(args.in_huggingface) else: model_config = BertConfig.from_pretrained(args.in_config) model_config.output_hidden_states = args.output_hidden_states model = BertModel(config=model_config) tokenizer = BertTokenizer.from_pretrained(args.in_vocab) if getattr(args, "in_model", None) is not None: checkpoint = load_checkpoint(args.in_model) checkpoint = {"model_state_dict": checkpoint} unpack_checkpoint(checkpoint=checkpoint, model=model) model = model.eval() model, _, _, _, device = process_components(model=model) df = pd.read_csv(args.in_csv) df = df.dropna(subset=[args.txt_col]) df.to_csv(f"{args.out_prefix}.df.csv", index=False) df = df.reset_index().drop("index", axis=1) df = list(df.to_dict("index").values()) num_samples = len(df) open_fn = LambdaReader( input_key=args.txt_col, output_key=None, lambda_fn=partial( tokenize_text, strip=args.strip, lowercase=args.lowercase, remove_punctuation=args.remove_punctuation, ), tokenizer=tokenizer, max_length=max_length, ) dataloader = get_loader( df, open_fn, batch_size=batch_size, num_workers=num_workers, ) features = {} dataloader = tqdm(dataloader) if args.verbose else dataloader with torch.no_grad(): for idx, batch_input in enumerate(dataloader): batch_input = any2device(batch_input, device) batch_output = model(**batch_input) mask = (batch_input["attention_mask"].unsqueeze(-1) if args.mask_for_max_length else None) if check_ddp_wrapped(model): # using several gpu hidden_size = model.module.config.hidden_size hidden_states = model.module.config.output_hidden_states else: # using cpu or one gpu hidden_size = model.config.hidden_size hidden_states = model.config.output_hidden_states batch_features = process_bert_output( bert_output=batch_output, hidden_size=hidden_size, output_hidden_states=hidden_states, pooling_groups=pooling_groups, mask=mask, ) # create storage based on network output if idx == 0: for layer_name, layer_value in batch_features.items(): if bert_level is not None and bert_level != layer_name: continue layer_name = (layer_name if isinstance(layer_name, str) else f"{layer_name:02d}") _, embedding_size = layer_value.shape features[layer_name] = np.memmap( f"{args.out_prefix}.{layer_name}.npy", dtype=np.float32, mode="w+", shape=(num_samples, embedding_size), ) indices = np.arange(idx * batch_size, min((idx + 1) * batch_size, num_samples)) for layer_name2, layer_value2 in batch_features.items(): if bert_level is not None and bert_level != layer_name2: continue layer_name2 = (layer_name2 if isinstance(layer_name2, str) else f"{layer_name2:02d}") features[layer_name2][indices] = _detach(layer_value2) if args.force_save: for key, mmap in features.items(): mmap.flush() np.save(f"{args.out_prefix}.{key}.force.npy", mmap, allow_pickle=False)