def get_dataclass_data(obj: Any, allow_objects: Optional[bool] = None) -> Dict[str, Any]: from omegaconf.omegaconf import MISSING, OmegaConf, _maybe_wrap flags = { "allow_objects": allow_objects } if allow_objects is not None else {} dummy_parent = OmegaConf.create({}, flags=flags) d = {} resolved_hints = get_type_hints(get_type_of(obj)) for field in dataclasses.fields(obj): name = field.name is_optional, type_ = _resolve_optional(resolved_hints[field.name]) type_ = _resolve_forward(type_, obj.__module__) if hasattr(obj, name): value = getattr(obj, name) if value == dataclasses.MISSING: value = MISSING else: if field.default_factory == dataclasses.MISSING: # type: ignore value = MISSING else: value = field.default_factory() # type: ignore if _is_union(type_): e = ConfigValueError( f"Union types are not supported:\n{name}: {type_str(type_)}") format_and_raise(node=None, key=None, value=value, cause=e, msg=str(e)) d[name] = _maybe_wrap( ref_type=type_, is_optional=is_optional, key=name, value=value, parent=dummy_parent, ) d[name]._set_parent(None) return d
def _set_value(self, value: Any) -> None: from omegaconf import OmegaConf if OmegaConf.is_none(value): if not self._is_optional(): raise ValidationError( "Non optional ListConfig cannot be constructed from None") self.__dict__["_content"] = None elif get_value_kind(value) == ValueKind.MANDATORY_MISSING: self.__dict__["_content"] = "???" elif get_value_kind(value) in ( ValueKind.INTERPOLATION, ValueKind.STR_INTERPOLATION, ): self.__dict__["_content"] = value else: assert is_primitive_list(value) or isinstance(value, ListConfig) self.__dict__["_content"] = [] for item in value: self.append(item)
def append(self, item: Any) -> None: index = len(self) self._validate_set(key=index, value=item) try: from omegaconf.omegaconf import OmegaConf, _maybe_wrap self.__dict__["_content"].append( _maybe_wrap( annotated_type=self._metadata.element_type, key=index, value=item, is_optional=OmegaConf.is_optional(item), parent=self, )) except UnsupportedValueType: full_key = self._get_full_key(f"{len(self)}") raise UnsupportedValueType( f"{type(item).__name__} is not a supported type (key: {full_key})" )
def get_attr_data(obj: Any, allow_objects: Optional[bool] = None) -> Dict[str, Any]: from omegaconf.omegaconf import OmegaConf, _maybe_wrap flags = { "allow_objects": allow_objects } if allow_objects is not None else {} dummy_parent = OmegaConf.create(flags=flags) from omegaconf import MISSING d = {} is_type = isinstance(obj, type) obj_type = obj if is_type else type(obj) for name, attrib in attr.fields_dict(obj_type).items(): is_optional, type_ = _resolve_optional(attrib.type) type_ = _resolve_forward(type_, obj.__module__) if not is_type: value = getattr(obj, name) else: value = attrib.default if value == attr.NOTHING: value = MISSING if _is_union(type_): e = ConfigValueError( f"Union types are not supported:\n{name}: {type_str(type_)}") format_and_raise(node=None, key=None, value=value, cause=e, msg=str(e)) d[name] = _maybe_wrap( ref_type=type_, is_optional=is_optional, key=name, value=value, parent=dummy_parent, ) d[name]._set_parent(None) return d
def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) plugins = [ NLPDDPPlugin( no_ddp_communication_hook=True, gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, find_unused_parameters=False, ) ] if cfg.trainer.precision in [16, 'bf16']: scaler = None if cfg.trainer.precision == 16: scaler = GradScaler( init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32), growth_interval=cfg.model.get('native_amp_growth_interval', 1000), hysteresis=cfg.model.get('hysteresis', 2), ) if megatron_amp_o2: plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) if cfg.get('cluster_type', None) == 'BCP': plugins.append(TorchElasticEnvironment()) trainer = Trainer(plugins=plugins, **cfg.trainer) exp_manager(trainer, cfg.exp_manager) # update resume from checkpoint found by exp_manager if cfg.model.resume_from_checkpoint is not None: resume_from_checkpoint = cfg.model.resume_from_checkpoint else: resume_from_checkpoint = trainer._checkpoint_connector.resume_from_checkpoint_fit_path logging.info(f'Resuming training from checkpoint: {resume_from_checkpoint}') trainer._checkpoint_connector = CheckpointConnector(trainer, resume_from_checkpoint=resume_from_checkpoint) # Override timer callback to a stateless one for idx, callback in enumerate(trainer.callbacks): if isinstance(callback, Timer): trainer.callbacks[idx] = StatelessTimer(cfg.trainer.max_time,) # Get the T5 Base configuration. t5_cfg = MegatronT5FinetuneModel.restore_from( restore_path=cfg.model.restore_from_path, trainer=trainer, return_config=True ) # Override the T5 configuration with the one from the config file. OmegaConf.set_struct(t5_cfg, True) with open_dict(t5_cfg): t5_cfg.masked_softmax_fusion = False t5_cfg.megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) t5_cfg.hidden_dropout = cfg.model.get('hidden_dropout', 0.1) t5_cfg.attention_dropout = cfg.model.get('attention_dropout', 0.1) t5_cfg.data = cfg.model.data t5_cfg.precision = cfg.trainer.precision t5_cfg.optim = cfg.model.optim t5_cfg.micro_batch_size = cfg.model.data.train_ds.micro_batch_size t5_cfg.global_batch_size = cfg.model.data.train_ds.global_batch_size # XNLI has eval languages in the yaml config. if hasattr(cfg.model, 'eval_languages'): t5_cfg.eval_languages = cfg.model.eval_languages if hasattr(cfg.model.data.train_ds, 'task_name'): model = MegatronT5GLUEModel.restore_from( restore_path=cfg.model.restore_from_path, trainer=trainer, override_config_path=t5_cfg, save_restore_connector=NLPSaveRestoreConnector(), ) else: model = MegatronT5FinetuneModel.restore_from( restore_path=cfg.model.restore_from_path, trainer=trainer, override_config_path=t5_cfg, save_restore_connector=NLPSaveRestoreConnector(), ) trainer.fit(model) trainer.validate(model) if hasattr(cfg.model.data, 'test_ds'): trainer.test(model)
# print("Text: ", text) # print("Preds: ", get_text_spans(text, old_offsets)) # print("Clean Preds: ", get_text_spans(text, new_offsets)) with open( os.path.join( save_dir, f"spans-pred-{test_file.split('/')[-1].split('.')[0]}.txt" ), "w", ) as f: for i, spans in enumerate(new_final_offset_predictions): f.write(f"{i}\t{str(spans)}\n") if __name__ == "__main__": random.seed(SEED) np.random.seed(SEED) set_seed(SEED) parser = argparse.ArgumentParser(prog="run_baseline_model.py", description="Train Baseline RNNSL Model.") parser.add_argument( "--config", type=str, action="store", help="The configuration for model training/evaluation", ) args = parser.parse_args() config = OmegaConf.load(args.config) predict(**dict(config))
def run(cfg: DictConfig) -> None: os.chdir(hydra.utils.get_original_cwd()) log.info(OmegaConf.to_yaml(cfg)) cfg['device'] = ('cuda' if torch.cuda.is_available() else 'cpu') cfg['list_seed'] = [i for i in range(cfg.model.nseed)] verbose = 0 local_path = '../' path = f'{local_path}input/lish-moa' path_model = f'{local_path}models' cfg['path_model'] = path_model # print(os.listdir(f'{local_path}../')) os.chdir(hydra.utils.get_original_cwd()) log.info(OmegaConf.to_yaml(cfg)) cfg['device'] = ('cuda' if torch.cuda.is_available() else 'cpu') cfg['list_seed'] = [i for i in range(cfg.model.nseed)] verbose = 1 local_path = '../' path = f'{local_path}input/lish-moa' path_model = f'{local_path}models' cfg['path_model'] = path_model # print(os.listdir(f'{local_path}../')) ###################################### # data_load and preprocess ###################################### pretrain_model = False data_dict = load_and_preprocess_data_index( cfg, path, pca_append_test=True, variancethreshold_append_test=False, verbose=1) CV = DrugAwareMultilabelStratifiedKFold(n_splits=cfg.model.nfolds, shuffle=False, random_state=42) ################################################## # Train ################################################## SEED = [0] oof = np.zeros((len(data_dict['train']), len(data_dict['target_cols']))) predictions = np.zeros( (len(data_dict['test']), len(data_dict['target_cols']))) for seed in tqdm([0], leave=verbose): xgb_params = { 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'min_child_weight': 31.58, 'learning_rate': 0.05, 'colsample_bytree': 0.65, 'gamma': 3.69, 'max_delta_step': 2.07, 'max_depth': 10, 'n_estimators': 10, 'subsample': 0.86, 'verbosity': 1, } return_run_k_fold = get_xgboost(data_dict, cfg, xgb_params, CV, seed=seed, file_prefix='x1', optimization=False, verbose=0) if cfg.model.train_models: oof_, predictions_ = return_run_k_fold oof += oof_ / len(SEED) else: predictions_ = return_run_k_fold predictions += predictions_ / len(SEED) gc.collect() train = data_dict['train'].copy() test = data_dict['test'].copy() target = data_dict['target'].copy() feature_cols = data_dict['feature_cols'] target_cols = data_dict['target_cols'] train_targets_scored = data_dict['train_targets_scored'] test_features = data_dict['test_features'] if not pretrain_model: train[target_cols] = oof test[target_cols] = predictions ################################################## # valodation and save ################################################## if not pretrain_model: y_true = train_targets_scored[target_cols].values valid_results = train_targets_scored.drop(columns=target_cols).merge( train[target_cols], on='sig_id', how='left').fillna(0) y_pred = valid_results[target_cols].values score = 0 for i in range(len(target_cols)): score_ = log_loss(y_true[:, i], y_pred[:, i]) score += score_ / len(target_cols) print(f"CV log_loss: {score}") log.info(f"CV log_loss: {score}") log.info(f"y_true.shape: {y_true.shape}") log.info(f"y_pred.shape: {y_pred.shape}") # sub = sample_submission.drop(columns=target_cols).merge(test[['sig_id'] + target_cols], on='sig_id', # how='left').fillna(0) # sub.to_csv('submission.csv', index=False) # log.info(f"sub.shape: {sub.shape}") res = test[target_cols] corner_case = test_features[test_features['cp_type'] == 'ctl_vehicle'] zeros = np.zeros((corner_case.shape[0], len(target_cols))) corner_case[target_cols] = zeros corner_case = corner_case[target_cols] res = pd.concat([res, corner_case], axis=0) res.to_csv('submission.csv') log.info(f"res.shape: {res.shape}") log.info(f"test[target_cols].shape: {test[target_cols].shape}") if not pretrain_model: return score else: return 0
def format_and_raise( node: Any, key: Any, value: Any, msg: str, cause: Exception, type_override: Any = None, ) -> None: from omegaconf import OmegaConf from omegaconf.base import Node # Uncomment to make debugging easier. Note that this will cause some tests to fail # raise cause if isinstance(cause, AssertionError): raise if isinstance(cause, OmegaConfBaseException) and cause._initialized: ex = cause if type_override is not None: ex = type_override(str(cause)) ex.__dict__ = copy.deepcopy(cause.__dict__) _raise(ex, cause) object_type: Optional[Type[Any]] object_type_str: Optional[str] = None ref_type: Optional[Type[Any]] ref_type_str: Optional[str] child_node: Optional[Node] = None if node is None: full_key = "" object_type = None ref_type = None ref_type_str = None else: if key is not None and not OmegaConf.is_none(node): child_node = node._get_node(key, validate_access=False) full_key = node._get_full_key(key=key) object_type = OmegaConf.get_type(node) object_type_str = type_str(object_type) ref_type = get_ref_type(node) ref_type_str = type_str(ref_type) msg = string.Template(msg).substitute( REF_TYPE=ref_type_str, OBJECT_TYPE=object_type_str, KEY=key, FULL_KEY=full_key, VALUE=value, VALUE_TYPE=f"{type(value).__name__}", KEY_TYPE=f"{type(key).__name__}", ) template = """$MSG \tfull_key: $FULL_KEY \treference_type=$REF_TYPE \tobject_type=$OBJECT_TYPE""" s = string.Template(template=template) message = s.substitute(REF_TYPE=ref_type_str, OBJECT_TYPE=object_type_str, MSG=msg, FULL_KEY=full_key) exception_type = type(cause) if type_override is None else type_override if exception_type == TypeError: exception_type = ConfigTypeError elif exception_type == IndexError: exception_type = ConfigIndexError ex = exception_type(f"{message}") if issubclass(exception_type, OmegaConfBaseException): ex._initialized = True ex.msg = message ex.parent_node = node ex.child_node = child_node ex.key = key ex.full_key = full_key ex.value = value ex.object_type = object_type ex.object_type_str = object_type_str ex.ref_type = ref_type ex.ref_type_str = ref_type_str _raise(ex, cause)
def run(): on_kaggle = False # change me True if you use kaggle pretrain_model = False cfg = OmegaConf.load( f"{'../input/src-code0' if on_kaggle else './'}/src/test.yaml") # os.chdir(utils.get_original_cwd()) # log.info(OmegaConf.to_yaml(cfg)) cfg['device'] = 'cuda' print(cfg['device']) cfg['list_seed'] = [i for i in range(cfg.model.nseed)] verbose = 1 local_path = '../' path = f'../input/lish-moa' path_model = f"{'/kaggle/input/models0' if pretrain_model else '/kaggle/working' if on_kaggle else '../models'}" cfg['path_model'] = path_model ###################################### # data_load and preprocess ###################################### data_dict = load_and_preprocess_data_index( cfg, path, pca_append_test=True, variancethreshold_append_test=False, verbose=1) ###################################### # cv ###################################### # CV = MultilabelStratifiedKFold(n_splits=cfg.mode.nfolds, random_state=42) # CV = MultilabelStratifiedKFold(n_splits=cfg.mode.nfolds, random_state=42) CV = DrugAwareMultilabelStratifiedKFold(n_splits=cfg.model.nfolds, shuffle=False, random_state=42) ################################################## # Train ################################################## oof = np.zeros((len(data_dict['train']), len(data_dict['target_cols']))) predictions = np.zeros( (len(data_dict['test']), len(data_dict['target_cols']))) for seed in tqdm(cfg['list_seed'], leave=verbose): return_run_k_fold = run_k_fold_nn(data_dict, cfg, cv=CV, seed=seed, file_prefix='h1', pretrain_model=pretrain_model, verbose=verbose) if not pretrain_model: oof_, predictions_ = return_run_k_fold oof += oof_ / cfg.model.nseed else: predictions_ = return_run_k_fold predictions += predictions_ / cfg.model.nseed / 2 gc.collect() return_run_k_fold = run_k_fold_nn_two_head( data_dict, cfg, cv=CV, seed=seed, file_prefix='m1', pretrain_model=pretrain_model, verbose=verbose) if not pretrain_model: oof_, predictions_ = return_run_k_fold oof += oof_ / cfg.model.nseed else: predictions_ = return_run_k_fold predictions += predictions_ / cfg.model.nseed / 2 gc.collect() train = data_dict['train'].copy() test = data_dict['test'].copy() target = data_dict['target'].copy() feature_cols = data_dict['feature_cols'] target_cols = data_dict['target_cols'] train_targets_scored = data_dict['train_targets_scored'] test_features = data_dict['test_features'] if not pretrain_model: train[target_cols] = oof test[target_cols] = predictions ################################################## # valodation and save ################################################## if not pretrain_model: y_true = train_targets_scored[target_cols].values valid_results = train_targets_scored.drop(columns=target_cols).merge( train[target_cols], on='sig_id', how='left').fillna(0) y_pred = valid_results[target_cols].values score = 0 for i in range(len(target_cols)): score_ = log_loss(y_true[:, i], y_pred[:, i]) score += score_ / len(target_cols) print(f"CV log_loss: {score}") log.info(f"CV log_loss: {score}") log.info(f"y_true.shape: {y_true.shape}") log.info(f"y_pred.shape: {y_pred.shape}") # sub = sample_submission.drop(columns=target_cols).merge(test[['sig_id'] + target_cols], on='sig_id', # how='left').fillna(0) # sub.to_csv('submission.csv', index=False) # log.info(f"sub.shape: {sub.shape}") res = test[target_cols] corner_case = test_features[test_features['cp_type'] == 'ctl_vehicle'] zeros = np.zeros((corner_case.shape[0], len(target_cols))) corner_case[target_cols] = zeros corner_case = corner_case[target_cols] res = pd.concat([res, corner_case], axis=0) res.to_csv('submission.csv') log.info(f"res.shape: {res.shape}") log.info(f"test[target_cols].shape: {test[target_cols].shape}") if not pretrain_model: return score else: return 0
def _train( task: str, ov: List[str], do_sweep: bool, ): """ Run training Args: task: task to run training for ov: overwrites for config manager do_sweep: determine best emprical parameters for run """ print(f"Overwrites: {ov}") initialize_config_module(config_module="nndet.conf") cfg = compose(task, "config.yaml", overrides=ov if ov is not None else []) assert cfg.host.parent_data is not None, 'Parent data can not be None' assert cfg.host.parent_results is not None, 'Output dir can not be None' train_dir = init_train_dir(cfg) pl_logger = MLFlowLogger( experiment_name=cfg["task"], tags={ "host": socket.gethostname(), "fold": cfg["exp"]["fold"], "task": cfg["task"], "job_id": os.getenv('LSB_JOBID', 'no_id'), "mlflow.runName": cfg["exp"]["id"], }, save_dir=os.getenv("MLFLOW_TRACKING_URI", "./mlruns"), ) pl_logger.log_hyperparams( flatten_mapping( {"model": OmegaConf.to_container(cfg["model_cfg"], resolve=True)})) pl_logger.log_hyperparams( flatten_mapping({ "trainer": OmegaConf.to_container(cfg["trainer_cfg"], resolve=True) })) logger.remove() logger.add(sys.stdout, format="{level} {message}", level="INFO") log_file = Path(os.getcwd()) / "train.log" logger.add(log_file, level="INFO") logger.info(f"Log file at {log_file}") meta_data = {} meta_data["torch_version"] = str(torch.__version__) meta_data["date"] = str(datetime.now()) meta_data["git"] = log_git(nndet.__path__[0], repo_name="nndet") save_json(meta_data, "./meta.json") try: write_requirements_to_file("requirements.txt") except Exception as e: logger.error(f"Could not log req: {e}") plan_path = Path(str(cfg.host["plan_path"])) plan = load_pickle(plan_path) save_json(create_debug_plan(plan), "./plan_debug.json") data_dir = Path(cfg.host["preprocessed_output_dir"] ) / plan["data_identifier"] / "imagesTr" datamodule = Datamodule( augment_cfg=OmegaConf.to_container(cfg["augment_cfg"], resolve=True), plan=plan, data_dir=data_dir, fold=cfg["exp"]["fold"], ) module = MODULE_REGISTRY[cfg["module"]]( model_cfg=OmegaConf.to_container(cfg["model_cfg"], resolve=True), trainer_cfg=OmegaConf.to_container(cfg["trainer_cfg"], resolve=True), plan=plan, ) callbacks = [] checkpoint_cb = ModelCheckpoint( dirpath=train_dir, filename='model_best', save_last=True, save_top_k=1, monitor=cfg["trainer_cfg"]["monitor_key"], mode=cfg["trainer_cfg"]["monitor_mode"], ) checkpoint_cb.CHECKPOINT_NAME_LAST = 'model_last' callbacks.append(checkpoint_cb) callbacks.append(LearningRateMonitor(logging_interval="epoch")) OmegaConf.save(cfg, str(Path(os.getcwd()) / "config.yaml")) OmegaConf.save(cfg, str(Path(os.getcwd()) / "config_resolved.yaml"), resolve=True) save_pickle(plan, train_dir / "plan.pkl") # backup plan splits = load_pickle( Path(cfg.host.preprocessed_output_dir) / datamodule.splits_file) save_pickle(splits, train_dir / "splits.pkl") trainer_kwargs = {} if cfg["train"]["mode"].lower() == "resume": trainer_kwargs[ "resume_from_checkpoint"] = train_dir / "model_last.ckpt" num_gpus = cfg["trainer_cfg"]["gpus"] logger.info(f"Using {num_gpus} GPUs for training") plugins = cfg["trainer_cfg"].get("plugins", None) logger.info(f"Using {plugins} plugins for training") trainer = pl.Trainer( gpus=list(range(num_gpus)) if num_gpus > 1 else num_gpus, accelerator=cfg["trainer_cfg"]["accelerator"], precision=cfg["trainer_cfg"]["precision"], amp_backend=cfg["trainer_cfg"]["amp_backend"], amp_level=cfg["trainer_cfg"]["amp_level"], benchmark=cfg["trainer_cfg"]["benchmark"], deterministic=cfg["trainer_cfg"]["deterministic"], callbacks=callbacks, logger=pl_logger, max_epochs=module.max_epochs, progress_bar_refresh_rate=None if bool(int(os.getenv("det_verbose", 1))) else 0, reload_dataloaders_every_epoch=False, num_sanity_val_steps=10, weights_summary='full', plugins=plugins, terminate_on_nan=True, # TODO: make modular move_metrics_to_cpu=True, **trainer_kwargs) trainer.fit(module, datamodule=datamodule) if do_sweep: case_ids = splits[cfg["exp"]["fold"]]["val"] if "debug" in cfg and "num_cases_val" in cfg["debug"]: case_ids = case_ids[:cfg["debug"]["num_cases_val"]] inference_plan = module.sweep( cfg=OmegaConf.to_container(cfg, resolve=True), save_dir=train_dir, train_data_dir=data_dir, case_ids=case_ids, run_prediction=True, ) plan["inference_plan"] = inference_plan save_pickle(plan, train_dir / "plan_inference.pkl") ensembler_cls = module.get_ensembler_cls( key="boxes", dim=plan["network_dim"]) # TODO: make this configurable for restore in [True, False]: target_dir = train_dir / "val_predictions" if restore else \ train_dir / "val_predictions_preprocessed" extract_results( source_dir=train_dir / "sweep_predictions", target_dir=target_dir, ensembler_cls=ensembler_cls, restore=restore, **inference_plan, ) _evaluate( task=cfg["task"], model=cfg["exp"]["id"], fold=cfg["exp"]["fold"], test=False, do_boxes_eval=True, # TODO: make this configurable do_analyze_boxes=True, # TODO: make this configurable )
def run(cfg: DictConfig) -> None: os.chdir(hydra.utils.get_original_cwd()) log.info(OmegaConf.to_yaml(cfg)) cfg['device'] = ('cuda' if torch.cuda.is_available() else 'cpu') cfg['list_seed'] = [i for i in range(cfg.model.nseed)] verbose = 1 local_path = '../' path = f'{local_path}input/lish-moa' path_model = f'{local_path}models' cfg['path_model'] = path_model # print(os.listdir(f'{local_path}../')) ###################################### # data_load and preprocess ###################################### pretrain_model = False data_dict = load_and_preprocess_data(cfg, path, pca_append_test=False, variancethreshold_append_test=False, verbose=1) ###################################### # cv ###################################### CV = MultilabelStratifiedKFold(n_splits=cfg.model.nfolds, random_state=42) ################################################## # Train ################################################## oof = np.zeros((len(data_dict['train']), len(data_dict['target_cols']))) predictions = np.zeros( (len(data_dict['test']), len(data_dict['target_cols']))) for seed in tqdm(cfg['list_seed'], leave=verbose): return_run_k_fold = run_k_fold_nn(data_dict, cfg, cv=CV, seed=seed, file_prefix='m1', pretrain_model=pretrain_model, verbose=verbose) if not pretrain_model: oof_, predictions_ = return_run_k_fold oof += oof_ / cfg.model.nseed else: predictions_ = return_run_k_fold predictions += predictions_ / cfg.model.nseed gc.collect() train = data_dict['train'].copy() test = data_dict['test'].copy() target = data_dict['target'].copy() feature_cols = data_dict['feature_cols'] target_cols = data_dict['target_cols'] train_targets_scored = data_dict['train_targets_scored'] test_features = data_dict['test_features'] if not pretrain_model: train[target_cols] = oof test[target_cols] = predictions ################################################## # valodation and save ################################################## if not pretrain_model: y_true = train_targets_scored[target_cols].values valid_results = train_targets_scored.drop(columns=target_cols).merge( train[['sig_id'] + target_cols], on='sig_id', how='left').fillna(0) y_pred = valid_results[target_cols].values score = 0 for i in range(len(target_cols)): score_ = log_loss(y_true[:, i], y_pred[:, i]) score += score_ / len(target_cols) print(f"CV log_loss: {score}") log.info(f"CV log_loss: {score}") log.info(f"y_true.shape: {y_true.shape}") log.info(f"y_pred.shape: {y_pred.shape}") # sub = sample_submission.drop(columns=target_cols).merge(test[['sig_id'] + target_cols], on='sig_id', # how='left').fillna(0) # sub.to_csv('submission.csv', index=False) # log.info(f"sub.shape: {sub.shape}") res = test[['sig_id'] + target_cols] corner_case = test_features[test_features['cp_type'] == 'ctl_vehicle'] zeros = np.zeros((corner_case.shape[0], len(target_cols))) corner_case[target_cols] = zeros corner_case = corner_case[['sig_id'] + target_cols] res = pd.concat([res, corner_case], axis=0) res.to_csv('submission.csv', index=False) if not pretrain_model: return score else: return 0
def legacy_model_config_to_new_model_config(model_cfg: DictConfig) -> DictConfig: """ Transform old style config into :class:`~nemo.collections.nlp.models.token_classification.punctuation_capitalization_config.PunctuationCapitalizationModelConfig`. Old style configs are configs which were used before ``common_dataset_parameters`` item was added. Old style datasets use ``dataset`` instead of ``common_dataset_parameters``, ``batch_size`` instead of ``tokens_in_batch``. Old style configs do not support tarred datasets. Args: model_cfg: old style config Returns: model config which follows dataclass :class:`~nemo.collections.nlp.models.token_classification.punctuation_capitalization_config.PunctuationCapitalizationModelConfig` """ train_ds = model_cfg.get('train_ds') validation_ds = model_cfg.get('validation_ds') test_ds = model_cfg.get('test_ds') dataset = model_cfg.dataset punct_head_config = model_cfg.get('punct_head', {}) capit_head_config = model_cfg.get('capit_head', {}) omega_conf = OmegaConf.structured( PunctuationCapitalizationModelConfig( class_labels=model_cfg.class_labels, common_dataset_parameters=CommonDatasetParametersConfig( pad_label=dataset.pad_label, ignore_extra_tokens=dataset.get( 'ignore_extra_tokens', CommonDatasetParametersConfig.ignore_extra_tokens ), ignore_start_end=dataset.get('ignore_start_end', CommonDatasetParametersConfig.ignore_start_end), punct_label_ids=model_cfg.punct_label_ids, capit_label_ids=model_cfg.capit_label_ids, ), train_ds=None if train_ds is None else legacy_data_config_to_new_data_config(train_ds, dataset, train=True), validation_ds=None if validation_ds is None else legacy_data_config_to_new_data_config(validation_ds, dataset, train=False), test_ds=None if test_ds is None else legacy_data_config_to_new_data_config(test_ds, dataset, train=False), punct_head=HeadConfig( num_fc_layers=punct_head_config.get('punct_num_fc_layers', HeadConfig.num_fc_layers), fc_dropout=punct_head_config.get('fc_dropout', HeadConfig.fc_dropout), activation=punct_head_config.get('activation', HeadConfig.activation), use_transformer_init=punct_head_config.get('use_transformer_init', HeadConfig.use_transformer_init), ), capit_head=HeadConfig( num_fc_layers=capit_head_config.get('capit_num_fc_layers', HeadConfig.num_fc_layers), fc_dropout=capit_head_config.get('fc_dropout', HeadConfig.fc_dropout), activation=capit_head_config.get('activation', HeadConfig.activation), use_transformer_init=capit_head_config.get('use_transformer_init', HeadConfig.use_transformer_init), ), tokenizer=model_cfg.tokenizer, language_model=model_cfg.language_model, optim=model_cfg.optim, ) ) with open_dict(omega_conf): retain_during_legacy_conversion = model_cfg.get('retain_during_legacy_conversion', {}) for key in retain_during_legacy_conversion.keys(): omega_conf[key] = retain_during_legacy_conversion[key] return omega_conf
def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) plugins = [ NLPDDPPlugin( no_ddp_communication_hook=True, gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, find_unused_parameters=False, ) ] if cfg.trainer.precision in [16, 'bf16']: scaler = None if cfg.trainer.precision == 16: scaler = GradScaler( init_scale=cfg.model.get('native_amp_init_scale', 2**32), growth_interval=cfg.model.get('native_amp_growth_interval', 1000), hysteresis=cfg.model.get('hysteresis', 2), ) if megatron_amp_o2: plugins.append( MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) else: plugins.append( NativeMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) if cfg.get('cluster_type', None) == 'BCP': plugins.append(TorchElasticEnvironment()) trainer = Trainer(plugins=plugins, **cfg.trainer) exp_manager(trainer, cfg.exp_manager) # Override timer callback to a stateless one for idx, callback in enumerate(trainer.callbacks): if isinstance(callback, Timer): trainer.callbacks[idx] = StatelessTimer(cfg.trainer.max_time, ) # Get the T5 Base configuration. t5_cfg = MegatronT5GLUEModel.restore_from( restore_path=cfg.model.restore_from_path, trainer=trainer, return_config=True) # Override the T5 configuration with the one from the config file. # NOTE: Only data can be overriden here since this the file being restored here should already correspond to a GLUE/XNLI finetuned model. OmegaConf.set_struct(t5_cfg, True) with open_dict(t5_cfg): t5_cfg.masked_softmax_fusion = False t5_cfg.precision = cfg.trainer.precision # Overwrite data configs t5_cfg.data = cfg.model.data # XNLI has eval languages in the yaml config. if hasattr(cfg.model, 'eval_languages'): t5_cfg.eval_languages = cfg.model.eval_languages if hasattr(t5_cfg.data.validation_ds, 'task_name'): model = MegatronT5GLUEModel.restore_from( restore_path=cfg.model.restore_from_path, trainer=trainer, override_config_path=t5_cfg) else: model = MegatronT5FinetuneModel.restore_from( restore_path=cfg.model.restore_from_path, trainer=trainer, override_config_path=t5_cfg) model.freeze() trainer.validate(model) if hasattr(cfg.model.data, 'test_ds'): trainer.test(model)
def merge_schema(config, schema): merged = OmegaConf.merge(schema, config) return struct(**merged)._without('_type_')
def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) plugins = [ NLPDDPPlugin( no_ddp_communication_hook=True, gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, find_unused_parameters=False, ) ] if cfg.trainer.precision in [16, 'bf16']: scaler = None if cfg.trainer.precision == 16: scaler = GradScaler( init_scale=cfg.model.get('native_amp_init_scale', 2**32), growth_interval=cfg.model.get('native_amp_growth_interval', 1000), hysteresis=cfg.model.get('hysteresis', 2), ) if megatron_amp_o2: plugins.append( MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) else: plugins.append( PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) if cfg.get('cluster_type', None) == 'BCP': plugins.append(TorchElasticEnvironment()) trainer = Trainer(plugins=plugins, **cfg.trainer, callbacks=[ModelSummary(max_depth=3)]) # tokenizers will be trained and and tarred training data will be created if needed # model config is then updated if cfg.model.preproc_out_dir is not None: MTDataPreproc(cfg=cfg.model, trainer=trainer) exp_manager(trainer, cfg.exp_manager) # update resume from checkpoint found by exp_manager if cfg.model.resume_from_checkpoint is not None: resume_from_checkpoint = cfg.model.resume_from_checkpoint else: resume_from_checkpoint = trainer._checkpoint_connector.resume_from_checkpoint_fit_path logging.info( f'Resuming training from checkpoint: {resume_from_checkpoint}') trainer._checkpoint_connector = CheckpointConnector( trainer, resume_from_checkpoint=resume_from_checkpoint) # Override timer callback to a stateless one for idx, callback in enumerate(trainer.callbacks): if isinstance(callback, Timer): trainer.callbacks[idx] = StatelessTimer(cfg.trainer.max_time, ) # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams with open_dict(cfg): cfg.model.precision = cfg.trainer.precision if hasattr(cfg.model, 'pretrained_model_path' ) and cfg.model.pretrained_model_path is not None: if not hasattr(cfg.model, 'pretrained_model_type'): raise ValueError(f"Pretrained model type must be in [T5, BART].") assert cfg.model.pretrained_model_type in ['T5', 'BART'] if cfg.model.pretrained_model_type == 'T5': pretrained_cfg = MegatronT5Model.restore_from( cfg.model.pretrained_model_path, trainer=trainer, return_config=True) else: pretrained_cfg = MegatronBARTModel.restore_from( cfg.model.pretrained_model_path, trainer=trainer, return_config=True) OmegaConf.set_struct(pretrained_cfg, True) with open_dict(pretrained_cfg): pretrained_cfg.masked_softmax_fusion = False # Set source and target language/multilingual pretrained_cfg.src_language = cfg.model.src_language pretrained_cfg.tgt_language = cfg.model.tgt_language pretrained_cfg.multilingual = cfg.model.multilingual pretrained_cfg.shared_tokenizer = True # Max generation delta pretrained_cfg.max_generation_delta = cfg.model.max_generation_delta # Set label smoothing pretrained_cfg.label_smoothing = cfg.model.label_smoothing # Set tokenizer paths: pretrained_cfg.encoder_tokenizer = pretrained_cfg.tokenizer pretrained_cfg.decoder_tokenizer = pretrained_cfg.tokenizer # Pre-trained models should use the legacy sentencepiece tokenizer ex: mT5 pretrained_cfg.encoder_tokenizer.sentencepiece_legacy = True pretrained_cfg.decoder_tokenizer.sentencepiece_legacy = True # Override dropout pretrained_cfg.hidden_dropout = cfg.model.hidden_dropout pretrained_cfg.attention_dropout = cfg.model.attention_dropout # Override precision pretrained_cfg.precision = cfg.model.precision # Set above from trainer.precision # Override data and global/micro batch size. pretrained_cfg.train_ds = cfg.model.train_ds pretrained_cfg.validation_ds = cfg.model.validation_ds pretrained_cfg.test_ds = cfg.model.test_ds pretrained_cfg.micro_batch_size = cfg.model.micro_batch_size pretrained_cfg.global_batch_size = cfg.model.global_batch_size # Class target for the new class being restored. pretrained_cfg.target = ( "nemo.collections.nlp.models.machine_translation.megatron_nmt_model.MegatronNMTModel" ) # Optimizer overrides. pretrained_cfg.optim = cfg.model.optim model = MegatronNMTModel.restore_from( cfg.model.pretrained_model_path, trainer=trainer, override_config_path=pretrained_cfg, save_restore_connector=NLPSaveRestoreConnector(), ) else: model = MegatronNMTModel(cfg.model, trainer) if cfg.do_training: trainer.fit(model) if cfg.do_testing: trainer.test(model)
def _sweep( task: str, model: str, fold: int, ): """ Determine best postprocessing parameters for a trained model Args: task: current task model: full name of the model run determine empricial parameters for e.g. RetinaUNetV001_D3V001_3d fold: current fold """ nndet_data_dir = Path(os.getenv("det_models")) task = get_task(task, name=True, models=True) train_dir = nndet_data_dir / task / model / f"fold{fold}" cfg = OmegaConf.load(str(train_dir / "config.yaml")) os.chdir(str(train_dir)) logger.remove() logger.add(sys.stdout, format="{level} {message}", level="INFO") log_file = Path(os.getcwd()) / "sweep.log" logger.add(log_file, level="INFO") logger.info(f"Log file at {log_file}") plan = load_pickle(train_dir / "plan.pkl") data_dir = Path(cfg.host["preprocessed_output_dir"] ) / plan["data_identifier"] / "imagesTr" module = MODULE_REGISTRY[cfg["module"]]( model_cfg=OmegaConf.to_container(cfg["model_cfg"], resolve=True), trainer_cfg=OmegaConf.to_container(cfg["trainer_cfg"], resolve=True), plan=plan, ) splits = load_pickle(train_dir / "splits.pkl") case_ids = splits[cfg["exp"]["fold"]]["val"] inference_plan = module.sweep( cfg=OmegaConf.to_container(cfg, resolve=True), save_dir=train_dir, train_data_dir=data_dir, case_ids=case_ids, run_prediction=True, # TODO: add commmand line arg ) plan["inference_plan"] = inference_plan save_pickle(plan, train_dir / "plan_inference.pkl") ensembler_cls = module.get_ensembler_cls( key="boxes", dim=plan["network_dim"]) # TODO: make this configurable for restore in [True, False]: target_dir = train_dir / "val_predictions" if restore else \ train_dir / "val_predictions_preprocessed" extract_results( source_dir=train_dir / "sweep_predictions", target_dir=target_dir, ensembler_cls=ensembler_cls, restore=restore, **inference_plan, ) _evaluate( task=cfg["task"], model=cfg["exp"]["id"], fold=cfg["exp"]["fold"], test=False, do_boxes_eval=True, # TODO: make this configurable do_analyze_boxes=True, # TODO: make this configurable )
def format_and_raise( node: Any, key: Any, value: Any, msg: str, cause: Exception, type_override: Any = None, ) -> None: from omegaconf import OmegaConf from omegaconf.base import Node if isinstance(cause, AssertionError): raise if isinstance(cause, OmegaConfBaseException) and cause._initialized: ex = cause if type_override is not None: ex = type_override(str(cause)) ex.__dict__ = copy.deepcopy(cause.__dict__) _raise(ex, cause) object_type: Optional[Type[Any]] object_type_str: Optional[str] = None ref_type: Optional[Type[Any]] ref_type_str: Optional[str] child_node: Optional[Node] = None if node is None: full_key = key if key is not None else "" object_type = None ref_type = None ref_type_str = None else: if key is not None and not node._is_none(): child_node = node._get_node(key, validate_access=False) try: full_key = node._get_full_key(key=key) except Exception as exc: # Since we are handling an exception, raising a different one here would # be misleading. Instead, we display it in the key. full_key = f"<unresolvable due to {type(exc).__name__}: {exc}>" object_type = OmegaConf.get_type(node) object_type_str = type_str(object_type) ref_type = get_ref_type(node) ref_type_str = type_str(ref_type) msg = string.Template(msg).safe_substitute( REF_TYPE=ref_type_str, OBJECT_TYPE=object_type_str, KEY=key, FULL_KEY=full_key, VALUE=value, VALUE_TYPE=type_str(type(value), include_module_name=True), KEY_TYPE=f"{type(key).__name__}", ) if ref_type not in (None, Any): template = dedent("""\ $MSG full_key: $FULL_KEY reference_type=$REF_TYPE object_type=$OBJECT_TYPE""") else: template = dedent("""\ $MSG full_key: $FULL_KEY object_type=$OBJECT_TYPE""") s = string.Template(template=template) message = s.substitute(REF_TYPE=ref_type_str, OBJECT_TYPE=object_type_str, MSG=msg, FULL_KEY=full_key) exception_type = type(cause) if type_override is None else type_override if exception_type == TypeError: exception_type = ConfigTypeError elif exception_type == IndexError: exception_type = ConfigIndexError ex = exception_type(f"{message}") if issubclass(exception_type, OmegaConfBaseException): ex._initialized = True ex.msg = message ex.parent_node = node ex.child_node = child_node ex.key = key ex.full_key = full_key ex.value = value ex.object_type = object_type ex.object_type_str = object_type_str ex.ref_type = ref_type ex.ref_type_str = ref_type_str _raise(ex, cause)
def run(cfg: DictConfig) -> None: os.chdir(hydra.utils.get_original_cwd()) log.info(OmegaConf.to_yaml(cfg)) cfg['device'] = ('cuda' if torch.cuda.is_available() else 'cpu') cfg['list_seed'] = [i for i in range(cfg.model.nseed)] verbose = 1 local_path = '../' path = f'{local_path}input/lish-moa' path_model = f'{local_path}models' cfg['path_model'] = path_model # print(os.listdir(f'{local_path}../')) # data_load train_features = pd.read_csv(f'{path}/train_features.csv') test_features = pd.read_csv(f'{path}/test_features.csv') train_targets_scored = pd.read_csv(f'{path}/train_targets_scored.csv') train_targets_nonscored = pd.read_csv( f'{path}/train_targets_nonscored.csv') train_features = change_type(train_features) test_features = change_type(test_features) train_targets_scored = change_type(train_targets_scored) log.info(f"train_targets_scored.shape: {train_targets_scored.shape}") sample_submission = pd.read_csv(f'{path}/sample_submission.csv') # sub = pd.read_csv(f'{path}/sample_submission.csv') log.info( f"n_comp_genes: {cfg.model.n_comp_genes}, n_comp_cells: {cfg.model.n_comp_cells}, total: " f"{cfg.model.n_comp_genes + cfg.model.n_comp_cells}.") GENES = [col for col in train_features.columns if col.startswith('g-')] CELLS = [col for col in train_features.columns if col.startswith('c-')] train_features_return, test_features_return = \ quantile_transformer(train_features, test_features, features=GENES+CELLS, n_quantiles=cfg.quantile_transformer.n_quantiles, output_distribution=cfg.quantile_transformer.output_distribution) del train_features, test_features gc.collect() train_features = train_features_return test_features = test_features_return log.info(f"End prearation data transform.\n" f"train_features.shape: {train_features.shape}\n" f"test_features.shape: {test_features.shape}\n" f"{'_' * 80}\n") ################################################## # PCA ################################################## train_features_return, test_features_return = \ get_pca_transform(train_features, test_features, features=GENES, n_components=cfg.model.n_comp_genes, flag='GENES', test_append=False) train_features = pd.concat((train_features, train_features_return), axis=1) test_features = pd.concat((test_features, test_features_return), axis=1) del train_features_return, test_features_return gc.collect() train_features_return, test_features_return = \ get_pca_transform(train_features, test_features, features=CELLS, n_components=cfg.model.n_comp_cells, flag='CELLS', test_append=False) train_features = pd.concat((train_features, train_features_return), axis=1) test_features = pd.concat((test_features, test_features_return), axis=1) del train_features_return, test_features_return gc.collect() ################################################## # Start: Feature selection ################################################## train_features_return, test_features_return = \ split_with_variancethreshold(train_features, test_features, variance_threshold_for_fs=cfg.model.variance_threshold_for_fs, categorical=['sig_id', 'cp_type', 'cp_time', 'cp_dose'], test_append=False) del train_features, test_features gc.collect() train_features = train_features_return test_features = test_features_return ################################################## # Start: Zero hack target & prepare train test ################################################## if verbose: print(f"Preparation of train & test:") train = train_features.merge(train_targets_scored, on='sig_id') train = train[train['cp_type'] != 'ctl_vehicle'].reset_index(drop=True) test = test_features[ test_features['cp_type'] != 'ctl_vehicle'].reset_index(drop=True) target = train[train_targets_scored.columns] train = train.drop('cp_type', axis=1) test = test.drop('cp_type', axis=1) target_cols = target.drop('sig_id', axis=1).columns.values.tolist() log.debug(f"Preparation of train & test.\n" f"train.shape: {train.shape}\n" f"test.shape: {test.shape}\n" f"{'_' * 80}\n") ################################################## # cv folds ################################################## folds = train.copy() mskf = MultilabelStratifiedKFold(n_splits=cfg.model.nfolds, random_state=cfg['list_seed'][0]) for f, (t_idx, v_idx) in enumerate(mskf.split(X=train, y=target)): folds.loc[v_idx, 'kfold'] = int(f) folds['kfold'] = folds['kfold'].astype(int) log.debug(f"train.shape: {train.shape}" f"folds.shape: {folds.shape}" f"test.shape: {test.shape}" f"target.shape: {target.shape}") gc.collect() ################################################## # Preprocessing feature_cols ################################################## feature_cols = [ c for c in preprocess_data(folds, cfg.model.patch1).columns if c not in target_cols ] feature_cols = [c for c in feature_cols if c not in ['kfold', 'sig_id']] num_features = len(feature_cols) num_targets = len(target_cols) ################################################## # END PREPROCESS ################################################## CV = MultilabelStratifiedKFold(n_splits=cfg.model.nfolds, random_state=42) data_dict = { 'train': preprocess_data(train), 'target': target, 'test': preprocess_data(test), 'feature_cols': feature_cols, 'target_cols': target_cols } # base_model_def(data_dict, params, cv=cv, optimization=False, verbose=0): ################################################## # Train ################################################## SEED = cfg['list_seed'] oof = np.zeros((len(train), len(target_cols))) predictions = np.zeros((len(test), len(target_cols))) for seed in tqdm(SEED, leave=verbose): return_run_k_fold = run_k_fold(cfg.model.nfolds, seed, cfg, folds, train, test, feature_cols, target_cols, num_features, num_targets, target, verbose) if cfg.model.train_models: oof_, predictions_ = return_run_k_fold oof += oof_ / len(SEED) else: predictions_ = return_run_k_fold predictions += predictions_ / len(SEED) gc.collect() if cfg.model.train_models: train[target_cols] = oof test[target_cols] = predictions ################################################## # valodation and save ################################################## if cfg.model.train_models: y_true = train_targets_scored[target_cols].values valid_results = train_targets_scored.drop(columns=target_cols).merge( train[['sig_id'] + target_cols], on='sig_id', how='left').fillna(0) y_pred = valid_results[target_cols].values score = 0 for i in range(len(target_cols)): score_ = log_loss(y_true[:, i], y_pred[:, i]) score += score_ / num_targets print(f"CV log_loss: {score}") log.info(f"CV log_loss: {score}") log.info(f"y_true.shape: {y_true.shape}") log.info(f"y_pred.shape: {y_pred.shape}") # sub = sample_submission.drop(columns=target_cols).merge(test[['sig_id'] + target_cols], on='sig_id', # how='left').fillna(0) # sub.to_csv('submission.csv', index=False) # log.info(f"sub.shape: {sub.shape}") res = test[['sig_id'] + target_cols] corner_case = test_features[test_features['cp_type'] == 'ctl_vehicle'] zeros = np.zeros((corner_case.shape[0], len(target_cols))) corner_case[target_cols] = zeros corner_case = corner_case[['sig_id'] + target_cols] res = pd.concat([res, corner_case], axis=0) res.to_csv('submission.csv', index=False) if cfg.model.train_models: return score else: return 0