def run_simple(args: RunConfiguration, with_continue: bool = False): hf_config = AutoConfig.from_pretrained(args.hf_pretrained_model_name_or_path) model_cache_path = replace_none( args.model_cache_path, default=os.path.join(args.exp_dir, "models") ) with distributed.only_first_process(local_rank=args.local_rank): # === Step 1: Write task configs based on templates === # full_task_name_list = sorted(list(set(args.train_tasks + args.val_tasks + args.test_tasks))) task_config_path_dict = {} if args.create_config: task_config_path_dict = create_and_write_task_configs( task_name_list=full_task_name_list, data_dir=args.data_dir, task_config_base_path=os.path.join(args.data_dir, "configs"), ) else: for task_name in full_task_name_list: task_config_path_dict[task_name] = os.path.join( args.data_dir, "configs", f"{task_name}_config.json" ) # === Step 2: Download models === # # if not os.path.exists(os.path.join(model_cache_path, hf_config.model_type)): # print("Downloading model") # export_model.export_model( # hf_pretrained_model_name_or_path=args.hf_pretrained_model_name_or_path, # output_base_path=os.path.join(model_cache_path, hf_config.model_type), # ) # === Step 3: Tokenize and cache === # phase_task_dict = { "train": args.train_tasks, "val": args.val_tasks, "test": args.test_tasks, } for task_name in full_task_name_list: phases_to_do = [] for phase, phase_task_list in phase_task_dict.items(): if task_name in phase_task_list and not os.path.exists( os.path.join(args.exp_dir, "cache", hf_config.model_type, task_name, phase) ): config = read_json(task_config_path_dict[task_name]) if phase in config["paths"]: phases_to_do.append(phase) else: phase_task_list.remove(task_name) if not phases_to_do: continue print(f"Tokenizing Task '{task_name}' for phases '{','.join(phases_to_do)}'") tokenize_and_cache.main( tokenize_and_cache.RunConfiguration( task_config_path=task_config_path_dict[task_name], hf_pretrained_model_name_or_path=args.hf_pretrained_model_name_or_path, output_dir=os.path.join(args.exp_dir, "cache", hf_config.model_type, task_name), phases=phases_to_do, # TODO: Need a strategy for task-specific max_seq_length issues (issue #1176) max_seq_length=args.max_seq_length, smart_truncate=True, do_iter=True, ) ) # === Step 4: Generate jiant_task_container_config === # # We'll do this with a configurator. Creating a jiant_task_config has a surprising # number of moving parts. jiant_task_container_config = configurator.SimpleAPIMultiTaskConfigurator( task_config_base_path=os.path.join(args.data_dir, "configs"), task_cache_base_path=os.path.join(args.exp_dir, "cache", hf_config.model_type), train_task_name_list=args.train_tasks, val_task_name_list=args.val_tasks, test_task_name_list=args.test_tasks, train_batch_size=args.train_batch_size, eval_batch_multiplier=2, epochs=args.num_train_epochs, num_gpus=torch.cuda.device_count(), train_examples_cap=args.train_examples_cap, ).create_config() os.makedirs(os.path.join(args.exp_dir, "run_configs"), exist_ok=True) jiant_task_container_config_path = os.path.join( args.exp_dir, "run_configs", f"{args.run_name}_config.json" ) py_io.write_json(jiant_task_container_config, path=jiant_task_container_config_path) # === Step 5: Train/Eval! === # if args.model_weights_path: model_load_mode = "partial" model_weights_path = args.model_weights_path else: # From Transformers if any(task_name.startswith("mlm_") for task_name in full_task_name_list): model_load_mode = "from_transformers_with_mlm" else: model_load_mode = "from_transformers" model_weights_path = os.path.join( model_cache_path, hf_config.model_type, "model", "model.p" ) run_output_dir = os.path.join(args.exp_dir, "runs", args.run_name) if ( args.save_checkpoint_every_steps and os.path.exists(os.path.join(run_output_dir, "checkpoint.p")) and with_continue ): print("Resuming") checkpoint = torch.load(os.path.join(run_output_dir, "checkpoint.p")) run_args = runscript.RunConfiguration.from_dict(checkpoint["metadata"]["args"]) else: print("Running from start") run_args = runscript.RunConfiguration( # === Required parameters === # jiant_task_container_config_path=jiant_task_container_config_path, output_dir=run_output_dir, # === Model parameters === # hf_pretrained_model_name_or_path=args.hf_pretrained_model_name_or_path, model_path=model_weights_path, model_config_path=os.path.join( model_cache_path, hf_config.model_type, "model", "config.json", ), model_load_mode=model_load_mode, # === Running Setup === # do_train=bool(args.train_tasks), do_val=bool(args.val_tasks), do_save=args.do_save, do_save_best=args.do_save_best, do_save_last=args.do_save_last, write_val_preds=args.write_val_preds, write_test_preds=args.write_test_preds, eval_every_steps=args.eval_every_steps, save_every_steps=args.save_every_steps, save_checkpoint_every_steps=args.save_checkpoint_every_steps, no_improvements_for_n_evals=args.no_improvements_for_n_evals, keep_checkpoint_when_done=args.keep_checkpoint_when_done, force_overwrite=args.force_overwrite, seed=args.seed, # === Training Learning Parameters === # learning_rate=args.learning_rate, adam_epsilon=args.adam_epsilon, max_grad_norm=args.max_grad_norm, optimizer_type=args.optimizer_type, # === Specialized config === # no_cuda=args.no_cuda, fp16=args.fp16, fp16_opt_level=args.fp16_opt_level, local_rank=args.local_rank, server_ip=args.server_ip, server_port=args.server_port, ) checkpoint = None runscript.run_loop(args=run_args, checkpoint=checkpoint) py_io.write_file(args.to_json(), os.path.join(run_output_dir, "simple_run_config.json"))
# Tokenize and cache the dataset tokenize_and_cache.main( tokenize_and_cache.RunConfiguration( task_config_path=f"{EXP_DIR}/tasks/configs/{TASK}_config.json", hf_pretrained_model_name_or_path="en_bert", output_dir=f"{EXP_DIR}/cache/{TASK}", phases=["test"] if is_diagnostics else ["train", "val", "test"], )) # Create the run config jiant_run_config = configurator.SimpleAPIMultiTaskConfigurator( task_config_base_path=f"{EXP_DIR}/tasks/configs", task_cache_base_path=f"{EXP_DIR}/cache", train_task_name_list=["rte" if is_diagnostics else TASK], val_task_name_list=["rte" if is_diagnostics else TASK], test_task_name_list=[TASK], train_batch_size=BATCH_SIZE, eval_batch_size=32, epochs=N_EPOCHS, num_gpus=1, warmup_steps_proportion=0.1, ).create_config() os.makedirs(f"{EXP_DIR}/run_configs/", exist_ok=True) py_io.write_json(jiant_run_config, f"{EXP_DIR}/run_configs/{TASK}_config.json") for seed in [123, 456, 789, 1234, 5678, 9012, 12345, 67890, 123456, 789012]: output_dir = f"{EXP_DIR}/runs/run_{TASK}_seed={seed}" with HiddenPrints(): run_args = main_runscript.RunConfiguration( jiant_task_container_config_path= f"{EXP_DIR}/run_configs/{TASK}_config.json", output_dir=output_dir,
import jiant.proj.main.scripts.configurator as configurator import jiant.proj.main.export_model as export_model import jiant.utils.python.io as py_io import jiant.utils.display as display import os jiant_run_config = configurator.SimpleAPIMultiTaskConfigurator( task_config_base_path="./spatial_experiment", task_cache_base_path="./spatial_experiment/cache", train_task_name_list=["spatial"], val_task_name_list=["spatial"], train_batch_size=8, eval_batch_size=16, epochs=3, num_gpus=1, ).create_config() os.makedirs("./spatial_experiment/run_configs/", exist_ok=True) py_io.write_json(jiant_run_config, "./spatial_experiment/run_configs/spatial_run_config.json") display.show_json(jiant_run_config) export_model.export_model( hf_pretrained_model_name_or_path="bert-base-uncased", output_base_path="./spatial_experiment/models/bert", )
def generate_configs(args: RunConfiguration): xtreme_task = args.xtreme_task if xtreme_task == "mlqa": xtreme_task_name_list = [ f"{xtreme_task}_{lang}_{lang}" for lang in LANGS_DICT[xtreme_task] ] else: xtreme_task_name_list = [ f"{xtreme_task}_{lang}" for lang in LANGS_DICT[xtreme_task] ] if xtreme_task in TRAINED_TASKS: train_task = TRAIN_TASK_DICT[xtreme_task] train_task_name_list = [train_task] val_task_name_list = get_unique_list_in_order( [xtreme_task_name_list, train_task_name_list]) if args.early_stop_on_xtreme_tasks: train_val_task_name_list = val_task_name_list else: train_val_task_name_list = train_task_name_list elif xtreme_task in UNTRAINED_TASKS: train_task_name_list = [] val_task_name_list = xtreme_task_name_list train_val_task_name_list = [] else: raise KeyError(xtreme_task) if xtreme_task == "udpos": test_task_name_list = xtreme_task_name_list + [ f"udpos_{lang}" for lang in EXTRA_UDPOS_TEST_LANGS ] elif xtreme_task in ["xquad", "tydiqa", "tatoeba"]: test_task_name_list = [] else: test_task_name_list = xtreme_task_name_list if not args.suppress_print: print("Training on:", ", ".join(train_task_name_list)) print("Validation on:", ", ".join(val_task_name_list)) print("Early stopping on:", ", ".join(train_val_task_name_list)) print("Testing on:", ",".join(test_task_name_list)) config = configurator.SimpleAPIMultiTaskConfigurator( task_config_base_path=args.task_config_base_path, task_cache_base_path=args.task_cache_base_path, train_task_name_list=train_task_name_list, train_val_task_name_list=train_val_task_name_list, val_task_name_list=val_task_name_list, test_task_name_list=test_task_name_list, epochs=args.epochs, train_batch_size=args.train_batch_size, eval_batch_multiplier=args.eval_batch_multiplier, gradient_accumulation_steps=args.gradient_accumulation_steps, eval_subset_num=args.eval_subset_num, num_gpus=args.num_gpus, warmup_steps_proportion=args.warmup_steps_proportion, ).create_config() # Make sure all tasks use the same task head config["taskmodels_config"]["task_to_taskmodel_map"] = { k: xtreme_task for k, v in config["taskmodels_config"] ["task_to_taskmodel_map"].items() } if not args.suppress_print: print(f"Assigning all tasks to '{xtreme_task}' head") if xtreme_task in UNTRAINED_TASKS: # The reference implementation from the XTREME paper uses layer 14 for the # retrieval representation. config["taskmodels_config"]["taskmodel_config_map"] = { xtreme_task: { "pooler_type": "mean", "layer": args.retrieval_layer } } py_io.write_json(config, args.output_path)
def set_task( DATASET: str, BATCH_SIZE: int, path: str, N_WORKERS: int) -> Union[Dataloader, Dataloader, List, List, List]: """ Setting task parameters Args: DATASET: Dataset name BATCH_SIZE: training batch size path: path to dataset folder N_WORKERS: num workers Returns: train_loader - loader for training set val_loader - loader for validation set criterions - loss functions list_of_encoders - encoder models list_of_decoders - decoder models """ set_seed(999) if DATASET == "CIFAR-10": train_dst = CIFAR10Loader(root=path, train=True) train_loader = train_dst.get_loader(batch_size=BATCH_SIZE, shuffle=True) val_dst = CIFAR10Loader(root=path, train=False) val_loader = val_dst.get_loader() list_of_encoders = [ResNet18] list_of_decoders = [MultiDec] * 10 criterions = [torch.nn.BCEWithLogitsLoss()] * 10 elif DATASET == "MNIST": train_dst = MNIST(root=path, train=True, download=True, transform=global_transformer(), multi=True) train_loader = torch.utils.data.DataLoader(train_dst, batch_size=BATCH_SIZE, shuffle=True, num_workers=N_WORKERS) val_dst = MNIST(root=path, train=False, download=True, transform=global_transformer(), multi=True) val_loader = torch.utils.data.DataLoader(val_dst, batch_size=BATCH_SIZE, num_workers=N_WORKERS) list_of_encoders = [MultiLeNetEnc] list_of_decoders = [MultiLeNetDec] * 2 criterions = [torch.nn.NLLLoss()] * 2 elif DATASET == "Cityscapes": cityscapes_augmentations = Compose( [RandomRotate(10), RandomHorizontallyFlip()]) img_rows = 256 img_cols = 512 train_dst = CITYSCAPES(root=path, is_transform=True, split=['train'], img_size=(img_rows, img_cols), augmentations=cityscapes_augmentations) train_loader = torch.utils.data.DataLoader(train_dst, batch_size=BATCH_SIZE, shuffle=True, num_workers=N_WORKERS) val_dst = CITYSCAPES(root=path, split=['val'], img_size=(img_rows, img_cols)) val_loader = torch.utils.data.DataLoader(val_dst, batch_size=BATCH_SIZE, num_workers=N_WORKERS) list_of_encoders = [get_segmentation_encoder] list_of_decoders = [ partialclass(SegmentationDecoder, num_class=19, task_type="C"), partialclass(SegmentationDecoder, num_class=2, task_type="R"), partialclass(SegmentationDecoder, num_class=1, task_type="R") ] criterions = [cross_entropy2d, l1_loss_instance, l1_loss_depth] elif DATASET == 'NLP': export_model.export_model( hf_pretrained_model_name_or_path="bert-base-uncased", output_base_path="./models/bert-base-uncased", ) for task_name in ["rte", "stsb", "commonsenseqa"]: tokenize_and_cache.main( tokenize_and_cache.RunConfiguration( task_config_path=f"./tasks/configs/{task_name}_config.json", hf_pretrained_model_name_or_path="bert-base-uncased", output_dir=f"./cache/{task_name}", phases=["train", "val"], )) jiant_run_config = configurator.SimpleAPIMultiTaskConfigurator( task_config_base_path="./tasks/configs", task_cache_base_path="./cache", train_task_name_list=["rte", "stsb", "commonsenseqa"], val_task_name_list=["rte", "stsb", "commonsenseqa"], train_batch_size=4, eval_batch_size=8, epochs=0.5, num_gpus=1, ).create_config() jiant_task_container = container_setup.create_jiant_task_container_from_dict( jiant_run_config) jiant_model = jiant_model_setup.setup_jiant_model( hf_pretrained_model_name_or_path="bert-base-uncased", model_config_path="./models/bert-base-uncased/model/config.json", task_dict=jiant_task_container.task_dict, taskmodels_config=jiant_task_container.taskmodels_config, ) train_cache = jiant_task_container.task_cache_dict['stsb']["train"] val_cache = jiant_task_container.task_cache_dict['stsb']["val"] train_dataloader = get_train_dataloader_from_cache( train_cache, task, 4) val_dataloader = get_eval_dataloader_from_cache(val_cache, task, 4) list_of_encoders = [jiant_model.encoder] decoder1 = deepcopy(jiant_model.taskmodels_dict['stsb'].head) reset(decoder1) decoder2 = deepcopy(decoder1) reset(decoder2) decoder3 = deepcopy(decoder2) reset(decoder3) list_of_decoders = [ lambda: decoder1, lambda: decoder2, lambda: decoder3 ] criterions = [ torch.nn.MSELoss(), torch.nn.MSELoss(), torch.nn.MSELoss() ] return train_loader, val_loader, criterions, list_of_encoders, list_of_decoders