def test_export_model(tmp_path, model_type, model_class, hf_pretrained_model_name_or_path): export_model( hf_pretrained_model_name_or_path=hf_pretrained_model_name_or_path, output_base_path=tmp_path, ) read_config = py_io.read_json(os.path.join(tmp_path, f"config.json")) assert read_config["model_type"] == model_type assert read_config["model_path"] == os.path.join(tmp_path, "model", f"{model_type}.p") assert read_config["model_config_path"] == os.path.join( tmp_path, "model", f"{model_type}.json")
def test_export_model(tmp_path, model_type, model_class, tokenizer_class, hf_model_name): export_model( model_type=model_type, output_base_path=tmp_path, model_class=model_class, tokenizer_class=tokenizer_class, hf_model_name=hf_model_name, ) read_config = py_io.read_json(os.path.join(tmp_path, f"config.json")) assert read_config["model_type"] == model_type assert read_config["model_path"] == os.path.join(tmp_path, "model", f"{model_type}.p") assert read_config["model_config_path"] == os.path.join(tmp_path, "model", f"{model_type}.json") assert read_config["model_tokenizer_path"] == os.path.join(tmp_path, "tokenizer")
def run_simple(args: RunConfiguration, with_continue: bool = False): hf_config = AutoConfig.from_pretrained( args.hf_pretrained_model_name_or_path) model_cache_path = replace_none(args.model_cache_path, default=os.path.join( args.exp_dir, "models")) with distributed.only_first_process(local_rank=args.local_rank): # === Step 1: Write task configs based on templates === # full_task_name_list = sorted( list(set(args.train_tasks + args.val_tasks + args.test_tasks))) task_config_path_dict = {} if args.create_config: task_config_path_dict = create_and_write_task_configs( task_name_list=full_task_name_list, data_dir=args.data_dir, task_config_base_path=os.path.join(args.data_dir, "configs"), ) else: for task_name in full_task_name_list: task_config_path_dict[task_name] = os.path.join( args.data_dir, "configs", f"{task_name}_config.json") # === Step 2: Download models === # model_pt_name = args.model_pt_name if not os.path.exists(os.path.join(model_cache_path, model_pt_name)): print("Downloading model") export_model.export_model( hf_pretrained_model_name_or_path=args. hf_pretrained_model_name_or_path, output_base_path=os.path.join(model_cache_path, model_pt_name), ) # === Step 3: Tokenize and cache === # phase_task_dict = { "train": args.train_tasks, "val": args.val_tasks, "test": args.test_tasks, } for task_name in full_task_name_list: phases_to_do = [] for phase, phase_task_list in phase_task_dict.items(): if task_name in phase_task_list and not os.path.exists( os.path.join(args.exp_dir, "cache", model_pt_name, task_name, phase)): phases_to_do.append(phase) if not phases_to_do: continue print( f"Tokenizing Task '{task_name}' for phases '{','.join(phases_to_do)}'" ) tokenize_and_cache.main( tokenize_and_cache.RunConfiguration( task_config_path=task_config_path_dict[task_name], hf_pretrained_model_name_or_path=args. hf_pretrained_model_name_or_path, output_dir=os.path.join(args.exp_dir, "cache", model_pt_name, task_name), phases=phases_to_do, # TODO: Need a strategy for task-specific max_seq_length issues (issue #1176) max_seq_length=args.max_seq_length, smart_truncate=True, do_iter=True, )) # === Step 4: Generate jiant_task_container_config === # # We'll do this with a configurator. Creating a jiant_task_config has a surprising # number of moving parts. jiant_task_container_config = configurator.SimpleAPIMultiTaskConfigurator( task_config_base_path=os.path.join(args.data_dir, "configs"), task_cache_base_path=os.path.join(args.exp_dir, "cache", model_pt_name), train_task_name_list=args.train_tasks, val_task_name_list=args.val_tasks, test_task_name_list=args.test_tasks, train_batch_size=args.train_batch_size, eval_batch_multiplier=2, epochs=args.num_train_epochs, num_gpus=torch.cuda.device_count(), train_examples_cap=args.train_examples_cap, ).create_config() os.makedirs(os.path.join(args.exp_dir, "run_configs"), exist_ok=True) jiant_task_container_config_path = os.path.join( args.exp_dir, "run_configs", f"{args.run_name}_config.json") py_io.write_json(jiant_task_container_config, path=jiant_task_container_config_path) # === Step 5: Train/Eval! === # if args.model_weights_path: model_load_mode = "partial" model_weights_path = args.model_weights_path else: # From Transformers if any( task_name.startswith("mlm_") for task_name in full_task_name_list): model_load_mode = "from_transformers_with_mlm" else: model_load_mode = "from_transformers" model_weights_path = os.path.join(model_cache_path, model_pt_name, "model", "model.p") print(f"Loading model from {model_weights_path}") run_output_dir = os.path.join(args.exp_dir, "runs", args.run_name) if (args.save_checkpoint_every_steps and os.path.exists(os.path.join(run_output_dir, "checkpoint.p")) and with_continue): print("Resuming") checkpoint = torch.load(os.path.join(run_output_dir, "checkpoint.p")) run_args = runscript.RunConfiguration.from_dict( checkpoint["metadata"]["args"]) else: print("Running from start") run_args = runscript.RunConfiguration( # === Required parameters === # jiant_task_container_config_path=jiant_task_container_config_path, output_dir=run_output_dir, # === Model parameters === # hf_pretrained_model_name_or_path=args. hf_pretrained_model_name_or_path, model_path=model_weights_path, model_config_path=os.path.join( model_cache_path, model_pt_name, "model", "config.json", ), model_load_mode=model_load_mode, # === Running Setup === # do_train=bool(args.train_tasks), do_val=bool(args.val_tasks), do_save=args.do_save, do_save_best=args.do_save_best, do_save_last=args.do_save_last, write_val_preds=args.write_val_preds, write_test_preds=args.write_test_preds, eval_every_steps=args.eval_every_steps, save_every_steps=args.save_every_steps, save_checkpoint_every_steps=args.save_checkpoint_every_steps, no_improvements_for_n_evals=args.no_improvements_for_n_evals, keep_checkpoint_when_done=args.keep_checkpoint_when_done, force_overwrite=args.force_overwrite, seed=args.seed, # === Training Learning Parameters === # learning_rate=args.learning_rate, adam_epsilon=args.adam_epsilon, max_grad_norm=args.max_grad_norm, optimizer_type=args.optimizer_type, # === Specialized config === # no_cuda=args.no_cuda, fp16=args.fp16, fp16_opt_level=args.fp16_opt_level, local_rank=args.local_rank, server_ip=args.server_ip, server_port=args.server_port, ) checkpoint = None runscript.run_loop(args=run_args, checkpoint=checkpoint) py_io.write_file(args.to_json(), os.path.join(run_output_dir, "simple_run_config.json"))
import jiant.scripts.download_data.runscript as downloader import jiant.proj.main.export_model as export_model EXP_DIR = "./jiant" tasks = [ "superglue_broadcoverage_diagnostics", # Broadcoverage Diagnostics; Recognizing Textual Entailment "cb", # CommitmentBank "copa", # Choice of Plausible Alternatives "multirc", # Multi-Sentence Reading Comprehension "wic", # Words in Context "wsc", # The Winograd Schema Challenge "boolq", # BoolQ "record", # Reading Comprehension with Commonsense Reasoning "superglue_winogender_diagnostics", # Winogender Schema Diagnostics "rte" ] # Download the Data downloader.download_data(tasks, f"{EXP_DIR}/tasks") # Cache the model export_model.export_model( hf_pretrained_model_name_or_path="en_bert", output_base_path=f"{EXP_DIR}/models/en_bert", )
import jiant.proj.main.scripts.configurator as configurator import jiant.proj.main.export_model as export_model import jiant.utils.python.io as py_io import jiant.utils.display as display import os jiant_run_config = configurator.SimpleAPIMultiTaskConfigurator( task_config_base_path="./spatial_experiment", task_cache_base_path="./spatial_experiment/cache", train_task_name_list=["spatial"], val_task_name_list=["spatial"], train_batch_size=8, eval_batch_size=16, epochs=3, num_gpus=1, ).create_config() os.makedirs("./spatial_experiment/run_configs/", exist_ok=True) py_io.write_json(jiant_run_config, "./spatial_experiment/run_configs/spatial_run_config.json") display.show_json(jiant_run_config) export_model.export_model( hf_pretrained_model_name_or_path="bert-base-uncased", output_base_path="./spatial_experiment/models/bert", )
def set_task( DATASET: str, BATCH_SIZE: int, path: str, N_WORKERS: int) -> Union[Dataloader, Dataloader, List, List, List]: """ Setting task parameters Args: DATASET: Dataset name BATCH_SIZE: training batch size path: path to dataset folder N_WORKERS: num workers Returns: train_loader - loader for training set val_loader - loader for validation set criterions - loss functions list_of_encoders - encoder models list_of_decoders - decoder models """ set_seed(999) if DATASET == "CIFAR-10": train_dst = CIFAR10Loader(root=path, train=True) train_loader = train_dst.get_loader(batch_size=BATCH_SIZE, shuffle=True) val_dst = CIFAR10Loader(root=path, train=False) val_loader = val_dst.get_loader() list_of_encoders = [ResNet18] list_of_decoders = [MultiDec] * 10 criterions = [torch.nn.BCEWithLogitsLoss()] * 10 elif DATASET == "MNIST": train_dst = MNIST(root=path, train=True, download=True, transform=global_transformer(), multi=True) train_loader = torch.utils.data.DataLoader(train_dst, batch_size=BATCH_SIZE, shuffle=True, num_workers=N_WORKERS) val_dst = MNIST(root=path, train=False, download=True, transform=global_transformer(), multi=True) val_loader = torch.utils.data.DataLoader(val_dst, batch_size=BATCH_SIZE, num_workers=N_WORKERS) list_of_encoders = [MultiLeNetEnc] list_of_decoders = [MultiLeNetDec] * 2 criterions = [torch.nn.NLLLoss()] * 2 elif DATASET == "Cityscapes": cityscapes_augmentations = Compose( [RandomRotate(10), RandomHorizontallyFlip()]) img_rows = 256 img_cols = 512 train_dst = CITYSCAPES(root=path, is_transform=True, split=['train'], img_size=(img_rows, img_cols), augmentations=cityscapes_augmentations) train_loader = torch.utils.data.DataLoader(train_dst, batch_size=BATCH_SIZE, shuffle=True, num_workers=N_WORKERS) val_dst = CITYSCAPES(root=path, split=['val'], img_size=(img_rows, img_cols)) val_loader = torch.utils.data.DataLoader(val_dst, batch_size=BATCH_SIZE, num_workers=N_WORKERS) list_of_encoders = [get_segmentation_encoder] list_of_decoders = [ partialclass(SegmentationDecoder, num_class=19, task_type="C"), partialclass(SegmentationDecoder, num_class=2, task_type="R"), partialclass(SegmentationDecoder, num_class=1, task_type="R") ] criterions = [cross_entropy2d, l1_loss_instance, l1_loss_depth] elif DATASET == 'NLP': export_model.export_model( hf_pretrained_model_name_or_path="bert-base-uncased", output_base_path="./models/bert-base-uncased", ) for task_name in ["rte", "stsb", "commonsenseqa"]: tokenize_and_cache.main( tokenize_and_cache.RunConfiguration( task_config_path=f"./tasks/configs/{task_name}_config.json", hf_pretrained_model_name_or_path="bert-base-uncased", output_dir=f"./cache/{task_name}", phases=["train", "val"], )) jiant_run_config = configurator.SimpleAPIMultiTaskConfigurator( task_config_base_path="./tasks/configs", task_cache_base_path="./cache", train_task_name_list=["rte", "stsb", "commonsenseqa"], val_task_name_list=["rte", "stsb", "commonsenseqa"], train_batch_size=4, eval_batch_size=8, epochs=0.5, num_gpus=1, ).create_config() jiant_task_container = container_setup.create_jiant_task_container_from_dict( jiant_run_config) jiant_model = jiant_model_setup.setup_jiant_model( hf_pretrained_model_name_or_path="bert-base-uncased", model_config_path="./models/bert-base-uncased/model/config.json", task_dict=jiant_task_container.task_dict, taskmodels_config=jiant_task_container.taskmodels_config, ) train_cache = jiant_task_container.task_cache_dict['stsb']["train"] val_cache = jiant_task_container.task_cache_dict['stsb']["val"] train_dataloader = get_train_dataloader_from_cache( train_cache, task, 4) val_dataloader = get_eval_dataloader_from_cache(val_cache, task, 4) list_of_encoders = [jiant_model.encoder] decoder1 = deepcopy(jiant_model.taskmodels_dict['stsb'].head) reset(decoder1) decoder2 = deepcopy(decoder1) reset(decoder2) decoder3 = deepcopy(decoder2) reset(decoder3) list_of_decoders = [ lambda: decoder1, lambda: decoder2, lambda: decoder3 ] criterions = [ torch.nn.MSELoss(), torch.nn.MSELoss(), torch.nn.MSELoss() ] return train_loader, val_loader, criterions, list_of_encoders, list_of_decoders