Exemple #1
0
def run_simple(args: RunConfiguration, with_continue: bool = False):
    hf_config = AutoConfig.from_pretrained(args.hf_pretrained_model_name_or_path)

    model_cache_path = replace_none(
        args.model_cache_path, default=os.path.join(args.exp_dir, "models")
    )

    with distributed.only_first_process(local_rank=args.local_rank):
        # === Step 1: Write task configs based on templates === #
        full_task_name_list = sorted(list(set(args.train_tasks + args.val_tasks + args.test_tasks)))
        task_config_path_dict = {}
        if args.create_config:
            task_config_path_dict = create_and_write_task_configs(
                task_name_list=full_task_name_list,
                data_dir=args.data_dir,
                task_config_base_path=os.path.join(args.data_dir, "configs"),
            )
        else:
            for task_name in full_task_name_list:
                task_config_path_dict[task_name] = os.path.join(
                    args.data_dir, "configs", f"{task_name}_config.json"
                )

        # === Step 2: Download models === #
        # if not os.path.exists(os.path.join(model_cache_path, hf_config.model_type)):
            # print("Downloading model")
            # export_model.export_model(
            #     hf_pretrained_model_name_or_path=args.hf_pretrained_model_name_or_path,
            #     output_base_path=os.path.join(model_cache_path, hf_config.model_type),
            # )

        # === Step 3: Tokenize and cache === #
        phase_task_dict = {
            "train": args.train_tasks,
            "val": args.val_tasks,
            "test": args.test_tasks,
        }
        for task_name in full_task_name_list:
            phases_to_do = []
            for phase, phase_task_list in phase_task_dict.items():
                if task_name in phase_task_list and not os.path.exists(
                    os.path.join(args.exp_dir, "cache", hf_config.model_type, task_name, phase)
                ):
                    config = read_json(task_config_path_dict[task_name])
                    if phase in config["paths"]:
                        phases_to_do.append(phase)
                    else:
                        phase_task_list.remove(task_name)
            if not phases_to_do:
                continue
            print(f"Tokenizing Task '{task_name}' for phases '{','.join(phases_to_do)}'")
            tokenize_and_cache.main(
                tokenize_and_cache.RunConfiguration(
                    task_config_path=task_config_path_dict[task_name],
                    hf_pretrained_model_name_or_path=args.hf_pretrained_model_name_or_path,
                    output_dir=os.path.join(args.exp_dir, "cache", hf_config.model_type, task_name),
                    phases=phases_to_do,
                    # TODO: Need a strategy for task-specific max_seq_length issues (issue #1176)
                    max_seq_length=args.max_seq_length,
                    smart_truncate=True,
                    do_iter=True,
                )
            )

    # === Step 4: Generate jiant_task_container_config === #
    # We'll do this with a configurator. Creating a jiant_task_config has a surprising
    # number of moving parts.
    jiant_task_container_config = configurator.SimpleAPIMultiTaskConfigurator(
        task_config_base_path=os.path.join(args.data_dir, "configs"),
        task_cache_base_path=os.path.join(args.exp_dir, "cache", hf_config.model_type),
        train_task_name_list=args.train_tasks,
        val_task_name_list=args.val_tasks,
        test_task_name_list=args.test_tasks,
        train_batch_size=args.train_batch_size,
        eval_batch_multiplier=2,
        epochs=args.num_train_epochs,
        num_gpus=torch.cuda.device_count(),
        train_examples_cap=args.train_examples_cap,
    ).create_config()
    os.makedirs(os.path.join(args.exp_dir, "run_configs"), exist_ok=True)
    jiant_task_container_config_path = os.path.join(
        args.exp_dir, "run_configs", f"{args.run_name}_config.json"
    )
    py_io.write_json(jiant_task_container_config, path=jiant_task_container_config_path)

    # === Step 5: Train/Eval! === #
    if args.model_weights_path:
        model_load_mode = "partial"
        model_weights_path = args.model_weights_path
    else:
        # From Transformers
        if any(task_name.startswith("mlm_") for task_name in full_task_name_list):
            model_load_mode = "from_transformers_with_mlm"
        else:
            model_load_mode = "from_transformers"
        model_weights_path = os.path.join(
            model_cache_path, hf_config.model_type, "model", "model.p"
        )
    run_output_dir = os.path.join(args.exp_dir, "runs", args.run_name)

    if (
        args.save_checkpoint_every_steps
        and os.path.exists(os.path.join(run_output_dir, "checkpoint.p"))
        and with_continue
    ):
        print("Resuming")
        checkpoint = torch.load(os.path.join(run_output_dir, "checkpoint.p"))
        run_args = runscript.RunConfiguration.from_dict(checkpoint["metadata"]["args"])
    else:
        print("Running from start")
        run_args = runscript.RunConfiguration(
            # === Required parameters === #
            jiant_task_container_config_path=jiant_task_container_config_path,
            output_dir=run_output_dir,
            # === Model parameters === #
            hf_pretrained_model_name_or_path=args.hf_pretrained_model_name_or_path,
            model_path=model_weights_path,
            model_config_path=os.path.join(
                model_cache_path, hf_config.model_type, "model", "config.json",
            ),
            model_load_mode=model_load_mode,
            # === Running Setup === #
            do_train=bool(args.train_tasks),
            do_val=bool(args.val_tasks),
            do_save=args.do_save,
            do_save_best=args.do_save_best,
            do_save_last=args.do_save_last,
            write_val_preds=args.write_val_preds,
            write_test_preds=args.write_test_preds,
            eval_every_steps=args.eval_every_steps,
            save_every_steps=args.save_every_steps,
            save_checkpoint_every_steps=args.save_checkpoint_every_steps,
            no_improvements_for_n_evals=args.no_improvements_for_n_evals,
            keep_checkpoint_when_done=args.keep_checkpoint_when_done,
            force_overwrite=args.force_overwrite,
            seed=args.seed,
            # === Training Learning Parameters === #
            learning_rate=args.learning_rate,
            adam_epsilon=args.adam_epsilon,
            max_grad_norm=args.max_grad_norm,
            optimizer_type=args.optimizer_type,
            # === Specialized config === #
            no_cuda=args.no_cuda,
            fp16=args.fp16,
            fp16_opt_level=args.fp16_opt_level,
            local_rank=args.local_rank,
            server_ip=args.server_ip,
            server_port=args.server_port,
        )
        checkpoint = None

    runscript.run_loop(args=run_args, checkpoint=checkpoint)
    py_io.write_file(args.to_json(), os.path.join(run_output_dir, "simple_run_config.json"))
Exemple #2
0
# Tokenize and cache the dataset
tokenize_and_cache.main(
    tokenize_and_cache.RunConfiguration(
        task_config_path=f"{EXP_DIR}/tasks/configs/{TASK}_config.json",
        hf_pretrained_model_name_or_path="en_bert",
        output_dir=f"{EXP_DIR}/cache/{TASK}",
        phases=["test"] if is_diagnostics else ["train", "val", "test"],
    ))

# Create the run config
jiant_run_config = configurator.SimpleAPIMultiTaskConfigurator(
    task_config_base_path=f"{EXP_DIR}/tasks/configs",
    task_cache_base_path=f"{EXP_DIR}/cache",
    train_task_name_list=["rte" if is_diagnostics else TASK],
    val_task_name_list=["rte" if is_diagnostics else TASK],
    test_task_name_list=[TASK],
    train_batch_size=BATCH_SIZE,
    eval_batch_size=32,
    epochs=N_EPOCHS,
    num_gpus=1,
    warmup_steps_proportion=0.1,
).create_config()
os.makedirs(f"{EXP_DIR}/run_configs/", exist_ok=True)
py_io.write_json(jiant_run_config, f"{EXP_DIR}/run_configs/{TASK}_config.json")

for seed in [123, 456, 789, 1234, 5678, 9012, 12345, 67890, 123456, 789012]:
    output_dir = f"{EXP_DIR}/runs/run_{TASK}_seed={seed}"
    with HiddenPrints():
        run_args = main_runscript.RunConfiguration(
            jiant_task_container_config_path=
            f"{EXP_DIR}/run_configs/{TASK}_config.json",
            output_dir=output_dir,
Exemple #3
0
import jiant.proj.main.scripts.configurator as configurator
import jiant.proj.main.export_model as export_model
import jiant.utils.python.io as py_io
import jiant.utils.display as display
import os

jiant_run_config = configurator.SimpleAPIMultiTaskConfigurator(
    task_config_base_path="./spatial_experiment",
    task_cache_base_path="./spatial_experiment/cache",
    train_task_name_list=["spatial"],
    val_task_name_list=["spatial"],
    train_batch_size=8,
    eval_batch_size=16,
    epochs=3,
    num_gpus=1,
).create_config()

os.makedirs("./spatial_experiment/run_configs/", exist_ok=True)
py_io.write_json(jiant_run_config,
                 "./spatial_experiment/run_configs/spatial_run_config.json")
display.show_json(jiant_run_config)

export_model.export_model(
    hf_pretrained_model_name_or_path="bert-base-uncased",
    output_base_path="./spatial_experiment/models/bert",
)
Exemple #4
0
def generate_configs(args: RunConfiguration):
    xtreme_task = args.xtreme_task
    if xtreme_task == "mlqa":
        xtreme_task_name_list = [
            f"{xtreme_task}_{lang}_{lang}" for lang in LANGS_DICT[xtreme_task]
        ]
    else:
        xtreme_task_name_list = [
            f"{xtreme_task}_{lang}" for lang in LANGS_DICT[xtreme_task]
        ]

    if xtreme_task in TRAINED_TASKS:
        train_task = TRAIN_TASK_DICT[xtreme_task]
        train_task_name_list = [train_task]
        val_task_name_list = get_unique_list_in_order(
            [xtreme_task_name_list, train_task_name_list])
        if args.early_stop_on_xtreme_tasks:
            train_val_task_name_list = val_task_name_list
        else:
            train_val_task_name_list = train_task_name_list
    elif xtreme_task in UNTRAINED_TASKS:
        train_task_name_list = []
        val_task_name_list = xtreme_task_name_list
        train_val_task_name_list = []
    else:
        raise KeyError(xtreme_task)

    if xtreme_task == "udpos":
        test_task_name_list = xtreme_task_name_list + [
            f"udpos_{lang}" for lang in EXTRA_UDPOS_TEST_LANGS
        ]
    elif xtreme_task in ["xquad", "tydiqa", "tatoeba"]:
        test_task_name_list = []
    else:
        test_task_name_list = xtreme_task_name_list

    if not args.suppress_print:
        print("Training on:", ", ".join(train_task_name_list))
        print("Validation on:", ", ".join(val_task_name_list))
        print("Early stopping on:", ", ".join(train_val_task_name_list))
        print("Testing on:", ",".join(test_task_name_list))

    config = configurator.SimpleAPIMultiTaskConfigurator(
        task_config_base_path=args.task_config_base_path,
        task_cache_base_path=args.task_cache_base_path,
        train_task_name_list=train_task_name_list,
        train_val_task_name_list=train_val_task_name_list,
        val_task_name_list=val_task_name_list,
        test_task_name_list=test_task_name_list,
        epochs=args.epochs,
        train_batch_size=args.train_batch_size,
        eval_batch_multiplier=args.eval_batch_multiplier,
        gradient_accumulation_steps=args.gradient_accumulation_steps,
        eval_subset_num=args.eval_subset_num,
        num_gpus=args.num_gpus,
        warmup_steps_proportion=args.warmup_steps_proportion,
    ).create_config()

    # Make sure all tasks use the same task head
    config["taskmodels_config"]["task_to_taskmodel_map"] = {
        k: xtreme_task
        for k, v in config["taskmodels_config"]
        ["task_to_taskmodel_map"].items()
    }
    if not args.suppress_print:
        print(f"Assigning all tasks to '{xtreme_task}' head")
    if xtreme_task in UNTRAINED_TASKS:
        # The reference implementation from the XTREME paper uses layer 14 for the
        #  retrieval representation.
        config["taskmodels_config"]["taskmodel_config_map"] = {
            xtreme_task: {
                "pooler_type": "mean",
                "layer": args.retrieval_layer
            }
        }

    py_io.write_json(config, args.output_path)
def set_task(
        DATASET: str, BATCH_SIZE: int, path: str,
        N_WORKERS: int) -> Union[Dataloader, Dataloader, List, List, List]:
    """
    Setting task parameters
    Args:
        DATASET: Dataset name
        BATCH_SIZE: training batch size
        path: path to dataset folder
        N_WORKERS: num workers

    Returns:
    train_loader - loader for training set
    val_loader - loader for validation set
    criterions - loss functions
    list_of_encoders - encoder models
    list_of_decoders - decoder models
    """
    set_seed(999)
    if DATASET == "CIFAR-10":
        train_dst = CIFAR10Loader(root=path, train=True)
        train_loader = train_dst.get_loader(batch_size=BATCH_SIZE,
                                            shuffle=True)

        val_dst = CIFAR10Loader(root=path, train=False)
        val_loader = val_dst.get_loader()

        list_of_encoders = [ResNet18]
        list_of_decoders = [MultiDec] * 10
        criterions = [torch.nn.BCEWithLogitsLoss()] * 10

    elif DATASET == "MNIST":
        train_dst = MNIST(root=path,
                          train=True,
                          download=True,
                          transform=global_transformer(),
                          multi=True)
        train_loader = torch.utils.data.DataLoader(train_dst,
                                                   batch_size=BATCH_SIZE,
                                                   shuffle=True,
                                                   num_workers=N_WORKERS)

        val_dst = MNIST(root=path,
                        train=False,
                        download=True,
                        transform=global_transformer(),
                        multi=True)
        val_loader = torch.utils.data.DataLoader(val_dst,
                                                 batch_size=BATCH_SIZE,
                                                 num_workers=N_WORKERS)

        list_of_encoders = [MultiLeNetEnc]
        list_of_decoders = [MultiLeNetDec] * 2
        criterions = [torch.nn.NLLLoss()] * 2

    elif DATASET == "Cityscapes":
        cityscapes_augmentations = Compose(
            [RandomRotate(10), RandomHorizontallyFlip()])
        img_rows = 256
        img_cols = 512

        train_dst = CITYSCAPES(root=path,
                               is_transform=True,
                               split=['train'],
                               img_size=(img_rows, img_cols),
                               augmentations=cityscapes_augmentations)
        train_loader = torch.utils.data.DataLoader(train_dst,
                                                   batch_size=BATCH_SIZE,
                                                   shuffle=True,
                                                   num_workers=N_WORKERS)

        val_dst = CITYSCAPES(root=path,
                             split=['val'],
                             img_size=(img_rows, img_cols))
        val_loader = torch.utils.data.DataLoader(val_dst,
                                                 batch_size=BATCH_SIZE,
                                                 num_workers=N_WORKERS)

        list_of_encoders = [get_segmentation_encoder]
        list_of_decoders = [
            partialclass(SegmentationDecoder, num_class=19, task_type="C"),
            partialclass(SegmentationDecoder, num_class=2, task_type="R"),
            partialclass(SegmentationDecoder, num_class=1, task_type="R")
        ]
        criterions = [cross_entropy2d, l1_loss_instance, l1_loss_depth]

    elif DATASET == 'NLP':

        export_model.export_model(
            hf_pretrained_model_name_or_path="bert-base-uncased",
            output_base_path="./models/bert-base-uncased",
        )

        for task_name in ["rte", "stsb", "commonsenseqa"]:
            tokenize_and_cache.main(
                tokenize_and_cache.RunConfiguration(
                    task_config_path=f"./tasks/configs/{task_name}_config.json",
                    hf_pretrained_model_name_or_path="bert-base-uncased",
                    output_dir=f"./cache/{task_name}",
                    phases=["train", "val"],
                ))

        jiant_run_config = configurator.SimpleAPIMultiTaskConfigurator(
            task_config_base_path="./tasks/configs",
            task_cache_base_path="./cache",
            train_task_name_list=["rte", "stsb", "commonsenseqa"],
            val_task_name_list=["rte", "stsb", "commonsenseqa"],
            train_batch_size=4,
            eval_batch_size=8,
            epochs=0.5,
            num_gpus=1,
        ).create_config()

        jiant_task_container = container_setup.create_jiant_task_container_from_dict(
            jiant_run_config)

        jiant_model = jiant_model_setup.setup_jiant_model(
            hf_pretrained_model_name_or_path="bert-base-uncased",
            model_config_path="./models/bert-base-uncased/model/config.json",
            task_dict=jiant_task_container.task_dict,
            taskmodels_config=jiant_task_container.taskmodels_config,
        )

        train_cache = jiant_task_container.task_cache_dict['stsb']["train"]
        val_cache = jiant_task_container.task_cache_dict['stsb']["val"]

        train_dataloader = get_train_dataloader_from_cache(
            train_cache, task, 4)
        val_dataloader = get_eval_dataloader_from_cache(val_cache, task, 4)

        list_of_encoders = [jiant_model.encoder]
        decoder1 = deepcopy(jiant_model.taskmodels_dict['stsb'].head)
        reset(decoder1)
        decoder2 = deepcopy(decoder1)
        reset(decoder2)
        decoder3 = deepcopy(decoder2)
        reset(decoder3)

        list_of_decoders = [
            lambda: decoder1, lambda: decoder2, lambda: decoder3
        ]
        criterions = [
            torch.nn.MSELoss(),
            torch.nn.MSELoss(),
            torch.nn.MSELoss()
        ]

    return train_loader, val_loader, criterions, list_of_encoders, list_of_decoders