Beispiel #1
0
def test_epoch_sync(num_workers: int, global_batch_size: int,
                    dataset_len: int) -> None:
    """
    Test that epoch_idx is synchronized across all workers regardless of whether the
    number of batches is evenly divisible by the number of workers.
    """
    config = conf.load_config(conf.fixtures_path("pytorch_no_op/const.yaml"))
    config = conf.set_slots_per_trial(config, num_workers)
    max_len_batches = 10
    config = conf.set_max_length(config, {"batches": max_len_batches})
    config = conf.set_hparam(config, "dataset_len", dataset_len)
    config = conf.set_global_batch_size(config, global_batch_size)

    e_id = exp.run_basic_test_with_temp_config(
        config, conf.fixtures_path("pytorch_no_op"), 1)
    t_id = exp.experiment_trials(e_id)[0].trial.id

    batches_per_epoch = (dataset_len + global_batch_size -
                         1) // global_batch_size  # ceil

    for batch_idx in range(max_len_batches):
        epoch_idx = batch_idx // batches_per_epoch
        for rank in range(config["resources"]["slots_per_trial"]):
            assert exp.check_if_string_present_in_trial_logs(
                t_id,
                f"rank {rank} finished batch {batch_idx} in epoch {epoch_idx}")
def test_text_classification_glue_amp() -> None:
    example_path = conf.model_hub_examples_path("huggingface/text-classification")
    config = conf.load_config(os.path.join(example_path, "glue_config.yaml"))
    config = conf.set_max_length(config, {"batches": 200})
    config = conf.set_hparam(config, "use_apex_amp", True)
    config = set_docker_image(config)

    exp.run_basic_test_with_temp_config(config, example_path, 1)
def test_language_modeling_plm_amp() -> None:
    example_path = conf.model_hub_examples_path("huggingface/language-modeling")
    config = conf.load_config(os.path.join(example_path, "plm_config.yaml"))
    config = conf.set_max_length(config, {"batches": 200})
    config = conf.set_hparam(config, "use_apex_amp", True)
    config = set_docker_image(config)

    exp.run_basic_test_with_temp_config(config, example_path, 1)
def test_token_classification_ner_amp() -> None:
    example_path = conf.model_hub_examples_path("huggingface/token-classification")
    config = conf.load_config(os.path.join(example_path, "ner_config.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_global_batch_size(config, 32)
    config = conf.set_max_length(config, {"batches": 200})
    config = conf.set_hparam(config, "use_apex_amp", True)
    config = set_docker_image(config)

    exp.run_basic_test_with_temp_config(config, example_path, 1)
def test_squad_v2_with_beam_search_amp() -> None:
    example_path = conf.model_hub_examples_path("huggingface/question-answering")
    config = conf.load_config(os.path.join(example_path, "squad_v2_beam_search.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_global_batch_size(config, 16)
    config = conf.set_max_length(config, {"batches": 200})
    config = conf.set_hparam(config, "use_apex_amp", True)
    config = set_docker_image(config)

    exp.run_basic_test_with_temp_config(config, example_path, 1)
def test_multiple_choice_swag_amp() -> None:
    example_path = conf.model_hub_examples_path("huggingface/multiple-choice")
    config = conf.load_config(os.path.join(example_path, "swag_config.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_global_batch_size(config, 64)
    config = conf.set_max_length(config, {"batches": 200})
    config = conf.set_hparam(config, "use_apex_amp", True)
    config = set_docker_image(config)

    exp.run_basic_test_with_temp_config(config, example_path, 1)
Beispiel #7
0
def test_detr_distributed_fake() -> None:
    example_path = conf.fixtures_path("mmdetection")
    config = conf.load_config(os.path.join(example_path, "distributed_fake_data.yaml"))
    config = conf.set_max_length(config, {"batches": 200})
    config = set_docker_image(config)
    config = conf.set_hparam(
        config, "config_file", "/mmdetection/configs/detr/detr_r50_8x2_150e_coco.py"
    )

    exp.run_basic_test_with_temp_config(config, example_path, 1)