def test_neoxargs_consume_deepy_args_with_config_dir():
    """
    verify consume_deepy_args processes command line arguments including config dir
    """

    from megatron.neox_arguments import NeoXArgs

    # load neox args with command line
    with patch(
            "sys.argv",
        [
            str(get_root_directory() / "deepy.py"),
            "train.py",
            "-d",
            str(get_config_directory()),
        ] + ["small.yml", "local_setup.yml"],
    ):
        args_loaded_consume = NeoXArgs.consume_deepy_args()

    # load neox args directly from yaml files
    args_loaded_yamls = NeoXArgs.from_ymls(
        get_configs_with_path(["small.yml", "local_setup.yml"]))

    # update values from yaml files that cannot otherwise be matched
    args_loaded_yamls.update_value("user_script", "train.py")
    args_loaded_yamls.wandb_group = args_loaded_consume.wandb_group

    assert args_loaded_yamls == args_loaded_consume
Example #2
0
def run_neox_args_load_test(yaml_files):
    from megatron.neox_arguments import NeoXArgs

    yaml_list = get_configs_with_path(yaml_files)
    args_loaded = NeoXArgs.from_ymls(yaml_list)
    assert isinstance(args_loaded, NeoXArgs)

    # initialize an empty config dictionary to be filled by yamls
    config = dict()

    # iterate of all to be loaded yaml files
    for conf_file_name in yaml_list:

        # load file
        with open(conf_file_name) as conf_file:
            conf = yaml.load(conf_file, Loader=yaml.FullLoader)

        # check for key duplicates and load values
        for conf_key, conf_value in conf.items():
            if conf_key in config:
                raise ValueError(
                    f'Conf file {conf_file_name} has the following duplicate keys with previously loaded file: {conf_key}'
                )

            conf_key_converted = conf_key.replace(
                "-",
                "_")  # TODO remove replace and update configuration files?
            config[conf_key_converted] = conf_value

    # validate that neox args has the same value as specified in the config (if specified in the config)
    for k, v in config.items():
        neox_args_value = getattr(args_loaded, k)
        assert v == neox_args_value, "loaded neox args value " + str(
            k) + " == " + str(
                neox_args_value) + " different from config file " + str(v)
Example #3
0
def get_neox_args(context):
    args = AttrMap(context.get_hparams())
    exp_config = context.get_experiment_config()

    # Gather overrides.
    overwrite_values = args.pop("overwrite_values")
    # We are going to overwrite certain neox_args with determined config values
    # from the experiment config to ensure consistency.
    assert ("batches" in exp_config["searcher"]["max_length"]
            ), "Please specify max_length in batches."
    assert ("batches" in exp_config["min_validation_period"]
            ), "Please specify min_validation_period in batches."
    overwrite_values.update({
        "train_iters":
        exp_config["searcher"]["max_length"]["batches"],
        "save_interval":
        exp_config["min_validation_period"]["batches"],
        "eval_interval":
        exp_config["min_validation_period"]["batches"],
        "global_num_gpus":
        context.distributed.get_size(),
        "seed":
        context.env.trial_seed,
    })
    for k, v in overwrite_values.items():
        logging.info(f"Setting neox_args.{k} to {v}")

    # Build neox args.
    neox_args = NeoXArgs.process_parsed_deepy_args(
        args, overwrite_values=overwrite_values)
    return neox_args
Example #4
0
def setup_for_inference_or_eval(inference=True,
                                get_key_value=True,
                                overwrite_values=None):

    from megatron.neox_arguments import NeoXArgs
    from megatron.initialize import initialize_megatron
    from megatron.training import setup_model_and_optimizer

    _overwrite_values = {
        "checkpoint_activations": False,
        "partition_activations": False,
        "no_load_optim": True,
    }
    if overwrite_values:
        _overwrite_values.update(overwrite_values)
    neox_args = NeoXArgs.consume_neox_args(overwrite_values=_overwrite_values)
    neox_args.configure_distributed_args()
    neox_args.build_tokenizer()

    if neox_args.load is None:
        raise ValueError("`load` parameter must be supplied to load a model`")

    # initialize megatron
    initialize_megatron(neox_args)

    # set up model and load checkpoint.
    model, _, _ = setup_model_and_optimizer(
        neox_args=neox_args, inference=inference, get_key_value=get_key_value
    )  # we use setup_model_and_optimizer instead of get_model in order to initialize deepspeed
    print_rank_0('Finished loading model')
    return model, neox_args
Example #5
0
def test_neoxargs_consume_neox_args():
    """
    verify megatron args are correctly consumed after sending via deepspeed
    """
    from megatron.neox_arguments import NeoXArgs
    
    # intitially load config from files as would be the case in deepy.py
    yaml_list = get_configs_with_path(["small.yml", "local_setup.yml"])
    args_baseline = NeoXArgs.from_ymls(yaml_list)
    args_baseline.update_value("user_script", str(get_root_directory() / "pretrain_gpt2.py"))
    deepspeed_main_args = args_baseline.get_deepspeed_main_args()

    # patch sys.argv so that args can be access by set_global_variables within initialize_megatron
    with patch('sys.argv', deepspeed_main_args):
        args_loaded = NeoXArgs.consume_neox_args()

    #TODO is the wandb group really to be changed?
    args_loaded.wandb_group = args_baseline.wandb_group
    assert args_baseline.megatron_config == args_loaded.megatron_config
Example #6
0
def model_setup(yaml_list=None,
                param_dict=None,
                clear_data=True,
                inference=False):
    from megatron.neox_arguments import NeoXArgs
    from megatron.mpu import destroy_model_parallel
    from megatron import initialize_megatron
    from megatron.training import setup_model_and_optimizer

    destroy_model_parallel(
    )  # mpu model parallel contains remaining global vars
    if clear_data and (not torch.distributed.is_initialized()
                       or torch.distributed.get_world_size() == 1
                       or torch.distributed.get_rank() == 0):
        clear_test_dirs()

    overwrite_values = {
        "user_script": str(get_root_directory() / "pretrain_gpt2.py"),
        "save": TEST_CHECKPOINT_DIR,
        "load": TEST_CHECKPOINT_DIR,
        "log_dir": TEST_LOG_DIR,
        "tensorboard_dir": TEST_TENSORBOARD_DIR,
    }

    # should not both be none
    assert yaml_list is not None or param_dict is not None

    # initially load config from files as would be the case in deepy.py
    if yaml_list is not None:
        args_loaded = NeoXArgs.from_ymls(yaml_list,
                                         overwrite_values=overwrite_values)
    else:
        p_dict = param_dict.copy()
        p_dict.update(overwrite_values)
        args_loaded = NeoXArgs.from_dict(p_dict)

    args_loaded.build_tokenizer()

    initialize_megatron(neox_args=args_loaded)
    model, optimizer, lr_scheduler = setup_model_and_optimizer(
        neox_args=args_loaded, inference=inference, get_key_value=True)
    return model, optimizer, lr_scheduler, args_loaded
Example #7
0
def test_neoxargs_consume_deepy_args_without_yml_suffix():
    """
    verify consume_deepy_args processes command line arguments without yaml suffix
    """

    from megatron.neox_arguments import NeoXArgs

    # load neox args with command line
    with patch('sys.argv', [str(get_root_directory() / "deepy.py"), "pretrain_gpt2.py"] + get_configs_with_path(["small", "local_setup"])):
        args_loaded_consume = NeoXArgs.consume_deepy_args()


    # load neox args directly from yaml files
    args_loaded_yamls = NeoXArgs.from_ymls(get_configs_with_path(["small.yml", "local_setup.yml"]))

    # update values from yaml files that cannot otherwise be matched
    args_loaded_yamls.update_value("user_script", "pretrain_gpt2.py")
    args_loaded_yamls.wandb_group = args_loaded_consume.wandb_group

    assert args_loaded_yamls == args_loaded_consume
def test_neoxargs_fail_instantiate_without_any_params():
    """
    verify assertion error if required arguments are not provided
    """
    from megatron.neox_arguments import NeoXArgs

    try:
        args_loaded = NeoXArgs()
        assert False
    except Exception as e:
        assert True
Example #9
0
def setup_for_inference_or_eval(
    use_cache=True,
    overwrite_values=None,
):
    """
    Initializes the model for evaluation or inference (doesn't load optimizer states, etc.) from command line args.

    use_cache: bool
        Whether to use key value caching in inference.
    overwrite_values: dict
        Optional Values to overwrite in the model config.
    """

    from megatron.neox_arguments import NeoXArgs
    from megatron.initialize import initialize_megatron
    from megatron.training import setup_model_and_optimizer

    _overwrite_values = {
        "checkpoint_activations": False,
        "partition_activations": False,
        "no_load_optim": True,
        "zero_optimization":
        None,  # disable zero optimization (won't be used in inference, and loading zero optimizer can cause errors)
    }
    if overwrite_values:
        _overwrite_values.update(overwrite_values)
    neox_args = NeoXArgs.consume_neox_args(overwrite_values=_overwrite_values)
    neox_args.configure_distributed_args()
    neox_args.build_tokenizer()

    if neox_args.load is None:
        raise ValueError("`load` parameter must be supplied to load a model`")

    # initialize megatron
    initialize_megatron(neox_args)

    # set up model and load checkpoint.
    model, _, _ = setup_model_and_optimizer(
        neox_args=neox_args,
        use_cache=use_cache,
        iteration=neox_args.iteration,
    )  # we use setup_model_and_optimizer instead of get_model in order to initialize deepspeed
    print_rank_0("Finished loading model")

    model.module.inference_mode(use_cache=use_cache)
    return model, neox_args
# Copyright (c) 2021, EleutherAI contributors
# This file is based on code by the authors denoted below and has been modified from its original version.
#
# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Pretrain GPT2"""
from megatron.neox_arguments import NeoXArgs
from megatron.training import pretrain

if __name__ == "__main__":
    neox_args = NeoXArgs.consume_neox_args()
    neox_args.configure_distributed_args()
    neox_args.build_tokenizer(
    )  # tokenizer needs to be build in training in order to set the padding vocab
    neox_args.initialize_tensorboard_writer(
    )  # is initialized if tensorboard directory is defined
    pretrain(neox_args=neox_args)
Example #11
0
# limitations under the License.

import logging
import os

import deepspeed
from deepspeed.launcher.runner import main

logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"))

from megatron.neox_arguments import NeoXArgs
from megatron.utils import get_wandb_api_key



neox_args = NeoXArgs.consume_deepy_args()
if neox_args.wandb_group is not None:
    # concat the wandb group name with a uid to make sure it's unique
    import wandb
    neox_args.wandb_group += "_" + wandb.util.generate_id()
neox_args.print()
deepspeed_main_args = neox_args.get_deepspeed_main_args()

# Extract wandb API key and inject into worker environments
wandb_token = get_wandb_api_key(neox_args=neox_args)
if wandb_token is not None:
    deepspeed.launcher.runner.EXPORT_ENVS.append('WANDB_API_KEY')
    os.environ['WANDB_API_KEY'] = wandb_token

if __name__ == '__main__':
    main(deepspeed_main_args)