import random
import deepspeed
import torch
from torch.utils.data import DataLoader
from tqdm.auto import trange
import torch.distributed as distributed

from gpt_neox import (GPTNeoX, AutoregressiveWrapper, GPT2Dataset,
                      extract_tarfile, prepare_optimizer_parameters,
                      get_tokenizer, is_main, prepare_data)

from gpt_neox.utils import get_args, get_params

train_args = get_args()
params = get_params(train_args.model)

# tokenizer
tokenizer = get_tokenizer(
    tokenizer_type=params["tokenizer"].get("type", None),
    from_pretrained=params["tokenizer"].get("from_pretrained", True),
    add_padding_token=params["tokenizer"].get("add_padding_token", False))
vocab_size = len(
    tokenizer) if params["vocab_size"] is None else params["vocab_size"]

# instantiate GPT-like decoder model
model = GPTNeoX(num_tokens=vocab_size,
                dim=params["hidden_dim"],
                seq_len=params["seq_len"],
                depth=params["n_layers"],
                heads=params["n_heads"],
                dim_head=params["dim_head"])
Beispiel #2
0
def build_eval_data_iter(dataset, model):
    sampler = torch.utils.data.distributed.DistributedSampler(
        dataset,
        num_replicas=model.dp_world_size,
        rank=model.mpu.get_data_parallel_rank(),
        shuffle=False)
    # Build a loader and make it repeating.
    pipe_dataloader = model.deepspeed_io(dataset, data_sampler=sampler)
    pipe_dataloader = RepeatingLoader(pipe_dataloader)
    return pipe_dataloader


if __name__ == '__main__':
    # arguments
    args = get_args()

    IS_MAIN = is_main(args)

    deepspeed.init_distributed(dist_backend='nccl')

    # tokenizer
    tokenizer = get_tokenizer(
        tokenizer_type=args["tokenizer"].get("type", None),
        from_pretrained=args["tokenizer"].get("from_pretrained", True),
        add_padding_token=args["tokenizer"].get("add_padding_token", False))
    vocab_size = len(
        tokenizer) if args["vocab_size"] is None else args["vocab_size"]

    # model
    model = GPTNeoX_Pipe(num_tokens=vocab_size,