Beispiel #1
0
def run_mp_worker(args, available_workers):
    new_data = True

    blob = make_model_and_data(args, None, new_data=new_data)
    model = blob["model"]

    balance = generate_balance(min(available_workers, 8), len(model))
    p = pipe.Pipe(
        model,
        balance,
        style=Pipe.MultiProcess,
        chunks=args.chunks,
        worker_map=get_worker_map(),
        input_device=torch.cuda.current_device(),
        pipelined_backward=args.pipelined_backward,
        checkpoint=args.checkpoint,
    ).cuda()

    if args.all_at_once and p.pipeline:
        print(f"running all at once")
        p.pipeline.all_at_once = True

    if new_data:
        train(blob["data"], p, blob["criterion"], blob["optimizer"],
              blob["vocab_size"], args)
    else:
        ntokens, train_data, val_data, test_data = blob["data"]
        benchmark_language_model(train_data, val_data, test_data, p, criterion,
                                 optimizer, ntokens, args)
Beispiel #2
0
def bench_single_process(args):
    num_devices = torch.cuda.device_count()
    assert num_devices > 0
    init_random_seed(0)
    device = torch.device("cuda")

    new_data = True

    blob = make_model_and_data(args, None, new_data=new_data)
    model = blob["model"]

    balance = generate_balance(min(num_devices, 8), len(model))
    p = pipe.Pipe(model,
                  balance,
                  chunks=args.chunks,
                  pipelined_backward=args.pipelined_backward,
                  checkpoint=args.checkpoint)
    del model
    del blob["model"]

    if new_data:
        train(blob["data"], p, blob["criterion"], blob["optimizer"],
              blob["vocab_size"], args)
    else:
        ntokens, train_data, val_data, test_data = blob["data"]
        benchmark_language_model(train_data, val_data, test_data, p, criterion,
                                 optimizer, ntokens, args)
Beispiel #3
0
def run_mp_worker(args, available_workers):
    new_data = True

    blob = make_model_and_data(args, None, new_data=new_data)
    model = blob["model"]

    balance = generate_balance_weighted(get_pipeline_parallel_group().size(),
                                        len(model), 0.8)
    p = pipe.Pipe(
        model,
        balance,
        style=Pipe.AsyncSchedule,
        chunks=args.chunks,
        worker_map=get_worker_map(),
        input_device=torch.device("cuda")
        if torch.cuda.is_available() else torch.device("cpu"),
        pipelined_backward=args.pipelined_backward,
        checkpoint=args.checkpoint,
        # loss_fn=blob["criterion"],
    )
    if torch.cuda.is_available():
        p = p.cuda()
    if args.all_at_once and p.pipeline:
        print(f"running all at once")
        p.pipeline.all_at_once = True

    if new_data:
        train(blob["data"], p, blob["criterion"], blob["optimizer"],
              blob["vocab_size"], args)
    else:
        ntokens, train_data, val_data, test_data = blob["data"]
        benchmark_language_model(train_data, val_data, test_data, p, criterion,
                                 optimizer, ntokens, args)
Beispiel #4
0
def run_mp_worker(args, available_workers):

    benchmark_config = create_benchmark_config(args.model_name)
    model_config = create_model_config(args, config=benchmark_config)
    model = model_config["model"]

    balance = generate_balance_weighted(get_pipeline_parallel_group().size(), len(model), 0.8)
    pipe_model = pipe.Pipe(
        model,
        balance,
        style=Pipe.AsyncSchedule,
        chunks=args.chunks,
        worker_map=get_worker_map(),
        input_device=torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"),
        pipelined_backward=args.pipelined_backward,
        checkpoint=args.checkpoint,
        # TODO(anj-s): Do we need to comment this out? loss_fn=benchmark_config["criterion"],
    )
    if torch.cuda.is_available():
        pipe_model = pipe_model.cuda()
    if args.all_at_once and pipe_model.pipeline:
        print(f"running all at once")
        pipe_model.pipeline.all_at_once = True

    if args.use_synthetic_data:
        train(model_config, pipe_model, benchmark_config, args)
    else:
        benchmark_language_model(model_config, pipe_model, benchmark_config, args)
Beispiel #5
0
def benchmark_single_process(args):
    """Benchmark a given model using a single process and multiple devices."""

    num_devices = torch.cuda.device_count() if torch.cuda.is_available() else 1
    assert num_devices > 0
    init_random_seed(0)

    benchmark_config = create_benchmark_config(args.model_name)
    model_config = create_model_config(args, config=benchmark_config)
    model = model_config["model"]

    balance = generate_balance(min(num_devices, 4), len(model))
    pipe_model = pipe.Pipe(model,
                           balance,
                           chunks=args.chunks,
                           pipelined_backward=args.pipelined_backward,
                           checkpoint=args.checkpoint)
    del model
    del model_config["model"]

    if args.dry_run:
        train(model_config, pipe_model, benchmark_config, args)
    else:
        benchmark_language_model(model_config, pipe_model, benchmark_config,
                                 args)
Beispiel #6
0
        print("No regression detected")


def generate_balance(num_devices, num_layers):
    balance = []
    layers_assigned = 0
    for i in range(num_devices):
        x = (num_layers - layers_assigned) / (num_devices - i)
        if x.is_integer():
            balance.append(int(x))
            layers_assigned += x
        else:
            balance.append(math.ceil(x))
            layers_assigned += math.ceil(x)
    return balance


if __name__ == "__main__":
    num_devices = torch.cuda.device_count()
    assert num_devices > 0

    torch.manual_seed(0)
    device = torch.device("cuda")
    ntokens, train_data, val_data, test_data = get_data(device)
    model, criterion, optimizer = make_model(device, ntokens)
    balance = generate_balance(min(num_devices, 4), len(model))
    p = pipe.Pipe(model, balance)
    benchmark_language_model(train_data, val_data, test_data, p, criterion,
                             optimizer, ntokens)
    del p