def run_mp_worker(args, available_workers): benchmark_config = create_benchmark_config(args.model_name) model_config = create_model_config(args, config=benchmark_config) model = model_config["model"] balance = generate_balance_weighted(get_pipeline_parallel_group().size(), len(model), 0.8) pipe_model = MultiProcessPipe( model, balance, style=MultiProcessPipe.AsyncSchedule, chunks=args.chunks, worker_map=get_worker_map(), input_device=torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"), pipelined_backward=args.pipelined_backward, checkpoint=args.checkpoint, # TODO(anj-s): Do we need to comment this out? loss_fn=benchmark_config["criterion"], ) if torch.cuda.is_available(): pipe_model = pipe_model.cuda() if args.all_at_once and pipe_model.pipeline: print(f"running all at once") pipe_model.pipeline.all_at_once = True if args.use_synthetic_data: train(model_config, pipe_model, benchmark_config, args) else: benchmark_language_model(model_config, pipe_model, benchmark_config, args)
def run_mp_worker(args, available_workers): benchmark_config = create_benchmark_config(args.model_name) model_specs = get_model_specs(args.model_name) model_config = create_model_config(args, benchmark_config=benchmark_config, model_specs=model_specs) model = model_config["model"] balance = generate_balance(get_pipeline_parallel_group().size(), len(model)) pipe_model = MultiProcessPipe( model, balance, chunks=args.chunks, worker_map=get_worker_map(), input_device=torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"), checkpoint=args.checkpoint, # TODO(anj-s): Do we need to comment this out? loss_fn=benchmark_config["criterion"], ) if torch.cuda.is_available(): pipe_model = pipe_model.cuda() if args.dry_run: train(model_config, pipe_model, benchmark_config, model_specs, args) else: benchmark_language_model(model_config, pipe_model, benchmark_config, model_specs, args)
def deny_moving(pipeline_style): a = nn.Linear(1, 1) b = nn.Linear(1, 1) model = nn.Sequential(a, b) model = MultiProcessPipe(model, [1, 1], style=pipeline_style, worker_map=get_worker_map()) model.cuda() model.cpu() model.to(torch.device("cuda")) model.to(0) model.to("cuda") model.to(device=0) model.to(torch.rand(1)) model.to(tensor=torch.rand(1)) # Casting is allowed. model.half() model.to(torch.double) model.to(dtype=torch.float)