def from_pretrained(cls, model_name_or_path=None, seq_len=512, weights_path=None, deepspeed_config_path=None): init_method = 'tcp://' + os.getenv('MASTER_ADDR', 'localhost') + ':' + os.getenv( 'MASTER_PORT', '6000') try: torch.distributed.init_process_group(backend='nccl', world_size=1, rank=0, init_method=init_method) mpu.initialize_model_parallel(1) except RuntimeError: logger.info("The default process group has already initialized...") seed = 1234 random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) mpu.model_parallel_cuda_manual_seed(seed) tokenizer = GPT2Tokenizer.from_pretrained(model_name_or_path) logger.info("Check cached model files...") if weights_path is None: weights_path, deepspeed_config_path = download_model_files( model_name_or_path) model = setup_model(weights_path, deepspeed_config_path) model.cuda() model = model.eval() return cls(model, tokenizer=tokenizer, seq_len=seq_len, model_path=model_name_or_path)
def initialize_distributed(args): """Initialize torch.distributed.""" # Manually set the device ids. device = args.rank % torch.cuda.device_count() if args.local_rank is not None: device = args.local_rank torch.cuda.set_device(device) # Call the init process init_method = 'tcp://' master_ip = os.getenv('MASTER_ADDR', 'localhost') master_port = os.getenv('MASTER_PORT', str(args.master_port)) init_method += master_ip + ':' + master_port torch.distributed.init_process_group( backend=args.distributed_backend, world_size=args.world_size, rank=args.rank, init_method=init_method) # Set the model-parallel / data-parallel communicators. mpu.initialize_model_parallel(args.model_parallel_size) # Optional DeepSpeed Activation Checkpointing Features # if DEEPSPEED_WRAP and args.deepspeed and args.deepspeed_activation_checkpointing: set_deepspeed_activation_checkpointing(args)