def _async_init(self, rank, device_id, args, model, nccl_uid): """Initialize child processes.""" self.args = args # set torch.seed in this process torch.manual_seed(args.seed) # set CUDA device torch.cuda.set_device(device_id) # initialize NCCL nccl.initialize(self.num_replicas, nccl_uid, device_id) # copy model to current device self.model = model.cuda() # initialize optimizer self.optimizer = NAG(self.model.parameters(), lr=self.args.lr, momentum=self.args.momentum, weight_decay=self.args.weight_decay) self.flat_grads = None # initialize LR scheduler self.lr_scheduler = self._build_lr_scheduler()
def _async_init(self, rank, device_id, args, model, nccl_uid, src_dict=None, dst_dict=None): """Initialize child processes.""" self.args = args # set torch.seed in this process torch.manual_seed(args.seed) # set CUDA device torch.cuda.set_device(device_id) # initialize NCCL nccl.initialize(self.num_replicas, nccl_uid, device_id) # copy model to current device self.model = model.cuda() # initialize optimizer self.optimizer = NAG(self.model.parameters(), lr=self.args.lr, momentum=self.args.momentum, weight_decay=self.args.weight_decay) self.flat_grads = None # initialize LR scheduler self.lr_scheduler = self._build_lr_scheduler() self.src_dict = src_dict self.dst_dict = dst_dict self.enable_rl = args.enable_rl self.args = args # Initialize generator models = [model] # SequenceGenerator accepts a list of models self.generator = SequenceGenerator( models, dst_dict, beam_size=1, stop_early=(not args.no_early_stop), normalize_scores=(not args.unnormalized), len_penalty=args.lenpen).cuda()
def _async_init(self, rank, device_id, args, model, criterion, nccl_uid): """Initialize child processes.""" self.args = args # set CUDA device torch.cuda.set_device(device_id) # initialize NCCL nccl.initialize(self.num_replicas, nccl_uid, device_id) # copy model and criterion to current device self.model = model.cuda() self.criterion = criterion.cuda() # initialize optimizer and LR scheduler self.args.lr = list(map(float, self.args.lr.split(','))) self.optimizer = self._build_optimizer() self.lr_scheduler = self._build_lr_scheduler() self.loss = None self._max_bsz_seen = 0
def _async_init(self, rank, device_id, args, model, criterion, nccl_uid): """Initialize child processes.""" self.args = args # set CUDA device torch.cuda.set_device(device_id) # initialize NCCL nccl.initialize(self.num_replicas, nccl_uid, device_id) # copy model and criterion to current device self.model = model.cuda() self.criterion = criterion.cuda() # initialize optimizer self.optimizer = self._build_optimizer() self.flat_grads = None self.loss = None # initialize LR scheduler self.lr_scheduler = self._build_lr_scheduler()