コード例 #1
0
    def _async_init(self, rank, device_id, args, model, nccl_uid):
        """Initialize child processes."""
        self.args = args

        # set torch.seed in this process
        torch.manual_seed(args.seed)

        # set CUDA device
        torch.cuda.set_device(device_id)

        # initialize NCCL
        nccl.initialize(self.num_replicas, nccl_uid, device_id)

        # copy model to current device
        self.model = model.cuda()

        # initialize optimizer
        self.optimizer = NAG(self.model.parameters(),
                             lr=self.args.lr,
                             momentum=self.args.momentum,
                             weight_decay=self.args.weight_decay)
        self.flat_grads = None

        # initialize LR scheduler
        self.lr_scheduler = self._build_lr_scheduler()
コード例 #2
0
    def _async_init(self,
                    rank,
                    device_id,
                    args,
                    model,
                    nccl_uid,
                    src_dict=None,
                    dst_dict=None):
        """Initialize child processes."""
        self.args = args

        # set torch.seed in this process
        torch.manual_seed(args.seed)

        # set CUDA device
        torch.cuda.set_device(device_id)

        # initialize NCCL
        nccl.initialize(self.num_replicas, nccl_uid, device_id)

        # copy model to current device
        self.model = model.cuda()

        # initialize optimizer
        self.optimizer = NAG(self.model.parameters(),
                             lr=self.args.lr,
                             momentum=self.args.momentum,
                             weight_decay=self.args.weight_decay)
        self.flat_grads = None

        # initialize LR scheduler
        self.lr_scheduler = self._build_lr_scheduler()

        self.src_dict = src_dict
        self.dst_dict = dst_dict
        self.enable_rl = args.enable_rl
        self.args = args

        # Initialize generator
        models = [model]  # SequenceGenerator accepts a list of models
        self.generator = SequenceGenerator(
            models,
            dst_dict,
            beam_size=1,
            stop_early=(not args.no_early_stop),
            normalize_scores=(not args.unnormalized),
            len_penalty=args.lenpen).cuda()
コード例 #3
0
    def _async_init(self, rank, device_id, args, model, criterion, nccl_uid):
        """Initialize child processes."""
        self.args = args

        # set CUDA device
        torch.cuda.set_device(device_id)

        # initialize NCCL
        nccl.initialize(self.num_replicas, nccl_uid, device_id)

        # copy model and criterion to current device
        self.model = model.cuda()
        self.criterion = criterion.cuda()

        # initialize optimizer and LR scheduler
        self.args.lr = list(map(float, self.args.lr.split(',')))
        self.optimizer = self._build_optimizer()
        self.lr_scheduler = self._build_lr_scheduler()

        self.loss = None
        self._max_bsz_seen = 0
コード例 #4
0
    def _async_init(self, rank, device_id, args, model, criterion, nccl_uid):
        """Initialize child processes."""
        self.args = args

        # set CUDA device
        torch.cuda.set_device(device_id)

        # initialize NCCL
        nccl.initialize(self.num_replicas, nccl_uid, device_id)

        # copy model and criterion to current device
        self.model = model.cuda()
        self.criterion = criterion.cuda()

        # initialize optimizer
        self.optimizer = self._build_optimizer()
        self.flat_grads = None
        self.loss = None

        # initialize LR scheduler
        self.lr_scheduler = self._build_lr_scheduler()