Python CudaScope Examples, utils.c2.CudaScope Python Examples

Example #1

0

Show file

File: detector.py Project: v-chixma/detectron

 def _SetNewLr(self, cur_lr, new_lr):
     """Do the actual work of updating the model and workspace blobs.
     """
     for i in range(cfg.NUM_GPUS):
         with c2_utils.CudaScope(i):
             workspace.FeedBlob('gpu_{}/lr'.format(i),
                                np.array([new_lr], dtype=np.float32))
     ratio = _get_lr_change_ratio(cur_lr, new_lr)
     if cfg.SOLVER.SCALE_MOMENTUM and cur_lr > 1e-7 and \
             ratio > cfg.SOLVER.SCALE_MOMENTUM_THRESHOLD:
         self._CorrectMomentum(new_lr / cur_lr)

Example #2

0

Show file

File: net.py Project: Captain1986/masktextspotter.caffe2

 def _do_broadcast(all_blobs):
     assert len(all_blobs) % cfg.NUM_GPUS == 0, \
         ('Unexpected value for NUM_GPUS. Make sure you are not '
          'running single-GPU inference with NUM_GPUS > 1.')
     blobs_per_gpu = int(len(all_blobs) / cfg.NUM_GPUS)
     for i in range(blobs_per_gpu):
         blobs = [p for p in all_blobs[i::blobs_per_gpu]]
         data = workspace.FetchBlob(blobs[0])
         logger.debug('Broadcasting {} to'.format(str(blobs[0])))
         for i, p in enumerate(blobs[1:]):
             logger.debug(' |-> {}'.format(str(p)))
             with c2_utils.CudaScope(i + 1):
                 workspace.FeedBlob(p, data)

Example #3

0

Show file

File: model_builder_rel.py Project: leefree-GIT/Large-Scale-VRD

def scale_momentum(scale, model):
    # for the LR warm-up in distributed training, when we change the LR after
    # warm-up, then we need to update the momentum accordingly
    logger.info('Scaling momentum: {}'.format(scale))
    root_device_id = cfg.ROOT_DEVICE_ID
    num_devices = cfg.NUM_DEVICES
    for idx in range(root_device_id, root_device_id + num_devices):
        with c2_utils.CudaScope(idx):
            params = model.GetParams()
            for param in params:
                op = core.CreateOperator('Scale', [param + '_momentum'],
                                         [param + '_momentum'],
                                         scale=scale)
                workspace.RunOperatorOnce(op)

Example #4

0

Show file

File: model_builder_rel.py Project: leefree-GIT/Large-Scale-VRD

def add_variable_stepsize_lr(
    curr_iter,
    num_devices,
    lr_iters,
    start_model_iter,
    model=None,
    prev_checkpointed_lr=None,
):
    global CURRENT_LR
    # if the model is resumed from some checkpoint state, then we load the
    # checkpoint LR into the CURRENT_LR at the start of training only
    if prev_checkpointed_lr is not None and (curr_iter == start_model_iter):
        CURRENT_LR = prev_checkpointed_lr
    if curr_iter <= lr_iters[0]:
        gamma_pow = 0
    else:
        idx = 0
        while idx < len(lr_iters) and lr_iters[idx] < curr_iter:
            idx += 1
        gamma_pow = idx

    learning_rate = (cfg.SOLVER.BASE_LR *
                     math.pow(cfg.SOLVER.GAMMA, gamma_pow))
    learning_rate = check_and_apply_warmup(curr_iter, learning_rate)
    root_device_id = cfg.ROOT_DEVICE_ID
    new_lr = learning_rate
    if curr_iter == 1:
        prev_lr = new_lr
    else:
        prev_lr = CURRENT_LR
    if cfg.SOLVER.SCALE_MOMENTUM and (not new_lr == prev_lr):
        scale = new_lr / float(prev_lr)
        scale_momentum(scale, model)

    CURRENT_LR = new_lr
    for idx in range(root_device_id, root_device_id + num_devices):
        with c2_utils.CudaScope(idx):
            workspace.FeedBlob('gpu_{}/lr'.format(idx),
                               np.array(learning_rate, dtype=np.float32))
            workspace.FeedBlob(
                'gpu_{}/lr_x'.format(idx),
                np.array(learning_rate * cfg.SOLVER.LR_FACTOR,
                         dtype=np.float32))

    return CURRENT_LR

Example #5

0

Show file

File: detector.py Project: v-chixma/detectron

    def _CorrectMomentum(self, correction):
        """The MomentumSGDUpdate op implements the update V as

            V := mu * V + lr * grad,

        where mu is the momentum factor, lr is the learning rate, and grad is
        the stochastic gradient. Since V is not defined independently of the
        learning rate (as it should ideally be), when the learning rate is
        changed we should scale the update history V in order to make it
        compatible in scale with lr * grad.
        """
        logger.info(
            'Scaling update history by {:.6f} (new lr / old lr)'.format(
                correction))
        for i in range(cfg.NUM_GPUS):
            with c2_utils.CudaScope(i):
                for param in self.TrainableParams(gpu_id=i):
                    op = core.CreateOperator('Scale', [param + '_momentum'],
                                             [param + '_momentum'],
                                             scale=correction)
                    workspace.RunOperatorOnce(op)

Example #6

0

Show file

File: optimizer.py Project: zoombapup/DetectAndTrack

def _add_allreduce_graph(model):
    """Construct the graph that performs Allreduce on the gradients."""
    # Need to all-reduce the per-GPU gradients if training with more than 1 GPU
    all_params = model.TrainableParams()
    assert len(all_params) % cfg.NUM_GPUS == 0
    # The model parameters are replicated on each GPU, get the number
    # distinct parameter blobs (i.e., the number of parameter blobs on
    # each GPU)
    params_per_gpu = int(len(all_params) / cfg.NUM_GPUS)
    with c2_utils.CudaScope(0):
        # Iterate over distinct parameter blobs
        for i in range(params_per_gpu):
            # Gradients from all GPUs for this parameter blob
            gradients = [
                model.param_to_grad[p] for p in all_params[i::params_per_gpu]
            ]
            if len(gradients) > 0:
                if cfg.USE_NCCL:
                    model.net.NCCLAllreduce(gradients, gradients)
                else:
                    muji.Allreduce(model.net, gradients, reduced_affix='')