Esempio n. 1
0
 def _NormalizeNamescope(namescope):
     if namescope is None:
         return scope.CurrentNameScope()
     elif namescope == '' or namescope.endswith(scope._NAMESCOPE_SEPARATOR):
         return namescope
     else:
         return namescope + scope._NAMESCOPE_SEPARATOR
    def get_parameter_name(self, name):
        candidate_scope = scope.CurrentNameScope()
        best_scope = self._resolve_scope_overrides(candidate_scope)
        if best_scope != candidate_scope:
            logger.info("Overwiting scope {0} with scope {1}".format(
                candidate_scope, best_scope))

        return best_scope + name
Esempio n. 3
0
    def GetWeights(self, namescope=None):
        if namescope is None:
            namescope = scope.CurrentNameScope()

        if namescope == '':
            return self.weights[:]
        else:
            return [w for w in self.weights if w.GetNameScope() == namescope]
Esempio n. 4
0
def _get_weights(model, namescope=None):
    if namescope is None:
        namescope = scope.CurrentNameScope()

    if namescope == '':
        return model.weights[:]
    else:
        return [w for w in model.weights if w.GetNameScope() == namescope]
Esempio n. 5
0
    def GetBiases(self, namescope=None):
        if namescope is None:
            namescope = scope.CurrentNameScope()

        if namescope == '':
            return self.biases[:]
        else:
            return [b for b in self.biases if b.GetNameScope() == namescope]
Esempio n. 6
0
def thread_runner(idx, testobj):
    global SUCCESS_COUNT
    testobj.assertEquals(scope.CurrentNameScope(), "")
    testobj.assertEquals(scope.CurrentDeviceScope(), None)
    namescope = "namescope_{}".format(idx)
    dsc = core.DeviceOption(caffe2_pb2.CUDA, idx)
    with scope.DeviceScope(dsc):
        with scope.NameScope(namescope):
            testobj.assertEquals(scope.CurrentNameScope(), namescope + "/")
            testobj.assertEquals(scope.CurrentDeviceScope(), dsc)

            time.sleep(0.01 + idx * 0.01)
            testobj.assertEquals(scope.CurrentNameScope(), namescope + "/")
            testobj.assertEquals(scope.CurrentDeviceScope(), dsc)

    testobj.assertEquals(scope.CurrentNameScope(), "")
    testobj.assertEquals(scope.CurrentDeviceScope(), None)
    SUCCESS_COUNT += 1
Esempio n. 7
0
def init_data_input_workers(
    net,
    input_blob_names,
    fetch_fun,
    batch_size,
    num_worker_threads=2,
    input_source_name="train",
    max_buffered_batches=800,
):
    global global_coordinator
    device_option = scope.CurrentDeviceScope()
    if (device_option is None):
        device_option = caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CPU)

    # Create coordinator object
    coordinator = DataInputCoordinator(
        net,
        input_blob_names,
        batch_size,
        device_option,
        scope.CurrentNameScope(),
        input_source_name,
        global_coordinator.get_queue(input_source_name, max_buffered_batches)
    )

    # Launch fetch worker threads
    workers = [
        threading.Thread(
            target=fetcher,
            name="data_workers fetcher id {}".format(global_coordinator._fetcher_id_seq + i),
            args=[coordinator, global_coordinator._fetcher_id_seq + i, fetch_fun, batch_size, input_blob_names],
        ) for i in range(num_worker_threads)
    ]
    global_coordinator._fetcher_id_seq += num_worker_threads

    workers.append(threading.Thread(
        target=enqueuer,
        name="Enqueuer {} {}".format(input_source_name, scope.CurrentNameScope()),
        args=[coordinator]))
    coordinator._workers = workers
    global_coordinator.add(coordinator)

    return global_coordinator
Esempio n. 8
0
    def _validate_param_shape(self, param_name, shape):
        if param_name not in self._param_to_shape:
            return

        ref_shape = self._param_to_shape[param_name]

        if shape != ref_shape:
            raise ValueError(
                "Got inconsistent shapes between shared parameters "
                "when trying to map a blob in scope {0} to {1}.".format(
                    scope.CurrentNameScope(), param_name))
Esempio n. 9
0
    def GetParams(self, namescope=None):
        '''
        Returns the params in current namescope
        '''
        if namescope is None:
            namescope = scope.CurrentNameScope()
        else:
            if not namescope.endswith(scope._NAMESCOPE_SEPARATOR):
                namescope += scope._NAMESCOPE_SEPARATOR

        if namescope == '':
            return self.params[:]
        else:
            return [p for p in self.params if p.GetNameScope() == namescope]
Esempio n. 10
0
    def _init_reader_schema(self, field_names=None):
        """Restore a reader schema from the DB file.

        If `field_names` given, restore scheme according to it.

        Overwise, loade blobs from the DB file into the workspace,
        and restore schema from these blob names.
        It is also assumed that:
        1). Each field of the schema have corresponding blobs
            stored in the DB file.
        2). Each blob loaded from the DB file corresponds to
            a field of the schema.
        3). field_names in the original schema are in alphabetic order,
            since blob names loaded to the workspace from the DB file
            will be in alphabetic order.

        Load a set of blobs from a DB file. From names of these blobs,
        restore the DB file schema using `from_column_list(...)`.

        Returns:
            schema: schema.Struct. Used in Reader.__init__(...).
        """
        if field_names:
            return from_column_list(field_names)

        if self.db_type == "log_file_db":
            assert os.path.exists(self.db_path), \
                'db_path [{db_path}] does not exist'.format(db_path=self.db_path)
        with core.NameScope(self.name):
            # blob_prefix is for avoiding name conflict in workspace
            blob_prefix = scope.CurrentNameScope()
        workspace.RunOperatorOnce(
            core.CreateOperator(
                'Load',
                [],
                [],
                absolute_path=True,
                db=self.db_path,
                db_type=self.db_type,
                load_all=True,
                add_prefix=blob_prefix,
            ))
        col_names = [
            blob_name[len(blob_prefix):]
            for blob_name in sorted(workspace.Blobs())
            if blob_name.startswith(blob_prefix)
        ]
        schema = from_column_list(col_names)
        return schema
Esempio n. 11
0
    def create_threads(self):
        # "worker" threads to construct (partial) minibatches and put them on
        # minibatch CPU queue in CPU memory (limited by queue size).
        self._worker_ids = self.get_worker_ids()
        self._workers = [
            threading.Thread(
                target=self.minibatch_loader,
                name='worker_{}'.format(worker_id),
                args=[worker_id],
            ) for worker_id in self._worker_ids
        ]

        # create one BlobsQueue per DEVICE which holds the training data in GPU
        # memory and feeds to the net
        prefix, device = helpers.get_prefix_and_device()
        # the root device id = 0
        for device_id in range(0, self._num_devices):
            with core.NameScope('{}{}'.format(prefix, device_id)):
                self.create_blobs_queue(
                    queue_name=self._blobs_queue_name,
                    num_blobs=len(self._blobs_idx_map),
                    capacity=self._device_blobs_queue_capacity)

        # launch enqueuer threads
        # Create one blob for each (blob_name, enqueuer_thread_id) pair:
        #  <train/test>_<blob_name>_enqueue_<enqueuer_thread_id>
        # The distinction between train/test here is important since when we use
        # EnqueueBlobs op, we need to distinguish otherwise data can get mixed.
        blob_names = self._blobs_idx_map.keys()
        enqueue_blobs_names = [[
            '{}_{}_enqueue_{}'.format(self._split, blob_name, idx)
            for blob_name in blob_names
        ] for idx in range(self._num_enqueuers)]
        for device_id in range(0, self._num_devices):
            # NameScope is prepended to all the blobs in the workspace
            with core.NameScope('{}{}'.format(prefix, device_id)):
                with core.DeviceScope(core.DeviceOption(device, device_id)):
                    for blob_list in enqueue_blobs_names:
                        for blob in blob_list:
                            scoped_blob_name = scope.CurrentNameScope() + blob
                            workspace.CreateBlob(scoped_blob_name)
        # create the enqueuer threads
        self._enqueuers = [
            threading.Thread(target=self.enqueue_blobs_thread,
                             args=(device_id, enqueue_blobs_names[idx]))
            for device_id in range(0, self._num_devices)
            for idx in range(self._num_enqueuers)
        ]
Esempio n. 12
0
    def GetComputedParams(self, namescope=None):
        '''
        Returns the computed params in current namescope. 'Computed params'
        are such parameters that are not optimized via gradient descent but are
        directly computed from data, such as the running mean and variance
        of Spatial Batch Normalization.
        '''
        if namescope is None:
            namescope = scope.CurrentNameScope()
        else:
            if not namescope.endswith(scope._NAMESCOPE_SEPARATOR):
                namescope += scope._NAMESCOPE_SEPARATOR

        if namescope == '':
            return self._computed_params[:]
        else:
            return [p for p in self._computed_params
                    if p.GetNameScope() == namescope]
Esempio n. 13
0
    def add_FC_layer_with_weight_name(self,
                                      weights_prefix,
                                      blob_in,
                                      blob_out,
                                      dim_in,
                                      dim_out,
                                      weight_init=None,
                                      bias_init=None,
                                      **kwargs):

        weights_name = weights_prefix + '_w'
        bias_name = weights_prefix + '_b'

        scope_str = scope.CurrentNameScope()

        if scope_str + weights_name not in self.params:
            # This is the first time these weights are being used, so we init them
            # in the init net, and at the same time add the records to the
            # model.params list

            weight_init = weight_init or ('XavierFill', {})
            bias_init = bias_init or ('ConstantFill', {})
            weight = self.param_init_net.__getattr__(
                weight_init[0])([],
                                weights_name,
                                shape=[dim_out, dim_in],
                                **weight_init[1])
            bias = self.param_init_net.__getattr__(
                bias_init[0])([], bias_name, shape=[
                    dim_out,
                ], **bias_init[1])

            self.params.extend([weight, bias])
            self.weights.append(weight)
            self.biases.append(bias)

        else:
            weight = core.ScopedBlobReference(weights_name,
                                              self.param_init_net)
            bias = core.ScopedBlobReference(bias_name, self.param_init_net)

        return self.net.FC([blob_in, weight, bias], blob_out, **kwargs)
Esempio n. 14
0
    def param_update_ops(model):
        lr = model.param_init_net.ConstantFill([],
                                               'lr',
                                               shape=[1],
                                               value=model.current_lr)
        weight_decay = model.param_init_net.ConstantFill(
            [], 'weight_decay', shape=[1], value=cfg.SOLVER.WEIGHT_DECAY)
        weight_decay_bn = model.param_init_net.ConstantFill(
            [], 'weight_decay_bn', shape=[1], value=cfg.SOLVER.WEIGHT_DECAY_BN)
        one = model.param_init_net.ConstantFill([],
                                                "ONE",
                                                shape=[1],
                                                value=1.0)
        params = model.GetParams()
        curr_scope = scope.CurrentNameScope()
        # scope is of format 'gpu_{}/'.format(gpu_id), so remove the separator
        trainable_params = model.TrainableParams(curr_scope[:-1])
        assert len(params) > 0, 'No trainable params found in model'
        for param in params:
            # only update trainable params
            if param in trainable_params:
                param_grad = model.param_to_grad[param]
                # the param grad is the summed gradient for the parameter across
                # all gpus/hosts
                param_momentum = model.param_init_net.ConstantFill([param],
                                                                   param +
                                                                   '_momentum',
                                                                   value=0.0)

                if '_bn' in str(param):
                    model.WeightedSum(
                        [param_grad, one, param, weight_decay_bn], param_grad)
                else:
                    model.WeightedSum([param_grad, one, param, weight_decay],
                                      param_grad)
                model.net.MomentumSGDUpdate(
                    [param_grad, param_momentum, lr, param],
                    [param_grad, param_momentum, param],
                    momentum=cfg.SOLVER.MOMENTUM,
                    nesterov=cfg.SOLVER.NESTEROV,
                )
Esempio n. 15
0
    def _init_reader_schema(self):
        """Restore a reader schema from the DB file.

        Here it is assumed that:
        1). Each field of the schema have corresponding blobs
            stored in the DB file.
        2). Each blob loaded from the DB file corresponds to
            a field of the schema.

        Load a set of blobs from a DB file. From names of these blobs,
        restore the DB file schema using `from_column_list(...)`.

        Returns:
            schema: schema.Struct. Used in Reader.__init__(...).
        """
        assert os.path.exists(self.db_path), \
            'db_path [{db_path}] does not exist'.format(db_path=self.db_path)
        with core.NameScope(self.name):
            # blob_prefix is for avoiding name conflict in workspace
            blob_prefix = scope.CurrentNameScope()
        workspace.RunOperatorOnce(
            core.CreateOperator(
                'Load',
                [],
                [],
                absolute_path=True,
                db=self.db_path,
                db_type=self.db_type,
                load_all=True,
                add_prefix=blob_prefix,
            )
        )
        col_names = [
            blob_name[len(blob_prefix):] for blob_name in workspace.Blobs()
            if blob_name.startswith(blob_prefix)
        ]
        schema = from_column_list(col_names)
        return schema
Esempio n. 16
0
    def create_threads(self):
        # "worker" threads to construct (partial) minibatches and put them on
        # minibatch queue in CPU memory (limited by queue size).
        self._worker_ids = self.get_worker_ids()
        self._workers = [
            threading.Thread(
                target=self.minibatch_loader,
                name='worker_{}'.format(worker_id),
                args=[worker_id],
            ) for worker_id in self._worker_ids
        ]

        # Create one BlobsQueue per GPU which holds the training data in GPU
        # memory and feeds to the net.
        root_gpu_id = cfg.ROOT_GPU_ID
        for gpu_id in range(root_gpu_id, root_gpu_id + self._num_gpus):
            with core.NameScope('gpu_{}'.format(gpu_id)):
                self.create_blobs_queue(
                    queue_name=self._blobs_queue_name,
                    num_blobs=len(self._blobs_idx_map),
                    capacity=self._gpu_blobs_queue_capacity)

        # Launch enqueuer threads.
        blob_names = self._blobs_idx_map.keys()
        enqueue_blobs_names = [
            '{}_{}_enqueue'.format(self._split, blob_name)
            for blob_name in blob_names
        ]
        for gpu_id in range(root_gpu_id, root_gpu_id + self._num_gpus):
            with core.NameScope('gpu_{}'.format(gpu_id)):
                with core.DeviceScope(
                        core.DeviceOption(caffe2_pb2.CUDA, gpu_id)):
                    for blob_list in enqueue_blobs_names:
                        for blob in blob_list:
                            scoped_blob_name = scope.CurrentNameScope() + blob
                            workspace.CreateBlob(scoped_blob_name)
        self._enqueuer = threading.Thread(target=self.enqueue_blobs_thread,
                                          args=(0, enqueue_blobs_names))
Esempio n. 17
0
def init_data_input_workers(
    net,
    input_blob_names,
    fetch_fun,
    batch_size,
    num_worker_threads=2,
    input_source_name="train",
    max_buffered_batches=800,
    init_fun=None,
    external_loggers=None,
    dont_rebatch=False,
    batch_columns=None,
    timeout=600
):
    global global_coordinator
    device_option = scope.CurrentDeviceScope()
    if (device_option is None):
        device_option = caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CPU)

    metrics = Metrics(external_loggers)
    batch_feeder = BatchFeeder(
        net,
        input_blob_names,
        batch_size,
        device_option,
        scope.CurrentNameScope(),
        input_source_name,
        global_coordinator.get_queue(input_source_name, max_buffered_batches),
        metrics,
        dont_rebatch,
        batch_columns,
        timeout=timeout
    )

    # Create coordinator object
    coordinator = WorkerCoordinator(
        input_source_name, init_fun, batch_feeder)

    # Launch fetch worker threads
    worker_ids = [
        global_coordinator.get_new_worker_id()
        for i in range(num_worker_threads)
    ]
    workers = [
        threading.Thread(
            target=run_worker,
            name="data_workers fetcher id {}".format(worker_id),
            args=[coordinator,
                  DataWorker(coordinator, worker_id, fetch_fun, metrics,
                             batch_size, batch_feeder)],
        ) for worker_id in worker_ids
    ]

    workers.append(threading.Thread(
        target=enqueuer,
        name="Enqueuer {} {}".format(input_source_name, scope.CurrentNameScope()),
        args=[coordinator, batch_feeder]))
    coordinator._workers = workers
    global_coordinator.add(coordinator)

    return global_coordinator
Esempio n. 18
0
def GetNameScope():
    """Return the current namescope string. To be used to fetch blobs"""
    return scope.CurrentNameScope()
 def get_parameter_name(self, name):
     best_scope = self._resolve_scope_overrides(scope.CurrentNameScope())
     return best_scope + name
Esempio n. 20
0
def scoped_name(blob_name):
    return scope.CurrentNameScope() + blob_name
Esempio n. 21
0
 def input_fn(model):
     for blob_name in blob_names:
         workspace.CreateBlob(scope.CurrentNameScope() + blob_name)
     model.DequeueBlobs(queue_name, blob_names)
     model.StopGradient('data{}'.format(suffix), 'data{}'.format(suffix))
Esempio n. 22
0
 def input_fn(model):
     for blob_name in blob_names:
         workspace.CreateBlob(scope.CurrentNameScope() + blob_name)
     model.net.DequeueBlobs(queue_name, blob_names)
     model.StopGradient('data', 'data')
Esempio n. 23
0
 def param_update_ops(model):
     weight_decay = model.param_init_net.ConstantFill(
         [], 'weight_decay', shape=[1], value=cfg.SOLVER.WEIGHT_DECAY)
     weight_decay_bn = model.param_init_net.ConstantFill(
         [], 'weight_decay_bn', shape=[1], value=cfg.SOLVER.WEIGHT_DECAY_BN)
     # for jigsaw model, all the bias params have weight decay set to
     weight_decay_zero_bias = model.param_init_net.ConstantFill(
         [], 'weight_decay_zero_bias', shape=[1], value=0.0)
     zero = model.param_init_net.ConstantFill([],
                                              "ZERO",
                                              shape=[1],
                                              value=0.0)
     one = model.param_init_net.ConstantFill([],
                                             "ONE",
                                             shape=[1],
                                             value=1.0)
     two = model.param_init_net.ConstantFill([],
                                             "TWO",
                                             shape=[1],
                                             value=2.0)
     params = model.GetParams()
     curr_scope = scope.CurrentNameScope()
     # scope is of format 'gpu_{}/'.format(device_id), so remove the separator
     trainable_params = model.TrainableParams(curr_scope[:-1])
     assert len(params) > 0, 'No trainable params found in model'
     for param in params:
         # only update trainable params
         if param in trainable_params:
             # the param grad is the summed gradient for the parameter across
             # all devices/hosts
             param_momentum = model.param_init_net.ConstantFill([param],
                                                                param +
                                                                '_momentum',
                                                                value=0.0)
             param_grad = model.param_to_grad[param]
             # add weight decay
             if '_bn' in str(param):
                 # make LR 0 and weight decay 0 to keep scale and bias same.
                 # Scale/bias are the learnable parameters in BN. See
                 # Algorithm1 https://arxiv.org/pdf/1502.03167.pdf
                 if cfg.MODEL.BN_NO_SCALE_SHIFT:
                     model.WeightedSum(
                         [param_grad, zero, param, weight_decay_bn],
                         param_grad)
                 else:
                     model.WeightedSum(
                         [param_grad, one, param, weight_decay_bn],
                         param_grad)
             elif cfg.MODEL.NO_BIAS_DECAY:
                 # In jigsaw model, all the bias params have decay=0 and
                 # lr_multiplier=2
                 if '_b' in str(param):
                     model.WeightedSum(
                         [param_grad, two, param, weight_decay_zero_bias],
                         param_grad)
             else:
                 model.WeightedSum([param_grad, one, param, weight_decay],
                                   param_grad)
             model.net.MomentumSGDUpdate(
                 [param_grad, param_momentum, 'lr', param],
                 [param_grad, param_momentum, param],
                 momentum=cfg.SOLVER.MOMENTUM,
                 nesterov=cfg.SOLVER.NESTEROV,
             )
Esempio n. 24
0
    def add_Conv_layer_with_weight_name(self,
                                        weights_prefix,
                                        blob_in,
                                        blob_out,
                                        dim_in,
                                        dim_out,
                                        kernel,
                                        weight_init=None,
                                        bias_init=None,
                                        group=1,
                                        **kwargs):

        weights_name = weights_prefix + '_w'
        bias_name = weights_prefix + '_b'

        scope_str = scope.CurrentNameScope()

        use_bias = False if ("no_bias" in kwargs
                             and kwargs["no_bias"]) else True
        weight_init = weight_init if weight_init else ('XavierFill', {})
        bias_init = bias_init if bias_init else ('ConstantFill', {})
        blob_out = blob_out or self.net.NextName()
        weight_shape = ([dim_out, int(dim_in / group), kernel, kernel]
                        if self.order == "NCHW" else
                        [dim_out, kernel, kernel,
                         int(dim_in / group)])
        if scope_str + weights_name not in self.params:
            weight = self.param_init_net.__getattr__(
                weight_init[0])([],
                                weights_name,
                                shape=weight_shape,
                                **weight_init[1])
            self.weights.append(weight)
            if use_bias:
                bias = self.param_init_net.__getattr__(
                    bias_init[0])([],
                                  bias_name,
                                  shape=[
                                      dim_out,
                                  ],
                                  **bias_init[1])
                self.params.extend([weight, bias])
                self.biases.append(bias)
            else:
                self.params.extend([weight])
        else:
            weight = core.ScopedBlobReference(weights_name,
                                              self.param_init_net)
            if use_bias:
                bias = core.ScopedBlobReference(bias_name, self.param_init_net)

        if self.use_cudnn:
            kwargs['engine'] = 'CUDNN'
            kwargs['exhaustive_search'] = self.cudnn_exhaustive_search
            if self.ws_nbytes_limit:
                kwargs['ws_nbytes_limit'] = self.ws_nbytes_limit

        inputs = []
        if use_bias:
            inputs = [blob_in, weight, bias]
        else:
            inputs = [blob_in, weight]

        # For the operator, we no longer need to provide the no_bias field
        # because it can automatically figure this out from the number of
        # inputs.
        if 'no_bias' in kwargs:
            del kwargs['no_bias']
        if group != 1:
            kwargs['group'] = group
        return self.net.Conv(inputs,
                             blob_out,
                             kernel=kernel,
                             order=self.order,
                             **kwargs)