コード例 #1
0
ファイル: scope_test.py プロジェクト: GeekLiB/caffe2-master
    def testMultiThreaded(self):
        """
        Test that name/device scope are properly local to the thread
        and don't interfere
        """
        global SUCCESS_COUNT
        self.assertEquals(scope.CurrentNameScope(), "")
        self.assertEquals(scope.CurrentDeviceScope(), None)

        threads = []
        for i in range(4):
            threads.append(
                threading.Thread(
                    target=thread_runner,
                    args=(i, self),
                ))
        for t in threads:
            t.start()

        with scope.NameScope("master"):
            self.assertEquals(scope.CurrentDeviceScope(), None)
            self.assertEquals(scope.CurrentNameScope(), "master/")
            for t in threads:
                t.join()

            self.assertEquals(scope.CurrentNameScope(), "master/")
            self.assertEquals(scope.CurrentDeviceScope(), None)

        # Ensure all threads succeeded
        self.assertEquals(SUCCESS_COUNT, 4)
コード例 #2
0
    def testDevicescopeBasic(self):
        self.assertEquals(scope.CurrentDeviceScope(), None)

        dsc = core.DeviceOption(workspace.GpuDeviceType, 9)
        with scope.DeviceScope(dsc):
            self.assertEquals(scope.CurrentDeviceScope(), dsc)

        self.assertEquals(scope.CurrentDeviceScope(), None)
コード例 #3
0
    def testDevicescopeBasic(self):
        self.assertEquals(scope.CurrentDeviceScope(), None)

        dsc = core.DeviceOption(_gpu_device_type(), 9)
        with scope.DeviceScope(dsc):
            self.assertEquals(scope.CurrentDeviceScope(), dsc)

        self.assertEquals(scope.CurrentDeviceScope(), None)
コード例 #4
0
ファイル: scope_test.py プロジェクト: GeekLiB/caffe2-master
    def testDevicescopeBasic(self):
        self.assertEquals(scope.CurrentDeviceScope(), None)

        dsc = core.DeviceOption(caffe2_pb2.CUDA, 9)
        with scope.DeviceScope(dsc):
            self.assertEquals(scope.CurrentDeviceScope(), dsc)

        self.assertEquals(scope.CurrentDeviceScope(), None)
コード例 #5
0
    def testDevicescopeAssertion(self):
        self.assertEquals(scope.CurrentDeviceScope(), None)

        dsc = core.DeviceOption(_gpu_device_type(), 9)

        try:
            with scope.DeviceScope(dsc):
                self.assertEquals(scope.CurrentDeviceScope(), dsc)
                raise Exception()
        except Exception:
            pass

        self.assertEquals(scope.CurrentDeviceScope(), None)
コード例 #6
0
    def testDevicescopeAssertion(self):
        self.assertEquals(scope.CurrentDeviceScope(), None)

        dsc = core.DeviceOption(caffe2_pb2.CUDA, 9)

        try:
            with scope.DeviceScope(dsc):
                self.assertEquals(scope.CurrentDeviceScope(), dsc)
                raise Exception()
        except Exception:
            pass

        self.assertEquals(scope.CurrentDeviceScope(), None)
コード例 #7
0
    def testDevicescopeAssertion(self):
        self.assertEquals(scope.CurrentDeviceScope(), None)

        dsc = core.DeviceOption(workspace.GpuDeviceType, 9)

        try:
            with scope.DeviceScope(dsc):
                self.assertEquals(scope.CurrentDeviceScope(), dsc)
                raise Exception()
        except Exception:
            pass

        self.assertEquals(scope.CurrentDeviceScope(), None)
コード例 #8
0
ファイル: layers.py プロジェクト: vikasgoel2000/lambda-packs
    def add_init_params(self, init_net):
        '''
        Adds layer initialization operators to passed net.
        '''
        for param in self.params:
            # TODO(amalevich): Either return back to lambdas, that add
            # all params (looks a bit safer and breaking less
            # abstractions) or extend Net interface to this type of
            # operations better
            # TODO(xlwang) init_net._net.op has type google.protobuf.\
            # internal.containers.RepeatedCompositeFieldContainer, but
            # the version of protobuf in fbcode does not support append
            # so extend is used
            init_op = param.initializer
            current_device_scope = scope.CurrentDeviceScope()
            if not init_op:
                continue

            if not init_op.HasField('device_option') and\
                    current_device_scope:
                init_op = caffe2_pb2.OperatorDef()
                init_op.CopyFrom(param.initializer)
                init_op.device_option.CopyFrom(current_device_scope)

            # do not add duplicated init ops
            if any(
                    utils.OpAlmostEqual(op, init_op, 'debug_info')
                    for op in init_net._net.op):
                continue

            init_net._net.op.extend([init_op])
コード例 #9
0
ファイル: optimizer.py プロジェクト: aaaqhbd/caffe2_1
    def _run(self, net, param_init_net, param_info):
        param = param_info.blob
        grad = param_info.grad
        if self.base_learning_rate == 0:
            return
        assert self.base_learning_rate > 0

        # We need negative sign for LR when used directly with WeightedSum
        # below.
        lr_sign = -1 if self.momentum else 1
        lr, _ = self.build_lr(net,
                              param_init_net,
                              base_learning_rate=self.base_learning_rate *
                              lr_sign,
                              policy=self.policy,
                              **(self.init_kwargs))

        dev = scope.CurrentDeviceScope()
        if dev is None:
            dev = core.DeviceOption(caffe2_pb2.CPU)

        # Each GPU/CPU must have its own ONE blob, thus modify the name
        # to include device information.
        ONE = param_init_net.ConstantFill([],
                                          "ONE_{}_{}{}".format(
                                              dev.device_type, dev.cuda_gpu_id,
                                              dev.node_name),
                                          shape=[1],
                                          value=1.0)

        self._aux_params.shared.append(ONE)

        if self.momentum > 0:
            momentum_data = param_init_net.ConstantFill(param,
                                                        str(param) +
                                                        "_momentum",
                                                        value=0.)
            self._aux_params.local.append(momentum_data)

        if isinstance(grad, core.GradientSlice):
            grad = self.dedup(net, self.sparse_dedup_aggregator, grad)
            if self.momentum > 0.:
                net.SparseMomentumSGDUpdate(
                    [grad.values, momentum_data, lr, param, grad.indices],
                    [grad.values, momentum_data, param],
                    momentum=self.momentum,
                    nesterov=self.nesterov)
            else:
                net.ScatterWeightedSum(
                    [param, ONE, grad.indices, grad.values, lr], param)
        else:
            if self.momentum > 0.:
                net.MomentumSGDUpdate([grad, momentum_data, lr, param],
                                      [grad, momentum_data, param],
                                      momentum=self.momentum,
                                      nesterov=self.nesterov)
            else:
                coeff = lr

                net.WeightedSum([param, ONE, grad, coeff], param)
コード例 #10
0
def FeedBlob(name, arr, device_option=None):
    """Feeds a blob into the workspace.

    Inputs:
      name: the name of the blob.
      arr: either a TensorProto object or a numpy array object to be fed into
          the workspace.
      device_option (optional): the device option to feed the data with.
    Returns:
      True or False, stating whether the feed is successful.
    """
    if type(arr) is caffe2_pb2.TensorProto:
        arr = utils.Caffe2TensorToNumpyArray(arr)
    if type(arr) is np.ndarray and arr.dtype.kind in 'SU':
        # Plain NumPy strings are weird, let's use objects instead
        arr = arr.astype(np.object)

    if device_option is None:
        device_option = scope.CurrentDeviceScope()

    if device_option and device_option.device_type == caffe2_pb2.CUDA:
        if arr.dtype == np.dtype('float64'):
            logger.warning(
                "CUDA operators do not support 64-bit doubles, " +
                "please use arr.astype(np.float32) or np.int32 for ints." +
                " Blob: {}".format(name) + " type: {}".format(str(arr.dtype)))

    name = StringifyBlobName(name)
    if device_option is not None:
        return C.feed_blob(name, arr, StringifyProto(device_option))
    else:
        return C.feed_blob(name, arr)
コード例 #11
0
def load_from_db(filename, db_type, device_option=None, *args, **kwargs):
    # global_init_net in meta_net_def will load parameters from
    # predictor_constants.PREDICTOR_DBREADER
    create_db = core.CreateOperator(
        'CreateDB', [],
        [core.BlobReference(predictor_constants.PREDICTOR_DBREADER)],
        db=filename, db_type=db_type)
    assert workspace.RunOperatorOnce(create_db), (
        'Failed to create db {}'.format(filename))

    # predictor_constants.META_NET_DEF is always stored before the parameters
    load_meta_net_def = core.CreateOperator(
        'Load',
        [core.BlobReference(predictor_constants.PREDICTOR_DBREADER)],
        [core.BlobReference(predictor_constants.META_NET_DEF)])
    assert workspace.RunOperatorOnce(load_meta_net_def)

    blob = workspace.FetchBlob(predictor_constants.META_NET_DEF)
    meta_net_def = serde.deserialize_protobuf_struct(
        blob if isinstance(blob, bytes)
        else str(blob).encode('utf-8'),
        metanet_pb2.MetaNetDef)

    if device_option is None:
        device_option = scope.CurrentDeviceScope()

    if device_option is not None:
        # Set the device options of all loaded blobs
        for kv in meta_net_def.nets:
            net = kv.value
            for op in net.op:
                op.device_option.CopyFrom(device_option)

    return meta_net_def
コード例 #12
0
ファイル: optimizer.py プロジェクト: zhangfan-neu/pytorch
    def _run(self, net, param_init_net, param_info):
        dev = scope.CurrentDeviceScope()
        if dev is None:
            dev = core.DeviceOption(caffe2_pb2.CPU)

        ONE = param_init_net.ConstantFill([],
                                          "ONE_{}_{}".format(
                                              dev.device_type,
                                              dev.cuda_gpu_id),
                                          shape=[1],
                                          value=1.0)
        WD = param_init_net.ConstantFill([],
                                         "wd_{}_{}".format(
                                             dev.device_type, dev.cuda_gpu_id),
                                         shape=[1],
                                         value=self.weight_decay)

        if isinstance(param_info.grad, core.GradientSlice):
            raise ValueError(
                "Weight decay does not yet support sparse gradients")
        else:
            net.WeightedSum(
                [param_info.grad, ONE, param_info.blob, WD],
                param_info.grad,
            )
コード例 #13
0
ファイル: data_workers.py プロジェクト: playbar/caffe2
def init_data_input_workers(
    net,
    input_blob_names,
    fetch_fun,
    batch_size,
    num_worker_threads=2,
    input_source_name="train",
    max_buffered_batches=800,
    init_fun=None,
    external_loggers=None,
    dont_rebatch=False,
    batch_columns=None,
    timeout=600
):
    global global_coordinator
    device_option = scope.CurrentDeviceScope()
    if (device_option is None):
        device_option = caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CPU)

    metrics = Metrics(external_loggers)
    batch_feeder = BatchFeeder(
        net,
        input_blob_names,
        batch_size,
        device_option,
        scope.CurrentNameScope(),
        input_source_name,
        global_coordinator.get_queue(input_source_name, max_buffered_batches),
        metrics,
        dont_rebatch,
        batch_columns
    )

    # Create coordinator object
    coordinator = WorkerCoordinator(
        input_source_name, init_fun, batch_feeder)

    # Launch fetch worker threads
    worker_ids = [
        global_coordinator.get_new_worker_id()
        for i in range(num_worker_threads)
    ]
    workers = [
        threading.Thread(
            target=run_worker,
            name="data_workers fetcher id {}".format(worker_id),
            args=[coordinator,
                  DataWorker(coordinator, worker_id, fetch_fun, metrics,
                             batch_size, batch_feeder)],
        ) for worker_id in worker_ids
    ]

    workers.append(threading.Thread(
        target=enqueuer,
        name="Enqueuer {} {}".format(input_source_name, scope.CurrentNameScope()),
        args=[coordinator, batch_feeder]))
    coordinator._workers = workers
    global_coordinator.add(coordinator)

    return global_coordinator
コード例 #14
0
    def _run(self, net, param_init_net, param_info):
        dev = scope.CurrentDeviceScope()
        if dev is None:
            dev = core.DeviceOption(caffe2_pb2.CPU)

        ONE = param_init_net.ConstantFill([],
                                          "ONE_{}_{}".format(
                                              dev.device_type,
                                              dev.cuda_gpu_id),
                                          shape=[1],
                                          value=1.0)
        SS = param_init_net.ConstantFill([],
                                         "SS_{}_{}".format(
                                             dev.device_type, dev.cuda_gpu_id),
                                         shape=[1],
                                         value=self.sparse_scale)

        if isinstance(param_info.grad, core.GradientSlice):
            raise ValueError(
                "Weight decay does not yet support sparse gradients")
        else:
            param_sign = net.Sign(
                [param_info.blob],
                ['{}_sign'.format(param_info.blob)],
            )
            net.WeightedSum(
                [param_info.grad, ONE, param_sign, SS],
                param_info.grad,
            )
コード例 #15
0
ファイル: optimizer.py プロジェクト: aaaqhbd/caffe2_1
    def _run(self, net, param_init_net, param_info):
        param = param_info.blob
        grad = param_info.grad

        assert self.alpha > 0
        assert not isinstance(grad, core.GradientSlice), \
            "RmsPropOptimizer doesn't support sparse gradients"

        dev = scope.CurrentDeviceScope()
        if dev is None:
            dev = core.DeviceOption(caffe2_pb2.CPU)

        ONE = param_init_net.ConstantFill([],
                                          "ONE_{}_{}".format(
                                              dev.device_type,
                                              dev.cuda_gpu_id),
                                          shape=[1],
                                          value=1.0)

        lr, _ = self.build_lr(net,
                              param_init_net,
                              base_learning_rate=-self.alpha,
                              policy=self.policy,
                              **(self.init_kwargs))

        grad_o = param_init_net.ConstantFill(
            [param],
            str(param) + "_grad_o",
            values=0.0,
        )

        ms = param_init_net.ConstantFill(
            [param],
            str(param) + "_mean_squares",
            values=0.0,
        )

        mom = param_init_net.ConstantFill(
            [param],
            str(param) + "_momentum",
            values=0.0,
        )

        self._aux_params.local.append(ms)
        self._aux_params.local.append(mom)

        net.RmsProp(
            [grad, ms, mom, ONE],
            [grad_o, ms, mom],
            decay=self.decay,
            momentum=self.momentum,
            epsilon=self.epsilon,
            engine=self.engine,
        )

        net.MomentumSGDUpdate(
            [grad_o, mom, lr, param],
            [grad_o, mom, param],
        )
コード例 #16
0
    def testTags(self):
        self.assertEquals(scope.CurrentDeviceScope(), None)

        extra_info1 = ["key1:value1"]
        extra_info2 = ["key2:value2"]
        extra_info3 = ["key3:value3"]

        extra_info_1_2 = ["key1:value1", "key2:value2"]
        extra_info_1_2_3 = ["key1:value1", "key2:value2", "key3:value3"]

        with scope.DeviceScope(core.DeviceOption(0, extra_info=extra_info1)):
            self.assertEquals(scope.CurrentDeviceScope().extra_info,
                              extra_info1)

            with scope.DeviceScope(core.DeviceOption(0,
                                                     extra_info=extra_info2)):
                self.assertEquals(scope.CurrentDeviceScope().extra_info,
                                  extra_info_1_2)

                with scope.DeviceScope(
                        core.DeviceOption(0, extra_info=extra_info3)):
                    self.assertEquals(scope.CurrentDeviceScope().extra_info,
                                      extra_info_1_2_3)

                self.assertEquals(scope.CurrentDeviceScope().extra_info,
                                  extra_info_1_2)
            self.assertEquals(scope.CurrentDeviceScope().extra_info,
                              extra_info1)
        self.assertEquals(scope.CurrentDeviceScope(), None)
コード例 #17
0
    def build_lr(self,
                 net,
                 param_init_net,
                 base_learning_rate,
                 learning_rate_blob=None,
                 policy="fixed",
                 iter_val=0,
                 **kwargs):
        if learning_rate_blob is None:
            learning_rate_blob = self.make_unique_blob_name('lr')

        optimization_iter_blob = _OPTIMIZER_ITERATION_NAME
        if not param_init_net.BlobIsDefined(optimization_iter_blob):
            # Add training operators.
            with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)):
                iteration = param_init_net.ConstantFill(
                    [],
                    optimization_iter_blob,
                    shape=[1],
                    value=iter_val,
                    dtype=core.DataType.INT64)
                iter_mutex = param_init_net.CreateMutex([],
                                                        ["iteration_mutex"])
                net.AtomicIter([iter_mutex, iteration], [iteration])
        else:
            iteration = param_init_net.GetBlobRef(optimization_iter_blob)

        if not net.BlobIsDefined(learning_rate_blob):
            # There is one interesting thing here: since we are minimizing, we are
            # doing "descent" so the learning rate is set to be negative.
            lr = net.LearningRate([iteration],
                                  learning_rate_blob,
                                  base_lr=-base_learning_rate,
                                  policy=policy,
                                  **kwargs)
        else:
            lr = net.GetBlobRef(learning_rate_blob)

        if self._lr_multiplier is not None:
            current_scope = scope.CurrentDeviceScope()
            if (current_scope is not None
                    and current_scope.device_type == caffe2_pb2.CUDA
                    and not self._lr_multiplier_on_gpu):
                lr_multiplier = net.CopyFromCPUInput(
                    self._lr_multiplier,
                    self.make_unique_blob_name('lr_multiplier'))
            else:
                lr_multiplier = self._lr_multiplier

            scaled_lr = net.Mul(
                [lr, lr_multiplier],
                self.make_unique_blob_name('scaled_lr'),
                broadcast=1,
            )
            lr = scaled_lr

        return lr, iteration
コード例 #18
0
ファイル: scope_test.py プロジェクト: GeekLiB/caffe2-master
def thread_runner(idx, testobj):
    global SUCCESS_COUNT
    testobj.assertEquals(scope.CurrentNameScope(), "")
    testobj.assertEquals(scope.CurrentDeviceScope(), None)
    namescope = "namescope_{}".format(idx)
    dsc = core.DeviceOption(caffe2_pb2.CUDA, idx)
    with scope.DeviceScope(dsc):
        with scope.NameScope(namescope):
            testobj.assertEquals(scope.CurrentNameScope(), namescope + "/")
            testobj.assertEquals(scope.CurrentDeviceScope(), dsc)

            time.sleep(0.01 + idx * 0.01)
            testobj.assertEquals(scope.CurrentNameScope(), namescope + "/")
            testobj.assertEquals(scope.CurrentDeviceScope(), dsc)

    testobj.assertEquals(scope.CurrentNameScope(), "")
    testobj.assertEquals(scope.CurrentDeviceScope(), None)
    SUCCESS_COUNT += 1
コード例 #19
0
ファイル: optimizer.py プロジェクト: zhangfan-neu/pytorch
    def build_lr(self,
                 net,
                 param_init_net,
                 base_learning_rate,
                 learning_rate_blob=None,
                 policy="fixed",
                 iter_val=0,
                 **kwargs):
        if learning_rate_blob is None:
            learning_rate_blob = self.make_unique_blob_name('lr')

        iteration = utils.BuildUniqueMutexIter(param_init_net,
                                               net,
                                               iter_val=iter_val)

        if not net.BlobIsDefined(learning_rate_blob):
            # There is one interesting thing here: since we are minimizing, we are
            # doing "descent" so the learning rate is set to be negative.
            lr = net.LearningRate([iteration],
                                  learning_rate_blob,
                                  base_lr=-base_learning_rate,
                                  policy=policy,
                                  **kwargs)
        else:
            lr = net.GetBlobRef(learning_rate_blob)

        if self._lr_multiplier is not None:
            lr_multiplier = net.CopyFromCPUInput(
                self._lr_multiplier,
                self.make_unique_blob_name('lr_multiplier'))

            lr = net.Mul(
                [lr, lr_multiplier],
                self.make_unique_blob_name('scaled_lr'),
                broadcast=1,
            )

        if self._local_lr_multiplier is not None:
            current_scope = scope.CurrentDeviceScope()
            if (current_scope is not None
                    and current_scope.device_type == caffe2_pb2.CUDA
                    and not self._local_lr_multiplier_on_gpu):
                local_lr_multiplier = net.CopyFromCPUInput(
                    self._local_lr_multiplier,
                    self.make_unique_blob_name('local_lr_multiplier'))
            else:
                local_lr_multiplier = self._local_lr_multiplier

            lr = net.Mul(
                [lr, local_lr_multiplier],
                self.make_unique_blob_name('local_scaled_lr'),
                broadcast=1,
            )

        return lr, iteration
コード例 #20
0
 def get_lr_blob_name(self):
     """Returns an LR blob name.
     The name will be unique to the current device and optimizer instance.
     """
     classname = self.__class__.__name__
     s = scope.CurrentDeviceScope()
     if s.device_type == caffe2_pb2.CUDA:
         return '%s_%d_lr_gpu%d' % (classname, self._instance_num,
                                    s.cuda_gpu_id)
     else:
         return '%s_%d_lr_cpu' % (classname, self._instance_num)
コード例 #21
0
ファイル: data_workers.py プロジェクト: GeekLiB/caffe2-master
def init_data_input_workers(
    net,
    input_blob_names,
    fetch_fun,
    batch_size,
    num_worker_threads=2,
    input_source_name="train",
    max_buffered_batches=800,
    init_fun=None,
    external_loggers=None,
):
    global global_coordinator
    device_option = scope.CurrentDeviceScope()
    if (device_option is None):
        device_option = caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CPU)

    # Create coordinator object
    coordinator = DataInputCoordinator(
        net,
        input_blob_names,
        batch_size,
        device_option,
        scope.CurrentNameScope(),
        input_source_name,
        global_coordinator.get_queue(input_source_name, max_buffered_batches),
        init_fun=init_fun,
        external_loggers=external_loggers,
    )

    # Launch fetch worker threads
    worker_ids = [
        global_coordinator.get_new_worker_id()
        for i in range(num_worker_threads)
    ]
    workers = [
        threading.Thread(
            target=fetcher,
            name="data_workers fetcher id {}".format(worker_id),
            args=[
                coordinator, worker_id, fetch_fun, batch_size, input_blob_names
            ],
        ) for worker_id in worker_ids
    ]

    workers.append(
        threading.Thread(target=enqueuer,
                         name="Enqueuer {} {}".format(
                             input_source_name, scope.CurrentNameScope()),
                         args=[coordinator]))
    coordinator._workers = workers
    global_coordinator.add(coordinator)

    return global_coordinator
コード例 #22
0
    def get_lr_blob_name(self):
        """Returns an LR blob name.
        The name will be unique to the current device and optimizer instance.
        """
        current_scope = scope.CurrentDeviceScope()
        if current_scope is None:
            return self.get_cpu_lr_blob_name()

        if current_scope.device_type == caffe2_pb2.CUDA:
            return self.get_gpu_lr_blob_name(current_scope.cuda_gpu_id)
        else:
            return self.get_cpu_lr_blob_name()
コード例 #23
0
    def make_unique_blob_name(self, base_str):
        """
        Returns a blob name that will be unique to the current device
        and optimizer instance.
        """
        current_scope = scope.CurrentDeviceScope()
        if current_scope is None:
            return self.get_cpu_blob_name(base_str)

        if current_scope.device_type == caffe2_pb2.CUDA:
            return self.get_gpu_blob_name(base_str, current_scope.cuda_gpu_id)
        else:
            return self.get_cpu_blob_name(base_str)
コード例 #24
0
ファイル: cnn.py プロジェクト: thatguymike/caffe2
    def Accuracy(self, blob_in, blob_out, **kwargs):
        dev = kwargs['device_option'] if 'device_option' in kwargs else scope.CurrentDeviceScope()

        blobs_in_dev = []
        # if device_option is CPU (or None, so assumed to be CPU), nothing needs to be done
        if dev == None or dev.device_type == caffe2_pb2.CPU:
            blobs_in_dev = blob_in
        else:
            # Otherwise insert copy operators
            pred_host = self.net.CopyGPUToCPU(blob_in[0], blob_in[0]+"_host")
            label_host = self.net.CopyGPUToCPU(blob_in[1], blob_in[1]+"_host")
            blobs_in_dev = [pred_host, label_host]

        # Now use the Host version of the accuracy op
        self.net.Accuracy(blobs_in_dev, blob_out, device_option=core.DeviceOption(caffe2_pb2.CPU, 0), **kwargs)
コード例 #25
0
ファイル: normalization.py プロジェクト: zxspring21/caffe2
def lrn(model, blob_in, blob_out, order="NCHW", use_cudnn=False, **kwargs):
    """LRN"""
    dev = kwargs['device_option'] if 'device_option' in kwargs \
        else scope.CurrentDeviceScope()
    is_cpu = dev is None or dev.device_type == caffe2_pb2.CPU
    if use_cudnn and (not is_cpu):
        kwargs['engine'] = 'CUDNN'
        blobs_out = blob_out
    else:
        blobs_out = [blob_out, "_" + blob_out + "_scale"]
    lrn = model.net.LRN(blob_in, blobs_out, order=order, **kwargs)

    if use_cudnn and (not is_cpu):
        return lrn
    else:
        return lrn[0]
コード例 #26
0
ファイル: train.py プロジェクト: GeekLiB/caffe2-master
def Accuracy(model, blob_in, blob_out, **kwargs):
    dev = kwargs['device_option'] if 'device_option' in kwargs \
        else scope.CurrentDeviceScope()
    is_cpu = dev is None or dev.device_type == caffe2_pb2.CPU

    # We support top_k > 1 only on CPU
    if not is_cpu and 'top_k' in kwargs and kwargs['top_k'] > 1:
        pred_host = model.net.CopyGPUToCPU(blob_in[0], blob_in[0] + "_host")
        label_host = model.net.CopyGPUToCPU(blob_in[1], blob_in[1] + "_host")

        # Now use the Host version of the accuracy op
        model.net.Accuracy([pred_host, label_host],
                           blob_out,
                           device_option=core.DeviceOption(caffe2_pb2.CPU, 0),
                           **kwargs)
    else:
        model.net.Accuracy(blob_in, blob_out)
コード例 #27
0
def init_data_input_workers(
    net,
    input_blob_names,
    fetch_fun,
    batch_size,
    num_worker_threads=2,
    input_source_name="train",
    max_buffered_batches=100,
):
    global global_coordinator
    device_option = scope.CurrentDeviceScope()
    if (device_option is None):
        device_option = caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CPU)

    # Create coordinator object
    coordinator = DataInputCoordinator(
        net,
        input_blob_names,
        batch_size,
        device_option,
        scope.CurrentNameScope(),
        input_source_name,
        max_buffered_batches,
    )

    # Launch fetch worker threads
    workers = [
        threading.Thread(
            target=fetcher,
            args=[
                coordinator, global_coordinator._fetcher_id_seq + i, fetch_fun,
                batch_size, input_blob_names
            ],
        ) for i in range(num_worker_threads)
    ]
    global_coordinator._fetcher_id_seq += num_worker_threads

    workers.append(threading.Thread(target=enqueuer, args=[coordinator]))
    coordinator._workers = workers
    global_coordinator.add(coordinator)

    return global_coordinator
コード例 #28
0
def _Workspace_feed_blob(ws, name, arr, device_option=None):
    if type(arr) is caffe2_pb2.TensorProto:
        arr = utils.Caffe2TensorToNumpyArray(arr)
    if type(arr) is np.ndarray and arr.dtype.kind in 'SU':
        # Plain NumPy strings are weird, let's use objects instead
        arr = arr.astype(np.object)

    if device_option is None:
        device_option = scope.CurrentDeviceScope()

    if device_option and device_option.device_type == caffe2_pb2.CUDA:
        if arr.dtype == np.dtype('float64'):
            logger.warning(
                "CUDA operators do not support 64-bit doubles, " +
                "please use arr.astype(np.float32) or np.int32 for ints." +
                " Blob: {}".format(name) + " type: {}".format(str(arr.dtype)))

    name = StringifyBlobName(name)
    print("device option is:")
    print(device_option)
    return ws.create_blob(name).feed(arr)
コード例 #29
0
ファイル: optimizer.py プロジェクト: zhangfan-neu/pytorch
    def _run(self, net, param_init_net, param_info):
        param = param_info.blob
        grad = param_info.grad
        if self.base_learning_rate == 0:
            return
        assert self.base_learning_rate > 0, (
            "Expect positive base learning rate, got {}".format(
                self.base_learning_rate))

        self._clear_local_lr_multiplier()

        # TODO(zqq): support LARS for sparse parameters
        if self.lars is not None and not isinstance(grad, core.GradientSlice):
            assert self.lars >= 0, (
                'Lars offset must be nonnegative, got {}'.format(self.lars))
            lr_lars_multiplier = net.Lars(
                [param, grad],
                self.make_unique_blob_name(str(param) + "_lars"),
                offset=self.lars)
            current_scope = scope.CurrentDeviceScope()
            self._add_local_lr_multiplier(
                lr_lars_multiplier,
                is_gpu_blob=(current_scope is not None
                             and current_scope.device_type == caffe2_pb2.CUDA),
            )

        # We need negative sign for LR when used directly with WeightedSum
        # below.
        lr_sign = -1 if self.momentum else 1
        lr, _ = self.build_lr(net,
                              param_init_net,
                              base_learning_rate=self.base_learning_rate *
                              lr_sign,
                              policy=self.policy,
                              **(self.init_kwargs))

        dev = scope.CurrentDeviceScope()
        if dev is None:
            dev = core.DeviceOption(caffe2_pb2.CPU)

        # Each GPU/CPU must have its own ONE blob, thus modify the name
        # to include device information.
        ONE = param_init_net.ConstantFill([],
                                          "ONE_{}_{}{}".format(
                                              dev.device_type, dev.cuda_gpu_id,
                                              dev.node_name),
                                          shape=[1],
                                          value=1.0)

        self._aux_params.shared.append(ONE)

        if self.momentum > 0:
            momentum_data = param_init_net.ConstantFill(param,
                                                        str(param) +
                                                        "_momentum",
                                                        value=0.)
            self._aux_params.local.append(momentum_data)

        if isinstance(grad, core.GradientSlice):
            grad = self.dedup(net, self.sparse_dedup_aggregator, grad)
            if self.momentum > 0.:
                net.SparseMomentumSGDUpdate(
                    [grad.values, momentum_data, lr, param, grad.indices],
                    [grad.values, momentum_data, param],
                    momentum=self.momentum,
                    nesterov=self.nesterov)
            else:
                net.ScatterWeightedSum(
                    [param, ONE, grad.indices, grad.values, lr], param)
        else:
            if self.momentum > 0.:
                net.MomentumSGDUpdate([grad, momentum_data, lr, param],
                                      [grad, momentum_data, param],
                                      momentum=self.momentum,
                                      nesterov=self.nesterov)
            else:
                coeff = lr

                net.WeightedSum([param, ONE, grad, coeff], param)
コード例 #30
0
ファイル: optimizer.py プロジェクト: zhangfan-neu/pytorch
    def _run(self, net, param_init_net, param_info):
        param = param_info.blob
        grad = param_info.grad

        if self.alpha <= 0:
            return

        self._clear_local_lr_multiplier()

        if self.lars is not None and not isinstance(grad, core.GradientSlice):
            assert self.lars >= 0, (
                'Lars offset must be nonnegative, got {}'.format(self.lars))
            lr_lars_multiplier = net.Lars(
                [param, grad],
                self.make_unique_blob_name(str(param) + "_lars"),
                offset=self.lars)
            current_scope = scope.CurrentDeviceScope()
            self._add_local_lr_multiplier(
                lr_lars_multiplier,
                is_gpu_blob=(current_scope is not None
                             and current_scope.device_type == caffe2_pb2.CUDA),
            )

        lr, _ = self.build_lr(net,
                              param_init_net,
                              base_learning_rate=self.alpha,
                              policy=self.policy,
                              **(self.init_kwargs))

        if self.rowWise:
            shapes, types = workspace.InferShapesAndTypes([param_init_net])
            if str(param) not in shapes:
                # Type/shape inference is not available for this param, fallback
                # on Shape/Slice logic
                shape = param_init_net.Shape(param, str(param) + "_shape")
                num_rows = param_init_net.Slice([shape],
                                                str(shape) + "_numrows",
                                                starts=[0],
                                                ends=[1])
                param_squared_sum = param_init_net.ConstantFill(
                    num_rows,
                    str(param) + "_avg_squared_sum",
                    input_as_shape=1,
                    value=0.0)
            else:
                param_squared_sum = param_init_net.ConstantFill(
                    [],
                    str(param) + "_avg_squared_sum",
                    shape=[shapes[str(param)][0]],
                    value=0.0)

        else:
            param_squared_sum = param_init_net.ConstantFill([param],
                                                            str(param) +
                                                            "_squared_sum",
                                                            value=0.0)

        self._aux_params.local.append(param_squared_sum)

        if self.rowWise:
            assert isinstance(grad, core.GradientSlice),\
                'If SparseAdagrad with rowWise=True, gradient must be '\
                'a gradientslice. PLease ensure that rowWise is not enabled '\
                'for the dense Adagrad optimizer, as it is not supported.'
        if isinstance(grad, core.GradientSlice):
            assert self.decay == 1.,\
                'Decay is not implemented for SparseAdagrad and must be set to 1'
            grad = self.dedup(net, self.sparse_dedup_aggregator, grad)
            if self.rowWise:
                op = 'RowWiseSparseAdagrad'
            else:
                op = 'SparseAdagrad'
            net.__getattr__(op)(
                [param, param_squared_sum, grad.indices, grad.values, lr],
                [param, param_squared_sum],
                epsilon=self.epsilon,
                engine=self.engine)
        else:
            output_args = [param, param_squared_sum]
            if self.output_effective_lr_and_update:
                output_args.append(str(param) + '_effective_lr')
                output_args.append(str(param) + '_update')
            elif self.output_effective_lr:
                output_args.append(str(param) + '_effective_lr')

            net.Adagrad([param, param_squared_sum, grad, lr],
                        output_args,
                        epsilon=self.epsilon,
                        decay=float(self.decay),
                        engine=self.engine)