def AddBlob(self, value=None, filler=None, enforce_no_grad=None): # Use a a fixed name in the current workspace # Note that a non-empty tensor scope will make it # impossible to load/save caffe models. You should use # a new workspace instead of the terrible name scope scoped_name = _scope.get_default_name_scope() + self._name param_name = scoped_name + '/param:{}'.format(len(self._blobs)) # Set the name explicitly variable = _Tensor.Ref(param_name) variable_grad = _Tensor.Ref(param_name + '_grad') if filler is not None: variable.Fill(**filler) else: # Register a constant filler by default value = value if value else 0 variable.Constant(value=value) # Determine whether we have disabled the gradients explicitly if enforce_no_grad is not None: variable_grad = None # Append to the blobs self._blobs.append({'data': variable, 'diff': variable_grad})
def Setup(self): """Setup the net. Returns ------- None References ---------- The implementation of `Init(net.cpp, L44)`_. """ self._net_outputs = set() for layer in self._layers: bottom = [] for bottom_name in layer._bottom: if not bottom_name in self._blobs: raise RuntimeError('bottom({}) is unknown.'.format(bottom_name)) bottom.append(self._blobs[bottom_name]) if bottom_name in self._net_outputs: self._net_outputs.remove(bottom_name) outputs = layer.Setup([blob['data'] for blob in bottom]) if not isinstance(outputs, (list, tuple)): outputs = [outputs] for idx, top in enumerate(layer._top): self._blobs[top] = { 'data': outputs[idx], 'diff': _Tensor.Ref(outputs[idx].name + '_grad'), } self._net_outputs.add(top)
def _set_param( self, layer_id, param_id, param_type, param, ): if isinstance(param, numpy.ndarray): param_temp = _Tensor.Ref('/tmp/rnn_param') param_temp.set_value(param) param = param_temp else: raise ValueError('Excepted a numpy array.') self.weights.expressions = dict() # Clear cached expressions outputs = RNNParamSet( inputs=[self.weights, param], layer_id=layer_id, param_id=param_id, param_type=param_type, rnn_mode=self.mode, input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers, num_directions=self.num_directions, ) for k, v in outputs.expressions.items(): _workspace.RunOperator(v)
def dragon(self): """Create a dragon tensor sharing this tensor. Returns ------- dragon.Tensor The dragon tensor. """ if isinstance(self._tensor, str): return _Tensor.Ref(self._tensor, shape=self.shape, dtype=self.dtype) else: return self._tensor
def NetInit(self, proto_txt, phase='TRAIN'): """Construct a Net by the ``proto_txt`` file. Parameters ---------- proto_txt : str The path of ``proto_txt`` file. phase : str The phase, ``TRAIN`` or ``TEST``. Returns ------- Net The net. References ---------- The implementation of `Net_Init(_caffe.cpp, L109)`_. """ self._net = _proto_def.NetParameter() _parse_text_proto(open(proto_txt,'r').read(), self._net) self._phase = phase self._layers = [] self._inputs_to_tensors = {} if not hasattr(self, '_blobs'): self._blobs = {} self._losses, self._trainable_vars = [], [] if len(self._net.input) > 0: for input in self._net.input: if not input in self._blobs: variable = _Tensor(input).Variable() self._blobs[input] = { 'data': variable, 'diff': _Tensor.Ref(variable.name + '_grad'), } self._inputs_to_tensors[input] = self._blobs[input]['data'] for layer in self._net.layer: if not self.FilterLayer(layer): continue self._layers.append(getattr( _layer_factory, layer.type + 'Layer')(layer)) self.Setup() for layer in self._net.layer: if not self.FilterLayer(layer): continue self.CheckBackward(layer)
def grad(cost, wrt, **kwargs): """Compute the gradients for variables with respect to the cost. Parameters ---------- cost : Tensor The cost. wrt : Tensor or list of Tensor The variables w.r.t the cost. Returns ------- Tensor or list of Tensor The gradients of variables. Examples -------- >>> import dragon as dg >>> x = dg.Tensor('x').Variable() >>> y = x * 2 >>> dx = grad(y, x) >>> z = dg.Tensor('z').Variable() >>> y = x + z >>> dx, dz = grad(y, [x, z]) """ grads = [] if not isinstance(wrt, list): wrt = [wrt] for w in wrt: cost.gradient.add_wrt(w.name) w.gradient.add_cost(cost) grads.append( _Tensor.Ref(name=w.name + '_grad', shape=w.shape, dtype=w.dtype)) if len(grads) == 1: return grads[0] return grads
def constant( value, dtype=None, shape=None, name=None, verify_shape=False, ): if dtype is not None: if isinstance(value, numpy.ndarray): value = value.astype(dtype.as_numpy_dtype) else: value = numpy.array(value, dtype.as_numpy_dtype) else: if not isinstance(value, numpy.ndarray): value = numpy.array(value) # Discard the default float64 if value.dtype == numpy.float64: value = value.astype(numpy.float32) # Determine the shape if shape is not None: if value.size == 1: # Case 1: Broadcast with scalar value scalar = value.flatten()[0] value = numpy.empty(shape, value.dtype) value.fill(scalar) else: # Case 2: Reshape directly if verify_shape: if shape is not None: if len(shape) != len(value.shape): raise RuntimeError( 'The constant was limited to {} dimensions, ' \ 'while feed a value with {} dimensions.' .format(len(shape), len(value.shape))) for i in range(len(shape)): if shape[i] is None: continue if shape[i] != value.shape[i]: raise RuntimeError( 'The shape of constant was limited as (' + ','.join([str(dim) for dim in shape]) + '), ' + 'while feed a value with (' + ','.join([str(dim) for dim in value.shape]) + ').') value = value.reshape(shape) # Get a available name defined_name = \ _workspace.GetDummyName( basename=_scope.get_default_name_scope() + (name if name else 'Const'), suffix=':0', domain='Tensor', ) # Feed into the workspace return _Tensor.Ref( name=defined_name, shape=list(value.shape), dtype=str(value.dtype), ).set_value(value)
def _try_get_tensor(name=None): """Try to create or get a tensor""" if name is None or name == '': return Tensor() else: return Tensor.Ref(name)