def NetInit(self, prototxt, phase='TRAIN'): self._net = pb.NetParameter() Parse(open(prototxt,'r').read(), self._net) self._phase = phase self._layers = [] if not hasattr(self, '_blobs'): self._blobs = {} self._params = {}; self._swap_blobs = {} self._inputs_to_tensors = {} self._costs = []; self._wrts = [] self._lr_mults = []; self._decay_mults = [] if len(self._net.input) > 0: for input in self._net.input: if not input in self._blobs: # create new tensors self._blobs[input] = {'data':Tensor(input).Variable(), 'diff': Tensor(input + '_grad')} self._inputs_to_tensors[input] = self._blobs[input]['data'] for layer in self._net.layer: if not self.FilterNet(layer): continue self._layers.append(getattr(layers, layer.type + 'Layer')(layer)) self.Setup() for layer in self._net.layer: if not self.FilterNet(layer): continue self.CheckBackward(layer)
def __init__(self, LayerParameter): super(BatchNormLayer, self).__init__(LayerParameter) param = LayerParameter.batch_norm_param self._param = { 'use_stats': int(param.use_global_stats) if param.HasField('use_global_stats') else -1, 'momentum': param.moving_average_fraction, 'eps': param.eps, 'axis': 1, 'mode': 'CAFFE' } # mean, var, factor are set to 0 in order to do statistics mean = Tensor(LayerParameter.name + '@param0').Constant(value=0.0) var = Tensor(LayerParameter.name + '@param1').Constant(value=0.0) factor = Tensor(LayerParameter.name + '@param2').Constant(value=0.0) # in dragon, set diff as None will ignore computing grad automatically # but in bvlc-caffe1, you must set lr_mult = 0 manually self._blobs.append({'data': mean, 'diff': None}) self._blobs.append({'data': var, 'diff': None}) self._blobs.append({'data': factor, 'diff': None})
def zeros(shape, dtype=None): """Initialize a tensor with zeros. If dtype is ``None``, use ``config.floatX``. Parameters ---------- shape : tuple or list The shape of Tensor. dtype : str or None The data type of Tensor. Returns ------- Tensor The initialized tensor. """ if dtype is None: dtype = config.floatX else: if dtype not in _DATA_TYPES.keys(): raise TypeError("Unsupported data type: {}".format(dtype)) np_value = np.zeros(shape, dtype=_DATA_TYPES[dtype]) output = Tensor(shape=shape, dtype=dtype) output.set_value(np_value) return output
def __init__(self, LayerParameter): super(ConvolutionLayer, self).__init__(LayerParameter) param = LayerParameter.convolution_param self._param = {'num_output': param.num_output, 'kernel_size': [int(element) for element in param.kernel_size], 'stride': [int(element) for element in param.stride] if len(param.stride) > 0 else [1], 'pad': [int(element) for element in param.pad] if len(param.pad) > 0 else [0], 'dilation': [int(element) for element in param.dilation] if len(param.dilation) > 0 else [1], 'group': int(param.group), 'padding': 'VALID', 'data_format': 'NCHW'} if param.HasField('kernel_h'): assert param.HasField('kernel_w') self._param['kernel_size'] = [param.kernel_h, param.kernel_w] if param.HasField('stride_h'): assert param.HasField('stride_w') self._param['stride'] = [param.stride_h, param.stride_w] if param.HasField('pad_h'): assert param.HasField('pad_w') self._param['pad'] = [param.pad_h, param.pad_w] scope = LayerParameter.name weight = Tensor(scope + '/param:0') weight_diff = Tensor(scope + '/param:0_grad') if len(LayerParameter.param) > 0: if LayerParameter.param[0].lr_mult <= 0: weight_diff = None self.Fill(weight, param, 'weight_filler') self._blobs.append({'data': weight, 'diff': weight_diff}) if param.bias_term: bias = Tensor(scope + '/param:1') bias_diff = Tensor(scope + '/param:1_grad') self.Fill(bias, param, 'bias_filler') if len(LayerParameter.param) > 1: if LayerParameter.param[1].lr_mult <= 0: bias_diff = None self._blobs.append({'data': bias, 'diff': bias_diff})
def At(inputs, indices=[], axis=0, acc_gradient=False, **kwargs): args = locals() kwargs = args['kwargs'] del args['kwargs'] kwargs = dict(args, **kwargs) if isinstance(inputs, list): if len(inputs) != 2: raise TypeError('At Operator accpets a list of 2 Tensors') elif isinstance(inputs, Tensor): if not isinstance(indices, list): raise TypeError('At Operator accepts a list of indices') indices = np.array(indices, dtype=np.float32) tensor = GetTensorName() ws.FeedTensor(tensor, indices) kwargs['inputs'] = [kwargs['inputs'], Tensor(tensor)] output = Tensor.CreateOperator(op_type='At', nout=1, **kwargs) if isinstance(inputs, Tensor): if inputs.shape is not None: output.shape = inputs.shape[:] output.shape[axis] = len(indices) return output
def __init__(self, LayerParameter): super(BatchRenormLayer, self).__init__(LayerParameter) param = LayerParameter.batch_renorm_param self._param = { 'use_stats': int(param.use_global_stats) if param.HasField('use_global_stats') else -1, 'momentum': param.moving_average_fraction, 'eps': param.eps, 'r_max': float(param.r_max), 'd_max': float(param.d_max), 't_delta': float(param.t_delta), 'axis': 1, 'mode': 'CAFFE' } mean = Tensor(LayerParameter.name + '@param0').Constant(value=0.0) var = Tensor(LayerParameter.name + '@param1').Constant(value=0.0) factor = Tensor(LayerParameter.name + '@param2').Constant(value=0.0) self._blobs.append({'data': mean, 'diff': None}) self._blobs.append({'data': var, 'diff': None}) self._blobs.append({'data': factor, 'diff': None})
def FromTensor(src, src_ctx=None, name=None, ctx=None): """Create a Tensor from a existing tensor. Parameters ---------- src_ctx : str The name of source tensor. src_ctx : dragon_pb2.DeviceOption The context of source tensor. name : str The optional tensor name for destination tensor. ctx : dragon_pb2.DeviceOption The context for destination tensor. Returns ------- Tensor The tensor with the same data as source. References ---------- The wrapper of ``TensorFromTensorCC``. """ if name is None: tensor = Tensor(name=name) else: tensor = Tensor(_name=name) if src_ctx is None: src_ctx = MakeDeviceOption(0, 0) # CPUContext if ctx is None: ctx = MakeDeviceOption(0, 0) # CPUContext TensorFromTensorCC( _stringify_tensor(tensor), _stringify_tensor(src), _stringify_proto(ctx), _stringify_proto(src_ctx)) return tensor
def constant(x, name=None, shape=None, dtype=None): """Initialize a tensor with constant value. If dtype is ``None``, use ``config.floatX``. Parameters ---------- x : basic numerical type The constant value. name : str or None The name of Tensor. shape : list, tuple or None The shape of Tensor. dtype : str or None The data type of Tensor. Returns ------- Tensor The initialized tensor. """ if dtype is None: dtype = config.floatX else: if dtype not in _DATA_TYPES.keys(): raise TypeError("Unsupported data type: {}".format(dtype)) if shape is None: shape = () np_value = x * np.ones(shape, dtype=_DATA_TYPES[dtype]) output = Tensor(name=name, shape=shape, dtype=dtype) output.set_value(np_value) return output
def FromPyArray(array, name=None): """Create a Tensor from a existing Array. Note that memory of Tensor are ``zero-copied``. Parameters ---------- array : ndarray The array for creating the tensor. name : str The optional tensor name. Returns ------- Tensor The tensor sharing the memory with original array. References ---------- The wrapper of ``TensorFromPyArrayCC``. """ if name is None: tensor = Tensor(name=name) else: tensor = Tensor(_name=name) if not isinstance(array, np.ndarray): raise TypeError('The given nd-array should be numpy.ndarray.') TensorFromPyArrayCC(_stringify_tensor(tensor), array) return tensor
def FromShape(shape, dtype='float32', ctx=None, name=None): """Create a Tensor from the shape. Parameters ---------- shape : tuple or list The shape info. dtype : str The data type. ctx : dragon_pb2.DeviceOption The context info. name : str The optional tensor name. Returns ------- Tensor The tensor with the specific shape. References ---------- The wrapper of ``TensorFromShapeCC``. """ if name is None: tensor = Tensor(name=name) else: tensor = Tensor(_name=name) if not isinstance(shape, (tuple, list)): raise TypeError('The shape should be a tuple or list.') if ctx is None: ctx = MakeDeviceOption(0, 0) # CPUContext TensorFromShapeCC(_stringify_tensor(tensor), list(shape), dtype, _stringify_proto(ctx)) return tensor
def convert_to_tensor(value, dtype=None, name=None, **kwargs): """Converts the given value to a Tensor. Parameters ---------- value : basic type, list or numpy.ndarray The value to convert. dtype : Dtype or None The data type. If ``None``, inferred from the type of `value`. name : str or None The Optional name. Returns ------- Tensor The output tensor. """ if dtype is not None: if not isinstance(dtype, str): if isinstance(dtype, dtypes.DType): dtype = dtype.name else: raise ValueError('The dtype should be a str of a tf.Dtype.') tensor = Tensor(name=name, dtype=dtype) tensor.set_value(value) return tensor
def AddBlob(self, value=None, filler=None, enforce_no_grad=None): # Use a a fixed name in the current workspace # Note that a non-empty tensor scope will make it # impossible to load/save caffe models. You should use # a new workspace instead of the terrible name scope scoped_name = _scope.get_default_name_scope() + self._name param_name = scoped_name + '/param:{}'.format(len(self._blobs)) # Set the name explicitly variable = _Tensor.Ref(param_name) variable_grad = _Tensor.Ref(param_name + '_grad') if filler is not None: variable.Fill(**filler) else: # Register a constant filler by default value = value if value else 0 variable.Constant(value=value) # Determine whether we have disabled the gradients explicitly if enforce_no_grad is not None: variable_grad = None # Append to the blobs self._blobs.append({'data': variable, 'diff': variable_grad})
def ones(shape, dtype=None): """Initialize a tensor with ones. If dtype is ``None``, use ``config.floatX``. Parameters ---------- shape : tuple or list The shape of Tensor. dtype : str or None The data type of Tensor. Returns ------- Tensor The initialized tensor. """ if dtype is None: dtype = config.floatX else: if dtype not in _DATA_TYPES.keys(): raise TypeError("Unsupported data type: {}".format(dtype)) np_value = np.ones(shape, dtype=_DATA_TYPES[dtype]) output = Tensor(shape=shape, dtype=dtype) output.set_value(np_value) return output
def WrapConstants(constants, dtype='float32'): if not isinstance(constants, Tensor): if not isinstance(constants, np.ndarray): constants = np.array(constants, dtype=dtype) tensor = Tensor() tensor.set_value(constants) tensor.shape = constants.shape constants = tensor return constants
def weight_bias(self, weights_init=None, no_bias=False): if weights_init is None: weight = Tensor().Xavier() else: weight = weights_init if no_bias: self.network_params.extend([weight]) return [weight] bias = Tensor().Constant(value=0) self.network_params.extend([weight, bias]) return [weight, bias]
def NetInit(self, proto_txt, phase='TRAIN'): """Construct a Net by the ``proto_txt`` file. Parameters ---------- proto_txt : str The path of ``proto_txt`` file. phase : str The phase, ``TRAIN`` or ``TEST``. Returns ------- Net The net. References ---------- The implementation of `Net_Init(_caffe.cpp, L109)`_. """ self._net = pb.NetParameter() Parse(open(proto_txt, 'r').read(), self._net) self._phase = phase self._layers = [] if not hasattr(self, '_blobs'): self._blobs = {} self._params = {} self._swap_tensors = {} self._inputs_to_tensors = {} self._costs = [] self._wrts = [] self._lr_mults = [] self._decay_mults = [] if len(self._net.input) > 0: for input in self._net.input: if not input in self._blobs: self._blobs[input] = { 'data': Tensor(input).Variable(), 'diff': Tensor(input + '_grad') } self._inputs_to_tensors[input] = self._blobs[input]['data'] for layer in self._net.layer: if not self.FilterLayer(layer): continue self._layers.append(getattr(layers, layer.type + 'Layer')(layer)) self.Setup() for layer in self._net.layer: if not self.FilterLayer(layer): continue self.CheckBackward(layer)
def _plan_params(self): if self.mode == 'lstm': gate_size = 4 * self.hidden_size elif self.mode == 'gru': gate_size = 3 * self.hidden_size else: gate_size = self.hidden_size # 1. Plan weights self._matrix_weights = [] self._bias_weights = [] for layer in range(self.num_layers): for direction in range(self.num_directions): layer_input_size = self.input_size if layer == 0 \ else self.hidden_size * self.num_directions w_names = [ 'layer_{}/{}/{}'.format(layer, p, 'L' if direction == 0 else 'R') for p in ('matrix_ih', 'matrix_hh', 'bias_ih', 'bias_hh') ] w_ih = Tensor(name=w_names[0], shape=[gate_size, layer_input_size]) w_hh = Tensor(name=w_names[1], shape=[gate_size, self.hidden_size]) b_ih = Tensor(name=w_names[2], shape=[ gate_size, ]) b_hh = Tensor(name=w_names[3], shape=[ gate_size, ]) # W (0 ~ 3), R (4 ~ 7) self._matrix_weights.extend([w_ih, w_hh]) # Bw (0 ~ 3), Br (4 ~ 7) self._bias_weights.extend([b_ih, b_hh]) # 2. Compute total number of parameters self._weights_count = 0 for w in self._matrix_weights + self._bias_weights: self._weights_count += np.prod(w.shape) # 3. Register the packed weights self.weights = FromShape(shape=[self._weights_count], name=self.name + '/weights' if self.name else None) # 4. Create the initialization grids if self.mode == 'lstm': num_params_per_layer = 8 elif self.mode == 'gru': num_params_per_layer = 6 else: num_params_per_layer = 2 self._matrix_init_grids = [[[ 'orthogonal' for _ in range(num_params_per_layer) ] for _ in range(self.num_directions)] for _ in range(self.num_layers)] self._bias_init_grids = [[[ 'zero' for _ in range(num_params_per_layer) ] for _ in range(self.num_directions)] for _ in range(self.num_layers)]
def __init__(self, LayerParameter): super(BNLayer, self).__init__(LayerParameter) bn_param = LayerParameter.batch_norm_param scale_param = LayerParameter.scale_param self._param = {'use_stats': int(bn_param.use_global_stats) if bn_param.HasField('use_global_stats') else -1, 'momentum': bn_param.moving_average_fraction, 'eps': bn_param.eps} mean = Tensor(LayerParameter.name + '@param0').Constant(value=0.0) var = Tensor(LayerParameter.name + '@param1').Constant(value=0.0) scale = Tensor(LayerParameter.name + '@param2') scale_diff = Tensor(LayerParameter.name + '@param2_grad') bias = Tensor(LayerParameter.name + '@param3') bias_diff = Tensor(LayerParameter.name + '@param3_grad') if scale_param.HasField('filler'): self.Fill(scale, scale_param, 'filler') else: scale.Constant(value=1.0) self.Fill(bias, scale_param, 'bias_filler') self.norm_blobs = [{'data': mean, 'diff': None}, {'data': var, 'diff': None}] self.scale_blobs = [{'data': scale, 'diff': scale_diff}, {'data': bias, 'diff': bias_diff}] self._blobs.extend(self.norm_blobs) self._blobs.extend(self.scale_blobs)
def __init__(self, LayerParameter): super(InnerProductLayer, self).__init__(LayerParameter) param = LayerParameter.inner_product_param self._param = {'axis': param.axis, 'num_output': param.num_output} weight = Tensor(LayerParameter.name + '@param0') weight_diff = Tensor(LayerParameter.name + '@param0_grad') self.Fill(weight, param, 'weight_filler') self._blobs.append({'data': weight, 'diff': weight_diff}) if param.bias_term: bias = Tensor(LayerParameter.name + '@param1') bias_diff = Tensor(LayerParameter.name + '@param1_grad') self.Fill(bias, param, 'bias_filler') self._blobs.append({'data': bias, 'diff': bias_diff})
def __init__(self, LayerParameter): super(NormalizeLayer, self).__init__(LayerParameter) param = LayerParameter.normalize_param self._l2norm_param = {'axis': 1, 'num_axes': -1 if param.across_spatial else 1, 'eps': param.eps} self._scale_param = {'axis': 1, 'num_axes': 0 if param.channel_shared else 1} scale = Tensor(LayerParameter.name + '@param0') if param.HasField('scale_filler'): self.Fill(scale, param, 'scale_filler') else: scale.Constant(value=1.0) self.scale_blobs = [{'data': scale, 'diff': Tensor(scale.name + '_grad')}] self._blobs.extend(self.scale_blobs)
def __init__(self, LayerParameter): super(PReLULayer, self).__init__(LayerParameter) param = LayerParameter.prelu_param self._param = { 'channel_shared': param.channel_shared, 'data_format': 'NCHW' } slope = Tensor(LayerParameter.name + '@param0') slope_diff = Tensor(LayerParameter.name + '@param0_grad') if param.HasField('filler'): self.Fill(slope, param, 'filler') else: slope.Constant(value=0.25) self._blobs.append({'data': slope, 'diff': slope_diff})
def constant(value, dtype=None, shape=None, name=None): if dtype == None: dtype = dtypes.float32 if isinstance(value, np.ndarray): feed = value.astype(dtype) elif isinstance(value, list): feed = np.array(value, dtype) else: feed = np.array([value], dtype) if shape is not None: if feed.size == 1: c = feed[0] feed = np.zeros(shape, dtype) feed.fill(c) else: feed = feed.reshape(shape) tensor = Tensor(name) tensor.shape = list(feed.shape) ws.FeedTensor(tensor, feed) return tensor
def SparseSoftmaxFocalLoss(inputs, axis=1, normalization='VALID', ignore_labels=(), alpha=0.5, gamma=2.0, eps=1e-10, neg_id=-1, **kwargs): """ :param inputs: a list of Tensor contains [input, label] :param axis a int of using which axis to compute softmax :param normalization: a str of (UNIT, FULL, VALID, BATCH_SIZE, NONE) :param ignore_labels: a list of int contatins the labels to ignore :param alpha a float of the alpha value :param gamma a float of the gamma value :param eps a float of the eps value :return: a Tensor of loss with the shape (1,) """ if not isinstance(inputs, list) or len(inputs) is not 2: raise RuntimeError('SoftmaxFocalLoss Operator accpets a list of 2 Tensors') args = locals(); kwargs = args['kwargs'] del args['kwargs']; kwargs = dict(args, **kwargs) output = Tensor.CreateOperator(nout=1, op_type='SparseSoftmaxFocalLoss', **kwargs) if inputs[0].shape is not None: if normalization != 'UNIT': output.shape = [1] elif all(dim is not None for dim in inputs[0].shape): outer_dim = int(np.prod(inputs[0].shape[0 : axis])) inner_dim = int(np.prod(inputs[0].shape[axis + 1 :])) output.shape = [outer_dim * inner_dim] else: output.shape = [None] return output
def _set_param( self, layer_id, param_id, param_type, param, ): if isinstance(param, numpy.ndarray): param_temp = _Tensor.Ref('/tmp/rnn_param') param_temp.set_value(param) param = param_temp else: raise ValueError('Excepted a numpy array.') self.weights.expressions = dict() # Clear cached expressions outputs = RNNParamSet( inputs=[self.weights, param], layer_id=layer_id, param_id=param_id, param_type=param_type, rnn_mode=self.mode, input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers, num_directions=self.num_directions, ) for k, v in outputs.expressions.items(): _workspace.RunOperator(v)
def SigmoidCrossEntropyLoss(inputs, normalization='FULL', **kwargs): """ :param inputs: a list of Tensor contains [input, label] :param normalization: a str of (UNIT, FULL, BATCH_SIZE, NONE) :return: a Tensor of loss with the shape (1,) """ if not isinstance(inputs, list) or len(inputs) is not 2: raise RuntimeError( 'SigmoidCrossEntropyLoss Operator accpets a list of 2 Tensors') args = locals() kwargs = args['kwargs'] del args['kwargs'] kwargs = dict(args, **kwargs) output = Tensor.CreateOperator(nout=1, op_type='SigmoidCrossEntropyLoss', **kwargs) if inputs[0].shape is not None: if normalization != 'UNIT': output.shape = [1] else: output.shape = inputs[0].shape[:] return output
def SoftmaxCrossEntropyLoss(inputs, axis=1, normalization='FULL', **kwargs): """ :param inputs: a list of Tensor contains [input, label] :param normalization: a str of (UNIT, FULL, BATCH_SIZE, NONE) :return: a Tensor of loss with the shape (1,) """ if not isinstance(inputs, list) or len(inputs) is not 2: raise RuntimeError( 'SoftmaxCrossEntropyLoss Operator accpets a list of 2 Tensors') args = locals() kwargs = args['kwargs'] del args['kwargs'] kwargs = dict(args, **kwargs) output = Tensor.CreateOperator(nout=1, op_type='SoftmaxCrossEntropyLoss', **kwargs) if inputs[0].shape is not None: if normalization != 'UNIT': output.shape = [1] elif all(dim is not None for dim in inputs[0].shape): outer_dim = int(np.prod(inputs[0].shape[0:axis])) inner_dim = int(np.prod(inputs[0].shape[axis + 1:])) output.shape = [outer_dim * inner_dim] else: output.shape = [None] return output
def Generator(arguments): properties = arguments.get(name, None) if properties is None: return arguments desc_name = name_v2 if name_v2 else name if name_v2: del arguments[name] if not isinstance(properties, (list, tuple)): properties = [properties] # Check whether to use desc tensor_in_properties = False for property in properties: if isinstance(property, Tensor): tensor_in_properties = True if tensor_in_properties: properties_t = [] for property in properties: if isinstance(property, Tensor): if as_target: if not 'extra_inputs' in arguments: arguments['extra_inputs'] = [] arguments['extra_inputs'].extend([property]) properties_t.append(property.name) else: properties_t.append(Tensor.convert_to(property, dtype).name) arguments[desc_name] = None arguments[desc_name + '_desc'] = properties_t else: arguments[desc_name] = properties return arguments
def Pow(inputs, power, shift=None, scale=None, **kwargs): """ :param inputs: a Tensor with any shape :param power: a float of power :param shift: a float of shift :param scale: a float of scale :return: a Tensor of { [(x + shift) * scale] ^ power } """ if not isinstance(inputs, Tensor): raise RuntimeError('Pow Operator accepts a Tensor as inputs') args = locals(); kwargs = args['kwargs'] del args['kwargs']; kwargs = dict(args, **kwargs) kwargs['power']= float(power) if kwargs['scale'] is not None: kwargs['scale'] = float(scale) if kwargs['shift'] is not None: kwargs['shift'] = float(shift) output = Tensor.CreateOperator(nout=1, op_type='Pow', **kwargs) if inputs.shape is not None: output.shape = inputs.shape[:] return output
def Setup(self): """Setup the net. Returns ------- None References ---------- The implementation of `Init(net.cpp, L44)`_. """ self._net_outputs = set() for layer in self._layers: bottom = [] for bottom_name in layer._bottom: if not bottom_name in self._blobs: raise RuntimeError( 'bottom({}) is unknown.'.format(bottom_name)) bottom.append(self._blobs[bottom_name]) if bottom_name in self._net_outputs: self._net_outputs.remove(bottom_name) outputs = layer.Setup([blob['data'] for blob in bottom]) if not isinstance(outputs, list): outputs = [outputs] for idx, top in enumerate(layer._top): self._blobs[top] = { 'data': outputs[idx], 'diff': Tensor(outputs[idx].name + '_grad') } self._net_outputs.add(top)
def BatchNorm(inputs, momentum=0.9, eps=1e-3, use_stats=-1, inplace=True, **kwargs): """ :param inputs: a list of 4 Tensors contains [input, mean, var, factor] tips: mean, var, factor should be set to fill 0 before :param use_stats: a int: set 0 or 1 force to not use or use stats specially, set -1 will use(Train) / not use(Test) tips: set -1 when training with a large batchsize set 0 when without doing batch statistics (p.s statistics will poor if training with a small batchsize) :param decay: a float of moving average factor :param eps: a float of eps in sqrt(x + eps) :return: a Tensor after BatchNorm, which will speed convergence """ if not isinstance(inputs, list) or len(inputs) < 4: raise TypeError('BatchNorm Operator accpets a list of 4 Tensors') args = locals() kwargs = args['kwargs'] del args['kwargs'] kwargs = dict(args, **kwargs) return Tensor.CreateOperator(nout=1, op_type='BatchNorm', **kwargs)
def MPIGather(inputs, root, mpi_rank=None, **kwargs): """ :param inputs: a Tensor which to broadcast :param root: a int of the root in a broadcast group :return: a Tensor that be broadcast """ if not isinstance(inputs, Tensor): raise RuntimeError('MPIGather Operator accepts a Tensor as inputs') args = locals(); kwargs = args['kwargs'] del args['kwargs']; kwargs = dict(args, **kwargs) if mpi_rank is None: num_nodes = mpi.size() kwargs['mpi_rank'] = [i for i in xrange(0, num_nodes)] if not isinstance(kwargs['mpi_rank'], list): kwargs['mpi_rank'] = [kwargs['mpi_rank']] if 'nout' in kwargs: if kwargs['nout'] != len(kwargs['mpi_rank']): raise RuntimeError('specfied nout is {}, but provide {} mpi nodes' .format(kwargs['nout'], len(kwargs['mpi_rank']))) safe_nout = kwargs['nout'] else: safe_nout = len(kwargs['mpi_rank']) comm, group = mpi.group(root, incl=mpi_rank) new_kwargs = {'inputs': kwargs['inputs'], 'mpi_rank': kwargs['mpi_rank'], 'comm': comm, 'group': group} return Tensor.CreateOperator(nout=safe_nout, op_type='MPIGather', **new_kwargs)
def constant(value, dtype=None, shape=None, name=None, verify_shape=False): # determine the data type if dtype == None: dtype = dtypes.float32 if isinstance(value, np.ndarray): feed = value.astype(dtype.as_numpy_dtype) elif isinstance(value, list): feed = np.array(value, dtype.as_numpy_dtype) else: feed = np.array([value], dtype.as_numpy_dtype) # determine the shape if shape is not None: if feed.size == 1: # case 1: broadcast with scalar value c = feed[0] feed = np.zeros(shape, dtype.as_numpy_dtype) feed.fill(c) else: # case 2: reshape directly if verify_shape: if shape is not None: if len(shape) != len(value.shape): raise RuntimeError('The constant was limited to {} dimensions, \ while feed a value with {} dimensions.'. format(len(shape), len(value.shape))) for i in xrange(len(shape)): if shape[i] is None: continue if shape[i] != value.shape[i]: raise RuntimeError('The shape of constant was limited as (' + ','.join([str(dim) for dim in shape]) + '), ' + 'while feed a value with (' + ','.join([str(dim) for dim in value.shape]) + ').') feed = feed.reshape(shape) # feed to VM tensor = Tensor(name) tensor.shape = list(feed.shape) ws.FeedTensor(tensor, feed) return tensor
def grad(cost, wrt, **kwargs): """Compute the gradients for variables with respect to the cost. Parameters ---------- cost : Tensor The cost. wrt : Tensor or list of Tensor The variables w.r.t the cost. Returns ------- Tensor or list of Tensor The gradients of variables. Examples -------- >>> x = Tensor('x').Variable() >>> y = x * 2 >>> dx = grad(y, x) >>> z = Tensor('z').Variable() >>> y = x + z >>> dx, dz = grad(y, [x, z]) """ grads = [] if not isinstance(wrt, list): wrt = [wrt] for w in wrt: cost.grad_wrts.append(w.name) w.grad_objs.append(cost) w_grad = Tensor(w.name + '_grad') w_grad.extra_targets.add(cost.name) w_grad.expressions = cost.expressions w_grad.grad_wrts.append(w.name) grads.append(w_grad) if len(grads) == 1: return grads[0] return grads