コード例 #1
0
 def __init__(
     self,
     num_features,
     eps=1e-5,
     momentum=0.1,
     affine=True,
     track_running_stats=True,
 ):
     super(_BatchNorm, self).__init__()
     self.num_features = num_features
     self.eps = eps
     self.momentum = momentum
     self.affine = affine
     self.track_running_stats = track_running_stats
     if self.affine:
         self.weight = Parameter(Tensor(num_features))
         self.bias = Parameter(Tensor(num_features))
     else:
         self.register_buffer('weight', constant_ops.ones(num_features))
         self.register_buffer('bias', constant_ops.zeros(num_features))
     if self.track_running_stats:
         self.num_batches_tracked = 0
     else:
         self.num_batches_tracked = None
     self.register_buffer('running_mean', constant_ops.zeros(num_features))
     self.register_buffer('running_var', constant_ops.ones(num_features))
     self.reset_parameters()
コード例 #2
0
ファイル: normalization.py プロジェクト: seetaresearch/dragon
    def __init__(
        self,
        num_groups,
        num_channels,
        eps=1e-5,
        affine=True,
    ):
        r"""Create a ``GroupNorm`` module.

        Parameters
        ----------
        num_groups : int
            The number of groups.
        num_channels : int
            The number of channels.
        eps : float, optional, default=1e-5
            The value to :math:`\epsilon`.
        affine : bool, optional, default=True
            ``True`` to apply a affine transformation.

        """
        super(GroupNorm, self).__init__()
        self.num_groups = num_groups
        self.num_channels = num_channels
        self.eps = eps
        self.affine = affine
        if self.affine:
            self.weight = Parameter(Tensor(num_channels))
            self.bias = Parameter(Tensor(num_channels))
        else:
            self.register_buffer('weight', constant_ops.ones(num_channels))
            self.register_buffer('bias', constant_ops.zeros(num_channels))
        self.reset_parameters()
コード例 #3
0
ファイル: activation.py プロジェクト: seetaresearch/dragon
    def __init__(
        self,
        embed_dim,
        num_heads,
        dropout=0.,
        bias=True,
        kdim=None,
        vdim=None,
    ):
        """Create a ``MultiheadAttention`` module.

        Parameters
        ----------
        embed_dim : int
            The dimension of input embeddings.
        num_heads : int
            The number of parallel heads.
        dropout: float, optional, default=0.
            The probability to set the attention to zero.
        bias : bool, optional, default=True
            Add a bias tensor to output or not.
        kdim : int, optional
            The dimension of key embedding.
        vdim : int, optional
            The dimension of value embedding.

        """
        super(MultiheadAttention, self).__init__()
        self.embed_dim = embed_dim
        self.kdim = kdim if kdim is not None else embed_dim
        self.vdim = vdim if vdim is not None else embed_dim
        self._qkv_same_embed_dim = self.kdim == embed_dim and self.vdim == embed_dim
        self.num_heads = num_heads
        self.dropout = dropout
        self.head_dim = embed_dim // num_heads
        if self.head_dim * num_heads != self.embed_dim:
            raise ValueError('<embed_dim> must be divisible by <num_heads>.')
        if not self._qkv_same_embed_dim:
            self.q_proj_weight = Parameter(Tensor(embed_dim, embed_dim))
            self.k_proj_weight = Parameter(Tensor(embed_dim, self.kdim))
            self.v_proj_weight = Parameter(Tensor(embed_dim, self.vdim))
            self.register_parameter('in_proj_weight', None)
        else:
            self.in_proj_weight = Parameter(Tensor(3 * embed_dim, embed_dim))
            self.register_parameter('q_proj_weight', None)
            self.register_parameter('k_proj_weight', None)
            self.register_parameter('v_proj_weight', None)
        if bias:
            self.in_proj_bias = Parameter(Tensor(3 * embed_dim))
        else:
            self.register_parameter('in_proj_bias', None)
        self.out_proj = Linear(embed_dim, embed_dim, bias=bias)
        self.reset_parameters()
コード例 #4
0
 def _register_parameters(self):
     """Register and flatten the parameters."""
     if self.mode == 'lstm':
         gate_size = 4 * self.hidden_size
     elif self.mode == 'gru':
         gate_size = 3 * self.hidden_size
     else:
         gate_size = self.hidden_size
     # Compute the shape of weight and bias.
     self._matrix_shape, self._bias_shape = [], []
     for layer in range(self.num_layers):
         for direction in range(self.num_directions):
             layer_input_size = self.input_size if layer == 0 \
                 else self.hidden_size * self.num_directions
             w_ih_shape = [gate_size, layer_input_size]
             w_hh_shape = [gate_size, self.hidden_size]
             b_ih_shape, b_hh_shape = [gate_size], [gate_size]
             # W (0 ~ 3), R (4 ~ 7)
             self._matrix_shape.extend([w_ih_shape, w_hh_shape])
             # Bw (0 ~ 3), Br (4 ~ 7)
             self._bias_shape.extend([b_ih_shape, b_hh_shape])
     # Compute total number of parameters.
     self._weights_count = 0
     for shape in self._matrix_shape + self._bias_shape:
         self._weights_count += int(numpy.prod(shape))
     # Create the flat float32 weights.
     self.weights = Parameter(Tensor(self._weights_count))
コード例 #5
0
def new_leaf(size, kwargs):
    """Return a leaf tensor from optional kwargs."""
    device = kwargs.get('device', cpp.device())
    return Tensor(*size,
                  dtype=kwargs.get('dtype', 'float32'),
                  device=cpp.device() if device is None else device,
                  requires_grad=kwargs.get('requires_grad', False))
コード例 #6
0
ファイル: constant_ops.py プロジェクト: seetaresearch/dragon
def tensor(data, dtype=None, device=None, requires_grad=False):
    """Create a tensor initializing from the given data.

    Parameters
    ----------
    data : array_like
        The data to initialize from.
    dtype : str, optional
        The optional data type.
    device : dragon.vm.torch.device, optional
        The optional device of returned tensor.
    requires_grad : bool, optional, default=False
        ``True`` to record gradient for returned tensor.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.

    """
    array_data = numpy.array(data, copy=True)
    if dtype is None:
        dtype = str(array_data.dtype)
    else:
        array_data = array_data.astype(dtype)
    return Tensor(
        array_data,
        dtype=dtype,
        device=cpp.device() if device is None else device,
        requires_grad=requires_grad,
    )
コード例 #7
0
 def _get_grad(execute_ws, param, summed=False):
     """Return the grad of a parameter."""
     grad_impl = execute_ws.get_tensor(
         param.id + ('_grad_sum' if summed else '_grad'))
     if grad_impl:
         return Tensor(device=param.device, impl=grad_impl)
     return None
コード例 #8
0
ファイル: constant_ops.py プロジェクト: seetaresearch/dragon
def scalar(input, dtype, device):
    """Return a cached scalar tensor.

    Parameters
    ----------
    input : number
        The scalar value.
    dtype : str, optional
        The data type of output tensor.
    device : dragon.vm.torch.device
        The device of output tensor.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.

    """
    if isinstance(input, Tensor):
        return input
    try:
        input = float(input)
    except (TypeError, ValueError):
        raise ValueError('<input> should be a python number, got {}.'.format(
            type(input).__name__))
    cached_name = '%s(%s)' % (dtype, input)
    default_ws = workspace.get_workspace()
    impl = default_ws.get_tensor(cached_name)
    if impl is None:
        impl = default_ws.create_tensor(cached_name)
        impl.FromNumpy(numpy.array(input, dtype), True)
    return Tensor(device=device, impl=impl)
コード例 #9
0
ファイル: constant_ops.py プロジェクト: seetaresearch/dragon
def empty(*size, dtype=None, device=None, requires_grad=False):
    """Return a tensor filled with uninitialized data.

    Parameters
    ----------
    size : int...
        The sizes of output tensor.
    dtype : str, optional
        The optional data type.
    device : dragon.vm.torch.device, optional
        The optional device option.
    requires_grad : bool, optional, default=False
        Whether to compute the gradient if necessary.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.

    """
    return Tensor(
        *size,
        dtype=dtype if dtype else 'float32',
        device=cpp.device() if device is None else device,
        requires_grad=requires_grad,
    )
コード例 #10
0
ファイル: rnn.py プロジェクト: seetaresearch/dragon
 def __init__(self, input_size, hidden_size, bias, num_chunks):
     super(RNNCellBase, self).__init__()
     self.input_size = input_size
     self.hidden_size = hidden_size
     self.bias = bias
     self.weight_ih = Parameter(Tensor(num_chunks * hidden_size,
                                       input_size))
     self.weight_hh = Parameter(
         Tensor(num_chunks * hidden_size, hidden_size))
     if bias:
         self.bias_ih = Parameter(Tensor(num_chunks * hidden_size))
         self.bias_hh = Parameter(Tensor(num_chunks * hidden_size))
     else:
         self.register_parameter('bias_ih', None)
         self.register_parameter('bias_hh', None)
     self.reset_parameters()
コード例 #11
0
    def __init__(self, num_embeddings, embedding_dim, padding_idx=None):
        """Create an ``Embedding`` module.

        Parameters
        ----------
        num_embeddings : int
            The dictionary size.
        embedding_dim : int
            The embedding dimension.
        padding_idx : int, optional
            The position where to return zeros.

        """
        super(Embedding, self).__init__()
        self.num_embeddings = num_embeddings
        self.embedding_dim = embedding_dim
        if padding_idx is not None:
            if padding_idx > 0:
                if padding_idx >= self.num_embeddings:
                    raise ValueError('<padding_idx> must be within <num_embeddings>.')
            elif padding_idx < 0:
                if padding_idx < -self.num_embeddings:
                    raise ValueError('<padding_idx> must be within <num_embeddings>.')
                padding_idx = self.num_embeddings + padding_idx
        self.padding_idx = padding_idx
        self.weight = Parameter(Tensor(num_embeddings, embedding_dim))
        self.reset_parameters()
コード例 #12
0
 def _steal_grad(ws, param, grad_accum=False):
     """Steal the grad from backend."""
     impl = ws.GetTensor(param.id +
                         ('_grad[accum]' if grad_accum else '_grad'))
     if impl is not None:
         return Tensor(device=param.device, impl=impl)
     return None
コード例 #13
0
ファイル: dlpack.py プロジェクト: ORG-MARS/dragon
def from_dlpack(dlpack):
    """Create a tensor sharing the dlpack data.

    Parameters
    ----------
    dlpack : PyCapsule
        The capsule object of a dlpack tensor.

    Returns
    -------
    dragon.vm.torch.Tensor
        The tensor with the dlpack data.

    """
    current_ws = workspace.get_workspace()
    tensor = Tensor(device=None)
    tensor._gc = current_ws.collectors.TENSOR
    tensor._impl = current_ws.create_tensor(
        tensor._gc.alloc('${DLPACK}')).FromDLPack(dlpack)
    tensor._device = cpp.device(*tensor._impl.device)
    return tensor
コード例 #14
0
    def __init__(self, in_features, out_features, bias=True):
        """Create a ``Linear`` module.

        Parameters
        ----------
        in_features : int
            The number of input features.
        out_features : int
            The number of output features.
        bias : bool, optional, default=True
            Add a bias tensor to output or not.

        """
        super(Linear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(Tensor(out_features, in_features))
        if bias:
            self.bias = Parameter(Tensor(out_features))
        else:
            self.bias = None
        self.reset_parameters()
コード例 #15
0
ファイル: activation.py プロジェクト: ORG-MARS/dragon
    def __init__(self, num_parameters=1, init=0.25):
        """Create a ``PReLU`` module.

        Parameters
        ----------
        num_parameters : int, optional, default=1
            The number of parameters.
        init : float, optional, default=0.25
            The default value of parameters.

        """
        super(PReLU, self).__init__()
        self.num_parameters = num_parameters
        self.weight = Parameter(Tensor(num_parameters).fill_(init))
コード例 #16
0
ファイル: normalization.py プロジェクト: seetaresearch/dragon
    def __init__(self, normalized_shape, eps=1e-5, elementwise_affine=True):
        r"""Create a ``LayerNorm`` module.

        Parameters
        ----------
        normalized_shape : Union[int, Sequence[int]]
            The size normalized over the last dimensions.
        eps : float, optional, default=1e-5
            The value to :math:`\epsilon`.
        elementwise_affine : bool, optional, default=True
            ``True`` to apply a affine transformation.

        """
        super(LayerNorm, self).__init__()
        self.normalized_shape = tuple(nest.flatten(normalized_shape))
        self.eps = eps
        self.elementwise_affine = elementwise_affine
        if self.elementwise_affine:
            self.weight = Parameter(Tensor(*self.normalized_shape))
            self.bias = Parameter(Tensor(*self.normalized_shape))
        else:
            self.register_buffer('weight', constant_ops.ones(*self.normalized_shape))
            self.register_buffer('bias', constant_ops.zeros(*self.normalized_shape))
        self.reset_parameters()
コード例 #17
0
def scalar_to_tensor(input, dtype, device):
    """Return a cached scalar tensor."""
    if isinstance(input, Tensor):
        return input
    try:
        input = float(input)
    except (TypeError, ValueError):
        raise ValueError('<input> should be a python number, got {}.'.format(
            type(input).__name__))
    name = '/share/scalar/{}/{}'.format(dtype, str(input))
    current_ws = workspace.get_workspace()
    if not current_ws.has_tensor(name):
        current_ws.feed_tensor(name, numpy.array(input, dtype=dtype))
    return Tensor(device=device,
                  impl=current_ws.GetTensor(name),
                  requires_grad=False)
コード例 #18
0
ファイル: constant_ops.py プロジェクト: seetaresearch/dragon
def from_numpy(ndarray):
    """Create a tensor converting from the given numpy array.

    Parameters
    ----------
    ndarray : numpy.ndarray
        The numpy array data.

    Return
    ------
    dragon.vm.torch.Tensor
        The torch tensor.

    """
    if not isinstance(ndarray, numpy.ndarray):
        raise TypeError('<ndarray> should be a numpy array.')
    return Tensor(ndarray, copy=False)
コード例 #19
0
ファイル: dlpack.py プロジェクト: seetaresearch/dragon
def from_dlpack(dlpack):
    """Create a tensor sharing the dlpack data.

    Parameters
    ----------
    dlpack : PyCapsule
        The capsule object of a dlpack tensor.

    Returns
    -------
    dragon.vm.torch.Tensor
        The tensor with the dlpack data.

    """
    default_ws = workspace.get_workspace()
    impl = default_ws.create_tensor(scope='DLPack').FromDLPack(dlpack)
    return Tensor(device=cpp.device(*impl.device),
                  impl=impl, deleter=default_ws._handle_pool)
コード例 #20
0
 def _set_parameter(self, layer_id, param_id, param_type, param):
     """Set parameter to the flatten weights."""
     if isinstance(param, numpy.ndarray):
         param = Tensor(
             param,
             copy=False,
             requires_grad=self.weights.requires_grad,
         )
     return nn_funcs.RNNParamSet \
         .instantiate(
             self.weights.device,
             layer_id=layer_id,
             param_id=param_id,
             param_type=param_type,
             mode=self.mode,
             input_size=self.input_size,
             hidden_size=self.hidden_size,
             num_layers=self.num_layers,
             num_directions=self.num_directions,
         ).apply(param, self.weights)
コード例 #21
0
ファイル: rnn.py プロジェクト: seetaresearch/dragon
 def flatten_parameters(self):
     """Flatten parameters into a single weights."""
     gate_size = self._num_gates * self.hidden_size
     # Compute the shape of weight and bias.
     matrix_shapes, bias_shapes = [], []
     for layer in range(self.num_layers):
         for direction in range(int(self.bidirectional) + 1):
             layer_input_size = self.input_size if layer == 0 \
                 else self.hidden_size * self.num_directions
             w_ih_shape = [gate_size, layer_input_size]
             w_hh_shape = [gate_size, self.hidden_size]
             b_ih_shape, b_hh_shape = [gate_size], [gate_size]
             matrix_shapes.extend([w_ih_shape, w_hh_shape])
             bias_shapes.extend([b_ih_shape, b_hh_shape])
     # Compute total number of parameters.
     self._weights_count = 0
     self._weights_shapes = matrix_shapes + bias_shapes
     for shape in self._weights_shapes:
         self._weights_count += math_util.prod(shape)
     # Create the flat float32 weights.
     self.weights = Parameter(Tensor(self._weights_count))
コード例 #22
0
ファイル: execute.py プロジェクト: ORG-MARS/dragon
def run_operator(
    op_def,
    inputs,
    outputs,
    no_grad=False,
    pre_callback=None,
):
    """Compute the outputs."""
    requires_grad = False
    input_names, output_names = [], []
    default_tape = backprop.get_default_tape()

    # Add inputs.
    for input in inputs:
        input_names.append(input.id)
        if input.requires_grad:
            requires_grad = True

    # Determine the gradient flags.
    requires_grad = requires_grad and not no_grad
    requires_grad = requires_grad and grad_mode.is_grad_enabled()
    if default_tape is not None:
        no_grad = no_grad and not default_tape.retain_graph
        requires_grad = requires_grad or default_tape.retain_graph

    # Allocate outputs.
    ws = workspace.get_workspace()
    output_scope = context.get_eager_scope(requires_grad)
    gc = ws.collectors  # Garbage collectors

    for i, spec in enumerate(outputs):
        if isinstance(spec, six.string_types):
            output_names.append(spec)
        else:
            if isinstance(spec, device_cls):
                impl = ws.create_tensor(gc.TENSOR.alloc(output_scope))
                outputs[i] = Tensor(device=spec, gc=gc.TENSOR, impl=impl)
            output_names.append(outputs[i].id)

    # Generate the OpDef.
    op_handle = None  # Optional resource handle
    op_def = op_def.DeriveTo(input_names, output_names)

    # Flag the outputs.
    if len(inputs) > 0 and not no_grad:
        if requires_grad:
            instance_tape = backprop.Tape()
            for input in inputs:
                instance_tape.merge_from(input._tape)
                if not input._requires_grad:
                    instance_tape.add_empty_grad(input.id + '_grad')
            op_def.name = op_handle = gc.OP.alloc(op_def.type)
            instance_tape.add_operation(op_def)
            for output in outputs:
                output._tape = instance_tape
                output._requires_grad = True
        else:
            for output in outputs:
                output._requires_grad = False

    # Record this operation for future developments.
    if default_tape is not None:
        default_tape.add_def(op_def)
        if default_tape.retain_op_handles and op_handle is None:
            op_def.name = gc.OP.alloc(op_def.type)

    # Dispatch the computation.
    if pre_callback is not None:
        pre_callback(ws, op_def.name)
    ws.run_operator(op_def)

    # Return the outputs.
    return outputs[0] if len(outputs) == 1 else outputs
コード例 #23
0
ファイル: function.py プロジェクト: seetaresearch/dragon
    def forward(inputs, run_config, **kwargs):
        """Compute the function outputs."""
        graph_tape = tapes.get_tape()
        execute_ws = workspace.get_workspace()
        device = run_config['device']

        # Add inputs.
        inputs_id = []
        enable_grad = False
        for i, input in enumerate(inputs):
            inputs_id.append(input.id)
            if input.requires_grad:
                enable_grad = True
            if run_config['check_device'] and input._device != device:
                raise RuntimeError(
                    'Mismatched device between function and '
                    'element {} of input tensors. ({} vs. {})'
                    .format(i, device, input._device))

        # Unify grad modes.
        no_grad = run_config['no_grad']
        no_grad = no_grad or not grad_mode.is_grad_enabled()
        enable_grad = enable_grad and not no_grad
        if hasattr(graph_tape, '_exporting'):
            # Ensure the intermediates saved for the exporting graph.
            no_grad, enable_grad = False, True

        # Add outputs.
        outputs, outputs_id = [], []
        output_specs = kwargs.get('outputs', [None])
        for i, spec in enumerate(output_specs):
            if spec is None:
                outputs.append(Tensor(
                    device=device.copy(),
                    impl=execute_ws.create_tensor(
                        scope=context.get_variable_scope(enable_grad)),
                    deleter=execute_ws._handle_pool))
                outputs_id.append(outputs[i].id)
            else:
                if isinstance(spec, Tensor):
                    spec._device = device.copy()
                    outputs.append(spec)
                    outputs_id.append(spec.id)
                else:
                    outputs_id.append(spec)
                if enable_grad and outputs_id[-1] not in inputs_id:
                    raise RuntimeError('Output tensor should be in inputs if requires grad.')

        # Specialize def for given inputs and outputs.
        op_name = ''  # Optional operator name.
        op_def = run_config['def'].DeriveTo(inputs_id, outputs_id)

        # Record def if grad is enabled.
        if len(inputs) > 0 and not no_grad:
            if enable_grad:
                op_tape = tapes.OrderedTape()
                op_name = execute_ws.create_handle(op_def.type)
                op_def.name = op_name
                op_tape.add_element(op_def)
                op_tape.add_handle(op_name)
                for input in inputs:
                    op_tape.add_source(input)
                for output in outputs:
                    op_tape.merge_from(output._tape)
                for output in outputs:
                    output._tape = op_tape
                    output._requires_grad = True
            else:
                for output in outputs:
                    output._requires_grad = False

        # Ensure the named operator for the tracing graph.
        if hasattr(graph_tape, '_tracing'):
            if not op_name:
                op_name = execute_ws.create_handle(op_def.type)
            op_def.name = op_name
            graph_tape.add_element(op_def)
            graph_tape.add_handle(op_name)

        # Save inputs for the checkpointing graph.
        if hasattr(graph_tape, '_checkpointing'):
            for input in inputs:
                if input._tape:
                    if input._retains_grad:
                        graph_tape.add_source(input)
                elif input._requires_grad:
                    graph_tape.add_source(input)

        # Emit to dispatch this execution.
        for feed_key, value_type in run_config['feed_dict'].items():
            dest = execute_ws.create_tensor(op_name + '/' + feed_key)
            dest.FromNumpy(numpy.array(kwargs[feed_key], value_type), True)
        execute_ws.run_operator(op_def)

        # Return single or repeated outputs.
        return outputs[0] if len(outputs) == 1 else outputs