Beispiel #1
0
 def _register_as(self, type, **kwargs):
     """Fill self with the specific type of filler."""
     filler = dragon_pb2.FillerInfo()
     filler.type = type.lower()
     variance_norm = {'fan_in': 0, 'fan_out': 1, 'fan_avg': 2}
     if filler.type == 'constant':
         filler.value = kwargs['value'] if 'value' in kwargs else 0
     elif filler.type in ['normal', 'gaussian']:
         filler.mean = kwargs['mean'] if 'mean' in kwargs else 0
         filler.std = kwargs['std'] if 'std' in kwargs else 1
         filler.type = 'normal'
     elif filler.type == 'uniform':
         filler.low = kwargs['low'] if 'low' in kwargs else 0
         filler.high = kwargs['high'] if 'high' in kwargs else 1
     elif filler.type == 'truncated_normal':
         filler.mean = kwargs['mean'] if 'mean' in kwargs else 0
         filler.std = kwargs['std'] if 'std' in kwargs else 1
         filler.low = filler.mean - 2.0 * filler.std
         filler.high = filler.mean + 2.0 * filler.std
     elif filler.type in ['glorot_uniform', 'xavier']:
         filler.scale = kwargs['scale'] if 'scale' in kwargs else 3
         filler.variance_norm = variance_norm[kwargs.get('mode', 'fan_in')]
     elif filler.type in ['glorot_normal', 'msra']:
         filler.scale = kwargs['scale'] if 'scale' in kwargs else 2
         filler.variance_norm = variance_norm[kwargs.get('mode', 'fan_in')]
     workspace.get_workspace().create_tensor(self.name, filler)
     return self
Beispiel #2
0
def _set_value(input, value):
    """Set the copied value to input."""
    if hasattr(input, 'id'):
        workspace.get_workspace().feed_tensor(input.id,
                                              value,
                                              enforce_cpu=True)
    else:
        raise ValueError('Input is not a legal tensor.')
Beispiel #3
0
    def accumulate(self, momentum):
        """Accumulate the gradient of params.

        Call this method after each ``backward`` pass:

        ```python
        x = torch.ones(1, requires_grad=True)
        optimizer = torch.optim.SGD([x], lr=0.1)
        for epoch in range(2):
            for step in range(3):
                y = x + 1
                y.backward()
                # Note to zero the accumulation at the first step
                optimizer.accumulate(momentum=1 if step > 0 else 1)
            optimizer.step()
        print(x)  # 0.4
        ```

        Parameters
        ----------
        momentum : float, required
            The momentum to the accumulated value.

        """
        current_ws = workspace.get_workspace()
        for group in self.param_groups:
            group['_internal/grad_accum'] = True
            for param in group['params']:
                grad = self._steal_grad(current_ws, param)
                if grad is not None:
                    training_funcs.accumulate_grad(grad)
Beispiel #4
0
 def __init__(self):
     self._defs = []
     self._watched = set()
     self._empty_grads = set()
     self._gc = workspace.get_workspace().collectors
     self._retain_graph = False
     self._retain_op_handles = False
Beispiel #5
0
    def from_value(cls, value, dtype=None, name=None):
        """Return a tensor converted from the given value.

        Parameters
        ----------
        value : array_like
            The value to convert.
        dtype: str, optional
            The optional data type.
        name: str, optional
            The optional tensor name.

        Returns
        -------
        dragon.Tensor
            The output tensor.

        """
        if not isinstance(value, numpy.ndarray):
            value = numpy.array(value, dtype if dtype else 'float32')
        name = workspace.get_workspace().unique_name(
            name=context.get_name_scope() + (name if name else 'Const'),
            suffix=':0',
            namespace='Tensor')
        ref = TensorRef(name, list(value.shape), str(value.dtype))
        return ref.set_value(value)
Beispiel #6
0
    def sum_grad(self):
        """Sum the gradients of all parameters.

        Call this method after each ``backward`` pass:

        ```python
        x = torch.ones(1, requires_grad=True)
        optimizer = torch.optim.SGD([x], lr=0.1)
        for epoch in range(2):
            for step in range(3):
                y = x + 1
                y.backward()
                optimizer.sum_grad()
            optimizer.step()
        print(x)  # 0.4
        ```

        """
        current_ws = workspace.get_workspace()
        for group in self.param_groups:
            grads, sum_grads = [], []
            for param in group['params']:
                grad = self._get_grad(current_ws, param)
                if grad is not None:
                    grads.append(grad)
                    sum_grads.append(grad.id + '_sum')
            Function.apply(
                'Axpby', grads[0].device,
                grads, outputs=sum_grads,
                alpha=1., beta=1. if self._sums_grad else 0.)
        self._sums_grad = True
Beispiel #7
0
    def backward(outputs, grad_outputs, retain_graph=False):
        """Compute the function derivatives w.r.t graph leaves."""
        # Collect tapes for graph reversely.
        graph_tape = tapes.OrderedTape()
        graph_leaves, memo = set(), set()
        inputs = list(outputs)
        while len(inputs) > 0:
            input = inputs.pop(0)
            if id(input) in memo:
                continue
            memo.add(id(input))
            if input._tape:
                graph_tape.merge_from(input._tape)
                inputs.extend(input._tape.get_sources())
                input._tape = None
                if input._retains_grad:
                    graph_leaves.add(input.id)
            elif input._requires_grad:
                graph_leaves.add(input.id)

        # Emit to dispatch backward execution.
        execute_ws = workspace.get_workspace()
        execute_ws.run_backward(
            op_defs=graph_tape.get_elements(),
            targets=[y.id for y in outputs],
            grad_targets=[dy.id for dy in grad_outputs],
            sources=list(graph_leaves))

        # Free handles if graph not retained.
        if not retain_graph:
            for handle in graph_tape.get_handles():
                execute_ws.release_handle(handle)
Beispiel #8
0
def add_update_defs(graph_def, optimizer):
    """Add the update defs."""
    grads, update_defs = [], []
    extra_arguments = optimizer._extra_kwargs
    extra_arguments['handle'] = optimizer._op_handle
    # Generate op defs according to the collected updates
    current_ws = workspace.get_workspace()
    for (param, grad), arguments in optimizer._param_group:
        if current_ws.has_tensor(grad):
            grads.append(grad)
            arguments = dict(arguments, **extra_arguments)
            update_defs.append(
                proto_util.make_operator_def(op_type=optimizer._op_type,
                                             inputs=[grad],
                                             outputs=[param],
                                             name=OpDef.get_name(),
                                             **arguments))
        else:
            logging.info('Skip to update Tensor({}).'.format(param))
    # Insert a reduce def if the process group is found.
    process_group = optimizer._process_group
    if process_group is not None:
        update_defs.insert(
            0,
            proto_util.make_operator_def(op_type='Collective',
                                         inputs=grads,
                                         outputs=grads,
                                         name=OpDef.get_name(),
                                         operation='MEAN',
                                         communication='ALLREDUCE',
                                         **process_group.arguments))
    graph_def.op.extend(update_defs)
Beispiel #9
0
    def __init__(self, model, device, **kwargs):
        """Create a ``BackendRep``.

        Parameters
        ----------
        model : str
            The path of onnx model file.
        device : onnx.Device
            The executing device.

        """
        if not isinstance(device, Device):
            device = Device(device)
        execute_ws = workspace.get_workspace()
        if device.type == DeviceType.CPU:
            device_type, device_index = 'cpu', 0
        elif device.type == DeviceType.CUDA:
            device_type, device_index = 'cuda', device.device_id
        else:
            raise ValueError('Unsupported device type: ' + device.type)
        with context.device(device_type, device_index):
            self._context = GraphLib.from_onnx(model)
        self._input_dict = collections.OrderedDict()
        self._output_dict = collections.OrderedDict()
        for input in self._context._def.input:
            impl = execute_ws.get_tensor(input)
            self._input_dict[input] = Tensor(impl=impl)
        for output in self._context._def.output:
            impl = execute_ws.get_tensor(output)
            self._output_dict[output] = Tensor(impl=impl)
        self._output_tuple = namedtupledict('Outputs', self._context._def.output)
Beispiel #10
0
 def _from_array(self, array):
     """Create implementation from the array."""
     ws = workspace.get_workspace()
     self._const_size = array.size
     self._gc, self._is_leaf = ws.collectors.TENSOR, True
     self._impl = ws.create_tensor(self._gc.alloc(
         context.get_eager_scope())).FromNumpy(array)
Beispiel #11
0
def scalar(input, dtype, device):
    """Return a cached scalar tensor.

    Parameters
    ----------
    input : number
        The scalar value.
    dtype : str, optional
        The data type of output tensor.
    device : dragon.vm.torch.device
        The device of output tensor.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.

    """
    if isinstance(input, Tensor):
        return input
    try:
        input = float(input)
    except (TypeError, ValueError):
        raise ValueError('<input> should be a python number, got {}.'.format(
            type(input).__name__))
    cached_name = '%s(%s)' % (dtype, input)
    default_ws = workspace.get_workspace()
    impl = default_ws.get_tensor(cached_name)
    if impl is None:
        impl = default_ws.create_tensor(cached_name)
        impl.FromNumpy(numpy.array(input, dtype), True)
    return Tensor(device=device, impl=impl)
Beispiel #12
0
 def _set_hyper(self, name, value, alias=None):
     """Set the specific hyper parameter."""
     if name not in self._hyper:
         self._hyper[name] = value
     else:
         if types.is_tensor(self._hyper[name]):
             workspace.get_workspace().feed_tensor(
                 self._hyper[name].id,
                 value,
                 dtype='float32',
                 enforce_cpu=True,
             )
         else:
             self._hyper[name] = value
     if alias and name not in self._alias:
         self._alias[name] = '/share/hyper/%s/%s' % (self._op_handle, alias)
Beispiel #13
0
 def _add_grads(graph_def, outputs):
     """Add gradients."""
     grad_tape = tapes.Tape()
     grad_outputs = []
     for i, output in enumerate(outputs):
         if hasattr(output, '_grad_tape') and output._grad_tape:
             if output._grad_tape != grad_tape and len(grad_outputs) > 0:
                 raise RuntimeError('Create graph from multiple gradient tapes.')
             grad_tape = output._grad_tape
             output._grad_tape = None
             grad_outputs.append(output)
     if grad_tape is None:
         return
     op_defs = grad_tape.get_elements()
     if len(op_defs) == 0:
         return
     execute_ws = workspace.get_workspace()
     ys = [y.id for y in grad_outputs]
     dys = [getattr(y._grad, 'id', '') for y in grad_outputs]
     grad_defs = backend.GradientTape().CreateGradientDefs(
         [op_def.SerializeToString() for op_def in op_defs], ys, dys)
     for serialized_str in grad_defs:
         grad_def = dragon_pb2.OperatorDef()
         grad_def.ParseFromString(serialized_str)
         grad_def.name = execute_ws.create_handle('Op')
         graph_def.op.extend([grad_def])
     if len(grad_defs) > 0:
         xs = [x.id for x in grad_tape.get_sources()]
         graph_def.arg.extend([
             proto_util.make_argument('grad_sources', xs),
             proto_util.make_argument('phase', 'TRAIN')])
Beispiel #14
0
def set_value(self, value):
    """Set value to the implementation.

    Parameters
    ----------
    value : array_like
        The value to set.

    Returns
    -------
    dragon.Tensor
        The self.

    """
    workspace.get_workspace().feed_tensor(self, value)
    return self
Beispiel #15
0
def load_weights_from_pickle(f, layer, verbose=False):
    ws = workspace.get_workspace()
    weight_dict = six.moves.pickle.load(f)
    for weight in layer.weights:
        name = weight.name
        if name in weight_dict:
            value = weight_dict[name]
            value_shape = list(value.shape)
            weight_shape = list(weight.shape)
            if value_shape != weight_shape:
                raise ValueError(
                    'Shape of weight({}) is ({}), \n'
                    'While load from shape of ({}).'
                    .format(name, ', '.join(
                        [str(d) for d in weight_shape]),
                        ', '.join([str(d) for d in value_shape]))
                )
            weight_impl = ws.GetTensor(weight.id)
            if weight_impl is not None:
                weight_impl.FromNumpy(value.copy())
                if verbose:
                    logging.info(
                        'Weight({}) loaded, Size: ({})'
                        .format(name, ', '.join([str(d) for d in value_shape])))
            else:
                logging.warning(
                    'Weight({}) is not created '
                    'in current workspace. Skip.'.format(name))
Beispiel #16
0
    def zero_grad(self, reset=False):
        """Set the gradient of params to zero.

        This method is not necessary usually, as we will overwrite
        the gradients in the next computation.

        However, if some gradients are not computed every time,
        remember to reset them before ``step(...)``:

        ```python
        m1 = torch.nn.Linear(3, 3)
        m2 = torch.nn.Linear(3, 3)
        x = torch.ones(1, 3, requires_grad=True)
        for i in range(10):
            x = m1(x)
            if i in (2, 4, 6):
                x += m2(x)
        optimizer.zero_grad(reset=True)
        x.backward()
        optimizer.step()
        ```

        Parameters
        ----------
        reset : bool, optional, default=False
            **True** to reset the memory instead of zeroing.

        """
        current_ws = workspace.get_workspace()
        for group in self.param_groups:
            for param in group['params']:
                grad = self._steal_grad(current_ws, param)
                if grad is not None:
                    current_ws.reset_tensor(grad) if reset else grad.zero_()
Beispiel #17
0
 def _build_graphs(self, *args, **kwargs):
     attributes = self._attribute_cache[workspace.get_workspace()]
     input_signature = self._spec.input_signature
     args, kwargs = self._spec.separate_inputs(*args, **kwargs)
     inputs = []
     for i in range(self._spec.num_inputs):
         input_spec = None
         if input_signature is not None:
             input_spec = input_signature[i]
         if not isinstance(args[i], Tensor) and input_spec is None:
             inputs.append(args[i])
             continue
         name = 'Input_%d' % (i + 1)
         shape = getattr(args[i], 'shape', None)
         dtype = getattr(args[i], 'dtype', None)
         if input_spec is not None:
             shape, dtype = input_spec.shape, input_spec.dtype
         inputs.append(Tensor(shape, dtype, name=name, symbolic=True))
     with eager_context.graph_mode():
         outputs = self._run_function(*inputs, **kwargs)
     graph_outputs, dummies, graphs = [], [], []
     for output in nest.flatten(outputs):
         if isinstance(output, Tensor):
             graph_outputs.append(output)
         else:
             dummies.append(output)
     if len(graph_outputs) > 0:
         graphs.append(GraphLib.from_outputs(graph_outputs))
     for obj in dummies:
         if isinstance(obj, GraphExec):
             graphs.append(obj)
     attributes['inputs'] = inputs
     attributes['outputs'] = outputs
     attributes['graphs'] = graphs
     return graphs
Beispiel #18
0
def placeholder(dtype=None, shape=None, name=None):
    """Return a symbolic tensor as the placeholder.

    Parameters
    ----------
    dtype : str, optional
        The data type provided to cast the input.
    shape : Sequence[int], optional
        The optional tensor shape.
    name : str, optional
        The optional tensor name.

    Returns
    -------
    dragon.Tensor
        The output tensor.

    """
    # Construct a tensor from the explicit name
    return TensorRef(
        workspace.get_workspace().unique_name(context.get_name_scope() +
                                              name if name else 'Placeholder',
                                              suffix=':0',
                                              namespace='Tensor'),
        dtype=dtype if dtype else dtype,
        shape=shape,
    ).constant()
Beispiel #19
0
 def _set_hyper(self, name, value):
     """Set value to a hyper parameter."""
     if name not in self._hyper:
         default_ws = workspace.get_workspace()
         impl = default_ws.create_tensor(self._name + '/' + name)
         self._hyper[name] = impl
     value = numpy.array(float(value), 'float32')
     self._hyper[name].FromNumpy(value, False)
Beispiel #20
0
def _get_value(input):
    """Return the value stolen from input."""
    if hasattr(input, 'id'):
        impl = workspace.get_workspace().GetTensor(input.id)
        if impl is not None:
            return impl.ToNumpy(True)
        return impl
    return input
Beispiel #21
0
def save_weights_to_pickle(f, layer):
    ws = workspace.get_workspace()
    weight_dict = collections.OrderedDict()
    for weight in layer.weights:
        weight_impl = ws.GetTensor(weight.id)
        if weight_impl is not None:
            weight_dict[weight.name] = weight_impl.ToNumpy(True)
    pickle = six.moves.pickle
    pickle.dump(weight_dict, f, PICKLE_DEFAULT_PROTOCOL)
Beispiel #22
0
 def from_updates(grads_and_vars, optimizer, name=None):
     """Create a graph from the updates."""
     name = 'Graph' if name is None else name
     execute_ws = workspace.get_workspace()
     graph_def = dragon_pb2.GraphDef(name=name)
     GraphLib._add_updates(graph_def, grads_and_vars, optimizer)
     GraphLib._add_device(graph_def)
     graph_def.name = execute_ws.create_graph(graph_def)
     return GraphExec(graph_def, execute_ws)
Beispiel #23
0
 def _set_name(self, name=None, zero_based=True):
     """Set the module name."""
     if name is None:
         self._name = workspace.get_workspace().unique_name(
             name=self.__class__.__name__.lower(),
             namespace='TensorLayerModule',
             zero_based=zero_based)
     else:
         self._name = name
Beispiel #24
0
    def add(op_type, inputs, **kwargs):
        """Add operator to output symbols."""
        op_tape = tapes.OrderedTape()
        graph_tape = tapes.get_tape()
        execute_ws = workspace.get_workspace()

        # Add inputs.
        enable_grad = False
        inputs = nest.flatten(inputs)
        for input in inputs:
            op_tape.add_source(input)
            if graph_tape and (input.requires_grad
                               or graph_tape.is_target(id(input))):
                enable_grad = True

        # Add extra inputs.
        for input in nest.flatten(kwargs.pop('extra_inputs', [])):
            op_tape.add_source(input)
            op_tape.add_target(input.id)

        # Add outputs.
        name = kwargs.pop('name', None)
        num_outputs = kwargs.pop('num_outputs', 1)
        outputs = []
        for i in range(num_outputs):
            outputs.append(
                Tensor(impl=execute_ws.create_tensor(scope='Tensor'),
                       name=name if name else op_type + ':%d' % i,
                       symbolic=True))

        # Create def.
        op_def = proto_util.make_operator_def(
            op_type=op_type,
            inputs=[input.id for input in inputs],
            outputs=[output.id for output in outputs],
            device_option=proto_util.get_default_device_option(),
            name=execute_ws.create_handle('Op'),
            **kwargs)

        # Record def.
        op_tape.add_element(op_def)
        graph_tape.add_element(op_def) if enable_grad else None

        # Set tape for outputs.
        for output in outputs:
            output._tape = op_tape
            output._requires_grad = enable_grad

        # Add spec to outputs.
        add_output_spec = OpSchema.get_spec(op_type)
        if add_output_spec is None:
            add_output_spec = OpSchema.get_spec('Unchanged')
        outputs = add_output_spec(kwargs, inputs, outputs)

        # Return single or repeated outputs.
        return outputs[0] if num_outputs == 1 else outputs
Beispiel #25
0
 def __call__(self, *args, **kwargs):
     """Call the compiled executables."""
     if self.executables is None:
         # Graph is not created on the first call.
         # Compile the executables from the python function.
         inputs = []
         input_signature = self.input_signature
         with context.name_scope('${%d}' % id(self)):
             for i in range(self._function_spec.num_inputs):
                 name, shape, dtype = 'Input:%d' % i, None, None
                 if input_signature is not None:
                     if i >= len(input_signature):
                         raise ValueError(
                             'When <input_signature> is provided, '
                             'only define arguments covered by it.\n'
                             'Got %d signature(s) and %d argument(s).' %
                             (len(input_signature),
                              self._function_spec.num_inputs))
                     shape = input_signature[i].shape
                     dtype = input_signature[i].dtype
                 inputs.append(Tensor(shape, dtype, name).constant())
         with context.name_scope('${%d}' %
                                 id(self)), eager_context.graph_mode():
             returns = nest.flatten(self._python_function(*inputs))
         outputs, dummies = [], []
         for obj in returns:
             if isinstance(obj, Tensor):
                 outputs.append(obj)
             else:
                 dummies.append(obj)
         executables = [function_lib.create_function(outputs=outputs)]
         for obj in dummies:
             if isinstance(obj, optimizer.Optimizer):
                 executables.append(
                     function_lib.create_function(optimizer=obj))
         self.inputs = inputs
         self.outputs = returns
         self.executables = executables
     # In this case, we have compiled executables.
     # Notify the backend to run directly.
     executables = self.executables
     inputs, kwargs = self.canonicalize_inputs(*args, **kwargs)
     current_ws = workspace.get_workspace()
     for input, value in zip(self.inputs, inputs):
         current_ws.feed_tensor(input, value)
     executables[0](return_outputs=False, **kwargs)
     [func(return_outputs=False) for func in executables[1:]]
     outputs = []
     for output in self.outputs:
         if isinstance(output, Tensor):
             impl = current_ws.GetTensor(output.id)
             device = device_spec.DeviceSpec(*impl.device)
             outputs.append(EagerTensor(impl=impl, device=device))
         else:
             outputs.append(output)
     return outputs[0] if len(outputs) == 1 else outputs
Beispiel #26
0
def get_value(self):
    """Return the value of implementation.

    Returns
    -------
    numpy.ndarray
        The deep-copied value.

    """
    return workspace.get_workspace().fetch_tensor(self)
Beispiel #27
0
 def _init_set_name(self, name=None, zero_based=True):
     if name is None:
         self._name = workspace.get_workspace().unique_name(
             name=camel_to_snake(self.__class__.__name__),
             namespace='Object',
             zero_based=zero_based,
         )
     else:
         if not valid_identifier(name):
             raise ValueError('<name> should be a legal identifier.')
         self._name = name
Beispiel #28
0
 def _init_set_name(self, name, zero_based=True):
     """Set a name for sharing weights."""
     if not name:
         self._name = workspace.get_workspace().unique_name(
             name=generic_utils.to_snake_case(
                 self.__class__.__name__),
             namespace='Object',
             zero_based=zero_based,
         )
     else:
         self._name = name
Beispiel #29
0
 def __call__(self, bottom):
     name_scope = context.get_name_scope()
     current_ws = workspace.get_workspace()
     return [
         TensorRef(
             name=current_ws.unique_name(name_scope + 'output',
                                         suffix=':{}'.format(i),
                                         namespace='Tensor'),
             shape=self.blob_shapes[i],
             dtype='float32',
         ).constant() for i in range(len(self.blob_shapes))
     ]
Beispiel #30
0
 def __init__(self, **kwargs):
     """Create a ``Optimizer``."""
     self._name = workspace.get_workspace().create_handle('Optimizer')
     self._op_type = self.__class__.__name__
     self._process_group = distributed.get_group()
     self._hyper = {}
     self._set_hyper('grad_scale', kwargs.pop('grad_scale', 1))
     self._set_hyper('weight_decay', kwargs.pop('weight_decay', 0))
     self._set_hyper('clip_norm', kwargs.pop('clip_norm', 0))
     self._set_hyper('clip_value', kwargs.pop('clip_value', 0))
     if kwargs:
         raise ValueError('Unexpected arguments: ' + ','.join(v for v in kwargs))