def __init__(self, *args): """Create a ``Net``. Parameters ---------- net_file : str The path of text proto file to load network. param_file : str, optional The path of binary proto file to load parameters. phase : {'TRAIN', 'TEST'}, optional The optional phase tag. """ if len(args) == 2: (net_file, self._phase), param_file = args, None elif len(args) == 3: net_file, param_file, self._phase = args else: raise ValueError('Excepted 2 or 3 args.') self._blobs = {} self._layers = [] self._layer_blobs = [] self._losses = [] self._params = [] self._blob_dict = None self._param_dict = None self._input_list = None self._output_list = None # Parse the network file with open(net_file, 'r') as f: self._proto = text_format.Parse(f.read(), caffe_pb2.NetParameter()) # Construct the layer class from proto for layer_param in self._proto.layer: if not self._filter_layer(layer_param): continue cls = getattr(layer_factory, layer_param.type) with context.name_scope(layer_param.name): self._layers.append(cls(layer_param)) # Prepare for the legacy net inputs if len(self._proto.input) > 0: layer_param = caffe_pb2.LayerParameter( name='data', type='Input', top=self._proto.input, input_param=caffe_pb2.InputParameter( shape=self._proto.input_shape)) cls = getattr(layer_factory, layer_param.type) with context.name_scope(layer_param.name): self._layers.insert(0, cls(layer_param)) # Call layers sequentially to get outputs self._setup() # Collect losses and parameters for layer in self._proto.layer: if not self._filter_layer(layer): continue self._collect_losses_and_params(layer) # Load the pre-trained weights if necessary if param_file is not None: self.copy_from(param_file)
def __call__(self, *args, **kwargs): """Call the compiled executables.""" if self.executables is None: # Graph is not created on the first call. # Compile the executables from the python function. inputs = [] input_signature = self.input_signature with context.name_scope('${%d}' % id(self)): for i in range(self._function_spec.num_inputs): name, shape, dtype = 'Input:%d' % i, None, None if input_signature is not None: if i >= len(input_signature): raise ValueError( 'When <input_signature> is provided, ' 'only define arguments covered by it.\n' 'Got %d signature(s) and %d argument(s).' % (len(input_signature), self._function_spec.num_inputs)) shape = input_signature[i].shape dtype = input_signature[i].dtype inputs.append(Tensor(shape, dtype, name).constant()) with context.name_scope('${%d}' % id(self)), eager_context.graph_mode(): returns = nest.flatten(self._python_function(*inputs)) outputs, dummies = [], [] for obj in returns: if isinstance(obj, Tensor): outputs.append(obj) else: dummies.append(obj) executables = [function_lib.create_function(outputs=outputs)] for obj in dummies: if isinstance(obj, optimizer.Optimizer): executables.append( function_lib.create_function(optimizer=obj)) self.inputs = inputs self.outputs = returns self.executables = executables # In this case, we have compiled executables. # Notify the backend to run directly. executables = self.executables inputs, kwargs = self.canonicalize_inputs(*args, **kwargs) current_ws = workspace.get_workspace() for input, value in zip(self.inputs, inputs): current_ws.feed_tensor(input, value) executables[0](return_outputs=False, **kwargs) [func(return_outputs=False) for func in executables[1:]] outputs = [] for output in self.outputs: if isinstance(output, Tensor): impl = current_ws.GetTensor(output.id) device = device_spec.DeviceSpec(*impl.device) outputs.append(EagerTensor(impl=impl, device=device)) else: outputs.append(output) return outputs[0] if len(outputs) == 1 else outputs
def name_scope(self): """Returns a ``dragon.name_scope`` instance for this class. Returns ------- ContextManger The context manager to apply the name scope. """ if self._scope_name is None: with context.name_scope(self._name) as scope_name: self._scope_name = scope_name return context.name_scope(self._scope_name)
def _setup(self): """Connect the layers sequentially.""" self._net_outputs = set() # Collect bottom and top blobs. for layer_idx, layer in enumerate(self._layers): bottom = [] for blob in layer._bottom: if blob not in self._blobs: raise RuntimeError('bottom({}) is unknown.'.format(blob)) bottom.append(self._blobs[blob]) if blob in self._net_outputs: self._net_outputs.remove(blob) if isinstance(layer, layer_factory.BatchNorm): next_layer = self._layers[layer_idx + 1] if isinstance(next_layer, layer_factory.Scale): layer.fuse_with_scale_layer(next_layer) with context.name_scope(layer._name): outputs = layer.setup([blob['data'] for blob in bottom]) if outputs is not None: outputs = nest.flatten(outputs) for blob_idx, blob in enumerate(layer._top): self._blobs[blob] = { 'data': outputs[blob_idx], 'diff': TensorRef(outputs[blob_idx].id + '_grad') } self._net_outputs.add(blob) # Collect layer param blobs. for blobs in self.params.values(): self._layer_blobs.extend(blobs)
def __call__(self, *args, **kwargs): """Wrap the ``self.call(...)`` with pre-post processing.""" inputs = None if args: inputs, args = args[0], args[1:] with context.name_scope(self.name): self._maybe_build(inputs) outputs = self.call(inputs, *args, **kwargs) return outputs
def _init(self): """Connect the layers sequentially.""" losses, learnable_blobs = [], [] grad_tape = backprop.GradientTape() # Collect bottom and top blobs. for i, layer in enumerate(self._layers): bottoms = [] for bottom_name in layer.bottom: if bottom_name not in self._net_blobs: raise RuntimeError( 'Bottom "{}" is unknown.'.format(bottom_name)) bottoms.append(self._net_blobs[bottom_name]) if bottom_name in self._net_outputs: self._net_outputs.remove(bottom_name) if isinstance(layer, layer_factory.BatchNorm): next_layer = self._layers[i + 1] if isinstance(next_layer, layer_factory.Scale): layer.scale_layer = next_layer with context.name_scope(layer.name), grad_tape: outputs = layer.setup([blob['data'] for blob in bottoms]) if outputs is not None: outputs = nest.flatten(outputs) for j, top_name in enumerate(layer.top): self._net_blobs[top_name] = { 'data': outputs[j], 'diff': None } self._net_outputs.add(top_name) loss_weights = list(layer._proto.loss_weight) if layer._proto.type.find('Loss') != -1: if len(loss_weights) == 0: loss_weights.append(1) for j, loss_weight in enumerate(loss_weights): if loss_weight > 0: losses.append(outputs[j]) for j, blob in enumerate(layer.blobs): lr_mult, decay_mult = 1, 1 if j < len(layer._proto.param): p = layer._proto.param[j] lr_mult = p.lr_mult if p.HasField('lr_mult') else 1 decay_mult = p.decay_mult if p.HasField( 'decay_mult') else 1 if lr_mult > 0 and blob['data'].requires_grad: if decay_mult == 0: blob['data']._weight_decay = 0 learnable_blobs.append(blob) if self._phase == 'TRAIN': with eager_context.graph_mode(): grads = grad_tape.gradient( losses, [blob['data'] for blob in learnable_blobs]) for blob, grad in zip(learnable_blobs, grads): blob['diff'] = grad # Collect all learnable blobs. for blobs in self.params.values(): for blob in blobs: if blob.diff: self._learnable_blobs.append(blob)
def __init__(self, network_file, phase='TEST', weights=None): """Create a ``Net``. Parameters ---------- network_file : str The path of text proto file to load network. phase : str, optional, default='TEST' The execution phase. weights : str, optional The path of binary proto file to load weights. """ # Parse the network file. with open(network_file, 'r') as f: self._proto = google.protobuf.text_format.Parse( f.read(), caffe_pb2.NetParameter()) self._phase = phase self._layers = [] self._learnable_blobs = [] self._net_blobs = dict() self._net_outputs = set() # Construct the layers from proto. layer_names = [] for layer_param in self._proto.layer: if not self._filter_layer(layer_param): continue try: layer_index = layer_names.index(layer_param.name) call_layer = self._layers[layer_index] except ValueError: call_layer = None layer_names.append(layer_param.name) cls = getattr(layer_factory, layer_param.type) self._layers.append(cls(layer_param)) self._layers[-1]._call_layer = call_layer # Add an input layer for the legacy inputs. if len(self._proto.input) > 0: layer_param = caffe_pb2.LayerParameter( name='data', type='Input', top=self._proto.input, input_param=caffe_pb2.InputParameter( shape=self._proto.input_shape)) cls = getattr(layer_factory, layer_param.type) with context.name_scope(layer_param.name): self._layers.insert(0, cls(layer_param)) # Connect layers to get outputs. self._init() # Load the pre-trained weights if necessary if weights is not None: self.copy_from(weights)
def __call__(self, y_true, y_pred): """Compute the defined loss function. Parameters ---------- y_true : dragon.Tensor The ground-truth tensor. y_pred : dragon.Tensor The logits tensor. Returns ------- dragon.Tensor The loss. """ scope_name = 'lambda' if self.name == '<lambda>' else self.name with context.name_scope(scope_name or self.__class__.__name__): return self.call(y_true, y_pred)
def __call__(self, inputs, **kwargs): """The preprocessor for ``self.forward(...)``.""" with context.name_scope(self.name): # Maybe build the layer at the first time. if not self._built: input_list = nest.flatten(inputs) input_shapes = None if all(hasattr(x, 'shape') for x in input_list): input_shapes = [x.shape for x in input_list] if not nest.is_sequence(inputs): input_shapes = input_shapes[0] self.build(input_shapes) # Call the forward implementation to get outputs. outputs = self.forward(inputs, **kwargs) # Record the nodes if necessary. if not self._nodes_fixed: self._add_node(inputs, outputs) return outputs
def apply_gradients(self, grads_and_vars): """Apply the gradients to update variables. Parameters ---------- grads_and_vars : Sequence[Sequence[dragon.Tensor]] The gradients and variables. Returns ------- dragon.vm.tensorflow.keras.optimizers.Optimizer The self to generate the update operations. """ # Create the hyper parameters if necessary. with context.name_scope(self._name): self._create_hypers() # Apply one-step update. if eager_context.executing_eagerly(): # Filter value whose grad is missing. for g, v in grads_and_vars: if g is not None: decay_mult = 0. regularizer = getattr(v, '_regularizer', None) if regularizer is not None: decay_mult = regularizer.l2 / self.BASE_WEIGHT_DECAY self._run_update(v, g, decay_mult=decay_mult) else: # Store for the lazy compilation. for g, v in grads_and_vars: decay_mult = 0. regularizer = getattr(v, '_regularizer', None) if regularizer is not None: decay_mult = regularizer.l2 / self.BASE_WEIGHT_DECAY self._add_update(v, g, decay_mult=decay_mult) # Increase the iterations. self._iterations += 1 return self
def name_scope(name): """Context-manager to nest the name as prefix for operations. Examples: ```python with tf.name_scope('my_scope'): x = tf.constant(1) print(x.name) ``` Parameters ---------- name : str The prefix name. Returns ------- str The current nesting prefix. """ return context.name_scope(name)
def __call__(self, inputs, *args, **kwargs): """Wrap the ``self.call(...)`` with pre-post processing.""" with context.name_scope(self._name_scope()): self._maybe_build(inputs) outputs = self.call(inputs, *args, **kwargs) return outputs
def __call__(self, *args, **kwargs): """The preprocessor for ``self.forward(...)``.""" with context.name_scope(self.name): return self.forward(*args, **kwargs)