def __init__( self, num_features, bias=True, fix_weight=False, fix_bias=False, inplace=False, ): super(Affine, self).__init__() self.num_features = num_features self.inplace = inplace if not fix_weight: self.weight = Parameter(ones(num_features)) if inplace: raise ValueError('Inplace computation requires fixed weight.') else: self.register_buffer('weight', ones(num_features)) if bias: if not fix_bias: self.bias = Parameter(zeros(num_features)) else: self.register_buffer('bias', zeros(num_features)) else: self.bias = None self.inputs = [self.weight, self.bias] if bias else [self.weight] self.register_op()
def __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, transposed, output_padding, groups, bias): super(_ConvNd, self).__init__() if in_channels % groups != 0: raise ValueError('in_channels must be divisible by groups') if out_channels % groups != 0: raise ValueError('out_channels must be divisible by groups') self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = kernel_size self.stride = stride self.padding = padding self.dilation = dilation self.transposed = transposed self.output_padding = output_padding self.groups = groups if transposed: self.weight = Parameter( Tensor(in_channels, out_channels // groups, *kernel_size)) else: self.weight = Parameter( Tensor(out_channels, in_channels // groups, *kernel_size)) if bias: self.bias = Parameter(Tensor(out_channels)) else: self.bias = None self.reset_parameters() self.register_op()
def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True): super(_BatchNorm, self).__init__() self.num_features = num_features self.eps = eps self.momentum = momentum self.affine = affine self.track_running_stats = track_running_stats if self.affine: self.weight = Parameter(Tensor(num_features)) self.bias = Parameter(Tensor(num_features)) else: self.register_buffer('weight', ones(num_features)) self.register_buffer('bias', zeros(num_features)) self.register_buffer('running_mean', zeros(num_features)) self.register_buffer('running_var', ones(num_features)) self.inputs = [ self.running_mean, self.running_var, self.weight, self.bias ] self.reset_parameters() self.register_op() self.op_metas = {'TRAIN': None, 'TEST': None}
def __init__( self, in_channels, out_channels, kernel_size, stride, padding, dilation, bias, ): super(_DepthwiseConvNd, self).__init__() if in_channels != out_channels: raise ValueError('in/out channels must be same') self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = kernel_size self.stride = stride self.padding = padding self.dilation = dilation self.weight = Parameter(Tensor(out_channels, 1, *kernel_size)) if bias: self.bias = Parameter(Tensor(out_channels)) else: self.bias = None self.reset_parameters() self.register_op()
def __init__(self, num_features, bias=True, fix_weight=False, fix_bias=False): super(Affine, self).__init__() self.num_features = num_features self.weight = Parameter(ones(num_features), requires_grad=not fix_weight) if bias: self.bias = Parameter(zeros(num_features), requires_grad=not fix_bias) else: self.bias = None self.inputs = [self.weight, self.bias] if bias else [self.weight] self.register_op()
def __init__(self, in_features, out_features, bias=True): super(Linear, self).__init__() self.in_features = in_features self.out_features = out_features self.weight = Parameter(Tensor(out_features, in_features)) if bias: self.bias = Parameter(Tensor(out_features)) else: self.bias = None self.reset_parameters() self.register_op()
def _plan_params(self): if self.mode == 'lstm': gate_size = 4 * self.hidden_size elif self.mode == 'gru': gate_size = 3 * self.hidden_size else: gate_size = self.hidden_size # 1. plan weights self._matrix_weights = [] self._bias_weights = [] for layer in range(self.num_layers): for direction in range(self.num_directions): layer_input_size = self.input_size if layer == 0 \ else self.hidden_size * self.num_directions w_names = [ 'layer_{}/{}/{}'.format(layer, p, 'L' if direction == 0 else 'R') for p in ('matrix_ih', 'matrix_hh', 'bias_ih', 'bias_hh') ] w_ih = dg.Tensor(name=w_names[0], shape=[gate_size, layer_input_size]) w_hh = dg.Tensor(name=w_names[1], shape=[gate_size, self.hidden_size]) b_ih = dg.Tensor(name=w_names[2], shape=[ gate_size, ]) b_hh = dg.Tensor(name=w_names[3], shape=[ gate_size, ]) # W (0 ~ 3), R (4 ~ 7) self._matrix_weights.extend([w_ih, w_hh]) # Bw (0 ~ 3), Br (4 ~ 7) self._bias_weights.extend([b_ih, b_hh]) # 2. compute total number of parameters self._weights_count = 0 for w in self._matrix_weights + self._bias_weights: self._weights_count += np.prod(w.shape) # 3. register the packed weights self.weights = Parameter(Tensor(int(self._weights_count))) # 4. create the initialization grids if self.mode == 'lstm': num_params_per_layer = 8 elif self.mode == 'gru': num_params_per_layer = 6 else: num_params_per_layer = 2 self._matrix_init_grids = [[[ 'orthogonal' for _ in range(num_params_per_layer) ] for _ in range(self.num_directions)] for _ in range(self.num_layers)] self._bias_init_grids = [[[ 'zero' for _ in range(num_params_per_layer) ] for _ in range(self.num_directions)] for _ in range(self.num_layers)] # 5. set the init flag self._init_params = False
def __init__(self, num_features, group=32, eps=1e-5, affine=True): super(_GroupNorm, self).__init__() self.num_features = num_features self.group = group self.eps = eps self.affine = affine if self.affine: self.weight = Parameter(Tensor(num_features)) self.bias = Parameter(Tensor(num_features)) else: self.weight = self.bias = None self.inputs = [self.weight, self.bias] if self.affine else [] self.reset_parameters() self.register_op()
class Linear(Module): def __init__(self, in_features, out_features, bias=True): super(Linear, self).__init__() self.in_features = in_features self.out_features = out_features self.weight = Parameter(Tensor(out_features, in_features)) if bias: self.bias = Parameter(Tensor(out_features)) else: self.bias = None self.reset_parameters() self.register_op() def register_op(self): self.op_meta = { 'op_type': 'InnerProduct', 'n_inputs': 3 if self.bias else 2, 'n_outputs': 1, 'arguments': { 'num_output': self.weight.shape[0], 'axis': -1, } } def reset_parameters(self): stdv = 1. / math.sqrt(self.weight.size(1)) self.weight.data.uniform_(-stdv, stdv) if self.bias is not None: self.bias.data.uniform_(-stdv, stdv) def forward(self, input): inputs = [input, self.weight] + ([self.bias] if self.bias else []) self.unify_devices(inputs) outputs = [self.register_output(input.dtype)] return self.run(inputs, outputs)
def __init__(self, input_size, hidden_size, bias, num_chunks): super(RNNCellBase, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.bias = bias self.weight_ih = Parameter(Tensor(num_chunks * hidden_size, input_size)) self.weight_hh = Parameter( Tensor(num_chunks * hidden_size, hidden_size)) if bias: self.bias_ih = Parameter(Tensor(num_chunks * hidden_size)) self.bias_hh = Parameter(Tensor(num_chunks * hidden_size)) else: self.register_parameter('bias_ih', None) self.register_parameter('bias_hh', None) self.reset_parameters()
def _plan_params(self): if self.mode == 'lstm': gate_size = 4 * self.hidden_size elif self.mode == 'gru': gate_size = 3 * self.hidden_size else: gate_size = self.hidden_size # 1. Plan weights self._matrix_shape, self._bias_shape = [], [] for layer in range(self.num_layers): for direction in range(self.num_directions): layer_input_size = self.input_size if layer == 0 \ else self.hidden_size * self.num_directions w_ih_shape = [gate_size, layer_input_size] w_hh_shape = [gate_size, self.hidden_size] b_ih_shape, b_hh_shape = [gate_size], [gate_size] # W (0 ~ 3), R (4 ~ 7) self._matrix_shape.extend([w_ih_shape, w_hh_shape]) # Bw (0 ~ 3), Br (4 ~ 7) self._bias_shape.extend([b_ih_shape, b_hh_shape]) # 2. Compute total number of parameters self._weights_count = 0 for shape in self._matrix_shape + self._bias_shape: self._weights_count += numpy.prod(shape) # 3. Register the packed weights self.weights = Parameter(Tensor(int(self._weights_count))) # 4. Create the initialization grids if self.mode == 'lstm': num_params_per_layer = 8 elif self.mode == 'gru': num_params_per_layer = 6 else: num_params_per_layer = 2 self._matrix_init_grids = [[[ 'orthogonal' for _ in range(num_params_per_layer) ] for _ in range(self.num_directions)] for _ in range(self.num_layers)] self._bias_init_grids = [[[ 'zero' for _ in range(num_params_per_layer) ] for _ in range(self.num_directions)] for _ in range(self.num_layers)] # 5. Set the init flag self._init_params = False
class RNNBase(Module): def __init__(self, mode, input_size, hidden_size, num_layers=1, bias=True, batch_first=False, dropout=0, bidirectional=False): super(RNNBase, self).__init__() self.mode = mode self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.bias = bias self.batch_first = batch_first self.dropout = dropout if dropout != 0 else None self.dropout_state = {} self.bidirectional = bidirectional self.num_directions = 2 if bidirectional else 1 if batch_first: raise NotImplementedError('Batch first is disabled.') if not bias: raise NotImplementedError('Bias is required.') if not isinstance(dropout, numbers.Number) or not 0 <= dropout <= 1 or \ isinstance(dropout, bool): raise ValueError( "dropout should be a number in range [0, 1] " "representing the probability of an element being " "zeroed") if dropout > 0 and num_layers == 1: warnings.warn("dropout option adds dropout after all but last " "recurrent layer, so non-zero dropout expects " "num_layers greater than 1, but got dropout={} and " "num_layers={}".format(dropout, num_layers)) self._plan_params() self.register_op() self.meta_in_phase = {'TRAIN': [None, None], 'TEST': [None, None]} def register_op(self): self.op_meta = { 'op_type': 'Recurrent', 'n_inputs': 4, 'n_outputs': 2, # meaningless 'arguments': { 'hidden_size': self.hidden_size, 'num_layers': self.num_layers, 'bidirectional': self.bidirectional, 'rnn_mode': self.mode, 'rnn_input_mode': 'linear', 'dropout_ratio': self.dropout, 'phase': 'TEST', } } def make_meta_from_phase(self, phase): def reset_meta(self, phase): # Ren-Gen Key self._persistent_key = None _ = self.persistent_key self._persistent_key += '/{}'.format(phase) self.op_meta['arguments']['phase'] = phase # Re-Gen Op self._gen_op() self.meta_in_phase[phase][0] = self._persistent_key self.meta_in_phase[phase][1] = self._op if self._persistent_key is None: # Init or CTX has changed reset_meta(self, phase) else: # CTX unchanged & Run into a new phase if self.meta_in_phase[phase][0] is None: reset_meta(self, phase) return self.meta_in_phase[phase] def forward(self, input, hx=None): if hx and not isinstance(hx, Tensor): raise TypeError('Excepted hx as a Tensor, got {}.'.format( type(hx))) if not self._init_params: self._reset_params() inputs = [input, self.weights] + ([hx] if hx else []) self.unify_devices(inputs) outputs = [self.register_output(input.dtype) for _ in range(2)] requires_grad = False for input in inputs: if input.requires_grad: requires_grad = True requires_grad = requires_grad and is_grad_enabled() meta = [ 'PERSISTENT', ] + self.make_meta_from_phase('TRAIN' if requires_grad else 'TEST') return RunOperator(inputs, outputs, meta) def _plan_params(self): if self.mode == 'lstm': gate_size = 4 * self.hidden_size elif self.mode == 'gru': gate_size = 3 * self.hidden_size else: gate_size = self.hidden_size # 1. plan weights self._matrix_weights = [] self._bias_weights = [] for layer in range(self.num_layers): for direction in range(self.num_directions): layer_input_size = self.input_size if layer == 0 \ else self.hidden_size * self.num_directions w_names = [ 'layer_{}/{}/{}'.format(layer, p, 'L' if direction == 0 else 'R') for p in ('matrix_ih', 'matrix_hh', 'bias_ih', 'bias_hh') ] w_ih = dg.Tensor(name=w_names[0], shape=[gate_size, layer_input_size]) w_hh = dg.Tensor(name=w_names[1], shape=[gate_size, self.hidden_size]) b_ih = dg.Tensor(name=w_names[2], shape=[ gate_size, ]) b_hh = dg.Tensor(name=w_names[3], shape=[ gate_size, ]) # W (0 ~ 3), R (4 ~ 7) self._matrix_weights.extend([w_ih, w_hh]) # Bw (0 ~ 3), Br (4 ~ 7) self._bias_weights.extend([b_ih, b_hh]) # 2. compute total number of parameters self._weights_count = 0 for w in self._matrix_weights + self._bias_weights: self._weights_count += np.prod(w.shape) # 3. register the packed weights self.weights = Parameter(Tensor(int(self._weights_count))) # 4. create the initialization grids if self.mode == 'lstm': num_params_per_layer = 8 elif self.mode == 'gru': num_params_per_layer = 6 else: num_params_per_layer = 2 self._matrix_init_grids = [[[ 'orthogonal' for _ in range(num_params_per_layer) ] for _ in range(self.num_directions)] for _ in range(self.num_layers)] self._bias_init_grids = [[[ 'zero' for _ in range(num_params_per_layer) ] for _ in range(self.num_directions)] for _ in range(self.num_layers)] # 5. set the init flag self._init_params = False ############################################## # # # INITIALIZER # # # ############################################## def _uniform_init(self, shape, dtype='float32'): stdv = 1.0 / np.sqrt(self.hidden_size) return np.random.uniform(-stdv, stdv, shape).astype(dtype) def _orthogonal_init(self, shape, gain=1, dtype='float32'): num_rows = 1 for dim in shape[:-1]: num_rows *= dim num_cols = shape[-1] flat_shape = (num_cols, num_rows) if num_rows < num_cols \ else (num_rows, num_cols) W = np.random.randn(*flat_shape) q, r = np.linalg.qr(W) # Make Q uniform d = np.diag(r) q *= np.sign(d) if num_rows < num_cols: q = q.T return gain * q.reshape(shape).astype(dtype) def _zero_init(self, shape, dtype='float32'): return np.zeros(shape, dtype=dtype) ############################################## # # # PARAMETERS # # # ############################################## def set_param(self, layer=0, direction=0, param_id=0, type='matrix', initializer=None): if type == 'matrix': self._matrix_init_grids[layer][direction][param_id] = initializer elif type == 'bias': self._bias_init_grids[layer][direction][param_id] = initializer else: raise ValueError('Unknown param type: ' + type) def _set_param(self, layer_id, param_id, param_type, param): if not isinstance(param, Tensor): if isinstance(param, np.ndarray): paramT = dg.Tensor('/tmp/rnn_param').Variable() paramT.set_value(param) param = paramT else: raise ValueError('Excepted a tensor or numpy array.') W = self.weights.dragon() outputs = RNNParamSet([W, param], layer_id, param_id, param_type, rnn_mode=self.mode, input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers, num_directions=self.num_directions) for k, v in outputs.expressions.items(): dg.workspace.RunOperator(v) def _reset_params(self): np.random.seed(dg.config.GetRandomSeed()) if self.mode == 'lstm': num_gates = 4 elif self.mode == 'gru': num_gates = 3 else: num_gates = 1 for layer in range(len(self._matrix_init_grids)): for direction in range(len(self._matrix_init_grids[0])): for param_id in range(len(self._matrix_init_grids[0][0])): matrix_init = self._matrix_init_grids[layer][direction][ param_id] bias_init = self._bias_init_grids[layer][direction][ param_id] if isinstance(matrix_init, str): matrix_init = getattr(self, '_{}_init'.format(matrix_init)) if isinstance(bias_init, str): bias_init = getattr(self, '_{}_init'.format(bias_init)) pseudo_layer_id = layer * self.num_directions + direction packed_id = pseudo_layer_id * 2 + int(param_id / num_gates) matrix_shape = self._matrix_weights[packed_id].shape[:] bias_shape = self._bias_weights[packed_id].shape[:] matrix_shape[0] = bias_shape[0] = int(matrix_shape[0] / num_gates) self._set_param(layer_id=pseudo_layer_id, param_id=param_id, param_type='matrix', param=matrix_init(matrix_shape)) self._set_param(layer_id=pseudo_layer_id, param_id=param_id, param_type='bias', param=bias_init(bias_shape)) self._init_params = True
class RNNBase(Module): def __init__( self, mode, input_size, hidden_size, num_layers=1, bias=True, batch_first=False, dropout=0, bidirectional=False, ): super(RNNBase, self).__init__() self.mode = mode self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.bias = bias self.batch_first = batch_first self.dropout = dropout if dropout != 0 else None self.dropout_state = {} self.bidirectional = bidirectional self.num_directions = 2 if bidirectional else 1 if batch_first: raise NotImplementedError('Batch first is disabled.') if not bias: raise NotImplementedError('Bias is required.') if not isinstance(dropout, numbers.Number) or \ not 0 <= dropout <= 1 or isinstance(dropout, bool): raise ValueError( "dropout should be a number in range [0, 1] " "representing the probability of an element being " "zeroed") if dropout > 0 and num_layers == 1: warnings.warn("dropout option adds dropout after all but last " "recurrent layer, so non-zero dropout expects " "num_layers greater than 1, but got dropout={} and " "num_layers={}".format(dropout, num_layers)) self._plan_params() self.register_op() self.op_metas = {'TRAIN': None, 'TEST': None} def register_op(self): self.op_meta = { 'op_type': 'Recurrent', 'arguments': { 'hidden_size': self.hidden_size, 'num_layers': self.num_layers, 'bidirectional': self.bidirectional, 'rnn_mode': self.mode, 'rnn_input_mode': 'linear', 'dropout_ratio': self.dropout, 'phase': 'TEST', } } def extra_repr(self): s = '{input_size}, {hidden_size}' if self.num_layers != 1: s += ', num_layers={num_layers}' if self.bias is not True: s += ', bias={bias}' if self.batch_first is not False: s += ', batch_first={batch_first}' if self.dropout != 0: s += ', dropout={dropout}' if self.bidirectional is not False: s += ', bidirectional={bidirectional}' return s.format(**self.__dict__) def make_meta_from_phase(self, phase): def reset_meta(self, phase): self._module_key = None _ = self.module_key self._module_key += '/{}'.format(phase) self.op_meta['arguments']['phase'] = phase self._gen_module_def() self.op_metas[phase] = (self._module_key, self._module_def) if self._module_key is None: # Init or Context has changed reset_meta(self, phase) else: # Context unchanged if self.op_metas[phase] is None: reset_meta(self, phase) return self.op_metas[phase] def forward(self, input, hx=None): if hx and not isinstance(hx, Tensor): raise TypeError('Excepted hx as a Tensor, got {}.'.format( type(hx))) if not self._init_params: self._reset_params() inputs = [input, self.weights] + ([hx] if hx else []) self.unify_devices(inputs) outputs = [self.register_output() for _ in range(2)] meta = self.make_meta_from_phase('TRAIN' if self.training else 'TEST') return RunOperator(inputs, outputs, meta) def _plan_params(self): if self.mode == 'lstm': gate_size = 4 * self.hidden_size elif self.mode == 'gru': gate_size = 3 * self.hidden_size else: gate_size = self.hidden_size # 1. Plan weights self._matrix_shape, self._bias_shape = [], [] for layer in range(self.num_layers): for direction in range(self.num_directions): layer_input_size = self.input_size if layer == 0 \ else self.hidden_size * self.num_directions w_ih_shape = [gate_size, layer_input_size] w_hh_shape = [gate_size, self.hidden_size] b_ih_shape, b_hh_shape = [gate_size], [gate_size] # W (0 ~ 3), R (4 ~ 7) self._matrix_shape.extend([w_ih_shape, w_hh_shape]) # Bw (0 ~ 3), Br (4 ~ 7) self._bias_shape.extend([b_ih_shape, b_hh_shape]) # 2. Compute total number of parameters self._weights_count = 0 for shape in self._matrix_shape + self._bias_shape: self._weights_count += numpy.prod(shape) # 3. Register the packed weights self.weights = Parameter(Tensor(int(self._weights_count))) # 4. Create the initialization grids if self.mode == 'lstm': num_params_per_layer = 8 elif self.mode == 'gru': num_params_per_layer = 6 else: num_params_per_layer = 2 self._matrix_init_grids = [[[ 'orthogonal' for _ in range(num_params_per_layer) ] for _ in range(self.num_directions)] for _ in range(self.num_layers)] self._bias_init_grids = [[[ 'zero' for _ in range(num_params_per_layer) ] for _ in range(self.num_directions)] for _ in range(self.num_layers)] # 5. Set the init flag self._init_params = False ############################################## # # # INITIALIZER # # # ############################################## def _uniform_init(self, shape, dtype='float32'): stdv = 1.0 / numpy.sqrt(self.hidden_size) return numpy.random.uniform(-stdv, stdv, shape).astype(dtype) def _orthogonal_init(self, shape, gain=1, dtype='float32'): num_rows = 1 for dim in shape[:-1]: num_rows *= dim num_cols = shape[-1] flat_shape = (num_cols, num_rows) if num_rows < num_cols \ else (num_rows, num_cols) W = numpy.random.randn(*flat_shape) q, r = numpy.linalg.qr(W) # Make Q uniform d = numpy.diag(r) q *= numpy.sign(d) if num_rows < num_cols: q = q.T return gain * q.reshape(shape).astype(dtype) def _zero_init(self, shape, dtype='float32'): return numpy.zeros(shape, dtype=dtype) ############################################## # # # PARAMETERS # # # ############################################## def set_param(self, layer=0, direction=0, param_id=0, type='matrix', initializer=None): if type == 'matrix': self._matrix_init_grids[layer][direction][param_id] = initializer elif type == 'bias': self._bias_init_grids[layer][direction][param_id] = initializer else: raise ValueError('Unknown param type: ' + type) def _set_param(self, layer_id, param_id, param_type, param): if isinstance(param, numpy.ndarray): param_temp = dragon.Tensor.Ref('/tmp/rnn_param') param_temp.set_value(param) param = param_temp else: raise ValueError('Excepted a numpy array.') W = self.weights.dragon() outputs = RNNParamSet([W, param], layer_id, param_id, param_type, rnn_mode=self.mode, input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers, num_directions=self.num_directions) for k, v in outputs.expressions.items(): dragon.workspace.RunOperator(v) def _reset_params(self): numpy.random.seed(dragon.config.GetRandomSeed()) if self.mode == 'lstm': num_gates = 4 elif self.mode == 'gru': num_gates = 3 else: num_gates = 1 for layer in range(len(self._matrix_init_grids)): for direction in range(len(self._matrix_init_grids[0])): for param_id in range(len(self._matrix_init_grids[0][0])): matrix_init = self._matrix_init_grids[layer][direction][ param_id] bias_init = self._bias_init_grids[layer][direction][ param_id] if isinstance(matrix_init, str): matrix_init = getattr(self, '_{}_init'.format(matrix_init)) if isinstance(bias_init, str): bias_init = getattr(self, '_{}_init'.format(bias_init)) pseudo_layer_id = layer * self.num_directions + direction packed_id = pseudo_layer_id * 2 + int(param_id / num_gates) matrix_shape = self._matrix_shape[packed_id][:] bias_shape = self._bias_shape[packed_id][:] matrix_shape[0] = bias_shape[0] = int(matrix_shape[0] / num_gates) self._set_param(layer_id=pseudo_layer_id, param_id=param_id, param_type='matrix', param=matrix_init(matrix_shape)) self._set_param(layer_id=pseudo_layer_id, param_id=param_id, param_type='bias', param=bias_init(bias_shape)) self._init_params = True