def call(self, inputs): binary_kernel = binarize(self.kernel, H=self.H) inverse_kernel_lr_multiplier = 1./self.kernel_lr_multiplier inputs_bnn_gradient = (inputs - (1. - 1./inverse_kernel_lr_multiplier) * K.stop_gradient(inputs))\ * inverse_kernel_lr_multiplier outputs_bnn_gradient = K.conv2d( inputs_bnn_gradient, binary_kernel, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) outputs = (outputs_bnn_gradient - (1. - 1./self.kernel_lr_multiplier) * K.stop_gradient(outputs_bnn_gradient))\ * self.kernel_lr_multiplier if self.use_bias: outputs = K.bias_add( outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs
def call(self, inputs): binary_kernel = binarize(self.kernel, H=self.H) output = K.dot(inputs, binary_kernel) if self.use_bias: output = K.bias_add(output, self.bias) if self.activation is not None: output = self.activation(output) return output
def call(self, x, mask=None): Wb = binarize(self.W, H=self.H) if self.bias: output = self.activation(K.dot(x, Wb) + self.b) else: output = self.activation(K.dot(x, Wb)) return output
def call(self, inputs): # 1. Binarize weights binary_kernel = binarize(self.kernel, H=1.) # 2. Perform matrix multiplication output = K.dot(inputs, binary_kernel) return output
def call(self, inputs): depthwise_kernel = binarize(self.depthwise_kernel, H=self.H) pointwise_kernel = binarize(self.pointwise_kernel, H=self.H) outputs = K.separable_conv2d( inputs, depthwise_kernel, pointwise_kernel, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) if self.use_bias: outputs = K.bias_add( outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs
def binfullcon(x, W, layer_name, normw=False, stochastic=False): with tf.name_scope(layer_name): with tf.name_scope('weights'): tf.summary.histogram('histogram', W) with tf.name_scope('BinWeights'): Wb = binarize(W, normalize=normw, stochastic=stochastic) tf.summary.histogram('BinWeights', Wb) fc_out = tf.matmul(x, Wb) # no bias tf.summary.histogram('Fcout', fc_out) #output = fc_out+b output = fc_out # no bias return output
def build(self, input_shape): if self.data_format == 'channels_first': channel_axis = 1 else: channel_axis = -1 if input_shape[channel_axis] is None: raise ValueError('The channel dimension of the inputs ' 'should be defined. Found `None`.') input_dim = input_shape[channel_axis] kernel_shape = self.kernel_size + (input_dim, self.filters) base = self.kernel_size[0] * self.kernel_size[1] if self.H == 'Glorot': nb_input = int(input_dim * base) nb_output = int(self.filters * base) self.H = np.float32(np.sqrt(1.5 / (nb_input + nb_output))) #print('Glorot H: {}'.format(self.H)) if self.kernel_lr_multiplier == 'Glorot': nb_input = int(input_dim * base) nb_output = int(self.filters * base) self.kernel_lr_multiplier = np.float32( 1. / np.sqrt(1.5 / (nb_input + nb_output))) #print('Glorot learning rate multiplier: {}'.format(self.lr_multiplier)) self.kernel_constraint = Clip(0, self.H) self.kernel_initializer = initializers.RandomUniform(0, self.H) self.kernel = self.add_weight(shape=kernel_shape, initializer=self.kernel_initializer, name='kernel', regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) binary_kernel = binarize(self.kernel, H=self.H) if self.use_bias: self.lr_multipliers = [ self.kernel_lr_multiplier, self.bias_lr_multiplier ] self.bias = self.add_weight((self.output_dim, ), initializer=self.bias_initializers, name='bias', regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.lr_multipliers = [self.kernel_lr_multiplier] self.bias = None # Set input spec. self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim}) self.built = True
def binconv2d(x, W, layer_name, normw=False, depthwise=False, stochastic=False): with tf.name_scope(layer_name): with tf.name_scope('weights'): tf.summary.histogram('histogram', W) with tf.name_scope('BinWeights'): Wb = binarize(W, normalize=normw, stochastic=stochastic) tf.summary.histogram('BinWeights', Wb) if depthwise: conv_out = tf.nn.depthwise_conv2d(x, Wb, strides=[1, 1, 1, 1], padding='SAME') else: conv_out = tf.nn.conv2d(x, Wb, strides=[1, 1, 1, 1], padding='SAME') tf.summary.histogram('Convout', conv_out) output = conv_out # no bias return output
def call(self, inputs): binary_kernel = binarize(self.kernel, H=self.H) outputs = inputs * binary_kernel if self.use_bias: outputs = K.bias_add(outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation( outputs) #TODO: make sure this is not executing return outputs
def call(self, inputs): # 1. Binarize weights binary_kernel = binarize(self.kernel, H=1.) # 2. Perform convolution outputs = K.conv2d( inputs, binary_kernel, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) return outputs
def call(self, inputs): binary_kernel = binarize(self.kernel, H=self.H) outputs = K.conv2d(inputs, binary_kernel, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) if self.use_bias: outputs = K.bias_add(outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs
def call(self, x, mask=None): Wb = binarize(self.W, H=self.H) conv_out = K.conv2d(x, Wb, strides=self.subsample, border_mode=self.border_mode, dim_ordering=self.dim_ordering, filter_shape=self.W_shape) if self.bias: if self.dim_ordering == 'th': conv_out = conv_out + K.reshape(self.b, (1, self.nb_filter, 1, 1)) elif self.dim_ordering == 'tf': conv_out = conv_out + K.reshape(self.b, (1, 1, 1, self.nb_filter)) else: raise Exception('Invalid dim_ordering: ' + self.dim_ordering) output = self.activation(conv_out) return output
def _conv_layer( self, layer_name, inputs, filters, size, stride, padding='SAME', freeze=False, xavier=False, relu=True, w_bin=8, stddev=0.001, depthwise=False): """Convolutional layer operation constructor. Args: layer_name: layer name. inputs: input tensor filters: number of output filters. size: kernel size. stride: stride padding: 'SAME' or 'VALID'. See tensorflow doc for detailed description. freeze: if true, then do not train the parameters in this layer. xavier: whether to use xavier weight initializer or not. relu: whether to use relu or not. stddev: standard deviation used for random weight initializer. Returns: A convolutional layer operation. """ mc = self.mc use_pretrained_param = False if mc.LOAD_PRETRAINED_MODEL: cw = self.caffemodel_weight if layer_name in cw: kernel_val = np.transpose(cw[layer_name][0], [2, 3, 1, 0]) bias_val = cw[layer_name][1] # check the shape if (kernel_val.shape == (size, size, inputs.get_shape().as_list()[-1], filters)) \ and (bias_val.shape == (filters,)): use_pretrained_param = True else: print('Shape of the pretrained parameter of {} does not match, ' 'use randomly initialized parameter'.format(layer_name)) else: print('Cannot find {} in the pretrained model. Use randomly initialized ' 'parameters'.format(layer_name)) if mc.DEBUG_MODE: print('Input tensor shape to {}: {}'.format(layer_name, inputs.get_shape())) with tf.variable_scope(layer_name) as scope: channels = inputs.get_shape()[3] # # of input channel # re-order the caffe kernel with shape [out, in, h, w] -> tf kernel with # shape [h, w, in, out] if use_pretrained_param: if mc.DEBUG_MODE: print('Using pretrained model for {}'.format(layer_name)) kernel_init = tf.constant(kernel_val, dtype=tf.float32) bias_init = tf.constant(bias_val, dtype=tf.float32) elif xavier: kernel_init = tf.contrib.layers.xavier_initializer_conv2d() bias_init = tf.constant_initializer(0.0) else: kernel_init = tf.truncated_normal_initializer( stddev=stddev, dtype=tf.float32) bias_init = tf.constant_initializer(0.0) if depthwise == False: # normal conv 2D kernel = _variable_with_weight_decay( 'kernels', shape=[size, size, int(channels), filters], wd=mc.WEIGHT_DECAY, initializer=kernel_init, trainable=(not freeze)) else: # depthwise conv # ignore filters parameter (# of ochannel since it's same to ichannel) assert int(channels) == filters, "DW conv's ic should be same to oc: {} vs. {}".format(int(channels), filters) kernel = _variable_with_weight_decay( 'kernels', shape=[size, size, int(channels), 1], wd=mc.WEIGHT_DECAY, initializer=kernel_init, trainable=(not freeze)) # biases = _variable_on_device('biases', [filters], bias_init, # trainable=(not freeze)) # self.model_params += [kernel, biases] if w_bin == 1: # binarized conv self.model_params += [kernel] kernel_bin = binarize(kernel) tf.summary.histogram('kernel_bin', kernel_bin) if depthwise == False: conv = tf.nn.conv2d(inputs, kernel_bin, [1, stride, stride, 1], padding=padding, name='convolution') else: # DW CONV conv = tf.nn.depthwise_conv2d(inputs, kernel_bin, [1, stride, stride, 1], padding=padding, name='convolution') conv_bias = conv elif w_bin == 8: # 8b quantization # biases = _variable_on_device('biases', [filters], bias_init, trainable=(not freeze)) self.model_params += [kernel] kernel_quant = lin_8b_quant(kernel) tf.summary.histogram('kernel_quant', kernel_quant) # biases_quant = lin_8b_quant(biases) # tf.summary.histogram('biases_quant', biases_quant) if depthwise == False: conv = tf.nn.conv2d(inputs, kernel_quant, [1, stride, stride, 1], padding=padding, name='convolution') else: # DW CONV conv = tf.nn.depthwise_conv2d(inputs, kernel_quant, [1, stride, stride, 1], padding=padding, name='convolution') # conv_bias = tf.nn.bias_add(conv, biases_quant, name='bias_add') conv_bias = conv else: biases = _variable_on_device('biases', [filters], bias_init, trainable=(not freeze)) self.model_params += [kernel, biases] if depthwise == False: conv = tf.nn.conv2d(inputs, kernel, [1, stride, stride, 1], padding=padding, name='convolution') else: # DW CONV conv = tf.nn.depthwise_conv2d(inputs, kernel, [1, stride, stride, 1], padding=padding, name='convolution') # conv_bias = tf.nn.bias_add(conv, biases, name='bias_add') conv_bias = conv if relu: out = tf.nn.relu(conv_bias, 'relu') else: out = conv_bias self.model_size_counter.append( (layer_name, (1 + size * size * int(channels)) * filters) ) out_shape = out.get_shape().as_list() num_flops = \ (1 + 2 * int(channels) * size * size) * filters * out_shape[1] * out_shape[2] if relu: num_flops += 2 * filters * out_shape[1] * out_shape[2] self.flop_counter.append((layer_name, num_flops)) self.activation_counter.append( (layer_name, out_shape[1] * out_shape[2] * out_shape[3]) ) return out