class Convolution(Layer): def __init__(self, input_shape, filter_sizes, init, strides=[1,1,1,1], padding='SAME', activation=None, bias=0., use_bias=True, name=None, load=None, train=True): self.input_shape = input_shape self.filter_sizes = filter_sizes self.batch_size, self.h, self.w, self.fin = self.input_shape self.fh, self.fw, self.fin, self.fout = self.filter_sizes self.init = init self.strides = strides _, self.sh, self.sw, _ = self.strides self.padding = padding self.activation = Linear() if activation == None else activation self.use_bias = use_bias self.name = name self.train_flag = train if load: print ("Loading Weights: " + self.name) weight_dict = np.load(load, encoding='latin1', allow_pickle=True).item() filters = weight_dict[self.name] bias = weight_dict[self.name + '_bias'] else: filters = init_filters(size=self.filter_sizes, init=self.init) bias = np.ones(shape=self.fout) * bias self.filters = tf.Variable(filters, dtype=tf.float32) self.bias = tf.Variable(bias, dtype=tf.float32) ################################################################### def get_weights(self): return [(self.name, self.filters), (self.name + "_bias", self.bias)] def output_shape(self): oh = conv_output_length(self.h, self.fh, self.padding.lower(), self.sh) ow = conv_output_length(self.w, self.fw, self.padding.lower(), self.sw) od = self.fout return [oh, oh, od] def num_params(self): filter_weights_size = self.fh * self.fw * self.fin * self.fout bias_weights_size = self.fout return filter_weights_size + bias_weights_size def forward(self, X): Z = tf.nn.conv2d(X, self.filters, self.strides, self.padding) if self.use_bias: Z = Z + tf.reshape(self.bias, (1, 1, 1, self.fout)) A = self.activation.forward(Z) return {'aout':A, 'cache':{}} ################################################################### def bp(self, AI, AO, DO, cache): DO = tf.multiply(DO, self.activation.gradient(AO)) DI = tf.nn.conv2d_backprop_input(input_sizes=self.input_shape, filter=self.filters, out_backprop=DO, strides=self.strides, padding=self.padding) DF = tf.nn.conv2d_backprop_filter(input=AI, filter_sizes=self.filter_sizes, out_backprop=DO, strides=self.strides, padding=self.padding) DB = tf.reduce_sum(DO, axis=[0, 1, 2]) if self.train_flag: return {'dout':DI, 'cache':{}}, [(DF, self.filters), (DB, self.bias)] else: return {'dout':DI, 'cache':{}}, [] def dfa(self, AI, AO, E, DO, cache): return self.bp(AI, AO, DO, cache) def lel(self, AI, AO, DO, Y, cache): return self.bp(AI, AO, DO, cache)
class FullyConnected(Layer): def __init__(self, input_shape, size, init=None, activation=None, bias=0., alpha=0., name=None, load=None, train=True): self.input_size = input_shape self.output_size = size self.size = [self.input_size, self.output_size] bias = np.ones(shape=self.output_size) * bias self.alpha = alpha self.activation = Linear() if activation == None else activation self.name = name self._train = train if load: print("Loading Weights: " + self.name) weight_dict = np.load(load).item() weights = weight_dict[self.name] bias = weight_dict[self.name + '_bias'] else: if init == "zero": weights = np.zeros(shape=self.size) elif init == "sqrt_fan_in": sqrt_fan_in = math.sqrt(self.input_size) weights = np.random.uniform(low=-1.0 / sqrt_fan_in, high=1.0 / sqrt_fan_in, size=self.size) elif init == "alexnet": weights = np.random.normal(loc=0.0, scale=0.01, size=self.size) else: # https://www.tensorflow.org/api_docs/python/tf/glorot_uniform_initializer # can verify we did this right ... fan_in = self.input_size fan_out = self.output_size lim = np.sqrt(6. / (fan_in + fan_out)) weights = np.random.uniform(low=-lim, high=lim, size=self.size) self.weights = tf.Variable(weights, dtype=tf.float32) self.bias = tf.Variable(bias, dtype=tf.float32) ################################################################### def get_weights(self): return [(self.name, self.weights), (self.name + "_bias", self.bias)] def set_weights(self, weight_dic): weights = weight_dic[self.name] bias = weight_dic[self.name + '_bias'] return [self.weights.assign(weights), self.bias.assign(bias)] def num_params(self): weights_size = self.input_size * self.output_size bias_size = self.output_size return weights_size + bias_size def forward(self, X): Z = tf.matmul(X, self.weights) + self.bias A = self.activation.forward(Z) return A ################################################################### def backward(self, AI, AO, DO): DO = tf.multiply(DO, self.activation.gradient(AO)) DI = tf.matmul(DO, tf.transpose(self.weights)) return DI def gv(self, AI, AO, DO): if not self._train: return [] N = tf.shape(AI)[0] N = tf.cast(N, dtype=tf.float32) DO = tf.multiply(DO, self.activation.gradient(AO)) DW = tf.matmul(tf.transpose(AI), DO) DB = tf.reduce_sum(DO, axis=0) return [(DW, self.weights), (DB, self.bias)] def train(self, AI, AO, DO): if not self._train: return [] N = tf.shape(AI)[0] N = tf.cast(N, dtype=tf.float32) DO = tf.multiply(DO, self.activation.gradient(AO)) DW = tf.matmul(tf.transpose(AI), DO) DB = tf.reduce_sum(DO, axis=0) self.weights = self.weights.assign( tf.subtract(self.weights, tf.scalar_mul(self.alpha, DW))) self.bias = self.bias.assign( tf.subtract(self.bias, tf.scalar_mul(self.alpha, DB))) return [(DW, self.weights), (DB, self.bias)] ################################################################### def dfa_backward(self, AI, AO, E, DO): return tf.ones_like(AI) def dfa_gv(self, AI, AO, E, DO): if not self._train: return [] N = tf.shape(AI)[0] N = tf.cast(N, dtype=tf.float32) DO = tf.multiply(DO, self.activation.gradient(AO)) DW = tf.matmul(tf.transpose(AI), DO) DB = tf.reduce_sum(DO, axis=0) return [(DW, self.weights), (DB, self.bias)] def dfa(self, AI, AO, E, DO): if not self._train: return [] N = tf.shape(AI)[0] N = tf.cast(N, dtype=tf.float32) DO = tf.multiply(DO, self.activation.gradient(AO)) DW = tf.matmul(tf.transpose(AI), DO) DB = tf.reduce_sum(DO, axis=0) self.weights = self.weights.assign( tf.subtract(self.weights, tf.scalar_mul(self.alpha, DW))) self.bias = self.bias.assign( tf.subtract(self.bias, tf.scalar_mul(self.alpha, DB))) return [(DW, self.weights), (DB, self.bias)] ################################################################### def lel_backward(self, AI, AO, E, DO, Y): # DI = tf.zeros_like(AI) DI = self.backward(AI, AO, DO) return DI def lel_gv(self, AI, AO, E, DO, Y): return self.gv(AI, AO, DO) def lel(self, AI, AO, E, DO, Y): return self.train(AI, AO, DO)
class Convolution(Layer): def __init__(self, input_sizes, filter_sizes, strides, padding, init=None, alpha=0., activation=None, bias=0., name=None, load=None, train=True): self.input_sizes = input_sizes self.filter_sizes = filter_sizes self.batch_size, self.h, self.w, self.fin = self.input_sizes self.fh, self.fw, self.fin, self.fout = self.filter_sizes bias = np.ones(shape=self.fout) * bias self.strides = strides self.padding = padding self.alpha = alpha self.activation = Linear() if activation == None else activation self.name = name self._train = train if load: print("Loading Weights: " + self.name) weight_dict = np.load(load, encoding='latin1').item() filters = weight_dict[self.name] bias = weight_dict[self.name + '_bias'] else: if init == "zero": filters = np.zeros(shape=self.filter_sizes) elif init == "sqrt_fan_in": sqrt_fan_in = math.sqrt(self.h * self.w * self.fin) filters = np.random.uniform(low=-1.0 / sqrt_fan_in, high=1.0 / sqrt_fan_in, size=self.filter_sizes) elif init == "alexnet": filters = np.random.normal(loc=0.0, scale=0.01, size=self.filter_sizes) else: # https://www.tensorflow.org/api_docs/python/tf/glorot_uniform_initializer # can verify we did this right ... fan_in = self.fh * self.fw * self.fin fan_out = self.fout lim = np.sqrt(6. / (fan_in + fan_out)) filters = np.random.uniform(low=-lim, high=lim, size=self.filter_sizes) self.filters = tf.Variable(filters, dtype=tf.float32) self.bias = tf.Variable(bias, dtype=tf.float32) ################################################################### def get_weights(self): return [(self.name, self.filters), (self.name + "_bias", self.bias)] def set_weights(self, weight_dic): filters = weight_dic[self.name] bias = weight_dic[self.name + '_bias'] return [self.filters.assign(filters), self.bias.assign(bias)] def output_shape(self): oh = conv_output_length(self.h, self.fh, self.padding.lower(), self.sh) ow = conv_output_length(self.w, self.fw, self.padding.lower(), self.sw) od = self.fout return [oh, oh, od] def num_params(self): filter_weights_size = self.fh * self.fw * self.fin * self.fout bias_weights_size = self.fout return filter_weights_size + bias_weights_size def forward(self, X): Z = tf.nn.conv2d(X, self.filters, self.strides, self.padding) + tf.reshape( self.bias, [1, 1, self.fout]) A = self.activation.forward(Z) return A ################################################################### def backward(self, AI, AO, DO): DO = tf.multiply(DO, self.activation.gradient(AO)) DI = tf.nn.conv2d_backprop_input(input_sizes=self.input_sizes, filter=self.filters, out_backprop=DO, strides=self.strides, padding=self.padding) return DI def gv(self, AI, AO, DO): if not self._train: return [] DO = tf.multiply(DO, self.activation.gradient(AO)) DF = tf.nn.conv2d_backprop_filter(input=AI, filter_sizes=self.filter_sizes, out_backprop=DO, strides=self.strides, padding=self.padding) DB = tf.reduce_sum(DO, axis=[0, 1, 2]) return [(DF, self.filters), (DB, self.bias)] def train(self, AI, AO, DO): if not self._train: return [] DO = tf.multiply(DO, self.activation.gradient(AO)) DF = tf.nn.conv2d_backprop_filter(input=AI, filter_sizes=self.filter_sizes, out_backprop=DO, strides=self.strides, padding=self.padding) DB = tf.reduce_sum(DO, axis=[0, 1, 2]) self.filters = self.filters.assign( tf.subtract(self.filters, tf.scalar_mul(self.alpha, DF))) self.bias = self.bias.assign( tf.subtract(self.bias, tf.scalar_mul(self.alpha, DB))) return [(DF, self.filters), (DB, self.bias)] ################################################################### def dfa_backward(self, AI, AO, E, DO): return tf.ones(shape=(tf.shape(AI))) def dfa_gv(self, AI, AO, E, DO): if not self._train: return [] DO = tf.multiply(DO, self.activation.gradient(AO)) DF = tf.nn.conv2d_backprop_filter(input=AI, filter_sizes=self.filter_sizes, out_backprop=DO, strides=self.strides, padding=self.padding) DB = tf.reduce_sum(DO, axis=[0, 1, 2]) return [(DF, self.filters), (DB, self.bias)] def dfa(self, AI, AO, E, DO): if not self._train: return [] DO = tf.multiply(DO, self.activation.gradient(AO)) DF = tf.nn.conv2d_backprop_filter(input=AI, filter_sizes=self.filter_sizes, out_backprop=DO, strides=self.strides, padding=self.padding) DB = tf.reduce_sum(DO, axis=[0, 1, 2]) self.filters = self.filters.assign( tf.subtract(self.filters, tf.scalar_mul(self.alpha, DF))) self.bias = self.bias.assign( tf.subtract(self.bias, tf.scalar_mul(self.alpha, DB))) return [(DF, self.filters), (DB, self.bias)] ################################################################### def lel_backward(self, AI, AO, E, DO, Y): return self.backward(AI, AO, DO) def lel_gv(self, AI, AO, E, DO, Y): return self.gv(AI, AO, DO) def lel(self, AI, AO, E, DO, Y): return self.train(AI, AO, DO)