def intfVGG_use_layer(input_tensor, n_classes=1000, rgb_mean=None, training=True): # assuming 224x224x3 input_tensor # define image mean if rgb_mean is None: rgb_mean = np.array([116.779, 123.68, 103.939], dtype=np.float32) mu = tf.constant(rgb_mean, name="rgb_mean") keep_prob = 0.5 # subtract image mean net = tf.subtract(input_tensor, mu, name="input_mean_centered") # block 1 -- outputs 112x112x64 net = L.conv(net, name="conv1_1", kh=3, kw=3, n_out=64) net = L.conv(net, name="conv1_2", kh=3, kw=3, n_out=64) net = L.pool(net, name="pool1", kh=2, kw=2, dw=2, dh=2) # block 2 -- outputs 56x56x128 net = L.conv(net, name="conv2_1", kh=3, kw=3, n_out=128) net = L.conv(net, name="conv2_2", kh=3, kw=3, n_out=128) net = L.pool(net, name="pool2", kh=2, kw=2, dh=2, dw=2) # # block 3 -- outputs 28x28x256 net = L.conv(net, name="conv3_1", kh=3, kw=3, n_out=256) net = L.conv(net, name="conv3_2", kh=3, kw=3, n_out=256) net = L.pool(net, name="pool3", kh=2, kw=2, dh=2, dw=2) # block 4 -- outputs 14x14x512 net = L.conv(net, name="conv4_1", kh=3, kw=3, n_out=512) net = L.conv(net, name="conv4_2", kh=3, kw=3, n_out=512) net = L.conv(net, name="conv4_3", kh=3, kw=3, n_out=512) net = L.pool(net, name="pool4", kh=2, kw=2, dh=2, dw=2) # block 5 -- outputs 7x7x512 net = L.conv(net, name="conv5_1", kh=3, kw=3, n_out=512) net = L.conv(net, name="conv5_2", kh=3, kw=3, n_out=512) net = L.conv(net, name="conv5_3", kh=3, kw=3, n_out=512) net = L.pool(net, name="pool5", kh=2, kw=2, dw=2, dh=2) # flatten flattened_shape = np.prod([s.value for s in net.get_shape()[1:]]) net = tf.reshape(net, [-1, flattened_shape], name="flatten") # fully connected net = L.fully_connected(net, name="fc6", n_out=4096) net = tf.nn.dropout(net, keep_prob) net = L.fully_connected(net, name="fc7", n_out=4096) net = tf.nn.dropout(net, keep_prob) net = L.fully_connected(net, name="fc8", n_out=n_classes) return net
def func(name, inputs, batch_size, img_size, img_chan): with tf.variable_scope(name): with tf.variable_scope("fc1"): inputs = tf.nn.relu(fully_connected(inputs, 1024)) with tf.variable_scope("fc2"): inputs = tf.nn.relu(fully_connected(inputs, 1024)) with tf.variable_scope("output"): inputs = tf.nn.tanh( fully_connected(inputs, img_size * img_size * img_chan)) inputs = tf.reshape( inputs, [batch_size, img_size, img_size, img_chan]) return inputs
def func(name, inputs, batch_size, img_size, img_chan, enable_sn=False, reuse=False): with tf.variable_scope(name, reuse=reuse): inputs = tf.layers.flatten(inputs) with tf.variable_scope("fc1"): inputs = tf.nn.relu( fully_connected(inputs, 1024, enable_sn)) with tf.variable_scope("fc2"): inputs = tf.nn.relu( fully_connected(inputs, 1024, enable_sn)) return fully_connected(inputs, 1, enable_sn)
def Dense_net(self, input_x): x = layers.conv2d(input_x, filters=2*self.filters, kernel_size=[7, 7], strides=[2, 2], kernel_regularizer=layers.l2_regularizer(0.0005), padding='valid', activation=None, name='conv0') x = self.dense_block(input_x=x, nb_layers=6, layer_name='dense_1') x = self.transition_layer(x, scope='trans_1') x = self.dense_block(input_x=x, nb_layers=12, layer_name='dense_2') x = self.transition_layer(x, scope='trans_2') x = self.dense_block(input_x=x, nb_layers=48, layer_name='dense_3') x = self.transition_layer(x, scope='trans_3') x = self.dense_block(input_x=x, nb_layers=32, layer_name='dense_final') # 100 Layer x = layers.batch_normalization(x, training=self.training, name='linear_batch') x = layers.selu(x) x = layers.global_ave_pool2d(x) # x = flatten(x) x = layers.fully_connected(x, self.class_num, use_bias=False, activation_fn=None, trainable=self.training, name='full_connecting') # x = tf.reshape(x, [-1, 10]) return x
def model(self): block0 = layers.gate_block( inputs=self.embed, k_size=3, filters=100, scope_name='block0') pool0 = layers.one_maxpool( inputs=block0, padding='VALID', scope_name='pool0') flatten0 = layers.flatten(pool0, scope_name='flatten0') block1 = layers.gate_block( inputs=self.embed, k_size=4, filters=100, scope_name='block1') pool1 = layers.one_maxpool( inputs=block1, padding='VALID', scope_name='pool1') flatten1 = layers.flatten(pool1, scope_name='flatten1') block2 = layers.gate_block( inputs=self.embed, k_size=5, filters=100, scope_name='block2') pool2 = layers.one_maxpool( inputs=block2, padding='VALID', scope_name='pool2') flatten2 = layers.flatten(pool2, scope_name='flatten2') concat0 = layers.concatinate( inputs=[flatten0, flatten1, flatten2], scope_name='concat0') dropout0 = layers.Dropout( inputs=concat0, rate=1 - self.keep_prob, scope_name='dropout0') self.logits = layers.fully_connected( inputs=dropout0, out_dim=self.n_classes, scope_name='fc0')
def model(self): conv0 = layers.conv1d( inputs=self.embed, filters=100, k_size=3, stride=1, padding="SAME", scope_name="conv0", ) relu0 = layers.relu(inputs=conv0, scope_name="relu0") pool0 = layers.one_maxpool(inputs=relu0, padding="VALID", scope_name="pool0") flatten0 = layers.flatten(inputs=pool0, scope_name="flatten0") conv1 = layers.conv1d( inputs=self.embed, filters=100, k_size=4, stride=1, padding="SAME", scope_name="conv1", ) relu1 = layers.relu(inputs=conv1, scope_name="relu0") pool1 = layers.one_maxpool(inputs=relu1, padding="VALID", scope_name="pool1") flatten1 = layers.flatten(inputs=pool1, scope_name="flatten1") conv2 = layers.conv1d( inputs=self.embed, filters=100, k_size=5, stride=1, padding="SAME", scope_name="conv2", ) relu2 = layers.relu(inputs=conv2, scope_name="relu0") pool2 = layers.one_maxpool(inputs=relu2, padding="VALID", scope_name="pool2") flatten2 = layers.flatten(inputs=pool2, scope_name="flatten2") concat0 = layers.concatinate([flatten0, flatten1, flatten2], scope_name="concat0") dropout0 = layers.Dropout(inputs=concat0, rate=1 - self.keep_prob, scope_name="dropout0") self.logits = layers.fully_connected(inputs=dropout0, out_dim=self.n_classes, scope_name="fc0")
def build_model(inp, label): y1 = layers.fully_connected(inp=inp, nout=p.hidden_units[0], activation=tf.nn.relu, scope="fc-1") out = layers.fully_connected(inp=y1, nout=p.num_labels, activation=tf.nn.softmax, scope="fc-2") with tf.variable_scope('loss'): loss = tf.losses.softmax_cross_entropy(logits=out, onehot_labels=label) tf.summary.scalar('loss', loss) with tf.variable_scope('accuracy'): accuracy = tf.reduce_mean( tf.cast(tf.equal(tf.argmax(label, 1), tf.argmax(out, 1)), tf.float64)) tf.summary.scalar('accuracy', accuracy) return out, loss, accuracy
def main(): # generate some dummy data for testing manager = Data() num_examples = 10**4 max_val = 1 train_batch_size = 16 train_size = int(num_examples/2) eval_size = int(num_examples/2) X, y = manager.create_data_set(num_of_examples=num_examples, max_val=max_val, discriminator=lambda x: max_val*(1/(1+np.exp(-x)) + 1/(1+np.exp(x**2)))-max_val/2, one_hot=False, plot_data=False, load_saved_data=True, filename='dataset.npy') train_examples, train_labels = X[0:train_size, :], y[0:train_size] eval_examples, eval_labels = X[train_size:, :], y[train_size:] print('train examples = {}, train labels = {}, eval examples = {}, eval labels = {}'.format(train_examples.shape, train_labels.shape, eval_examples.shape, eval_labels.shape)) # get some small train batch indices = np.random.randint(low=0,high=train_size,size=train_batch_size) train_batch_examples, train_batch_labels = X[indices,:], y[indices] # start by defining your default graph graph = GRAPH() graph.getDefaultGraph() # declare your placeholders, to provide your inputs # print(int(train_batch_examples.shape[1])) input_features = placeholder(shape=(train_batch_size, int(train_batch_examples.shape[1]))) input_labels = placeholder(shape=(train_batch_size)) """ Method #3 """ # this is defined using layers features = fully_connected(features=input_features, units=32) features = relu(features) features = fully_connected(features=features, units=64) features = relu(features) features = fully_connected(features=features, units=128) features = relu(features) features = fully_connected(features=features, units=64) features = relu(features) features = fully_connected(features=features, units=32) features = relu(features) features = fully_connected(features=features, units=2) logits = softmax_classifier(features) loss = CrossEntropyLoss(softmax_logits=logits, labels=input_labels) # compile and run graph.graph_compile(function=loss, verbose=True) loss = graph.run(input_matrices={input_features: train_batch_examples, input_labels: train_batch_labels}) print(loss, logits.output.shape)
def encoder(input, name, reuse=False): with tf.variable_scope(name, reuse=reuse) as scope: conv_0 = conv2d(input=input, filter_shape=[3, 3, 3, conf.n], name="conv_0") conv_1 = conv2d(input=conv_0, filter_shape=[3, 3, conf.n, conf.n], name="conv_1") conv_2 = conv2d(input=conv_1, filter_shape=[3, 3, conf.n, conf.n], name="conv_2") subs_1 = conv2d(input=conv_2, filter_shape=[3, 3, conf.n, 2 * conf.n], strides=(1, 2, 2, 1), name="subs_1") conv_3 = conv2d(input=subs_1, filter_shape=[3, 3, 2 * conf.n, 2 * conf.n], name="conv_3") conv_4 = conv2d(input=conv_3, filter_shape=[3, 3, 2 * conf.n, 2 * conf.n], name="conv_4") subs_2 = conv2d(input=conv_4, filter_shape=[3, 3, 2 * conf.n, 3 * conf.n], strides=(1, 2, 2, 1), name="subs_2") conv_5 = conv2d(input=subs_2, filter_shape=[3, 3, 3 * conf.n, 3 * conf.n], name="conv_5") conv_6 = conv2d(input=conv_5, filter_shape=[3, 3, 3 * conf.n, 3 * conf.n], name="conv_6") subs_3 = conv2d(input=conv_6, filter_shape=[3, 3, 3 * conf.n, 4 * conf.n], strides=(1, 2, 2, 1), name="subs_3") conv_7 = conv2d(input=subs_3, filter_shape=[3, 3, 4 * conf.n, 4 * conf.n], name="conv_7") conv_8 = conv2d(input=conv_7, filter_shape=[3, 3, 4 * conf.n, 4 * conf.n], name="conv_8") reshape_op = tf.reshape(conv_8, [tf.shape(conv_8)[0], 8 * 8 * 4 * conf.n]) fc_op = fully_connected(reshape_op, num_output=conf.embedding_dim, name="fc") return fc_op
def fully_connected_layer(self): ''' Sets the custom 3D module. Input: None ''' #Dimensions W, b, dim_h = fully_connected(self.convolution_codes, 7 * 7 * 512, 512, name='dim_h') W, b, self.dim = fully_connected(dim_h, 512, 3, name='dim_out', activation='linear') #Orientation W, b, orientation_h = fully_connected(self.convolution_codes, 7 * 7 * 512, 256, name='orientation_h') W, b, sin_u = fully_connected(orientation_h, 256, 1, name='unnormalized_sin', activation='linear') W, b, cos_u = fully_connected(orientation_h, 256, 1, name='unnormalized_cos', activation='linear') l2norm = tf.sqrt(tf.square(sin_u) + tf.square(cos_u)) self.sin = tf.divide(sin_u, l2norm, name='sin_out') self.cos = tf.divide(cos_u, l2norm, name='cos_out')
def decoder(input, name, reuse=False): with tf.variable_scope(name, reuse=reuse) as scope: fc_op = fully_connected(input, num_output=8 * 8 * conf.n, name="fc") reshape_op = tf.reshape(fc_op, [tf.shape(fc_op)[0], 8, 8, conf.n]) conv_1 = conv2d(input=reshape_op, filter_shape=[3, 3, conf.n, conf.n], name="conv_1") conv_2 = conv2d(input=conv_1, filter_shape=[3, 3, conf.n, conf.n], name="conv_2") ups_1 = upsample_2d(conv_2, size=[16, 16], name="ups_1") conv_3 = conv2d(input=ups_1, filter_shape=[3, 3, conf.n, conf.n], name="conv_3") conv_4 = conv2d(input=conv_3, filter_shape=[3, 3, conf.n, conf.n], name="conv_4") ups_2 = upsample_2d(conv_4, size=[32, 32], name="ups_2") conv_5 = conv2d(input=ups_2, filter_shape=[3, 3, conf.n, conf.n], name="conv_5") conv_6 = conv2d(input=conv_5, filter_shape=[3, 3, conf.n, conf.n], name="conv_6") ups_3 = upsample_2d(conv_6, size=[64, 64], name="ups_3") conv_7 = conv2d(input=ups_3, filter_shape=[3, 3, conf.n, conf.n], name="conv_7") conv_8 = conv2d(input=conv_7, filter_shape=[3, 3, conf.n, conf.n], name="conv_8") conv_9 = conv2d(input=conv_8, filter_shape=[3, 3, conf.n, 3], name="conv_9") return conv_9
def create_model(self, model_input, vocab_size, is_training, num_mixtures=None, l2_penalty=1e-8, **unused_params): """Creates a Mixture of (Logistic) Experts model. It also includes the possibility of gating the probabilities The model consists of a per-class softmax distribution over a configurable number of logistic classifiers. One of the classifiers in the mixture is not trained, and always predicts 0. Args: model_input: 'batch_size' x 'num_features' matrix of input features. vocab_size: The number of classes in the dataset. is_training: Is this the training phase ? num_mixtures: The number of mixtures (excluding a dummy 'expert' that always predicts the non-existence of an entity). l2_penalty: How much to penalize the squared magnitudes of parameter values. Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are batch_size x num_classes. """ num_mixtures = num_mixtures or FLAGS.moe_num_mixtures low_rank_gating = FLAGS.moe_low_rank_gating l2_penalty = FLAGS.moe_l2; gating_probabilities = FLAGS.moe_prob_gating gating_input = FLAGS.moe_prob_gating_input input_size = model_input.get_shape().as_list()[1] remove_diag = FLAGS.gating_remove_diag if low_rank_gating == -1: gate_activations = layers.fully_connected( model_input, vocab_size * (num_mixtures + 1), activation_fn=None, biases_initializer=None, weights_regularizer=slim.l2_regularizer(l2_penalty), scope="gates", float16_flag=FLAGS.float16_flag) else: gate_activations1 = slim.fully_connected( model_input, low_rank_gating, activation_fn=None, biases_initializer=None, weights_regularizer=slim.l2_regularizer(l2_penalty), scope="gates1") gate_activations = slim.fully_connected( gate_activations1, vocab_size * (num_mixtures + 1), activation_fn=None, biases_initializer=None, weights_regularizer=slim.l2_regularizer(l2_penalty), scope="gates2") expert_activations = layers.fully_connected( model_input, vocab_size * num_mixtures, activation_fn=None, weights_regularizer=slim.l2_regularizer(l2_penalty), scope="experts", float16_flag=FLAGS.float16_flag) gating_distribution = tf.nn.softmax(tf.reshape( gate_activations, [-1, num_mixtures + 1])) # (Batch * #Labels) x (num_mixtures + 1) expert_distribution = tf.nn.sigmoid(tf.reshape( expert_activations, [-1, num_mixtures])) # (Batch * #Labels) x num_mixtures if '16' in str(expert_distribution.dtype): expert_distribution = tf.cast(expert_distribution,tf.float32) probabilities_by_class_and_batch = tf.reduce_sum( gating_distribution[:, :num_mixtures] * expert_distribution, 1) probabilities = tf.reshape(probabilities_by_class_and_batch, [-1, vocab_size]) if gating_probabilities: if gating_input == 'prob': gating_weights = tf.get_variable("gating_prob_weights", [vocab_size, vocab_size], initializer = tf.random_normal_initializer(stddev=1 / math.sqrt(vocab_size)), dtype = tf.float16 if FLAGS.float16_flag else tf.float32) gates = tf.matmul(probabilities, tf.cast(gating_weights,tf.float32) if '16' in str(gating_weights.dtype) else gating_weights) else: gating_weights = tf.get_variable("gating_prob_weights", [input_size, vocab_size], initializer = tf.random_normal_initializer(stddev=1 / math.sqrt(vocab_size)), dtype = tf.float16 if FLAGS.float16_flag else tf.float32) gates = tf.matmul(model_input, tf.cast(gating_weights,tf.float32) if '16' in str(gating_weights.dtype) else gating_weights) if remove_diag: #removes diagonals coefficients diagonals = tf.matrix_diag_part(gating_weights) gates = gates - tf.multiply(diagonals,probabilities) gates = slim.batch_norm( gates, center=True, scale=True, is_training=is_training, scope="gating_prob_bn") gates = tf.sigmoid(gates) probabilities = tf.multiply(probabilities, gates) return {"predictions": probabilities}
def inference(images): """Definition of model inference. Args: images: A batch of images to process. Shape [batch_size,32,32,3] """ is_train = tf.get_collection('is_train')[0] def shortcut(l, in_channel, out_channel): """Shortcut for residual function. Args: l: Output of previous layer. in_channel: # of channels of l. out_channel: # of channels of each output feature. """ shortcut = tf.nn.avg_pool(l, [1, 2, 2, 1], [1, 2, 2, 1], 'VALID') pad = (out_channel - in_channel) // 2 return tf.pad(shortcut, [[0, 0], [0, 0], [0, 0], [pad, pad]]) def residual(name, l, in_channel, out_channel, stride): """Residual function. Args: name: Scope name of this function. l: Output of previous layer. in_channel: # of channels of l. out_channel: # of channels of each output feature. stride: Stride of the first convolution in residual function. """ with tf.variable_scope(name): sc = l if stride == 1 else shortcut(l, in_channel, out_channel) l = layers.conv('conv_0', l, out_channel, stride=stride) l = layers.batchnorm('bn_0', l, is_train) l = tf.nn.relu(l) l = layers.conv('conv_1', l, out_channel, stride=1) l = layers.batchnorm('bn_1', l, is_train) l = tf.nn.relu(l + sc) return l # ResNet-20 inference with tf.variable_scope('inference'): features = [] for m in range(FLAGS.num_model): l = images with tf.variable_scope('model_%d' % m): l = layers.conv('conv_init', l, 16, stride=1) l = residual('res_1_1', l, 16, 16, 1) l = residual('res_1_2', l, 16, 16, 1) l = residual('res_1_3', l, 16, 16, 1) features.append(l) # stochastically share hidden features right before the first pooling if FLAGS.feature_sharing: features = feature_sharing(features) for m in range(FLAGS.num_model): l = features[m] with tf.variable_scope('model_%d' % m): l = residual('res_2_1', l, 16, 32, 2) l = residual('res_2_2', l, 32, 32, 1) l = residual('res_2_3', l, 32, 32, 1) l = residual('res_3_1', l, 32, 64, 2) l = residual('res_3_2', l, 64, 64, 1) l = residual('res_3_3', l, 64, 64, 1) l = layers.batchnorm('bn_0', l, is_train) l = tf.nn.relu(l) # global average pooling l = tf.reduce_mean(l, [1, 2]) l = layers.fully_connected('fc_0', l, 10) features[m] = l return features
def _build_model(self): super()._build_model() with self.graph.as_default(): with tf.variable_scope('network'): input_layer, final_layer, predict_layer = agent_network( state_shape=self.state_shape, image_input=self.image_input, action_count=self.action_count) self.input_layer = input_layer self.predict_layer = predict_layer self.softmax_predict = tf.nn.softmax(self.predict_layer) self.value_layer = fully_connected(final_layer, 1, activation=None, name="value") self.value_layer_val = self.value_layer[:, 0] with tf.variable_scope('state'): self._frames = tf.Variable(0, trainable=False, name='frames', dtype=tf.int64) tf.summary.scalar('frames', self._frames) self.update_frames = tf.assign_add( self._frames, tf.cast(tf.shape(self.input_layer)[0], tf.int64)) lr_calc = self.starting_lr * \ (1.0 - (tf.cast(self._frames, tf.float64) / self.total_steps)) # self.learning_rate = tf.maximum(tf.cast(0.0, tf.float64), lr_calc) self.learning_rate = tf.constant(self.starting_lr) tf.summary.scalar('learning_rate', self.learning_rate) with tf.variable_scope('training'): self.target_predict = tf.placeholder(tf.int32, shape=[None], name='target_predict') self.target_value = tf.placeholder(tf.float32, shape=[None], name='target_value') self.reward_diff = tf.placeholder(tf.float32, shape=[None], name='reward_diff') mse_value = tf.reduce_mean( tf.squared_difference(self.value_layer, self.target_value) / 2.) tf.summary.scalar('mse_value', mse_value) diff_predict = tf.reduce_mean( self.reward_diff * tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.predict_layer, labels=self.target_predict)) tf.summary.scalar('err_predict', diff_predict) a0 = self.predict_layer - \ tf.reduce_max(self.predict_layer, axis=1, keep_dims=True) ea0 = tf.exp(a0) z0 = tf.reduce_sum(ea0, axis=1, keep_dims=True) p0 = ea0 / z0 # entropy = tf.reduce_mean(-tf.reduce_sum(self.softmax_predict * tf.log( self.softmax_predict + 1e-6), axis=1)) # adding 1e-6 to avoid DBZ entropy = tf.reduce_mean( tf.reduce_sum(p0 * (tf.log(z0) - a0), axis=1)) tf.summary.scalar('predict_entropy', entropy) trainer = tf.train.RMSPropOptimizer(self.learning_rate, decay=0.99, epsilon=1e-5) loss = diff_predict + self.value_weight * mse_value - self.entropy_weight * entropy tf.summary.scalar('loss', loss) # self.train_op = trainer.minimize(loss, global_step=self._step) grads_and_vars = trainer.compute_gradients(loss) grads, vars = zip(*grads_and_vars) grads, _ = tf.clip_by_global_norm(grads, 0.5) grads_and_vars = list(zip(grads, vars)) self.train_op = trainer.apply_gradients(grads_and_vars, global_step=self._step) with tf.variable_scope('stats'): self.score_placeholder = tf.placeholder(tf.float32, shape=[], name='score_input') score_1 = tf.Variable(0., trainable=False, name='score_1') tf.summary.scalar('score_1', score_1) score_100 = tf.Variable(0., trainable=False, name='score_100') tf.summary.scalar('score_100', score_100) score_1000 = tf.Variable(0., trainable=False, name='score_1000') tf.summary.scalar('score_1000', score_1000) self.set_scores = tf.group( tf.assign(score_1, self.score_placeholder), tf.assign( score_100, score_100 + (self.score_placeholder / 100.0) - (score_100 / 100.0)), tf.assign( score_1000, score_1000 + (self.score_placeholder / 1000.0) - (score_1000 / 1000.0)), )
def train_simple_conv_net(ckpt_dir, learning_rate, epochs, img_size, num_classes, batcher, batch_size=50): """A very simple conv net implementation for testing the library Define the graph input | 2d convolution (5x5, stride=1) | ReLU | Max Pool (2x2, stride=2) | 2d convolution (5x5, stride=1) | ReLU | Max Pool (2x2, stride=2) | Fully connected (1024) | Fully connected (num_classes) You can either define the graph inline here or pull it from another function. Please call the output logits Args: img_size : tuple The dimentions of the input image num_classes : int Number of classes training on train : bool Whether the graph is in training mode """ print("[INFO] Initializing new graph") with self.graph.as_default(): # 1. Input layer # ------------------------------------------------------- input_shape = [None] + list(img_size) X = tf.placeholder(tf.float32, shape=img_shape) y_true = tf.placeholder(tf.float32, shape=num_classes) # 2. Convolution-ReLU-Pool layer 1 # ------------------------------------------------------- # 32 filters, 5 by 5 window conv1_params = { 'shape': [5, 5, input_shape[-1], 32], 'strides': [1, 1, 1, 1], 'padding': 'SAME' } pool1_params = { 'shape': [1, 2, 2, 1], 'strides': [1, 2, 2, 1], 'padding': 'SAME' } conv1 = conv_relu_pool(X, conv1_params, pool1_params) # ------------------------------------------------------- # 3. Convolution-ReLU-Pool layer 2 # ------------------------------------------------------- conv2_params = { 'shape': [5, 5, conv2_params[-1], 64], 'strides': [1, 1, 1, 1], 'padding': 'SAME' } pool2_params = pool1_params conv2 = conv_relu_pool(conv1, conv2_params, pool2_params) # ------------------------------------------------------- # 4. Fully connected layer 1 # ------------------------------------------------------- full1 = fully_connected(conv2, 1024) # ------------------------------------------------------- # 5. ReLU Activation # ------------------------------------------------------- full1 = tf.nn.relu(full1) # ------------------------------------------------------- # Dropout hold_prob = tf.placeholder(tf.float32) full1 = tf.nn.dropout(full1, keep_prob=hold_prob) # 6. Fully connected layer 2 # ------------------------------------------------------- logits = fully_connected(full1, num_classes) # ------------------------------------------------------- # TRAINING VARIABLES # ------------------------------------------------------- cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y_true, logits=logits)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) train = optimizer.minimize(cross_entropy) # ------------------------------------------------------- init = tf.global_variables_initializer() # VALIDATION VARIABLES # ------------------------------------------------------- matches = tf.equal(tf.argmax(logits, 1), tf.argmax(y_true, 1)) acc = tf.reduce_mean(tf.cast(matches, tf.float32)) # ------------------------------------------------------- saver = tf.train.Saver() with tf.Session(graph=self.graph): sess.run(init) for i in range(self.epochs): X_train, y_train = batcher.next_batch('train', batch_size) feed_dict = {X: X_train, y_true: y_train, hold_prob: 0.5} sess.run(train, feed_dict=feed_dict) if i % 100 == 0: save_path = saver.save(sess, "{}/int.ckpt".format(ckpt_dir)) val_X, val_y = batcher.get_all('test') feed_dict = {X: val_X, y_true: val_y, hold_prob: 1.0} epoch_acc = sess.run(acc, feed_dict=feed_dict) print("Epoch: {}".format(i)) print("Accuracy: {}".format(racc)) save_path = saver.save(sess, "{}/final_model.ckpt".format(ckpt_dir)) return self
def main(): # generate some dummy data for testing manager = Data() num_examples = 10**4 max_val = 4 train_batch_size = 32 train_size = int(num_examples / 2) eval_size = int(num_examples / 2) # discriminator = (1/(1+np.exp(-x)+1/(1+np.exp(x)))) X, y = manager.create_data_set(num_of_examples=num_examples, max_val=max_val, discriminator=lambda x: max_val * (np.cos(np.sin(x**2))) - max_val / 2, one_hot=False, plot_data=True, load_saved_data=False, filename='dataset.npy') train_examples, train_labels = X[0:train_size, :], y[0:train_size] eval_examples, eval_labels = X[train_size:, :], y[train_size:] print( 'train examples = {}, train labels = {}, eval examples = {}, eval labels = {}' .format(train_examples.shape, train_labels.shape, eval_examples.shape, eval_labels.shape)) # start by defining your default graph graph = GRAPH() graph.getDefaultGraph() # declare your placeholders, to provide your inputs # print(int(train_batch_examples.shape[1])) input_features = placeholder(shape=(train_batch_size, int(train_examples.shape[1]))) input_labels = placeholder(shape=(train_batch_size)) """ Method #3 """ # this is defined using layers layer1 = fully_connected(features=input_features, units=32) layer1 = relu(layer1) layer2 = fully_connected(features=layer1, units=64) layer2 = relu(layer2) # layer2 = dropout(features=layer2, drop_rate=0.5) layer2_1 = fully_connected(features=layer2, units=64) layer2_1 = relu(layer2_1) layer2_2 = fully_connected(features=layer2_1, units=64) layer2_2 = relu(layer2_2) # a recurrent connection layer2_2 = add(layer2_2, layer2) layer3 = fully_connected(features=layer2_2, units=128) layer3 = relu(layer3) layer4 = fully_connected(features=layer3, units=128) layer4 = relu(layer4) layer5 = fully_connected(features=layer4, units=128) layer5 = relu(layer5) # a recurrent connection layer5 = add(layer5, layer3) layer6 = fully_connected(features=layer5, units=64) layer6 = relu(layer6) # layer6 = dropout(features=layer6, drop_rate=0.2) layer6_1 = fully_connected(features=layer6, units=64) layer6_1 = relu(layer6_1) layer6_2 = fully_connected(features=layer6_1, units=64) layer6_2 = relu(layer6_2) # a recurrent connection layer6_2 = add(layer6_2, layer6) layer7 = fully_connected(features=layer6_2, units=32) layer7 = relu(layer7) # layer7 = dropout(features=layer7, drop_rate=0.5) logits = fully_connected(features=layer7, units=2) loss = Softmax_with_CrossEntropyLoss(logits=logits, labels=input_labels) # compile and run (always compile with the loss function, even if you don't use it!!!) graph.graph_compile(function=loss, verbose=True) # run a training loop # all_W = [] # for layer in graph.forward_feed_order: # if layer.is_trainable: # all_W.append([layer.W, layer.bias]) def evaluate(batch_examples, batch_labels, mode='train'): loss_val = graph.run(function=graph.loss, input_matrices={ input_features: batch_examples, input_labels: batch_labels }, mode=mode) accuracy = 100 / train_batch_size * np.sum(batch_labels == np.argmax( np.exp(logits.output) / np.sum(np.exp(logits.output), axis=1)[:, None], axis=1)) return [loss_val, accuracy] def training_loop(iterations): for m in xrange(iterations): # get some small train batch indices = np.random.randint(low=0, high=train_size, size=train_batch_size) train_batch_examples, train_batch_labels = X[ indices, :], y[indices] # get the system outputs [loss_val, accuracy] = evaluate(batch_examples=train_batch_examples, batch_labels=train_batch_labels) # logits_val = graph.run(function=logits, input_matrices={input_features: train_batch_examples, # input_labels: train_batch_labels}) # print(logits_val.shape) if m % 1000 == 0: # print('-----------') print( 'log: at iteration #{}, train batch loss = {}, train batch accuracy = {}%' .format(m, loss_val, accuracy)) #, logits.output.shape) # print('batch accuracy = {}%'.format(m, accuracy)) #, logits.output.shape) # print('-----------') # calculate some evaluation accuracy if m != 0 and m % 20000 == 0: print('\n---------Evaluating Now-----------') eval_loss, eval_accuracy = (0, 0) steps = eval_size // train_batch_size for k in range(steps): eval_indices = range(k * train_batch_size, (k + 1) * train_batch_size) eval_batch_loss, eval_batch_accuracy = evaluate( batch_examples=eval_examples[eval_indices, :], batch_labels=eval_labels[eval_indices], mode='test') eval_loss += eval_batch_loss eval_accuracy += eval_batch_accuracy print('log: evaluation loss = {}, evaluation accuracy = {}%'. format(eval_loss / steps, eval_accuracy / steps)) print('------------------------------------\n') # do some testing if m != 0 and m % 50000 == 0: test_model() # run and calculate the gradients w.r.t to the loss function graph.gradients(function=loss) # check something # if m > 1: # graph.gradients(function=logits) # pass # update the weights graph.update(learn_rate=1e-2) def test_model(): # now finally testing the model x_ = np.linspace(start=-max_val, stop=max_val, num=64) # we want to evaluate this thing test_set = np.asarray([(x, y) for x in x_ for y in x_], dtype=np.float64) all_predictions = [] print('\n---------Testing Now-----------') print('log: your test set is {}'.format(test_set.shape)) steps = 64 * 64 // train_batch_size for k in range(steps): test_indices = range(k * train_batch_size, (k + 1) * train_batch_size) test_logits = graph.run( function=logits, input_matrices={input_features: test_set[test_indices, :]}, mode='test') test_logits -= np.max(test_logits) exps = np.exp(test_logits) softmaxed = exps / np.sum(exps, axis=1)[:, None] # print(softmaxed) predictions = np.argmax(softmaxed, axis=1) # print(predictions) # print(predictions) # fill up predictions all_predictions.append(predictions) print('------------------------------------\n') predictions = np.hstack(all_predictions) # print(predictions.shape) red = test_set[predictions == 0] green = test_set[predictions == 1] plot.scatter(green[:, 0], green[:, 1], color='g') plot.scatter(red[:, 0], red[:, 1], color='r') plot.show() training_loop(iterations=1000000)
def lstm_block(x, v, t, lstm_size=512, vocab_size=52, num_words=30, feed_previous=False, scope='lstm_block', reuse=False, batch_size=4): with tf.variable_scope(scope, reuse=reuse): with tf.variable_scope('lstm_1', reuse=reuse): lstm_first = tf.contrib.rnn.BasicLSTMCell(lstm_size, reuse=reuse) state_first = lstm_first.zero_state(batch_size, tf.float32) o_1, state_first = lstm_first(x[:, 0, :], state_first) r = tf.concat([o_1, v, t], axis=1) with tf.variable_scope('lstm_2', reuse=reuse): lstm_second = tf.contrib.rnn.BasicLSTMCell(lstm_size, reuse=reuse) state_second = lstm_second.zero_state(batch_size, tf.float32) o_2, state_second = lstm_second(r, state_second) o = fully_connected(o_2, output_units=vocab_size, std='xavier', activation=tf.identity, reuse=False, scope='lstm_fc') if feed_previous: print o o = tf.nn.softmax(o) print o o = softmax_to_binary(o, axis=1) print o with tf.variable_scope(scope, reuse=True): # Teacher training, we feed in a list of words so dont need to feed back in # the output of the lstm outputs = [] outputs.append(o) for i in range(num_words - 1): if not feed_previous: word = x[:, i + 1, :] else: word = o with tf.variable_scope('lstm_1', reuse=True): print word o, state_first = lstm_first(word, state_first) o = tf.concat([o, v, t], axis=1) with tf.variable_scope('lstm_2', reuse=True): o, state_second = lstm_second(o, state_second) o = fully_connected(o, output_units=vocab_size, std='xavier', activation=tf.identity, reuse=True, scope='lstm_fc') if not feed_previous: outputs.append(o) else: o = tf.nn.softmax(o) o = softmax_to_binary(o, axis=1) outputs.append(o) return outputs
def conv_block(x, num_filters=32, filter_dims=[5, 5], fc_size=1024, scope='conv_block', batch_size=4): s = x.get_shape().as_list() with tf.variable_scope(scope): # downsample image with stride [3, 3] a = conv_2d(x, dims=[7, 7], filters=num_filters, strides=[3, 3], std='xavier', padding='VALID', activation=tf.nn.relu, scope='conv1') # no downsampling with stride [1, 1] a = conv_2d(a, filter_dims, filters=num_filters, strides=[1, 1], std='xavier', padding='SAME', activation=tf.nn.relu, scope='conv2') num_filters = 2 * num_filters # downsample image with stride [2, 2] a = conv_2d(a, filter_dims, filters=num_filters, strides=[2, 2], std='xavier', padding='VALID', activation=tf.nn.relu, scope='conv3') # no downsampling with stride [1, 1] a = conv_2d(a, filter_dims, filters=num_filters, strides=[1, 1], std='xavier', padding='SAME', activation=tf.nn.relu, scope='conv4') num_filters = 2 * num_filters # downsample image with stride [2, 2] a = conv_2d(a, filter_dims, filters=num_filters, strides=[2, 2], std='xavier', padding='VALID', activation=tf.nn.relu, scope='conv5') # no downsampling with stride [1, 1] a = conv_2d(a, filter_dims, filters=num_filters, strides=[1, 1], std='xavier', padding='SAME', activation=tf.nn.relu, scope='conv6') # downsample image with stride [2, 2] num_filters = 32 a = conv_2d(a, filter_dims, filters=num_filters, strides=[2, 2], std='xavier', padding='VALID', activation=tf.nn.relu, scope='conv7') # Convert to vector with fullyconnected layer a = tf.reshape(a, shape=[batch_size, -1]) a = fully_connected(a, output_units=fc_size, activation=tf.nn.relu, std='xavier', scope='fc') print "output vector of conv_block is: {}".format(a) return a
def model_architecture(hyperparameters): """Sets the hyperparameters for the model.""" input_pc = tf.placeholder( tf.float32, [None, hyperparameters.num_points, hyperparameters.num_features]) input_graph = tf.placeholder( tf.float32, [None, hyperparameters.num_points * hyperparameters.num_points]) output_label = tf.placeholder(tf.float32) scaled_laplacian = tf.reshape( input_graph, [-1, hyperparameters.num_points, hyperparameters.num_points]) weights = tf.placeholder(tf.float32, [None]) learning_rate = tf.placeholder(tf.float32) keep_prob_1 = tf.placeholder(tf.float32) keep_prob_2 = tf.placeholder(tf.float32) # first layer: graph convolution gcn_1 = layers.gcn_layer(input_pc, scaled_laplacian, hyperparameters.num_points, hyperparameters.num_features, hyperparameters.num_gcn_1_output_features, hyperparameters.chebyshev_1_order) gcn_1_output = tf.nn.dropout(gcn_1, rate=1 - keep_prob_1) gcn_1_pool = layers.global_pooling( gcn_1_output, hyperparameters.num_gcn_1_output_features) # second layer: graph convolution on the output of gcn_1 before pooling gcn_2 = layers.gcn_layer(gcn_1_output, scaled_laplacian, hyperparameters.num_points, hyperparameters.num_gcn_1_output_features, hyperparameters.num_gcn_2_output_features, hyperparameters.chebyshev_2_order) gcn_2_output = tf.nn.dropout(gcn_2, rate=1 - keep_prob_1) gcn_2_pool = layers.global_pooling( gcn_2_output, hyperparameters.num_gcn_2_output_features) # concatenate global features between gcn_1 and gcn_2 global_features = tf.concat([gcn_1_pool, gcn_2_pool], axis=1) global_features = tf.nn.dropout(global_features, rate=1 - keep_prob_2) num_global_features = 2 * (hyperparameters.num_gcn_1_output_features + hyperparameters.num_gcn_2_output_features) # first fully connected layer at the end fc_1 = layers.fully_connected(global_features, num_global_features, hyperparameters.num_fc_1_output_features) fc_1 = tf.nn.relu(fc_1) fc_1 = tf.nn.dropout(fc_1, rate=1 - keep_prob_2) # second fully connected layer fc_2 = layers.fully_connected(fc_1, hyperparameters.num_fc_1_output_features, hyperparameters.num_fc_2_output_features) # ========================================================================================================= # LOSS AND BACKPROPAGATION # ========================================================================================================= # loss predict_label = tf.nn.sigmoid(fc_2) >= 0.5 loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=fc_2, labels=output_label) loss = tf.reduce_mean(tf.multiply(loss, weights)) train_vars = tf.trainable_variables() loss_reg = tf.add_n( [tf.nn.l2_loss(v) for v in train_vars if 'bias' not in v.name]) * 8e-6 loss_total = loss + loss_reg correct_prediction = tf.equal(predict_label, (output_label == 1)) accuracy = tf.cast(correct_prediction, tf.float32) accuracy = tf.reduce_mean(accuracy) train = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(loss_total) train_operation = { 'train': train, 'loss': loss, 'loss_reg': loss_reg, 'loss_total': loss_total, 'accuracy': accuracy, 'input_pc': input_pc, 'input_graph': input_graph, 'output_label': output_label, 'weights': weights, 'predict_label': predict_label, 'keep_prob_1': keep_prob_1, 'keep_prob_2': keep_prob_2, 'learning_rate': learning_rate } return train_operation
def inference(images): """Definition of model inference. Args: images: A batch of images to process. Shape [batch_size,32,32,3] """ is_train = tf.get_collection('is_train')[0] def conv_bn_relu(name, l, out_channel): """A sequence of convolution, batch normalization and ReLU. Args: name: Scope name of this function. l: Output of previous layer. out_channel: # of channels of each output feature. """ with tf.variable_scope(name): l = layers.conv('conv_0', l, out_channel) l = layers.batchnorm('bn_0', l, is_train) return tf.nn.relu(l) # VGGNet-17 inference with tf.variable_scope('inference'): features = [] for m in range(FLAGS.num_model): l = images with tf.variable_scope('model_%d' % m): l = conv_bn_relu('conv_bn_relu_01', l, 64) l = conv_bn_relu('conv_bn_relu_02', l, 64) features.append(l) # stochastically share hidden features right before the first pooling if FLAGS.feature_sharing: features = feature_sharing(features) for m in range(FLAGS.num_model): l = features[m] with tf.variable_scope('model_%d' % m): l = tf.nn.max_pool(l, [1, 2, 2, 1], [1, 2, 2, 1], 'VALID') l = conv_bn_relu('conv_bn_relu_03', l, 128) l = conv_bn_relu('conv_bn_relu_04', l, 128) l = tf.nn.max_pool(l, [1, 2, 2, 1], [1, 2, 2, 1], 'VALID') l = conv_bn_relu('conv_bn_relu_05', l, 256) l = conv_bn_relu('conv_bn_relu_06', l, 256) l = conv_bn_relu('conv_bn_relu_07', l, 256) l = conv_bn_relu('conv_bn_relu_08', l, 256) l = tf.nn.max_pool(l, [1, 2, 2, 1], [1, 2, 2, 1], 'VALID') l = conv_bn_relu('conv_bn_relu_09', l, 512) l = conv_bn_relu('conv_bn_relu_10', l, 512) l = conv_bn_relu('conv_bn_relu_11', l, 512) l = conv_bn_relu('conv_bn_relu_12', l, 512) l = tf.nn.max_pool(l, [1, 2, 2, 1], [1, 2, 2, 1], 'VALID') l = conv_bn_relu('conv_bn_relu_13', l, 512) l = conv_bn_relu('conv_bn_relu_14', l, 512) l = conv_bn_relu('conv_bn_relu_15', l, 512) l = conv_bn_relu('conv_bn_relu_16', l, 512) # global average pooling l = tf.reduce_mean(l, [1, 2]) l = layers.fully_connected('fc_0', l, 10) features[m] = l return features
def inference(images): """Definition of model inference. Args: images: A batch of images to process. Shape [batch_size,32,32,3] """ is_train = tf.get_collection('is_train')[0] def inception(name, l, wf): """Inception module. Args: name: Scope name of this function. l: Output of previous layer. wf: Channel width factor of this module. """ with tf.variable_scope(name): branchpool = tf.nn.max_pool(l, [1, 2, 2, 1], [1, 1, 1, 1], 'SAME') branchpool = layers.conv('conv_pool', branchpool, 32 * wf, kernel_size=1) branch5x5 = layers.conv('conv_5x5_0', l, 16 * wf, kernel_size=1) branch5x5 = tf.nn.relu(branch5x5) branch5x5 = layers.conv('conv_5x5_1', branch5x5, 32 * wf, kernel_size=5) branch3x3 = layers.conv('conv_3x3_0', l, 32 * wf, kernel_size=1) branch3x3 = tf.nn.relu(branch3x3) branch3x3 = layers.conv('conv_3x3_1', branch3x3, 64 * wf, kernel_size=3) branch1x1 = layers.conv('conv_1x1_0', l, 64 * wf, kernel_size=1) branch1x1 = tf.nn.relu(branch1x1) cc = tf.concat([branch1x1, branch3x3, branch5x5, branchpool], 3) cc = layers.batchnorm('bn_0', cc, is_train) return tf.nn.relu(cc) # GoogLeNet-18 inference with tf.variable_scope('inference'): features = [] for m in range(FLAGS.num_model): l = images with tf.variable_scope('model_%d' % m): l = layers.conv('conv_init', l, 32, kernel_size=3) l = layers.batchnorm('bn_init', l, is_train) l = tf.nn.relu(l) features.append(l) # stochastically share hidden features right before the first pooling if FLAGS.feature_sharing: features = feature_sharing(features) for m in range(FLAGS.num_model): l = features[m] with tf.variable_scope('model_%d' % m): l = tf.nn.max_pool(l, [1, 2, 2, 1], [1, 2, 2, 1], 'VALID') l = inception('inception_1a', l, 1) l = inception('inception_1b', l, 2) l = tf.nn.max_pool(l, [1, 2, 2, 1], [1, 2, 2, 1], 'VALID') l = inception('inception_2a', l, 2) l = inception('inception_2b', l, 2) l = inception('inception_2c', l, 2) l = inception('inception_2d', l, 4) l = tf.nn.max_pool(l, [1, 2, 2, 1], [1, 2, 2, 1], 'VALID') l = inception('inception_3a', l, 4) l = inception('inception_3b', l, 4) # global average pooling l = tf.reduce_mean(l, [1, 2]) l = layers.fully_connected('fc_0', l, 10) features[m] = l return features
print("normalize data") X_train = X_train / 255.0 print(len(X_train)) X_dev = X_dev / 255.0 test = test / 255.0 # Reshape image in 3 dimensions (height = 28px, width = 28px , canal = 1) X_train = X_train.values.reshape(-1,28,28,1) X_dev = X_dev.values.reshape(-1,28,28,1) X_train.shape test = test.values.reshape(-1,28,28,1) # initialize weights conv_ = conv(8) pool = maxpooling(2) fully_c=fully_connected(13*13*8,10) # Train! loss = 0 n_epochs=2 losses=[] accs=[] num_correct = 0 print('start training') for i, (im, label) in enumerate(zip(X_train, Y_train)): # print(im.shape) # print(label) if i % 100 == 99: print('[Step %d] Past 100 steps: Average Loss %.3f | Accuracy: %d%%' %(i + 1, loss / 100, num_correct)) losses.append(loss / 100) accs.append(num_correct) loss = 0
def forward_propagate(x, y, _weights, debug=True): activation_caches = {} m = x.shape[0] activation_caches["conv1"] = conv_fast(x, _weights["W1"], _weights["B1"], 2, 1) activation_caches["A1"] = relu(activation_caches["conv1"]) activation_caches["pool1"] = max_pooling(activation_caches["A1"], 2, 2) # Sanity check to make sure that our convolution vectorization is correct if debug: # Conv kconv, kcache = karpathy_conv_forward_naive(x, _weights["W1"], _weights["B1"], { 'stride': 1, 'pad': 2 }) assert np.mean(np.isclose(activation_caches["conv1"], kconv)) == 1.0 conv1_verify = conv_forward_naive(x, _weights["W1"], _weights["B1"], 2, 1) assert np.mean(np.isclose(activation_caches["conv1"], conv1_verify)) == 1.0 kpool1, kcache1 = karpathy_max_pool_forward_naive( activation_caches["A1"], { 'pool_height': 2, 'pool_width': 2, 'stride': 2 }) assert np.mean(np.isclose(activation_caches["pool1"], kpool1)) == 1.0 activation_caches["conv2"] = conv_fast(activation_caches["pool1"], _weights["W2"], _weights["B2"], 2, 1) activation_caches["A2"] = relu(activation_caches["conv2"]) activation_caches["pool2"] = max_pooling(activation_caches["A2"], 2, 2) activation_caches["Ar2"] = activation_caches["pool2"].reshape( (m, activation_caches["pool2"].shape[1] * activation_caches["pool2"].shape[2] * activation_caches["pool2"].shape[3])) if debug: conv2_verify = conv_forward_naive(activation_caches["pool1"], _weights["W2"], _weights["B2"], 2, 1) assert np.mean(np.isclose(activation_caches["conv2"], conv2_verify)) == 1.0 activation_caches["Z3"] = fully_connected(activation_caches["Ar2"], _weights["W3"], _weights["B3"]) activation_caches["A3"] = relu(activation_caches["Z3"]) activation_caches["Z4"] = fully_connected(activation_caches["A3"], _weights["W4"], _weights["B4"]) activation_caches["A4"] = softmax(activation_caches["Z4"]) cost = np.mean(softmax_cost(y, activation_caches["A4"], m)) return activation_caches, cost