def model_function(features, targets, mode): hlayers = layers.stack( features, layers.fully_connected, [1000, 100, 50, 20], activation_fn=tf.nn.relu, weights_regularizer=layers.l1_l2_regularizer(1.0, 2.0), weights_initializer=layers.xavier_initializer(uniform=True, seed=100)) # hidden layers have to be fully connected for best performance. So, no option in tensorflow for # non-fully connected layers; need to write custom code to do that outputs = layers.fully_connected( inputs=hlayers, num_outputs=10, # 10 perceptrons in output layer for 10 numbers (0 to 9) activation_fn=None ) # Use "None" as activation function specified in "softmax_cross_entropy" loss # Calculate loss using cross-entropy error; also use the 'softmax' activation function loss = losses.softmax_cross_entropy(outputs, targets) optimizer = layers.optimize_loss( loss=loss, global_step=tf.contrib.framework.get_global_step(), learning_rate=0.8, optimizer="SGD") # Class of output (i.e., predicted number) corresponds to the perceptron returning the highest fractional value # Returning both fractional values and corresponding labels probs = tf.nn.softmax(outputs) return {'probs': probs, 'labels': tf.argmax(probs, 1)}, loss, optimizer
def init_layer(self, name_suffix, dims): wname = 'w' + name_suffix w = tf.get_variable( wname, initializer=tf.random_normal(dims), regularizer=l1_l2_regularizer(scale_l1=self.reg_beta, scale_l2=self.reg_beta), dtype=tf.float32) sw = tf.summary.histogram(wname, w) self.summary_weights.append(sw) bname = 'b' + name_suffix b = tf.get_variable(bname, initializer=tf.random_normal([dims[1]]), dtype=tf.float32) sb = tf.summary.histogram(bname, b) self.summary_weights.append(sb) self.transform_params[wname] = w self.transform_params[bname] = b wext = tf.placeholder(tf.float32, dims, name=wname + '_ext') bext = tf.placeholder(tf.float32, [dims[1]], name=bname + '_ext') w_transform_ops = w.assign(w * (1 - self.transform_lr) + wext * self.transform_lr) self.transform_ops.append(w_transform_ops) b_transform_ops = b.assign(b * (1 - self.transform_lr) + bext * self.transform_lr) self.transform_ops.append(b_transform_ops) return w, b
def add_linear_output_layer(self, last_hidden_layer, ground_truth, corpus_tag, task_tag, loss_weight=1): # returns loss op with tf.variable_scope("output_layer_%s" % task_tag) as layer_scope: last_out = fully_connected(last_hidden_layer, 1, activation_fn=tf.identity, weights_regularizer=l1_l2_regularizer(self.l1_reg, self.l2_reg), scope=layer_scope) self.predictions = last_out with tf.name_scope("%s_loss_%s" % (corpus_tag, task_tag)): loss = loss_weight * tf.reduce_mean(tf.squared_difference(last_out, ground_truth)) utils.variable_summaries(loss, "loss", corpus_tag) tf.add_to_collection(tf.GraphKeys.LOSSES, loss) with tf.name_scope('%s_accuracy_%s' % (corpus_tag, task_tag)): accuracy, _ = streaming_mean_relative_error(last_out, ground_truth, ground_truth, name="acc_%s" % corpus_tag, updates_collections=tf.GraphKeys.UPDATE_OPS) accuracy = 1 - accuracy utils.variable_summaries(accuracy, "accuracy", corpus_tag) updates_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS) self.calculate_accuracy_op = control_flow_ops.with_dependencies(updates_op, accuracy)
def init_neurons(self, input_layer, wname, wnum, bias_name=None): ishape = input_layer.get_shape()[1].value dims = [ishape, wnum] w = tf.get_variable( wname, initializer=tf.random_normal(dims), regularizer=l1_l2_regularizer(scale_l1=self.reg_beta, scale_l2=self.reg_beta), dtype=tf.float32) sw = tf.summary.histogram(wname, w) self.summary_weights.append(sw) self.transform_params[wname] = w wext = tf.placeholder(tf.float32, dims, name=wname + '_ext') w_transform_ops = w.assign(w * (1 - self.transform_lr) + wext * self.transform_lr) self.transform_ops.append(w_transform_ops) h = tf.matmul(input_layer, w) if bias_name: b = tf.get_variable(bias_name, initializer=tf.random_normal([wnum]), dtype=tf.float32) sb = tf.summary.histogram(bias_name, b) self.summary_weights.append(sb) self.transform_params[bias_name] = b bext = tf.placeholder(tf.float32, [wnum], name=bias_name + '_ext') b_transform_ops = b.assign(b * (1 - self.transform_lr) + bext * self.transform_lr) self.transform_ops.append(b_transform_ops) h = tf.add(h, b) return h
def make_hidden_FN_layers(self, input_layer): previous_out = input_layer with tf.variable_scope("hidden_layers"): for i in range(1, self.num_layers + 1): with tf.variable_scope("layer%d" % i) as layer_scope: if self.is_residual and i > 1: previous_out = tf.add(previous_out, tf.ones_like(previous_out)) previous_out = fully_connected(previous_out, self.num_hidden_units, activation_fn=tf.nn.relu, normalizer_fn=batch_norm, normalizer_params={"scale": i == self.num_layers, "is_training": self.is_training, "decay": 0.9}, weights_regularizer=l1_l2_regularizer(self.l1_reg, self.l2_reg), scope=layer_scope) # if i == self.num_layers: if i % 2 == 0: previous_out = tf.nn.dropout(previous_out, self.keep_prob) last_hidden_layer = previous_out return last_hidden_layer
def add_classification_output_layer(self, last_hidden_layer, gt_labels, num_classes, corpus_tag, task_tag, loss_weight=1): # returns loss op with tf.variable_scope("output_layer_%s" % task_tag) as layer_scope: last_out = fully_connected(last_hidden_layer, num_classes, activation_fn=tf.identity, weights_regularizer=l1_l2_regularizer(self.l1_reg, self.l2_reg), scope=layer_scope) self.predictions = tf.nn.softmax(last_out) with tf.name_scope("%s_loss_%s" % (corpus_tag, task_tag)): loss = loss_weight * tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(last_out, gt_labels)) utils.variable_summaries(loss, "loss", corpus_tag) tf.add_to_collection(tf.GraphKeys.LOSSES, loss) with tf.name_scope('%s_accuracy_%s' % (corpus_tag, task_tag)): # correct_prediction = tf.equal(tf.argmax(last_out, 1), gt_labels) # accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) * 100 accuracy, _ = streaming_accuracy(tf.argmax(last_out, 1), gt_labels, name="acc_%s" % corpus_tag, updates_collections=tf.GraphKeys.UPDATE_OPS) utils.variable_summaries(accuracy, "accuracy", corpus_tag) updates_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS) self.calculate_accuracy_op = control_flow_ops.with_dependencies(updates_op, accuracy)
def fit(self, x, y): """ Fit a ConvDeconv2D model on data Arguments --------- x : np.ndarray array with 3 dimensions (nb_samples, height, width) or array with 4 dimensions (nb_samples, height, width, channels) y : np.ndarray array with 3 dimensions (nb_samples, height, width) or array with 4 dimensions (nb_samples, height, width, channels) """ tf.reset_default_graph() x_orig, y_orig, y_onehot = process_inputs_2D(x, y, self.NB_CLASSES) in_shape = [None] + list(x_orig.shape[1:]) orig_out_shape = list(y_orig.shape[1:]) soft_out_shape = [None] + list(y_onehot.shape[1:]) ### CONSTRUCTION PHASE ### X = tf.placeholder(tf.float32, shape=in_shape, name='X') y = tf.placeholder(tf.int32, shape=soft_out_shape, name='y') # CONV LAYERS # with tf.variable_scope('conv_layers'): with framework.arg_scope([layers.conv2d], weights_initializer=layers.xavier_initializer(), weights_regularizer=layers.l1_l2_regularizer(\ scale_l1=self.L1_PENALTY,scale_l2=self.L2_PENALTY), activation_fn=tf.nn.relu, padding='SAME'): for idx, c in enumerate(self.CONV_LAYERS): if idx == 0: # connect to input tensor conv = layers.conv2d(X, c[0], (c[1], c[1]), stride=1) else: # connect to previous conv layer conv = layers.conv2d(conv, c[0], (c[1], c[1]), stride=1) # DECONV LAYERS # with tf.variable_scope('deconv_layers'): with framework.arg_scope([layers.conv2d_transpose], weights_initializer=layers.xavier_initializer(), activation_fn=tf.nn.relu, weights_regularizer=layers.l1_l2_regularizer(\ scale_l1=self.L1_PENALTY,scale_l2=self.L2_PENALTY), padding='SAME'): for idx, c in enumerate(self.CONV_LAYERS[::-1]): if idx < len(self.CONV_LAYERS) - 1: # not last layer conv = layers.conv2d_transpose(conv, c[0], (c[1], c[1]), stride=1) else: # last layer conv = layers.conv2d_transpose( conv, orig_out_shape[-1] * self.NB_CLASSES, (c[1], c[1]), stride=1) # SOFTMAX RESHAPE LAYER # with tf.variable_scope('softmax_layer'): soft_shape = [ tf.shape(conv)[0], np.prod(orig_out_shape), self.NB_CLASSES ] softmax_reshape = tf.reshape(conv, soft_shape) # LOSS # with tf.name_scope('loss'): loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(softmax_reshape, y)) # OPTIMIZER # with tf.variable_scope('train'): optimizer = tf.train.AdamOptimizer(learning_rate=self.LEARN_RATE) train_op = optimizer.minimize(loss) # EVALUATORS # with tf.name_scope('eval'): prob_map = tf.nn.softmax(softmax_reshape) soft_flat = tf.reshape(softmax_reshape, [-1, self.NB_CLASSES]) # (logits, classes) y_flat = tf.reshape(tf.argmax(y, 2), [-1]) # (classes,) correct = tf.nn.in_top_k(soft_flat, y_flat, 1) accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) ### EXECUTION PHASE ### # PRE-EXECUTION VARIABLES # if self.SAVE_PATH is not None: saver = tf.train.Saver() best_test_loss = 1e9 init = tf.global_variables_initializer() # TRAINING ROUTINE # with tf.Session() as sess: if self.RESTORE_PATH is not None: print('Restoring Model') saver.restore(sess, self.RESTORE_PATH) else: print('Initializing Model') sess.run(init) for epoch in range(self.NB_EPOCH): for b_idx in range(int(x_orig.shape[0] / self.BATCH_SIZE)): xbatch = x_orig[b_idx * self.BATCH_SIZE:(b_idx + 1) * self.BATCH_SIZE] ybatch = y_onehot[b_idx * self.BATCH_SIZE:(b_idx + 1) * self.BATCH_SIZE] # run train op sess.run(train_op, feed_dict={X: xbatch, y: ybatch}) # get test statistics test_acc = sess.run(accuracy, feed_dict={X: xbatch, y: ybatch}) test_loss = sess.run(loss, feed_dict={X: xbatch, y: ybatch}) print('Epoch : %i , Test Loss : %.04f, Test Acc: %.04f' % (epoch, test_loss, test_acc)) # save model after each epoch if test loss is the best if self.SAVE_PATH is not None and test_loss < best_test_loss: best_test_loss = test_loss saver.save(sess, self.SAVE_PATH, write_meta_graph=False)
def buildGraph(input_placeholder_s1, input_placeholder_s2, labels_placeholder, mask_placeholder_s1, mask_placeholder_s2, dropout_placeholder, embeddings_matrix): params = tf.Variable(embeddings_matrix) tensor_s1 = tf.nn.embedding_lookup(params, input_placeholder_s1) tensor_s2 = tf.nn.embedding_lookup(params, input_placeholder_s2) embeddings_s1 = tf.reshape(tensor_s1, [-1, max_length, embed_size]) embeddings_s2 = tf.reshape(tensor_s2, [-1, max_length, embed_size]) #print embeddings_s1.shape #print tf.boolean_mask(embeddings_s1, mask_placeholder_s1, axis=1).shape #embeddings = tf.concat([tf.reduce_mean(tf.boolean_mask(embeddings_s1, mask_placeholder_s1), axis=1), tf.reduce_mean(tf.boolean_mask(embeddings_s2, mask_placeholder_s2), axis=1)], 0) #print embeddings.shape dropout_rate = dropout_placeholder preds = [] cell1 = LSTMCell(embed_size, hidden_size) cell2 = LSTMCell2(embed_size, hidden_size) c = tf.zeros([tf.shape(embeddings_s1)[0], hidden_size]) h = tf.zeros([tf.shape(embeddings_s2)[0], hidden_size]) initial_state = tf.contrib.rnn.LSTMStateTuple(c, h) l1 = tf.reduce_sum(tf.cast(mask_placeholder_s1, tf.int32), axis=1) outputs1, state1 = tf.nn.dynamic_rnn(cell1, embeddings_s1, dtype=tf.float32, initial_state=initial_state, sequence_length=l1) h = tf.zeros([tf.shape(embeddings_s2)[0], hidden_size]) initial_state = tf.contrib.rnn.LSTMStateTuple(state1.c, h) l2 = tf.reduce_sum(tf.cast(mask_placeholder_s2, tf.int32), axis=1) outputs2, state2 = tf.nn.dynamic_rnn(cell2, embeddings_s2, dtype=tf.float32, initial_state=initial_state, sequence_length=l2) #START HERE, CHECK PREDS, DO BITMASK FOR LOSSES, MAKE SURE OPTIMIZING CORRECT FUNCTION func = xavier_weight_init() U = tf.Variable(func([hidden_size, n_classes])) b1 = tf.Variable(tf.zeros([1, n_classes])) h_drop = tf.nn.dropout(state2.h, keep_prob=1 - dropout_rate) pred = tf.matmul(h_drop, U) + b1 tf.add_to_collection('ops_to_restore', pred) #pred = tf.add(tf.matmul(h_drop, U), b1, name="pred") loss = tf.nn.softmax_cross_entropy_with_logits(labels=labels_placeholder, logits=pred) loss = tf.reduce_mean(loss) regularizer = l1_l2_regularizer(l1_reg, l2_reg) reg_loss = apply_regularization(regularizer, tf.trainable_variables()) loss += reg_loss #y = labels_placeholder #loss = tf.nn.l2_loss(y-preds) #loss = tf.reduce_mean(loss) optimizer = tf.train.AdamOptimizer(learning_rate=lr) #train_op = optimizer.minimize(loss) #optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr) gradients = optimizer.compute_gradients(loss) grads = [x[0] for x in gradients] grads, global_norm = tf.clip_by_global_norm(grads, max_grad_norm) gradients = [(grads[i], gradients[i][1]) for i in range(len(grads))] train_op = optimizer.apply_gradients(gradients) return pred, loss, train_op
def nn(x, reuse=True, nchstart=32, act_fn=tf.nn.leaky_relu, TRAIN_FLAG=True, REG=False): """ Takes as input the (processed) measurements and estimates a projection of the original image. Params ------ x: batch_size, img_size, img_size, nch reuse: reuse variables flag nchstart: number of output channels in the first convolutional layer act_fn: activation function REG: Flag to add regularization loss TRAIN_FLAG: 'is_training' flag for batch_norm Returns ------- out: [batch_size, img_size*img_size] vectors on which the projection will be applied reg_loss: scalar, regularization loss in the middle layer, 0 if REG is False """ nchannels = nchstart normalizer_params = {'is_training': TRAIN_FLAG} reg = tcl.l1_l2_regularizer(scale_l1=1e-4, scale_l2=1e-4) reg_loss = 0 params = { 'kernel_size': 3, 'activation_fn': act_fn, 'normalizer_fn': tcl.batch_norm, 'normalizer_params': normalizer_params } with tf.variable_scope('projector', reuse=reuse): """Downsampling layers""" # Block 1 out1_1 = tcl.conv2d(x, num_outputs=nchannels, **params) out1_2 = tcl.conv2d(out1_1, num_outputs=nchannels, **params) out_mp1 = tcl.max_pool2d(out1_2, kernel_size=[2, 2], stride=2) # Block 2 out2_1 = tcl.conv2d(out_mp1, num_outputs=2 * nchannels, **params) out2_2 = tcl.conv2d(out2_1, num_outputs=2 * nchannels, **params) out_mp2 = tcl.max_pool2d(out2_2, kernel_size=[2, 2], stride=2) # Block 3 out3_1 = tcl.conv2d(out_mp2, num_outputs=4 * nchannels, **params) out3_2 = tcl.conv2d(out3_1, num_outputs=4 * nchannels, **params) out_mp3 = tcl.max_pool2d(out3_2, kernel_size=[2, 2], stride=2) # Block 4 out4_1 = tcl.conv2d(out_mp3, num_outputs=8 * nchannels, **params) out4_2 = tcl.conv2d(out4_1, num_outputs=8 * nchannels, **params) out_mp4 = tcl.max_pool2d(out4_2, kernel_size=[2, 2], stride=2) # Block 5 out5_1 = tcl.conv2d(out_mp4, num_outputs=16 * nchannels, **params) out5_2 = tcl.conv2d(out5_1, num_outputs=16 * nchannels, **params) # regularization if REG: reg_loss = reg(tcl.flatten(out5_2)) """Upsampling layers""" # Block 1 up_out1_1 = tf.keras.layers.UpSampling2D((2, 2))(out5_2) up_out1_1 = tf.concat([out4_2, up_out1_1], axis=3, name='skip_1') up_out1_1 = tcl.conv2d(up_out1_1, num_outputs=8 * nchannels, **params) up_out1_2 = tcl.conv2d(up_out1_1, num_outputs=8 * nchannels, **params) # Block 2 up_out2_1 = tf.keras.layers.UpSampling2D((2, 2))(up_out1_2) up_out2_1 = tf.concat([out3_2, up_out2_1], axis=3, name='skip_2') up_out2_1 = tcl.conv2d(up_out2_1, num_outputs=4 * nchannels, **params) up_out2_2 = tcl.conv2d(up_out2_1, num_outputs=4 * nchannels, **params) # Block 3 up_out3_1 = tf.keras.layers.UpSampling2D((2, 2))(up_out2_2) up_out3_1 = tf.concat([out2_2, up_out3_1], axis=3, name='skip_3') up_out3_1 = tcl.conv2d(up_out3_1, num_outputs=2 * nchannels, **params) up_out3_2 = tcl.conv2d(up_out3_1, num_outputs=2 * nchannels, **params) # Block 4 up_out4_1 = tf.keras.layers.UpSampling2D((2, 2))(up_out3_2) up_out4_1 = tf.concat([out1_2, up_out4_1], axis=3, name='skip_4') up_out4_1 = tcl.conv2d(up_out4_1, num_outputs=nchannels, **params) up_out4_2 = tcl.conv2d(up_out4_1, num_outputs=nchannels, **params) # Block 5 up_out5_1 = tcl.conv2d(up_out4_2, num_outputs=1, **params) out = tf.contrib.layers.flatten(up_out5_1) return out, reg_loss
def model_function(features, targets, mode): # input layer # Reshape features to 4-D tensor (55000x28x28x1) # MNIST images are 28x28 pixels # batch size corresponds to number of images: -1 represents ' compute the # images automatically (55000)' # +1 represents the # channels. Here #channels =1 since grey image. For color image, #channels=3 input_layer = tf.reshape(features, [-1, 28, 28, 1]) # Computes 32 features using a 5x5 filter # Padding is added to preserve width # Input Tensor Shape: [batch_size,28,28,1] # Output Tensor Shape: [batch_size,28,28,32] conv1 = layers.conv2d( inputs=input_layer, num_outputs=32, kernel_size=[5, 5], stride=1, padding= "SAME", # do so much padding such that the feature map is same size as input activation_fn=tf.nn.relu) # Pooling layer 1 # Pooling layer ith a 2x2 filter and stride 2 # Input shape: [batch_size,28,28,32] # Output shape: [batch_size,14,14,32] pool1 = layers.max_pool2d(inputs=conv1, kernel_size=[2, 2], stride=2) # Convolution layer 2 # Input: 14 x 14 x 32 (32 channels here) # Output: 14 x 14 x 64 (32 features/patches fed to each perceptron; discovering 64 features) conv2 = layers.conv2d( inputs=pool1, num_outputs=64, kernel_size=[5, 5], stride=1, padding= "SAME", # do so much padding such that the feature map is same size as input activation_fn=tf.nn.relu) # Pooling layer 2 # Input: 14 x14 x 64 # Output: 7 x 7 x 64 pool2 = layers.max_pool2d(inputs=conv2, kernel_size=[2, 2], stride=2) # Flatten the pool2 to feed to the 1st layer of fully connected layers # Input size: [batch_size,7,7,64] # Output size: [batch_size, 7x7x64] pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64]) # Connected layers with 100, 20 neurons # Input shape: [batch_size, 7x7x64] # Output shape: [batch_size, 10] fclayers = layers.stack( pool2_flat, layers.fully_connected, [100, 20], activation_fn=tf.nn.relu, weights_regularizer=layers.l1_l2_regularizer(1.0, 2.0), weights_initializer=layers.xavier_initializer(uniform=True, seed=100)) outputs = layers.fully_connected( inputs=fclayers, num_outputs=10, # 10 perceptrons in output layer for 10 numbers (0 to 9) activation_fn=None ) # Use "None" as activation function specified in "softmax_cross_entropy" loss # Calculate loss using cross-entropy error; also use the 'softmax' activation function loss = losses.softmax_cross_entropy(outputs, targets) optimizer = layers.optimize_loss( loss=loss, global_step=tf.contrib.framework.get_global_step(), learning_rate=0.1, optimizer="SGD") # Class of output (i.e., predicted number) corresponds to the perceptron returning the highest fractional value # Returning both fractional values and corresponding labels probs = tf.nn.softmax(outputs) return {'probs': probs, 'labels': tf.argmax(probs, 1)}, loss, optimizer
def buildGraph(input_placeholder_s1, input_placeholder_s2, labels_placeholder, mask_placeholder_s1, mask_placeholder_s2, dropout_placeholder, embeddings_matrix): params = tf.Variable(embeddings_matrix) tensor_s1 = tf.nn.embedding_lookup(params, input_placeholder_s1) tensor_s2 = tf.nn.embedding_lookup(params, input_placeholder_s2) embeddings_s1 = tf.reshape(tensor_s1, [-1, max_length, embed_size]) embeddings_s2 = tf.reshape(tensor_s2, [-1, max_length, embed_size]) #print embeddings_s1.shape #print tf.boolean_mask(embeddings_s1, mask_placeholder_s1, axis=1).shape #embeddings = tf.concat([tf.reduce_mean(tf.boolean_mask(embeddings_s1, mask_placeholder_s1), axis=1), tf.reduce_mean(tf.boolean_mask(embeddings_s2, mask_placeholder_s2), axis=1)], 0) #print embeddings.shape dropout_rate = dropout_placeholder preds = [] cell1 = LSTMCell(embed_size, hidden_size) cell2 = LSTMCell2(embed_size, hidden_size) c = tf.zeros([tf.shape(embeddings_s1)[0], hidden_size]) h = tf.zeros([tf.shape(embeddings_s2)[0], hidden_size]) initial_state = tf.contrib.rnn.LSTMStateTuple(c, h) l1 = tf.reduce_sum(tf.cast(mask_placeholder_s1, tf.int32), axis=1) outputs1, state1 = tf.nn.dynamic_rnn(cell1, embeddings_s1, dtype=tf.float32, initial_state=initial_state, sequence_length=l1) h = tf.zeros([tf.shape(embeddings_s2)[0], hidden_size]) initial_state = tf.contrib.rnn.LSTMStateTuple(state1.c, h) l2 = tf.reduce_sum(tf.cast(mask_placeholder_s2, tf.int32), axis=1) outputs2, state2 = tf.nn.dynamic_rnn(cell2, embeddings_s2, dtype=tf.float32, initial_state=initial_state, sequence_length=l2) func = xavier_weight_init() # Implementation of attention on the final hidden layer Y = tf.transpose(outputs1, perm=[0, 2, 1]) W_y = tf.Variable(func([hidden_size, hidden_size])) W_h = tf.Variable(func([hidden_size, hidden_size])) e_l = tf.constant(1.0, shape=[1, max_length]) WY = tf.tensordot(W_y, Y, axes=[[0], [1]]) WY = tf.transpose(WY, perm=[1, 0, 2]) h_n = tf.reshape(state2.h, shape=[-1, hidden_size, 1]) Whe = tf.tensordot(h_n, e_l, axes=[[2], [0]]) Whe = tf.tensordot(W_h, Whe, axes=[[0], [1]]) Whe = tf.transpose(Whe, perm=[1, 0, 2]) M = tf.tanh(WY + Whe) w_alpha = tf.Variable(func([1, hidden_size])) alpha = tf.nn.softmax(tf.tensordot(w_alpha, M, axes=[[1], [1]])) alpha = tf.transpose(alpha, perm=[1, 2, 0]) alpha = tf.reshape(alpha, shape=[-1, max_length, 1]) #alpha_entries = tf.unstack(alpha, axis = 0, num=[tf.shape(embeddings_s1)[0]]) #Y_entries = tf.unstack(Y, axis=0, num=[tf.shape(embeddings_s1)[0]]) #r = tf.stack([tf.matmul(Y_entries[i], alpha_entries[i]) for i in len(alpha.shape[0])], axis=0) #print Y.shape, alpha.shape #r = tf.tensordot(Y, alpha, axes=[[2], [1]]) #r = tf.reduce_mean(r, axis=2) #r = r[:, :, 0, :] #r = tf.diag_part(r) r = tf.matmul(Y, alpha) r = tf.reshape(r, shape=[-1, hidden_size]) #r = Y * alpha #print r.shape #r = tf.matmul(Y, tf.transpose(alpha, perm=[0, 2, 1])) U = tf.Variable(func([hidden_size, n_classes])) b1 = tf.Variable(tf.zeros([1, n_classes])) W_p = tf.Variable(func([hidden_size, hidden_size])) W_x = tf.Variable(func([hidden_size, hidden_size])) #print r.shape, state2.h.shape hstar = tf.tanh(tf.matmul(r, W_p) + tf.matmul(state2.h, W_x)) #hstar = tf.tanh(tf.matmul(state2.h, W_x)) h_drop = tf.nn.dropout(hstar, keep_prob=1 - dropout_rate) pred = tf.matmul(h_drop, U) + b1 #pred = tf.add(tf.matmul(h_drop, U), b1, name="pred") loss = tf.nn.softmax_cross_entropy_with_logits(labels=labels_placeholder, logits=pred) loss = tf.reduce_mean(loss) regularizer = l1_l2_regularizer(l1_reg, l2_reg) reg_loss = apply_regularization(regularizer, tf.trainable_variables()) loss += reg_loss #y = labels_placeholder #loss = tf.nn.l2_loss(y-preds) #loss = tf.reduce_mean(loss) optimizer = tf.train.AdamOptimizer(learning_rate=lr) #train_op = optimizer.minimize(loss) #optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr) gradients = optimizer.compute_gradients(loss) grads = [x[0] for x in gradients] grads, global_norm = tf.clip_by_global_norm(grads, max_grad_norm) gradients = [(grads[i], gradients[i][1]) for i in range(len(grads))] train_op = optimizer.apply_gradients(gradients) return pred, loss, train_op
# -*- coding:utf-8 -*- import tensorflow as tf import new_eval2 as new_eval from tensorflow.contrib import layers regularizer = layers.l1_l2_regularizer(scale_l1=1e-6, scale_l2=1e-6) def S_matri(x1, x2): normalized_q = tf.nn.l2_normalize(x1, dim=2) normalized_a = tf.nn.l2_normalize(x2, dim=2) matri = tf.matmul(normalized_q, tf.transpose(normalized_a, perm=[0, 2, 1])) return matri class GRU_first(object): def __init__(self, input, n_output, n_skip, batch_size): self.xt_ini = input self.batch_size = batch_size self.time_step = int(self.xt_ini.get_shape()[1]) self.n_input = int(self.xt_ini.get_shape()[2]) self.n_output = n_output with tf.variable_scope("gru_q_a"): self.skip_Wr = tf.get_variable(shape=[self.n_input, self.n_output], name="skip_Wr", regularizer=regularizer) self.skip_Ur = tf.get_variable( shape=[self.n_output, self.n_output], name="skip_Ur", regularizer=regularizer) self.skip_br = tf.get_variable(name='skip_br',
def build_resnet(repetitions=(2, 2, 2, 2), include_top=True, input_tensor=None, input_shape=None, classes=1000, block_type='usual', l1_regular=0.01, l2_regular=0.01): """ TODO """ # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=197, data_format='channels_last', require_flatten=include_top) if input_tensor is None: img_input = Input(shape=input_shape, name='data') else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor # get parameters for model layers no_scale_bn_params = get_bn_params(scale=False) bn_params = get_bn_params() conv_params = get_conv_params() init_filters = 64 if block_type == 'basic': conv_block = basic_conv_block identity_block = basic_identity_block else: conv_block = usual_conv_block identity_block = usual_identity_block regular = l1_l2_regularizer(scale_l1=l1_regular, scale_l2=l2_regular) # resnet bottom x = BatchNormalization(name='bn_data', **no_scale_bn_params)(img_input) x = ZeroPadding2D(padding=(3, 3))(x) x = Conv2D(init_filters, (7, 7), strides=(2, 2), kernel_regularizer=regular, name='conv0', **conv_params)(x) x = BatchNormalization(name='bn0', **bn_params)(x) x = Activation('relu', name='relu0')(x) x = ZeroPadding2D(padding=(1, 1))(x) x = MaxPooling2D((3, 3), strides=(2, 2), padding='valid', name='pooling0')(x) # resnet body for stage, rep in enumerate(repetitions): for block in range(rep): filters = init_filters * (2**stage) # first block of first stage without strides because we have maxpooling before if block == 0 and stage == 0: x = conv_block(filters, stage, block, strides=(1, 1), l1_regular=l1_regular, l2_regular=l2_regular)(x) elif block == 0: x = conv_block(filters, stage, block, strides=(2, 2), l1_regular=l1_regular, l2_regular=l2_regular)(x) else: x = identity_block(filters, stage, block, l1_regular=l1_regular, l2_regular=l2_regular)(x) x = BatchNormalization(name='bn1', **bn_params)(x) x = Activation('relu', name='relu1')(x) # resnet top if include_top: x = GlobalAveragePooling2D(name='pool1')(x) x = Dense(classes, name='fc1')(x) x = Activation('softmax', name='softmax')(x) # Ensure that the model takes into account any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x) return model
import tensorflow as tf from tensorflow.contrib.layers import l1_regularizer, l2_regularizer, l1_l2_regularizer REGULARISATORS = { 'none': lambda arg: tf.constant(0.0), 'l1': l1_regularizer(1.0), 'l2': l2_regularizer(1.0), 'l1_l2': l1_l2_regularizer(1.0, 1.0) } NBS_EPOCHS = [5, 10, 20, 50, 100, 200] BATCH_SIZES = [1, 16, 32, 64, 128, 512] ARCHITECTURES = [ [128] * 0, [128] * 1, ] #[128] * 2, #[128] * 3, #[128] * 4] OPTIMISERS = [ tf.train.AdamOptimizer(learning_rate=1e-3), tf.train.GradientDescentOptimizer(learning_rate=1e-3), tf.train.AdadeltaOptimizer(learning_rate=1e-3), tf.train.RMSPropOptimizer(learning_rate=1e-3) ]