def train(): #placeholders for the traning inputs (4 inputs with 2 features each) and outputs (4 outputs which have a value of 0 or 1) x = tf.placeholder(tf.float32, [4, 2], name='x-inputs') y = tf.placeholder(tf.float32, [4, 1], name='y-inputs') #set up the model calculations temp = tf.sigmoid(tf.matmul(x, w1) + b1) output = tf.sigmoid(tf.matmul(temp, w2) + b2) #cost function is avg error over training samples cost = tf.reduce_mean(((y * tf.log(output)) + ((1 - y) * tf.log(1.0 - output))) * -1) #training step is gradient descent train_step = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost) #declare training data training_x = [[0,1], [0,0], [1,0], [1,1]] training_y = [[1], [0], [1], [0]] #init session init = tf.initialize_all_variables() sess.run(init) #training for i in range(100000): sess.run(train_step, feed_dict={x:training_x, y:training_y}) if i % 1000 == 0: print (i, sess.run(cost, feed_dict={x:training_x, y:training_y})) print '\ntraining done\n'
def loc_net_fc(images, batch_size): images -= 128 images /= 128. images = tf.image.resize_images(images, 150, 150) images_flat = tf.reshape(images, [batch_size, -1]) hidden_size = 100 with tf.name_scope('fc1') as scope: weights = tf.Variable(tf.truncated_normal([150**2*3, hidden_size], dtype=tf.float32, stddev=1e-3), name='weights') biases = tf.Variable(tf.constant(0.0, shape=[hidden_size], dtype=tf.float32), name='biases') hidden = tf.add(tf.matmul(images_flat, weights), biases, name=scope) hidden = tf.nn.relu(hidden) with tf.name_scope('fc2') as scope: weights = tf.Variable(tf.truncated_normal([hidden_size, 3], dtype=tf.float32, stddev=1e-3), name='weights') biases = tf.Variable(tf.constant(0.0, shape=[3], dtype=tf.float32), name='biases') theta = tf.add(tf.matmul(hidden, weights), biases, name=scope) theta = tf.nn.tanh(theta) return theta
def output_dropout_no_bias(self,x,keep_prob=0.5): if(self.activation == 'sigmoid'): return tf.nn.dropout(tf.nn.sigmoid(tf.matmul(x,self.W)), keep_prob) elif(self.activation == 'relu'): return tf.nn.dropout(tf.nn.relu(tf.matmul(x,self.W)), keep_prob) elif(self.activation == 'relu6'): return tf.nn.dropout(tf.nn.relu6(tf.matmul(x,self.W)), keep_prob) elif(self.activation == 'leaky_relu'): return tf.nn.dropout(tf.maximum(0.1*tf.matmul(x,self.W),tf.matmul(x,self.W)),keep_prob) elif(self.activation == 'leaky_relu6'): return tf.nn.dropout(tf.maximum(0.1*tf.matmul(x,self.W),6),keep_prob) elif(self.activation == 'linear'): return tf.nn.dropout(tf.matmul(x,self.W),keep_prob) elif(self.activation == 'softplus'): return tf.nn.dropout(tf.nn.softplus(tf.matmul(x,self.W)),keep_prob) elif(self.activation == 'tanh'): return tf.nn.dropout(tf.tanh(tf.matmul(x,self.W)),keep_prob) else: print "No known activation function selected, using linear" return tf.matmul(x,self.W)
def __init__(self): self.x = tf.placeholder(tf.float32, [None, NUM_FEATURES]) self.y = tf.placeholder(tf.float32, [None, HIDDEN_3_SIZE]) W_1 = tf.Variable(tf.random_uniform([NUM_FEATURES, HIDDEN_1_SIZE], maxval=1.0)) b_1 = tf.Variable(tf.random_uniform([HIDDEN_1_SIZE], maxval=1.0)) W_2 = tf.Variable(tf.random_uniform([HIDDEN_1_SIZE, HIDDEN_2_SIZE], maxval=1.0)) b_2 = tf.Variable(tf.random_uniform([HIDDEN_2_SIZE], maxval=1.0)) W_3 = tf.Variable(tf.random_uniform([HIDDEN_2_SIZE, HIDDEN_3_SIZE], maxval=1.0)) b_3 = tf.Variable(tf.random_uniform([HIDDEN_3_SIZE], maxval=1.0)) x_drop = tf.nn.dropout(self.x, KEEP_PROB_INPUT) h_1 = tf.nn.tanh(tf.matmul(x_drop, W_1) + b_1) h_1_drop = tf.nn.dropout(h_1, KEEP_PROB_HIDDEN) h_2 = tf.nn.tanh(tf.matmul(h_1_drop, W_2) + b_2) h_2_drop = tf.nn.dropout(h_2, KEEP_PROB_HIDDEN) h_3 = tf.matmul(h_2_drop, W_3) + b_3 # self.y_pred = tf.nn.softmax(h_3) self.y_pred = h_3 # self.cross_entropy = tf.reduce_mean(-tf.reduce_sum(self.y * tf.log(self.y_pred), reduction_indices=[1])) self.cross_entropy = tf.reduce_mean(tf.square(self.y_pred - self.y)) self.train_step = tf.train.MomentumOptimizer(109,0.99).minimize(self.cross_entropy) self.sess = tf.Session()
def output(self,x): if(self.no_bias): return output_no_bias(self,x) if(self.activation == 'sigmoid'): return tf.nn.sigmoid(tf.matmul(x,self.W+self.b)) elif(self.activation == 'relu'): return tf.nn.relu(tf.matmul(x,self.W+self.b)) elif(self.activation == 'relu6'): return tf.nn.relu6(tf.matmul(x,self.W+self.b)) elif(self.activation == 'leaky_relu'): return tf.maximum(0.1*tf.matmul(x,self.W+self.b),tf.matmul(x,self.W+self.b)) elif(self.activation == 'leaky_relu6'): return tf.maximum(0.1*tf.matmul(x,self.W+self.b),6) elif(self.activation == 'linear'): return tf.matmul(x,self.W)+self.b elif(self.activation == 'softplus'): return tf.nn.softplus(tf.matmul(x,self.W+self.b)) elif(self.activation == 'tanh'): return tf.tanh(tf.matmul(x,self.W+self.b)) else: print "No known activation function selected, using linear" return tf.matmul(x,self.W)+self.b
def solve(a, b): if b.ndim == 1: return tf.reshape(tf.matmul(tf.matrix_inverse(a), tf.expand_dims(b, -1)), [-1]) elif b.ndim == 2: return tf.matmul(tf.matrix_inverse(a), b) else: import ipdb; ipdb.set_trace()
def build_predict(self, Xnew, full_cov=False): """ Xnew is a data matrix, point at which we want to predict This method computes p(F* | Y ) where F* are points on the GP at Xnew, Y are noisy observations at X. """ Kx = self.kern.K(self.X, Xnew) K = self.kern.K(self.X) + eye(self.num_data) * self.likelihood.variance L = tf.cholesky(K) A = tf.matrix_triangular_solve(L, Kx, lower=True) V = tf.matrix_triangular_solve(L, self.Y - self.mean_function(self.X)) fmean = tf.matmul(tf.transpose(A), V) + self.mean_function(Xnew) if full_cov: fvar = self.kern.K(Xnew) - tf.matmul(tf.transpose(A), A) shape = tf.pack([1, 1, tf.shape(self.Y)[1]]) fvar = tf.tile(tf.expand_dims(fvar, 2), shape) else: fvar = self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A), 0) fvar = tf.tile(tf.reshape(fvar, (-1, 1)), [1, self.Y.shape[1]]) return fmean, fvar
def update_centers(self, img_dataset): ''' Optimize: self.C = (U * hu^T + V * hv^T) (hu * hu^T + hv * hv^T)^{-1} self.C^T = (hu * hu^T + hv * hv^T)^{-1} (hu * U^T + hv * V^T) but all the C need to be replace with C^T : self.C = (hu * hu^T + hv * hv^T)^{-1} (hu^T * U + hv^T * V) ''' old_C_value = self.sess.run(self.C) h = self.img_b_all U = self.img_output_all smallResidual = tf.constant( np.eye(self.subcenter_num * self.subspace_num, dtype=np.float32) * 0.001) Uh = tf.matmul(tf.transpose(h), U) hh = tf.add(tf.matmul(tf.transpose(h), h), smallResidual) compute_centers = tf.matmul(tf.matrix_inverse(hh), Uh) update_C = self.C.assign(compute_centers) C_value = self.sess.run(update_C, feed_dict={ self.img_output_all: img_dataset.output, self.img_b_all: img_dataset.codes, }) C_sums = np.sum(np.square(C_value), axis=1) C_zeros_ids = np.where(C_sums < 1e-8) C_value[C_zeros_ids, :] = old_C_value[C_zeros_ids, :] self.sess.run(self.C.assign(C_value))
def model(data,text_data, train=False): """The Model definition.""" # 2D convolution, with 'SAME' padding (i.e. the output feature map has # the same size as the input). Note that {strides} is a 4D array whose # shape matches the data layout: [image index, y, x, depth]. conv = tf.nn.conv2d(data, conv1_weights, strides=[1, 1, 1, 1], padding='SAME') # Bias and rectified linear non-linearity. relu = tf.nn.relu(tf.nn.bias_add(conv, conv1_biases)) # Max pooling. The kernel size spec {ksize} also follows the layout of # the data. Here we have a pooling window of 2, and a stride of 2. pool = tf.nn.max_pool(relu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') conv = tf.nn.conv2d(pool, conv2_weights, strides=[1, 1, 1, 1], padding='SAME') relu = tf.nn.relu(tf.nn.bias_add(conv, conv2_biases)) pool = tf.nn.max_pool(relu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') print pool.get_shape().as_list() conv = tf.nn.conv2d(pool, conv3_weights, strides=[1, 1, 1, 1], padding='SAME') relu = tf.nn.relu(tf.nn.bias_add(conv, conv3_biases)) pool = tf.nn.max_pool(relu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') # Reshape the feature map cuboid into a 2D matrix to feed it to the # fully connected layers. pool_shape = pool.get_shape().as_list() print pool_shape print fc1_weights.get_shape().as_list() reshape = tf.reshape( pool, [pool_shape[0], pool_shape[1] * pool_shape[2] * pool_shape[3]]) #Add text vector into account before fully connected layer reshape = tf.concat(1,[reshape,text_data]) # Fully connected layer. Note that the '+' operation automatically # broadcasts the biases. hidden1 = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_biases) # Add a 50% dropout during training only. Dropout also scales # activations such that no rescaling is needed at evaluation time. if train: hidden1 = tf.nn.dropout(hidden1, 0.5, seed=SEED) hidden2 = tf.nn.relu(tf.matmul(hidden1, fc2_weights) + fc2_biases) if train: hidden2 = tf.nn.dropout(hidden2, 0.5, seed=SEED) return tf.matmul(hidden2, fc3_weights) + fc3_biases
def inference(self, train=False): """ Build the core of the model, initialize all convolutional and feed-forward layers, with the respective weights, and add dropout if necessary. :param train: Boolean if training or eval, necessary for including dropout. :return: Tuple of resulting logits, and the feed-forward trainable weights for L2 Loss. """ # 2D Convolution Layer, then Bias + ReLU, then Pooling Layer (add conditional train/eval) if train: conv1 = tf.nn.conv2d(self.X, self.conv1_w, strides=[1, 1, 1, 1], padding='SAME') else: conv1 = tf.nn.conv2d(self.eval_X, self.conv1_w, strides=[1, 1, 1, 1], padding='SAME') relu1 = tf.nn.relu(tf.nn.bias_add(conv1, self.conv1_b)) pool1 = tf.nn.max_pool(relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # 2D Convolution Layer, then Bias + ReLU, then Pooling Layer conv2 = tf.nn.conv2d(pool1, self.conv2_w, strides=[1, 1, 1, 1], padding='SAME') relu2 = tf.nn.relu(tf.nn.bias_add(conv2, self.conv2_b)) pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # Reshape 4D Pool Tensor into a 2D Tensor pool_shape = pool2.get_shape().as_list() reshape = tf.reshape(pool2, [pool_shape[0], pool_shape[1] * pool_shape[2] * pool_shape[3]]) # Fully Connected Layer 1 -> ReLU Activation hidden = tf.nn.relu(tf.matmul(reshape, self.fc1_w) + self.fc1_b) # Add dropout --> Only during training if train: hidden = tf.nn.dropout(hidden, 0.5) # Fully Connected Layer 2 -> for softmax (actual softmax performed in loss function) return tf.matmul(hidden, self.fc2_w) + self.fc2_b
def forward_propagation(X, parameters): """ Implements the forward propagation for the model: LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SOFTMAX Arguments: X -- input dataset placeholder, of shape (input size, number of examples) parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3" the shapes are given in initialize_parameters Returns: Z3 -- the output of the last LINEAR unit """ # Retrieve the parameters from the dictionary "parameters" W1 = parameters['W1'] b1 = parameters['b1'] W2 = parameters['W2'] b2 = parameters['b2'] W3 = parameters['W3'] b3 = parameters['b3'] print(W3.shape) ### START CODE HERE ### (approx. 5 lines) # Numpy Equivalents: Z1 = tf.add(tf.matmul(W1,X),b1) # Z1 = np.dot(W1, X) + b1 A1 = tf.nn.relu(Z1) # A1 = relu(Z1) Z2 = tf.add(tf.matmul(W2,A1),b2) # Z2 = np.dot(W2, a1) + b2 A2 = tf.nn.relu(Z2) # A2 = relu(Z2) print(A2.shape) Z3 = tf.add(tf.matmul(W3,A2),b3) # Z3 = np.dot(W3,Z2) + b3 ### END CODE HERE ### return Z3
def forward_propagation(images): with tf.variable_scope('conv1') as scope: W_conv1 = weight_variable([5, 5, 3, 32]) b_conv1 = bias_variable([32]) image_matrix = tf.reshape(images, [-1, 1750, 1750, 3]) h_conv1 = tf.nn.sigmoid(conv2d(image_matrix, W_conv1) + b_conv1) _activation_summary(h_conv1) h_pool1 = max_pool_5x5(h_conv1) with tf.variable_scope('conv2') as scope: W_conv2 = weight_variable([5, 5, 32, 64]) b_conv2 = bias_variable([64]) h_conv2 = tf.nn.sigmoid(conv2d(h_pool1, W_conv2) + b_conv2) _activation_summary(h_conv2) h_pool2 = max_pool_5x5(h_conv2) with tf.variable_scope('conv3') as scope: W_conv3 = weight_variable([5, 5, 64, 128]) b_conv3 = bias_variable([128]) h_conv3 = tf.nn.sigmoid(conv2d(h_pool2, W_conv3) + b_conv3) _activation_summary(h_conv3) h_pool3 = max_pool_5x5(h_conv3) with tf.variable_scope('local3') as scope: W_fc1 = weight_variable([14 * 14 * 128, 256]) b_fc1 = bias_variable([256]) h_pool3_flat = tf.reshape(h_pool3, [-1, 14 * 14 * 128]) h_fc1 = tf.nn.sigmoid(tf.matmul(h_pool3_flat, W_fc1) + b_fc1) _activation_summary(h_fc1) keep_prob = tf.Variable(1.0) W_fc2 = weight_variable([256, 4]) b_fc2 = bias_variable([4]) y_conv = tf.nn.softmax(tf.matmul(h_fc1, W_fc2) + b_fc2) _activation_summary(y_conv) return y_conv
def alex_net(_X, _dropout): # Reshape input picture _X = tf.reshape(_X, shape=[-1, 40, 40, 1]) # First convolutional layer conv1 = conv2d('conv1', _X, wc1, bc1) pool1 = max_pool('pool1', conv1, k=2) norm1 = norm('norm1', pool1, lsize=4) norm1 = tf.nn.dropout(norm1, _dropout) # Second convolutional layer conv2 = conv2d('conv2', norm1, wc2, bc2) pool2 = max_pool('pool2', conv2, k=2) norm2 = norm('norm2', pool2, lsize=4) norm2 = tf.nn.dropout(norm2, _dropout) # Third convolutional layer conv3 = conv2d('conv3', norm2, wc3, bc3) pool3 = max_pool('pool3', conv3, k=2) norm3 = norm('norm3', pool3, lsize=4) norm3 = tf.nn.dropout(norm3, _dropout) # Reshape conv3 output to fit dense layer input dense1 = tf.reshape(norm3, [-1, wd1.get_shape().as_list()[0]]) # Fully connected layers dense1 = tf.nn.relu(tf.matmul(dense1, wd1) + bd1, name='fc1') # Relu activation dense2 = tf.nn.relu(tf.matmul(dense1, wd2) + bd2, name='fc2') # Relu activation # Output, class prediction out = tf.matmul(dense2, wout) + bout return out
def fc_layers(self): # fc1 with tf.name_scope('fc1') as scope: shape = int(np.prod(self.pool5.get_shape()[1:])) fc1w = tf.Variable(tf.truncated_normal([shape, 4096], dtype=tf.float32, stddev=1e-1), name='weights') fc1b = tf.Variable(tf.constant(1.0, shape=[4096], dtype=tf.float32), trainable=True, name='biases') pool5_flat = tf.reshape(self.pool5, [-1, shape]) fc1l = tf.nn.bias_add(tf.matmul(pool5_flat, fc1w), fc1b) self.fc1 = tf.nn.relu(fc1l) self.parameters += [fc1w, fc1b] # fc2 with tf.name_scope('fc2') as scope: fc2w = tf.Variable(tf.truncated_normal([4096, 4096], dtype=tf.float32, stddev=1e-1), name='weights') fc2b = tf.Variable(tf.constant(1.0, shape=[4096], dtype=tf.float32), trainable=True, name='biases') fc2l = tf.nn.bias_add(tf.matmul(self.fc1, fc2w), fc2b) self.fc2 = tf.nn.relu(fc2l) self.parameters += [fc2w, fc2b] # fc3 with tf.name_scope('fc3') as scope: fc3w = tf.Variable(tf.truncated_normal([4096, 1000], dtype=tf.float32, stddev=1e-1), name='weights') fc3b = tf.Variable(tf.constant(1.0, shape=[1000], dtype=tf.float32), trainable=True, name='biases') self.fc3l = tf.nn.bias_add(tf.matmul(self.fc2, fc3w), fc3b) self.parameters += [fc3w, fc3b]
def RNN(_X, _istate, _weights, _biases): # input shape: (batch_size, n_steps, 28, 28, 1) _X = tf.transpose(_X, [1, 0, 2, 3, 4]) # permute n_steps and batch_size # input shape: (n_steps=3, batch_size=20, 28, 28, 1) # Reshape to prepare input to hidden activation #_X = tf.reshape(_X, [-1, n_input]) # (n_steps*batch_size, n_input) # Linear activation ==> convolutional net #_X = tf.matmul(_X, _weights['hidden']) + _biases['hidden'] A = CNN(_X[0,:,:,:,:]) B = CNN(_X[1,:,:,:,:]) C = CNN(_X[2,:,:,:,:]) # Define a lstm cell with tensorflow lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Split data because rnn cell needs a list of inputs for the RNN inner loop #_X = tf.split(0, n_steps, _X) # n_steps * (batch_size, n_hidden) # Get lstm cell output outputs, states = rnn.rnn(lstm_cell, [A,B,C], initial_state=_istate) # Linear activation # Get inner loop last output out1 = tf.nn.relu( tf.matmul(outputs[-1], _weights['out1']) + _biases['out1'] ) out2 = tf.matmul(out1, _weights['out2']) + _biases['out2'] return out2
def inference(images, hidden1_units): """Build the MNIST model up to where it may be used for inference. Args: images: Images placeholder, from inputs(). hidden1_units: Size of the first hidden layer. hidden2_units: Size of the second hidden layer. Returns: softmax_linear: Output tensor with the computed logits. """ # Hidden 1 with tf.name_scope('hidden1'): weights = tf.Variable( tf.truncated_normal([IMAGE_PIXELS, hidden1_units], stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))), name='weights') biases = tf.Variable(tf.zeros([hidden1_units]), name='biases') hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases) # Hidden 2 # Linear with tf.name_scope('softmax_linear'): weights = tf.Variable( tf.truncated_normal([hidden1_units, NUM_CLASSES], stddev=1.0 / math.sqrt(float(hidden1_units))), name='weights') biases = tf.Variable(tf.zeros([NUM_CLASSES]), name='biases') logits = tf.matmul(hidden1, weights) + biases return logits
def BiRNN(_X, _istate_fw, _istate_bw, _weights, _biases): # input shape: (batch_size, n_steps, n_input) _X = tf.transpose(_X, [1, 0, 2]) # permute n_steps and batch_size # Reshape to prepare input to hidden activation _X = tf.reshape(_X, [-1, n_input]) # (n_steps*batch_size, n_input) # Linear activation _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden'] # Define lstm cells with tensorflow # Forward direction cell lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Backward direction cell lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Split data because rnn cell needs a list of inputs for the RNN inner loop _X = tf.split(0, n_steps, _X) # n_steps * (batch_size, n_hidden) # Get lstm cell output outputs = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, _X, initial_state_fw=_istate_fw, initial_state_bw=_istate_bw) # Linear activation # Get inner loop last output output = [tf.matmul(o, _weights['out']) + _biases['out'] for o in outputs] return output
def feature_importance(sess, user_id, matrix_i, matrix_j, matrix_f, matrix_o, bias_i, bias_j, bias_f, bias_o): user_embedding = get_user_embedding(sess, user_id) end_index = 0 gates_i, gates_j, gates_f, gates_o = [], [], [], [] for feature in range(len(config.feature_desc)): start_index = end_index end_index = start_index + config.feature_desc[feature] gate_i, gate_j, gate_f, gate_o = 0, 0, 0, 0 for event in user_embedding: gate_i += np.sum(sess.run(tf.matmul(tf.reshape(event[feature], [1, -1]), matrix_i[start_index:end_index]) + tf.reshape(bias_i, [1, -1]))) gate_j += np.sum(sess.run(tf.matmul(tf.reshape(event[feature], [1, -1]), matrix_j[start_index:end_index]) + tf.reshape(bias_j, [1, -1]))) gate_f += np.sum(sess.run(tf.matmul(tf.reshape(event[feature], [1, -1]), matrix_f[start_index:end_index]) + tf.reshape(bias_f, [1, -1]))) gate_o += np.sum(sess.run(tf.matmul(tf.reshape(event[feature], [1, -1]), matrix_o[start_index:end_index]) + tf.reshape(bias_o, [1, -1]))) gates_i.append(gate_i/len(user_embedding)) gates_j.append(gate_j/len(user_embedding)) gates_f.append(gate_f/len(user_embedding)) gates_o.append(gate_o/len(user_embedding)) return gates_i, gates_j, gates_f, gates_o
def Linear(args, output_dim, bias=True, bias_init=0.0, scope=None): if not isinstance(args, (list, tuple)): args = [args] input_dim = 0 shapes = [a.get_shape().as_list() for a in args] for shape in shapes: if len(shape) != 2: raise ValueError("Linear is expecting 2d arguments: %s" % str(shapes)) elif not shape[1]: raise ValueError("Linear expects shape[1] of arguments: %s" % str(shapes)) else: input_dim += shape[1] with tf.variable_scope(scope or "linear"): W = tf.get_variable("W", (input_dim, output_dim)) if len(args) == 1: result = tf.matmul(args[0], W) else: result = tf.matmul(tf.concat(1, args), W) if not bias: return result b = tf.get_variable("b", (output_dim,), initializer=tf.constant_initializer(bias_init)) return result + b
def dot(x, y): """Compute dot product between a Tensor matrix and a Tensor vector. If x is a ``[M x N]`` matrix, then y is a ``M``-vector. If x is a ``M``-vector, then y is a ``[M x N]`` matrix. Parameters ---------- x : tf.Tensor ``M x N`` matrix or ``M`` vector (see above) y : tf.Tensor ``M`` vector or ``M x N`` matrix (see above) Returns ------- tf.Tensor ``N``-vector """ if len(x.get_shape()) == 1: vec = x mat = y return tf.matmul(tf.expand_dims(vec, 0), mat) else: mat = x vec = y return tf.matmul(mat, tf.expand_dims(vec, 1))
def conv_net(x, weights, biases, dropout): # Reshape input picture x = tf.reshape(x, shape=[-1, 28, 28, 1]) # Convolution Layer conv1 = conv2d(x, weights['wc1'], biases['bc1']) # Max Pooling (down-sampling) conv1 = maxpool2d(conv1, k=2) # Convolution Layer conv2 = conv2d(conv1, weights['wc2'], biases['bc2']) # Max Pooling (down-sampling) conv2 = maxpool2d(conv2, k=2) # Fully connected layer # Reshape conv2 output to fit fully connected layer input fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]]) fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1']) fc1 = tf.nn.relu(fc1) # Apply Dropout fc1 = tf.nn.dropout(fc1, dropout) # Output, class prediction out = tf.add(tf.matmul(fc1, weights['out']), biases['out']) return out
def observation_net(self, input_): #decoder # input:[B,Z] with tf.name_scope('observation_net'): n_layers = len(self.network_architecture['decoder_net']) # weights = self.network_weights['decoder_weights'] # biases = self.network_weights['decoder_biases'] for layer_i in range(n_layers): # input_ = tf.contrib.layers.layer_norm(input_) # input_ = self.transfer_fct(tf.add(tf.matmul(input_, self.params_dict['decoder_weights_l'+str(layer_i)]), self.params_dict['decoder_biases_l'+str(layer_i)])) input_ = self.transfer_fct(tf.contrib.layers.layer_norm(tf.add(tf.matmul(input_, self.params_dict['decoder_weights_l'+str(layer_i)]), self.params_dict['decoder_biases_l'+str(layer_i)]))) #add batch norm here x_mean = tf.add(tf.matmul(input_, self.params_dict['decoder_weights_out_mean']), self.params_dict['decoder_biases_out_mean']) x_log_var = tf.add(tf.matmul(input_, self.params_dict['decoder_weights_out_log_var']), self.params_dict['decoder_biases_out_log_var']) reward_mean = tf.add(tf.matmul(input_, self.params_dict['decoder_weights_reward_mean']), self.params_dict['decoder_biases_reward_mean']) reward_log_var = tf.add(tf.matmul(input_, self.params_dict['decoder_weights_reward_log_var']), self.params_dict['decoder_biases_reward_log_var']) return x_mean, x_log_var, reward_mean, reward_log_var
def forward(x, train, regularizer): # 实现第一层卷积层的前向传播过程 conv1_w = get_weight([CONV1_SIZE, CONV1_SIZE, NUM_CHANNELS, CONV1_KERNEL_NUM], regularizer) conv1_b = get_bias([CONV1_KERNEL_NUM]) conv1 = conv2d(x, conv1_w) relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_b)) pool1 = max_pool_2x2(relu1) # 实现第二层卷积层的前向传播过程,并初始化卷积层的对应变量 conv2_w = get_weight([CONV2_SIZE, CONV2_SIZE, CONV1_KERNEL_NUM, CONV2_KERNEL_NUM],regularizer) conv2_b = get_bias([CONV2_KERNEL_NUM]) conv2 = conv2d(pool1, conv2_w) relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_b)) pool2 = max_pool_2x2(relu2) # 将上一池化层的输出 pool2(矩阵)转化为下一层全连接层的输入格式(向量) pool_shape = pool2.get_shape().as_list() nodes = pool_shape[1] * pool_shape[2] * pool_shape[3] reshaped = tf.reshape(pool2, [pool_shape[0], nodes]) # 实现第三层全连接层的前向传播过程 fc1_w = get_weight([nodes, FC_SIZE], regularizer) # 初始化全连接层的权重,并加入正则化 fc1_b = get_bias([FC_SIZE]) # 初始化全连接层的偏置项 fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_w) + fc1_b) if train: fc1 = tf.nn.dropout(fc1, 0.5) # 实现第四层全连接层的前向传播过程,并初始化全连接层对应的变量 fc2_w = get_weight([FC_SIZE, OUTPUT_NODE], regularizer) fc2_b = get_bias([OUTPUT_NODE]) y = tf.matmul(fc1, fc2_w) + fc2_b return y
def model(data): """ Define o modelo da rede neural. Util para usar com diversos dados e nao so os de treinamento. Definir uma funcao-modelo aplica os mesmos pesos, ja que sao declarados externamente, ao dado de entrada (data), tornando assim possivel a predicao dos dados de validacao e teste utili- zando os pesos otimizados. """ # Camada 1 net1 = tf.nn.relu(tf.nn.conv2d(data, weights1, [1, 2, 2, 1], padding='SAME')) layer1 = tf.nn.relu(net1) # Camada 2 net2 = tf.nn.relu(tf.nn.conv2d(layer1, weights2, [1, 2, 2, 1], padding='SAME')) layer2 = tf.nn.relu(net2) # Formata camada 2 shape = layer2.get_shape().as_list() reshaped_layer2 = tf.reshape(layer2, [shape[0], shape[1] * shape[2] * shape[3]]) # Camada 3 net3 = tf.matmul(reshaped_layer2, weights3) layer3 = tf.nn.relu(net3) # Ultima camada (output)(valor de retorno) return tf.matmul(layer3, weights4)
def loss_fn(w_flat): w = tf.reshape(w_flat, [visible_size, hidden_size]) x = tf.matmul(data, w) x = tf.sigmoid(x) x = tf.matmul(x, w, transpose_b=True) x = tf.sigmoid(x) return tf.reduce_mean(tf.square(x-data))
def main(_): sess = tf.Session() # Construct the TensorFlow network. ph_float = tf.placeholder(tf.float32, name="ph_float") x = tf.transpose(ph_float, name="x") v = tf.Variable(np.array([[-2.0], [-3.0], [6.0]], dtype=np.float32), name="v") m = tf.constant( np.array([[0.0, 1.0, 2.0], [-4.0, -1.0, 0.0]]), dtype=tf.float32, name="m") y = tf.matmul(m, x, name="y") z = tf.matmul(m, v, name="z") if FLAGS.debug: sess = tf_debug.LocalCLIDebugWrapperSession(sess, ui_type=FLAGS.ui_type) if FLAGS.error == "shape_mismatch": print(sess.run(y, feed_dict={ph_float: np.array([[0.0], [1.0], [2.0]])})) elif FLAGS.error == "uninitialized_variable": print(sess.run(z)) elif FLAGS.error == "no_error": print(sess.run(y, feed_dict={ph_float: np.array([[0.0, 1.0, 2.0]])})) else: raise ValueError("Unrecognized error type: " + FLAGS.error)
def runNN (train_x, train_y, test_x, test_y, numHidden): print "NN({})".format(numHidden) session = tf.InteractiveSession() x = tf.placeholder("float", shape=[None, train_x.shape[1]]) y_ = tf.placeholder("float", shape=[None, 2]) W1 = tf.Variable(tf.truncated_normal([train_x.shape[1],numHidden], stddev=0.01)) b1 = tf.Variable(tf.truncated_normal([numHidden], stddev=0.01)) W2 = tf.Variable(tf.truncated_normal([numHidden,2], stddev=0.01)) b2 = tf.Variable(tf.truncated_normal([2], stddev=0.01)) z = tf.nn.relu(tf.matmul(x,W1) + b1) y = tf.nn.softmax(tf.matmul(z,W2) + b2) cross_entropy = -tf.reduce_sum(y_*tf.log(tf.clip_by_value(y,1e-10,1.0))) #cross_entropy = -tf.reduce_sum(y_*tf.log(y)) train_step = tf.train.MomentumOptimizer(learning_rate=.001, momentum=0.1).minimize(cross_entropy) #train_step = tf.train.AdamOptimizer(learning_rate=.01).minimize(cross_entropy) session.run(tf.initialize_all_variables()) for i in range(NUM_EPOCHS): offset = i*BATCH_SIZE % (train_x.shape[0] - BATCH_SIZE) train_step.run({x: train_x[offset:offset+BATCH_SIZE, :], y_: makeLabels(train_y[offset:offset+BATCH_SIZE])}) if i % 100 == 0: util.showProgress(cross_entropy, x, y, y_, test_x, test_y) session.close()
def get_training_model(): """ The training model acts on a batch of 128x64 windows, and outputs a (1 + 7 * len(common.CHARS) vector, `v`. `v[0]` is the probability that a plate is fully within the image and is at the correct scale. `v[1 + i * len(common.CHARS) + c]` is the probability that the `i`'th character is `c`. """ x, conv_layer, conv_vars = convolutional_layers() # Densely connected layer W_fc1 = weight_variable([32 * 8 * 128, 2048]) b_fc1 = bias_variable([2048]) conv_layer_flat = tf.reshape(conv_layer, [-1, 32 * 8 * 128]) h_fc1 = tf.nn.relu(tf.matmul(conv_layer_flat, W_fc1) + b_fc1) # Output layer W_fc2 = weight_variable([2048, 1 + 7 * len(common.CHARS)]) b_fc2 = bias_variable([1 + 7 * len(common.CHARS)]) y = tf.matmul(h_fc1, W_fc2) + b_fc2 return (x, y, conv_vars + [W_fc1, b_fc1, W_fc2, b_fc2])
def autoencoder_contd(input_dim, representation): x = tf.placeholder(tf.float32, [None, input_dim]); high_decW=tf.Variable( initial_value=tf.random_normal( [representation,input_dim], -math.sqrt(6.0/(input_dim+representation)), math.sqrt(6.0/(input_dim+representation))), dtype=tf.float32, name='high_decW'); # high_encW=tf.transpose(high_decW); high_encW=tf.Variable( initial_value=tf.random_normal( [input_dim, representation], -math.sqrt(6.0/(input_dim+representation)), math.sqrt(6.0/(input_dim+representation))), name='high_encW'); high_encb=tf.Variable(tf.zeros([representation]), name='high_encb'); z=tf.nn.sigmoid(tf.matmul(x,high_encW) + high_encb); hidden_weights=high_encW; high_decb=tf.Variable( tf.zeros([input_dim]), name='high_decb'); y=tf.nn.sigmoid(tf.matmul(z,high_decW)+high_decb); cost=tf.nn.l2_loss(x-y); loss_per_pixel=tf.reduce_mean(tf.abs(x-y)); return {'x':x,'z':z,'y':y,'cost':cost, 'weights':hidden_weights, 'encW':high_encW,'decW':high_decW, 'encb':high_encb,'decb':high_decb, 'ppx':loss_per_pixel };
def conv_net(_X, _weights, _biases, _dropout): # Reshape input picture _X = tf.reshape(_X, shape=[-1, 28, 28, 1]) # Convolution Layer conv1 = conv2d(_X, _weights['wc1'], _biases['bc1']) # Max Pooling (down-sampling) conv1 = max_pool(conv1, k=2) # Apply Dropout conv1 = tf.nn.dropout(conv1, _dropout) # Convolution Layer conv2 = conv2d(conv1, _weights['wc2'], _biases['bc2']) # Max Pooling (down-sampling) conv2 = max_pool(conv2, k=2) # Apply Dropout conv2 = tf.nn.dropout(conv2, _dropout) # Fully connected layer dense1 = tf.reshape(conv2, [-1, _weights['wd1'].get_shape().as_list()[0]]) # Reshape conv2 output to fit dense layer input dense1 = tf.nn.relu(tf.add(tf.matmul(dense1, _weights['wd1']), _biases['bd1'])) # Relu activation dense1 = tf.nn.dropout(dense1, _dropout) # Apply Dropout # Output, class prediction out = tf.add(tf.matmul(dense1, _weights['out']), _biases['out']) return out
def __init__(self, is_training, config, input_): self._input = input_ batch_size = input_.batch_size num_steps = input_.num_steps size = config.hidden_size vocab_size = config.vocab_size # Slightly better results can be obtained with forget gate biases # initialized to 1 but the hyperparameters of the model would need to be # different than reported in the paper. def lstm_cell(): # With the latest TensorFlow source code (as of Mar 27, 2017), # the BasicLSTMCell will need a reuse parameter which is unfortunately not # defined in TensorFlow 1.0. To maintain backwards compatibility, we add # an argument check here: if 'reuse' in inspect.getargspec( tf.contrib.rnn.BasicLSTMCell.__init__).args: return tf.contrib.rnn.BasicLSTMCell( size, forget_bias=0.0, state_is_tuple=True, reuse=tf.get_variable_scope().reuse) else: return tf.contrib.rnn.BasicLSTMCell(size, forget_bias=0.0, state_is_tuple=True) attn_cell = lstm_cell if is_training and config.keep_prob < 1: def attn_cell(): return tf.contrib.rnn.DropoutWrapper( lstm_cell(), output_keep_prob=config.keep_prob) cell = tf.contrib.rnn.MultiRNNCell( [attn_cell() for _ in range(config.num_layers)], state_is_tuple=True) self._initial_state = cell.zero_state(batch_size, data_type()) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [vocab_size, size], dtype=data_type()) inputs = tf.nn.embedding_lookup(embedding, input_.input_data) if is_training and config.keep_prob < 1: inputs = tf.nn.dropout(inputs, config.keep_prob) # Simplified version of models/tutorials/rnn/rnn.py's rnn(). # This builds an unrolled LSTM for tutorial purposes only. # In general, use the rnn() or state_saving_rnn() from rnn.py. # # The alternative version of the code below is: # # inputs = tf.unstack(inputs, num=num_steps, axis=1) # outputs, state = tf.contrib.rnn.static_rnn( # cell, inputs, initial_state=self._initial_state) outputs = [] state = self._initial_state with tf.variable_scope("RNN"): for time_step in range(num_steps): if time_step > 0: tf.get_variable_scope().reuse_variables() (cell_output, state) = cell(inputs[:, time_step, :], state) outputs.append(cell_output) output = tf.reshape(tf.stack(axis=1, values=outputs), [-1, size]) softmax_w = tf.get_variable("softmax_w", [size, vocab_size], dtype=data_type()) softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type()) logits = tf.matmul(output, softmax_w) + softmax_b # Reshape logits to be 3-D tensor for sequence loss logits = tf.reshape(logits, [batch_size, num_steps, vocab_size]) # use the contrib sequence loss and average over the batches loss = tf.contrib.seq2seq.sequence_loss(logits, input_.targets, tf.ones( [batch_size, num_steps], dtype=data_type()), average_across_timesteps=False, average_across_batch=True) # update the cost variables self._cost = cost = tf.reduce_sum(loss) self._final_state = state if not is_training: return self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) optimizer = tf.train.GradientDescentOptimizer(self._lr) self._train_op = optimizer.apply_gradients( zip(grads, tvars), global_step=tf.contrib.framework.get_or_create_global_step()) self._new_lr = tf.placeholder(tf.float32, shape=[], name="new_learning_rate") self._lr_update = tf.assign(self._lr, self._new_lr)
def ready(self): config = self.config N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden gru = cudnn_gru if config.use_cudnn else native_gru with tf.variable_scope("emb"): with tf.variable_scope("char"): ch_emb = tf.reshape(tf.nn.embedding_lookup( self.char_mat, self.ch), [N * PL, CL, dc]) qh_emb = tf.reshape(tf.nn.embedding_lookup( self.char_mat, self.qh), [N * QL, CL, dc]) ch_emb = dropout( ch_emb, keep_prob=config.keep_prob, is_train=self.is_train) qh_emb = dropout( qh_emb, keep_prob=config.keep_prob, is_train=self.is_train) cell_fw = tf.contrib.rnn.GRUCell(dg) cell_bw = tf.contrib.rnn.GRUCell(dg) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32) ch_emb = tf.concat([state_fw, state_bw], axis=1) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32) qh_emb = tf.concat([state_fw, state_bw], axis=1) qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg]) ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg]) with tf.name_scope("word"): c_emb = tf.nn.embedding_lookup(self.word_mat, self.c) q_emb = tf.nn.embedding_lookup(self.word_mat, self.q) self.c_emb = c_emb = tf.concat([c_emb, ch_emb], axis=2) q_emb = tf.concat([q_emb, qh_emb], axis=2) with tf.variable_scope("encoding"): rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape( ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) c = rnn(c_emb, seq_len=self.c_len) q = rnn(q_emb, seq_len=self.q_len) self.c_ck = c self.q_ck = c with tf.variable_scope("attention"): qc_att, self.qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train, give=True) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape( ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) att = rnn(qc_att, seq_len=self.c_len) self.att = att self.att_ck = att with tf.variable_scope("match"): self_att = dot_attention( att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape( ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) match = rnn(self_att, seq_len=self.c_len) self.match_ck = match with tf.variable_scope("pointer"): init = summ(q[:, :, -2 * d:], d, mask=self.q_mask, keep_prob=config.ptr_keep_prob, is_train=self.is_train) pointer = ptr_net(batch=N, hidden=init.get_shape().as_list( )[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train) logits1, logits2 = pointer(init, match, d, self.c_mask) with tf.variable_scope("predict"): outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) outer = tf.matrix_band_part(outer, 0, 15) self.yp1_distrib = tf.reduce_max(outer, axis=2) self.yp2_distrib = tf.reduce_max(outer, axis=1) self.yp1 = tf.argmax(self.yp1_distrib, axis=1) self.yp2 = tf.argmax(self.yp2_distrib, axis=1) losses = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits1, labels=tf.stop_gradient(self.y1)) losses2 = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits2, labels=tf.stop_gradient(self.y2)) self.loss = tf.reduce_mean(losses + losses2)
delimiter=',', dtype=np.float32, skiprows=1) x_data = boston_train[:, :9] y_data = boston_train[:, [-1]] # print(x_data.shape) # print(y_data.shape) X = tf.placeholder(tf.float32, shape=[None, 9]) Y = tf.placeholder(tf.float32, shape=[None, 1]) W = tf.Variable(tf.random_normal([9, 1]), name='weight') b = tf.Variable(tf.random_normal([1]), name='bias') hypothesis = tf.matmul(X, W) + b cost = tf.reduce_mean(tf.square(hypothesis - Y)) optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-6) train = optimizer.minimize(cost) sess = tf.Session() sess.run(tf.global_variables_initializer()) for step in range(1000001): cost_val, W_val, b_val, _ = \ sess.run([cost,W,b,train], feed_dict = {X:x_data, Y:y_data}) if step % 10000 == 0: print(step, cost_val) #, W_val, b_val)
num_epochs = 5 print_freq = 1 # Transform labels into on-hot encoding form y_train_OHEnc = tf.one_hot(y_train.copy(), num_classes) y_val_OHEnc = tf.one_hot(y_val.copy(), num_classes) # reset placeholders x = tf.placeholder(tf.float32, [None, total_features]) y_ = tf.placeholder(tf.float32, [None, num_classes]) W_ae_list = [init_weight_variable([size_list[i], size_list[i + 1]]) \ for i in range(num_layers)] b_ae_list = [init_bias_variable([size_list[i + 1]])\ for i in range(num_layers)] a_list = [batch_nm(tf.nn.relu(tf.matmul(x, W_ae_list[0]) + b_ae_list[0]))] for i in range(num_layers - 1): # batch normalization for post-activated values a_i = batch_nm(tf.nn.relu(tf.matmul(a_list[-1], W_ae_list[i + 1]) + b_ae_list[i + 1])) a_list.append(a_i) # dropout keep_prob = tf.placeholder(tf.float32) a_drop = tf.nn.dropout(a_list[-1], keep_prob) W_sm = init_weight_variable([size_list[-1], num_classes]) b_sm = init_bias_variable([num_classes]) y_sm = tf.matmul(a_drop, W_sm) + b_sm cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_sm)) train_step = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cross_entropy)
def forward_fc(self, inp, weights, reuse=False): hidden = normalize(tf.matmul(inp, weights['w1']) + weights['b1'], activation=tf.nn.relu, reuse=reuse, scope='0') for i in range(1,len(self.dim_hidden)): hidden = normalize(tf.matmul(hidden, weights['w'+str(i+1)]) + weights['b'+str(i+1)], activation=tf.nn.relu, reuse=reuse, scope=str(i+1)) return tf.matmul(hidden, weights['w'+str(len(self.dim_hidden)+1)]) + weights['b'+str(len(self.dim_hidden)+1)]
y_vals_train = y_vals[train_indices] y_vals_test = y_vals[test_indices] # Declare batch size batch_size = 100 # Initialize placeholders x_data = tf.placeholder(shape=[None, 2], dtype=tf.float32) y_target = tf.placeholder(shape=[None, 1], dtype=tf.float32) # Create variables for linear regression A = tf.Variable(tf.random_normal(shape=[2,1])) b = tf.Variable(tf.random_normal(shape=[1,1])) # Declare model operations model_output = tf.sub(tf.matmul(x_data, A), b) # Declare vector L2 'norm' function squared l2_norm = tf.reduce_sum(tf.square(A)) # Declare loss function # = max(0, 1-pred*actual) + alpha * L2_norm(A)^2 # L2 regularization parameter, alpha alpha = tf.constant([0.01]) # Margin term in loss classification_term = tf.reduce_mean(tf.maximum(0., tf.sub(1., tf.mul(model_output, y_target)))) # Put terms together loss = tf.add(classification_term, tf.mul(alpha, l2_norm)) # Declare prediction function prediction = tf.sign(model_output)
def __init__(self, cfg, vocab_counts): # add data placeholders self.left_context = tf.placeholder(name="left_context", shape=[None, None], dtype=tf.int32) self.left_seq_len = tf.placeholder(name="left_seq_len", shape=[None], dtype=tf.int32) self.right_context = tf.placeholder(name="right_context", shape=[None, None], dtype=tf.int32) self.right_seq_len = tf.placeholder(name="right_seq_len", shape=[None], dtype=tf.int32) self.verb = tf.placeholder(name="verb", shape=[None], dtype=tf.int32) # add hyper-parameter placeholders self.batch_size = tf.placeholder(name="batch_size", dtype=tf.int32) self.is_train = tf.placeholder(name="is_train", shape=[], dtype=tf.bool) self.drop_rate = tf.placeholder(name="dropout_rate", dtype=tf.float32) self.lr = tf.placeholder(name="learning_rate", dtype=tf.float32) # build embedding lookup table with tf.device("/gpu:0"): with tf.variable_scope("context_lookup_table"): self.word_embeddings = tf.Variable(np.load(cfg.pretrained_context)["embeddings"], name="word_embeddings", dtype=tf.float32, trainable=cfg.tune_emb) self.word_embeddings = tf.concat([tf.zeros([1, cfg.word_dim]), self.word_embeddings[1:, :]], axis=0) with tf.variable_scope("target_lookup_table"): self.verb_embeddings = tf.Variable(np.load(cfg.pretrained_target)["embeddings"], name="verb_embeddings", dtype=tf.float32, trainable=cfg.tune_emb) #self.verb_embeddings = tf.concat([tf.zeros([1, cfg.word_dim]), self.verb_embeddings[1:, :]], axis=0) # negative sampling self.neg_ids, _, _ = (tf.nn.fixed_unigram_candidate_sampler( true_classes=tf.cast(tf.expand_dims(self.verb, axis=1), dtype=tf.int64), num_true=1, num_sampled=cfg.neg_sample, unique=True, range_max=cfg.verb_size, distortion=0.75, unigrams=vocab_counts)) print('neg_ids : ', self.neg_ids) # embedding lookup # with tf.device("/gpu:0"): with tf.variable_scope("embedding_lookup"): left_context_emb = tf.nn.embedding_lookup(self.word_embeddings, self.left_context) right_context_emb = tf.nn.embedding_lookup(self.word_embeddings, self.right_context) verb_emb = tf.nn.embedding_lookup(self.verb_embeddings, self.verb) neg_verb_emb = tf.nn.embedding_lookup(self.verb_embeddings, self.neg_ids) # left context bi-lstm with tf.device("/gpu:0"): with tf.variable_scope("right_context_representation"): cell_fw = LSTMCell(num_units=cfg.num_units) cell_bw = LSTMCell(num_units=cfg.num_units) h_rc, _ = bidirectional_dynamic_rnn(cell_fw, cell_bw, right_context_emb, sequence_length=self.right_seq_len, dtype=tf.float32, time_major=False, scope="bi_lstm") h_rc = tf.concat(h_rc, axis=-1) # self-attention h_rc = self_attention(h_rc, name="self_attn_right") r_weight = tf.get_variable(name="r_weight", shape=[2 * cfg.num_units, 2 * cfg.num_units], dtype=tf.float32) h_rc = tf.nn.tanh(tf.matmul(h_rc, r_weight)) print("right context shape: {}".format(h_rc.get_shape().as_list())) with tf.variable_scope("left_context_representation"): cell_fw = LSTMCell(num_units=cfg.num_units) cell_bw = LSTMCell(num_units=cfg.num_units) h_lc, _ = bidirectional_dynamic_rnn(cell_fw, cell_bw, left_context_emb, sequence_length=self.left_seq_len, dtype=tf.float32, time_major=False, scope="bi_lstm") h_lc = tf.concat(h_lc, axis=-1) # shape = (batch_size, max_len, 2 * num_units) # self-attention h_lc = self_attention(h_lc, name="self_attn_left") # shape = (batch_size, 2 * num_units) l_weight = tf.get_variable(name="l_weight", shape=[2 * cfg.num_units, 2 * cfg.num_units], dtype=tf.float32) h_lc = tf.nn.tanh(tf.matmul(h_lc, l_weight)) print("left context shape: {}".format(h_lc.get_shape().as_list())) # with tf.device("/gpu:0"): with tf.device("/gpu:1"): with tf.variable_scope("neural_tensor_network"): T = tf.get_variable(name="T", shape=[cfg.output_units, 2 * cfg.num_units, 2 * cfg.num_units], dtype=tf.float32) W = tf.get_variable(name="W", shape=[4 * cfg.num_units, cfg.output_units], dtype=tf.float32) b = tf.get_variable(name="b", shape=[cfg.output_units], dtype=tf.float32) # compute tensors ff_product = tf.matmul(tf.concat([h_lc, h_rc], axis=-1), W) bilinear_list = [] for k in range(cfg.output_units): cur_res = tf.reduce_sum(tf.matmul(h_lc, T[k]) * h_rc, axis=1) bilinear_list.append(cur_res) context = tf.nn.tanh(tf.reshape(tf.concat(bilinear_list, axis=0), shape=[-1, cfg.output_units]) + ff_product + b) # shape = (batch_size, output_units) print("context representation shape: {}".format(context.get_shape().as_list())) # with tf.device("/gpu:1"): with tf.variable_scope("verb_representation"): target_verb = ffn_layer(verb_emb, cfg.num_units, cfg.output_units, scope="ffn_layer") print("verb representation shape: {}".format(target_verb.get_shape().as_list())) tf.get_variable_scope().reuse_variables() negative_verbs = ffn_layer(neg_verb_emb, cfg.num_units, cfg.output_units, scope="ffn_layer") print("negative verb shape: {}".format(negative_verbs.get_shape().as_list())) with tf.variable_scope("compute_loss"): true_logits = tf.reduce_sum(context * target_verb, axis=1) print("true logits shape: {}".format(true_logits.get_shape().as_list())) neg_logits = tf.matmul(context, tf.transpose(negative_verbs, [1, 0])) print("negative logits shape: {}".format(neg_logits.get_shape().as_list())) # with tf.device("/cpu:0"): with tf.variable_scope("nce_loss"): # cross-entropy(logits, labels) true_xent = tf.nn.sigmoid_cross_entropy_with_logits(logits=true_logits, labels=tf.ones_like(true_logits)) sampled_xent = tf.nn.sigmoid_cross_entropy_with_logits(logits=neg_logits, labels=tf.zeros_like(neg_logits)) # NCE-loss is the sum of the true and noise (sampled words) contributions, averaged over the batch. self.loss = (tf.reduce_sum(true_xent) + tf.reduce_sum(sampled_xent)) / tf.cast(self.batch_size, dtype=tf.float32) optimizer = tf.train.AdamOptimizer(learning_rate=self.lr) self.train_op = optimizer.minimize(self.loss)
def model_fn(features, labels, mode): input_layer = tf.reshape( features['angular'], shape=[-1, cfg.anguler_shape[0], cfg.anguler_shape[1], 6]) is_training = (mode == tf.estimator.ModeKeys.TRAIN) net = cnn_model(input_layer, is_training, '') with tf.variable_scope('Reshape_cnn'): output_shape = net.get_shape().as_list( ) # [batch,height,width,features] net = tf.transpose(net, [0, 2, 1, 3]) net = tf.reshape( net, shape=[-1, output_shape[2], output_shape[1] * output_shape[3]]) with tf.variable_scope('bi_GRU'): fw_cell_list = [ tf.nn.rnn_cell.DropoutWrapper(tf.nn.rnn_cell.GRUCell( 1024, kernel_initializer=tf.orthogonal_initializer), state_keep_prob=0.5) for _ in range(3) ] bw_cell_list = [ tf.nn.rnn_cell.DropoutWrapper(tf.nn.rnn_cell.GRUCell( 1024, kernel_initializer=tf.orthogonal_initializer), state_keep_prob=0.5) for _ in range(3) ] # fw_cell_list = [ # tf.nn.rnn_cell.DropoutWrapper(tf.nn.rnn_cell.GRUCell(256, kernel_initializer=tf.orthogonal_initializer), # input_keep_prob=0.8, output_keep_prob=0.8) for _ in range(3)] # bw_cell_list = [ # tf.nn.rnn_cell.DropoutWrapper(tf.nn.rnn_cell.GRUCell(256, kernel_initializer=tf.orthogonal_initializer), # input_keep_prob=0.8, output_keep_prob=0.8) for _ in range(3)] multi_rnn_fW_cell = tf.nn.rnn_cell.MultiRNNCell(fw_cell_list) multi_rnn_bw_cell = tf.nn.rnn_cell.MultiRNNCell(bw_cell_list) rnn_outputs, (last_state_fw, last_state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw=multi_rnn_fW_cell, cell_bw=multi_rnn_bw_cell, inputs=net, dtype=tf.float32) # rnn_outputs_merged = tf.concat(rnn_outputs, 2) # rnn_finial = tf.unstack(rnn_outputs_merged, rnn_outputs_merged.get_shape().as_list()[1], 1)[-1] # record rnn cells for var in tf.global_variables(): scope = var.name.split('/')[0] if scope == 'bi_GRU': gates_candidate = var.name.split('/')[-2] fw_cell = var.name.split('/')[2] + '_' + var.name.split( '/')[-4] + '_' kernal_bise = var.name.split('/')[-1].split(':')[0] if gates_candidate == 'candidate': tf.summary.histogram('bi_GRU_' + fw_cell + kernal_bise, var) with tf.variable_scope('dense_layer'): rnn_outputs_merged = tf.concat(rnn_outputs, 2) rnn_finial = tf.squeeze(rnn_outputs_merged, 1) weight = tf.get_variable( 'birnn_out_weight', [2 * 1024, 1024], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.02)) bise = tf.get_variable( 'birnn_out_bise', [1024], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.02)) net = tf.matmul(rnn_finial, weight) + bise # net = tf.layers.dense(inputs=rnn_finial, units=2048, activation=tf.nn.relu) # net = tf.layers.dense(inputs=last_state_fw[-1] + last_state_bw[-1], units=512, activation=tf.nn.relu) net = tf.layers.dropout(inputs=net, rate=0.4, training=is_training) logits = tf.layers.dense(inputs=net, units=cfg.num_class, activation=tf.nn.relu) predictions = { 'classes': tf.argmax(tf.nn.softmax(logits), axis=1, name='predict_class'), 'probabilities': tf.nn.softmax(logits, name='softmax_tensor'), } if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) accuracy = tf.metrics.accuracy(labels=labels, predictions=tf.argmax(tf.nn.softmax(logits), axis=1)) accuracy = tf.Print(accuracy, [accuracy], 'Acuracy__') tf.summary.scalar('train_accuracy', accuracy[1]) loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) if mode == tf.estimator.ModeKeys.TRAIN: update_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_op): optimizer = tf.train.AdagradOptimizer(learning_rate=0.01) train_op = optimizer.minimize( loss=loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) eval_metric = { 'accuracy': tf.metrics.accuracy(labels=labels, predictions=predictions['classes']) } if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric)
def call(self, inputs, training=None): # bi-directional recurrence def input_recurrence(initializer, elems): x_f_t, x_b_t = elems h_f_tm1, h_b_tm1 = initializer h_f_t = self.GRU_f(inputs=(tf.nn.embedding_lookup(self.We, x_f_t), h_f_tm1)) h_b_t = self.GRU_b(inputs=(tf.nn.embedding_lookup(self.We, x_b_t), h_b_tm1)) return [h_f_t, h_b_t] [h_f_t, h_b_t] = tf.scan( fn=input_recurrence, elems=[inputs, inputs[::-1]], # forward and backward sequences initializer=[self.GRU_f.h0, self.GRU_b.h0]) # 0-axis is time steps, 1-axis is batch size and 2-axis is hidden layer size context = tf.concat([h_f_t, h_b_t[::-1]], axis=2) #projected_context = tf.matmul(context, self.Wa_c) + self.ba for each tensor slice projected_context = tf.matmul( context, tf.tile(tf.expand_dims(self.Wa_c, 0), tf.stack([tf.shape(context)[0], 1, 1]))) + self.ba def output_recurrence(initializer, elems): x_t = elems h_tm1, _, _ = initializer # Attention model h_a = tf.nn.tanh(projected_context + tf.matmul(h_tm1, self.Wa_h)) #alphas = tf.exp(tf.matmul(h_a, self.Wa_y)) #alphas = tf.reshape(alphas, [tf.shape(alphas)[0], tf.shape(alphas)[1]]) # drop 2-axis (sized 1) is replaced by: #sess.run(tf.reshape(tf.matmul(tf.reshape(x, [-1, tf.shape(x)[-1]]), tf.expand_dims(z,-1)), tf.shape(x)[:2])) alphas = tf.exp( tf.reshape( tf.matmul(tf.reshape(h_a, [-1, tf.shape(h_a)[-1]]), tf.expand_dims(self.Wa_y, -1)), tf.shape(h_a)[:2])) alphas = alphas / tf.reduce_sum(alphas, axis=0, keepdims=True) weighted_context = tf.reduce_sum(context * alphas[:, :, None], axis=0) h_t = self.GRU(inputs=(x_t, h_tm1)) # Late fusion lfc = tf.matmul(weighted_context, self.Wf_c) # late fused context fw = tf.nn.sigmoid( tf.matmul(lfc, self.Wf_f) + tf.matmul(h_t, self.Wf_h) + self.bf) # fusion weights hf_t = lfc * fw + h_t # weighted fused context + hidden state z = tf.matmul(hf_t, self.Wy) + self.by y_t = z #tf.nn.softmax(z) return [h_t, hf_t, y_t] [_, self.last_hidden_states, self.y] = tf.scan( fn=output_recurrence, elems=context[ 1:], # ignore the 1st word in context, because there's no punctuation before that initializer=[ self.GRU.h0, self.GRU.h0, tf.zeros([self.minibatch_size, self.y_vocabulary_size]) ]) return self.y
import tensorflow as tf import numpy as np xy = np.loadtxt('data-03-diabetes.csv', delimiter=',', dtype=np.float32) x_data = xy[:, 0:-1] y_data = xy[:, [-1]] X = tf.placeholder(tf.float32, shape=[None, 8]) Y = tf.placeholder(tf.float32, shape=[None, 1]) W = tf.Variable(tf.random_normal([8, 1]), name='weight') b = tf.Variable(tf.random_normal([1]), name='bias') hypothesis = tf.sigmoid(tf.matmul(X, W) + b) cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis)) train = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost) predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32) accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) feed = {X: x_data, Y: y_data} for step in range(10001): sess.run(train, feed_dict=feed) if step % 200 == 0: print(step, sess.run(cost, feed_dict=feed)) h, c, a = sess.run([hypothesis, predicted, accuracy], feed_dict=feed)
def embedding_postprocessor(input_tensor, use_token_type=False, token_type_ids=None, token_type_vocab_size=16, token_type_embedding_name="token_type_embeddings", use_position_embeddings=True, position_embedding_name="position_embeddings", initializer_range=0.02, max_position_embeddings=512, dropout_prob=0.1): """Performs various post-processing on a word embedding tensor. Args: input_tensor: float Tensor of shape [batch_size, seq_length, embedding_size]. use_token_type: bool. Whether to add embeddings for `token_type_ids`. token_type_ids: (optional) int32 Tensor of shape [batch_size, seq_length]. Must be specified if `use_token_type` is True. token_type_vocab_size: int. The vocabulary size of `token_type_ids`. token_type_embedding_name: string. The name of the embedding table variable for token type ids. use_position_embeddings: bool. Whether to add position embeddings for the position of each token in the sequence. position_embedding_name: string. The name of the embedding table variable for positional embeddings. initializer_range: float. Range of the weight initialization. max_position_embeddings: int. Maximum sequence length that might ever be used with this model. This can be longer than the sequence length of input_tensor, but cannot be shorter. dropout_prob: float. Dropout probability applied to the final output tensor. Returns: float tensor with same shape as `input_tensor`. Raises: ValueError: One of the tensor shapes or input values is invalid. """ input_shape = get_shape_list(input_tensor, expected_rank=3) batch_size = input_shape[0] seq_length = input_shape[1] width = input_shape[2] output = input_tensor if use_token_type: if token_type_ids is None: raise ValueError("`token_type_ids` must be specified if" "`use_token_type` is True.") token_type_table = tf.get_variable( name=token_type_embedding_name, shape=[token_type_vocab_size, width], initializer=create_initializer(initializer_range)) # This vocab will be small so we always do one-hot here, since it is always # faster for a small vocabulary. flat_token_type_ids = tf.reshape(token_type_ids, [-1]) one_hot_ids = tf.one_hot(flat_token_type_ids, depth=token_type_vocab_size) token_type_embeddings = tf.matmul(one_hot_ids, token_type_table) token_type_embeddings = tf.reshape(token_type_embeddings, [batch_size, seq_length, width]) output += token_type_embeddings if use_position_embeddings: #assert_op = tf.assert_less_equal(seq_length, max_position_embeddings) #with tf.control_dependencies(): full_position_embeddings = tf.get_variable( name=position_embedding_name, shape=[max_position_embeddings, width], initializer=create_initializer(initializer_range)) # Since the position embedding table is a learned variable, we create it # using a (long) sequence length `max_position_embeddings`. The actual # sequence length might be shorter than this, for faster training of # tasks that do not have long sequences. # # So `full_position_embeddings` is effectively an embedding table # for position [0, 1, 2, ..., max_position_embeddings-1], and the current # sequence has positions [0, 1, 2, ... seq_length-1], so we can just # perform a slice. position_embeddings = tf.slice(full_position_embeddings, [0, 0], [seq_length, -1]) num_dims = len(output.shape.as_list()) # Only the last two dimensions are relevant (`seq_length` and `width`), so # we broadcast among the first dimensions, which is typically just # the batch size. position_broadcast_shape = [] for _ in range(num_dims - 2): position_broadcast_shape.append(1) position_broadcast_shape.extend([seq_length, width]) position_embeddings = tf.reshape(position_embeddings, position_broadcast_shape) output += position_embeddings output = layer_norm_and_dropout(output, dropout_prob) return output
with tf.name_scope("Conv_2"): w2 = weight([5, 5, 16, 36]) b2 = bias([36]) Conv_2 = conv2d(C1_Pool, w2) + b2 C2 = tf.nn.relu(Conv_2) with tf.name_scope("C2_Pool"): C2_Pool = average_pool_2x2(C2) with tf.name_scope("Flatten"): Flatten = tf.reshape(C2_Pool,[-1,36]) with tf.name_scope("Hidden_layer_1"): w3 = weight([36,24]) b3 = bias([24]) D_Hidden = tf.nn.relu(tf.matmul(Flatten, w3)+b3) D_Hidden_Dropout = tf.nn.dropout(D_Hidden, dropout) with tf.name_scope("Output_layer"): w4 = weight([24,10]) b4 = bias([10]) y_predict = tf.nn.softmax(tf.matmul(D_Hidden_Dropout,w4)+b4) # Adjust our model with tf.name_scope("Optimizer"): loss_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_predict,labels=y)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss_function) with tf.name_scope("Accuracy"): correct_prediction = tf.equal(tf.argmax(y_predict, 1), tf.argmax(y, 1))
# ######################################## difference from rosettta DIM_NUM = real_X.shape[1] X = tf.placeholder(tf.float32, [None, DIM_NUM]) Y = tf.placeholder(tf.float32, [None, 1]) print(X) print(Y) # initialize W & b W = tf.Variable(tf.zeros([DIM_NUM, 1]), dtype=tf.float32, name='w') b = tf.Variable(tf.zeros([1]), dtype=tf.float32, name='b') print(W) print(b) # predict pred_Y = tf.sigmoid(tf.matmul(X, W) + b) print(pred_Y) # loss logits = tf.matmul(X, W) + b loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=Y, logits=logits) loss = tf.reduce_mean(loss) print(loss) # optimizer train = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss) print(train) init = tf.global_variables_initializer() print(init)
def attention_layer(from_tensor, to_tensor, attention_mask=None, num_attention_heads=1, size_per_head=512, query_act=None, key_act=None, value_act=None, attention_probs_dropout_prob=0.0, initializer_range=0.02, do_return_2d_tensor=False, batch_size=None, from_seq_length=None, to_seq_length=None): """Performs multi-headed attention from `from_tensor` to `to_tensor`. This is an implementation of multi-headed attention based on "Attention is all you Need". If `from_tensor` and `to_tensor` are the same, then this is self-attention. Each timestep in `from_tensor` attends to the corresponding sequence in `to_tensor`, and returns a fixed-with vector. This function first projects `from_tensor` into a "query" tensor and `to_tensor` into "key" and "value" tensors. These are (effectively) a list of tensors of length `num_attention_heads`, where each tensor is of shape [batch_size, seq_length, size_per_head]. Then, the query and key tensors are dot-producted and scaled. These are softmaxed to obtain attention probabilities. The value tensors are then interpolated by these probabilities, then concatenated back to a single tensor and returned. In practice, the multi-headed attention are done with transposes and reshapes rather than actual separate tensors. Args: from_tensor: float Tensor of shape [batch_size, from_seq_length, from_width]. to_tensor: float Tensor of shape [batch_size, to_seq_length, to_width]. attention_mask: (optional) int32 Tensor of shape [batch_size, from_seq_length, to_seq_length]. The values should be 1 or 0. The attention scores will effectively be set to -infinity for any positions in the mask that are 0, and will be unchanged for positions that are 1. num_attention_heads: int. Number of attention heads. size_per_head: int. Size of each attention head. query_act: (optional) Activation function for the query transform. key_act: (optional) Activation function for the key transform. value_act: (optional) Activation function for the value transform. attention_probs_dropout_prob: (optional) float. Dropout probability of the attention probabilities. initializer_range: float. Range of the weight initializer. do_return_2d_tensor: bool. If True, the output will be of shape [batch_size * from_seq_length, num_attention_heads * size_per_head]. If False, the output will be of shape [batch_size, from_seq_length, num_attention_heads * size_per_head]. batch_size: (Optional) int. If the input is 2D, this might be the batch size of the 3D version of the `from_tensor` and `to_tensor`. from_seq_length: (Optional) If the input is 2D, this might be the seq length of the 3D version of the `from_tensor`. to_seq_length: (Optional) If the input is 2D, this might be the seq length of the 3D version of the `to_tensor`. Returns: float Tensor of shape [batch_size, from_seq_length, num_attention_heads * size_per_head]. (If `do_return_2d_tensor` is true, this will be of shape [batch_size * from_seq_length, num_attention_heads * size_per_head]). Raises: ValueError: Any of the arguments or tensor shapes are invalid. """ def transpose_for_scores(input_tensor, batch_size, num_attention_heads, seq_length, width): output_tensor = tf.reshape( input_tensor, [batch_size, seq_length, num_attention_heads, width]) output_tensor = tf.transpose(output_tensor, [0, 2, 1, 3]) return output_tensor from_shape = get_shape_list(from_tensor, expected_rank=[2, 3]) to_shape = get_shape_list(to_tensor, expected_rank=[2, 3]) if len(from_shape) != len(to_shape): raise ValueError( "The rank of `from_tensor` must match the rank of `to_tensor`.") if len(from_shape) == 3: batch_size = from_shape[0] from_seq_length = from_shape[1] to_seq_length = to_shape[1] elif len(from_shape) == 2: if (batch_size is None or from_seq_length is None or to_seq_length is None): raise ValueError( "When passing in rank 2 tensors to attention_layer, the values " "for `batch_size`, `from_seq_length`, and `to_seq_length` " "must all be specified.") # Scalar dimensions referenced here: # B = batch size (number of sequences) # F = `from_tensor` sequence length # T = `to_tensor` sequence length # N = `num_attention_heads` # H = `size_per_head` from_tensor_2d = reshape_to_matrix(from_tensor) to_tensor_2d = reshape_to_matrix(to_tensor) # `query_layer` = [B*F, N*H] query_layer = tf.layers.dense( from_tensor_2d, num_attention_heads * size_per_head, activation=query_act, name="query", kernel_initializer=create_initializer(initializer_range)) # `key_layer` = [B*T, N*H] key_layer = tf.layers.dense( to_tensor_2d, num_attention_heads * size_per_head, activation=key_act, name="key", kernel_initializer=create_initializer(initializer_range)) # `value_layer` = [B*T, N*H] value_layer = tf.layers.dense( to_tensor_2d, num_attention_heads * size_per_head, activation=value_act, name="value", kernel_initializer=create_initializer(initializer_range)) # `query_layer` = [B, N, F, H] query_layer = transpose_for_scores(query_layer, batch_size, num_attention_heads, from_seq_length, size_per_head) # `key_layer` = [B, N, T, H] key_layer = transpose_for_scores(key_layer, batch_size, num_attention_heads, to_seq_length, size_per_head) # Take the dot product between "query" and "key" to get the raw # attention scores. # `attention_scores` = [B, N, F, T] attention_scores = tf.matmul(query_layer, key_layer, transpose_b=True) attention_scores = tf.multiply(attention_scores, 1.0 / math.sqrt(float(size_per_head))) if attention_mask is not None: # `attention_mask` = [B, 1, F, T] attention_mask = tf.expand_dims(attention_mask, axis=[1]) # Since attention_mask is 1.0 for positions we want to attend and 0.0 for # masked positions, this operation will create a tensor which is 0.0 for # positions we want to attend and -10000.0 for masked positions. adder = (1.0 - tf.cast(attention_mask, tf.float32)) * -10000.0 # Since we are adding it to the raw scores before the softmax, this is # effectively the same as removing these entirely. attention_scores += adder # Normalize the attention scores to probabilities. # `attention_probs` = [B, N, F, T] attention_probs = tf.nn.softmax(attention_scores) # This is actually dropping out entire tokens to attend to, which might # seem a bit unusual, but is taken from the original Transformer paper. attention_probs = dropout(attention_probs, attention_probs_dropout_prob) # `value_layer` = [B, T, N, H] value_layer = tf.reshape( value_layer, [batch_size, to_seq_length, num_attention_heads, size_per_head]) # `value_layer` = [B, N, T, H] value_layer = tf.transpose(value_layer, [0, 2, 1, 3]) # `context_layer` = [B, N, F, H] context_layer = tf.matmul(attention_probs, value_layer) # `context_layer` = [B, F, N, H] context_layer = tf.transpose(context_layer, [0, 2, 1, 3]) if do_return_2d_tensor: # `context_layer` = [B*F, N*H] context_layer = tf.reshape( context_layer, [batch_size * from_seq_length, num_attention_heads * size_per_head]) else: # `context_layer` = [B, F, N*H] context_layer = tf.reshape( context_layer, [batch_size, from_seq_length, num_attention_heads * size_per_head]) return context_layer
def full_layer(input, size): in_size = int(input.get_shape()[1]) W = weight_variable([in_size, size]) b = bias_variable([size]) return tf.matmul(input, W) + b
def __main__(env_name='FrozenLake-v0', learning_rate=0.1, gamma=0.99, epsilon=0.1, num_episodes=5000, debug=False, debug_scale=500): # make this environment env = gym.make(env_name) # Reset tensorflow graph tf.reset_default_graph() # Feed-forward part of the network env_size = env.observation_space.n action_size = env.action_space.n inputs1 = tf.placeholder(shape=[1,env_size], dtype=tf.float32) W = tf.Variable(tf.random_uniform([env_size, action_size], 0, 0.01)) Qout = tf.matmul(inputs1, W) predict = tf.argmax(Qout,1) # Define loss function (sum of square difference between target and prediction Q values) nextQ = tf.placeholder(shape=[1,action_size], dtype=tf.float32) loss = tf.reduce_sum(tf.square(nextQ - Qout)) optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) updateModel = optimizer.minimize(loss) # Train init = tf.initialize_all_variables() # Lists for total rewards and steps to get to final state in that episode total_rewards = [] steps_to_complete = [] with tf.Session() as sess: sess.run(init) for i in range(num_episodes): show_episode = (debug and i % debug_scale == 0) if show_episode: print("Showing epsiode {}/{} ...\n".format(i, num_episodes)) time.sleep(1) state = env.reset() cumulative_reward = 0 done = False current_step = 0 # DQN while not done: if show_episode: env.render() time.sleep(0.5) current_step += 1 # Predict next action using NN rather that querying Q-table one_hot_encoded_state = np.identity(env_size)[state:state+1] action, q_values = sess.run([predict, Qout], feed_dict={inputs1:one_hot_encoded_state}) # Epsilon greedy (bigger epsilon more random) if np.random.rand(1) < epsilon: # Replace chosen action with random action action[0] = env.action_space.sample() # Take action (either chosen or random) new_state, reward, done, _ = env.step(action[0]) one_hot_encoded_new_state = np.identity(env_size)[new_state:new_state+1] Q1 = sess.run(Qout, feed_dict={inputs1:one_hot_encoded_new_state}) maxQ1 = np.max(Q1) targetQ = q_values targetQ[0,action[0]] = reward + gamma*maxQ1 _,W1 = sess.run([updateModel,W], feed_dict={inputs1:one_hot_encoded_state, nextQ:targetQ}) # For logging cumulative_reward += reward # Move to next state state = new_state if done: epsilon = 1.0/((i/50) + 10) break steps_to_complete.append(current_step) total_rewards.append(cumulative_reward) if show_episode: print("Completed with cumulative reward of {}...\n".format(cumulative_reward)) time.sleep(1) return total_rewards print("Percent of success: {}...\n".format(sum(total_rewards)/num_episodes))
import tensorflow as tf from numpy.random import RandomState batch_size=20 w1=tf.Variable(tf.random_normal([2,3],seed=1)) w2=tf.Variable(tf.random_normal([3,1],seed=1)) x=tf.placeholder(tf.float32,name='x-input') y_=tf.placeholder(tf.float32,name='y-input') a=tf.matmul(x,w1) y=tf.matmul(a,w2) cross_entropy=-tf.reduce_mean(y_*tf.log(tf.clip_by_value(y,1e-10,1.0))) train_step=tf.train.AdamOptimizer(0.001).minimize(cross_entropy) rdm=RandomState(1) dataset_size=128 X=rdm.rand(dataset_size,2) Y=[[int(x1+x2<1)] for (x1,x2) in X] with tf.Session() as sess: init_op=tf.global_variables_initializer() sess.run(init_op) print(sess.run(w1)) print(sess.run(w2)) STEPS=10000 for i in range(STEPS): start=(i*batch_size)%dataset_size end=min(start+batch_size,dataset_size) sess.run(train_step,feed_dict={x:X[start:end],y_:Y[start:end]}) if i%1000==0:
def add_local_attention_op(self): attention_entity_emb = self.pure_entity_embeddings if self.args.attention_ent_vecs_no_regularization else self.entity_embeddings with tf.variable_scope("attention"): K = self.args.attention_K left_mask = self._sequence_mask_v13(self.begin_span, K) # number of words on the left (left window) right_mask = self._sequence_mask_v13(tf.expand_dims(self.words_len, 1) - self.end_span, K) # number of words on the right. of course i don't get more than K even if more words exist. ctxt_mask = tf.concat([left_mask, right_mask], 2) # [batch, num_of_spans, 2*K] ctxt_mask = tf.log(tf.minimum(1.0, tf.maximum(self.args.zero, ctxt_mask))) # T, T, T, F, F | T, T, F, F, F # -1, -2, -3, -4, -5 +0, +1, +2, +3, +4 leftctxt_indices = tf.maximum(0, tf.range(-1, -K - 1, -1) + tf.expand_dims(self.begin_span, 2)) # [batch, num_mentions, K] rightctxt_indices = tf.minimum(tf.shape(self.pure_word_embeddings)[1] - 1, tf.range(K) + tf.expand_dims(self.end_span, 2)) # [batch, num_mentions, K] ctxt_indices = tf.concat([leftctxt_indices, rightctxt_indices], 2) # [batch, num_mentions, 2*K] batch_index = tf.tile(tf.expand_dims(tf.expand_dims(tf.range(tf.shape(ctxt_indices)[0]), 1), 2), [1, tf.shape(ctxt_indices)[1], tf.shape(ctxt_indices)[2]]) ctxt_indices = tf.stack([batch_index, ctxt_indices], 3) # [batch, num_of_spans, 2*K, 2] the last dimension is row,col for gather_nd # [batch, num_of_spans, 2*K, [row,col]] att_x_w = self.pure_word_embeddings # [batch, max_sent_len, 300] if self.args.attention_on_lstm and self.args.nn_components.find("lstm") != -1: # ablation: here the attention is computed on the output of the lstm layer x_k instead of using the # pure word2vec vectors. (word2vec used in paper). att_x_w = util.projection(self.context_emb, 300) # if tf.shape(self.context_emb)[-1] != 300 else self.context_emb ctxt_word_emb = tf.gather_nd(att_x_w, ctxt_indices) # [batch, num_of_spans, 2K, emb_size] emb_size = 300 only pure word emb used (word2vec) # and not after we add char emb and dropout # in this implementation we don't use the diagonal A and B arrays that are mentioned in # Ganea and Hoffmann 2017 (only used in the ablations) temp = attention_entity_emb if self.args.attention_use_AB: att_A = tf.get_variable("att_A", [300]) temp = att_A * attention_entity_emb scores = tf.matmul(ctxt_word_emb, temp, transpose_b=True) scores = tf.reduce_max(scores, reduction_indices=[-1]) # max score of each word for each span acquired from any cand entity scores = scores + ctxt_mask # some words are not valid out of window so we assign to them very low score top_values, _ = tf.nn.top_k(scores, self.args.attention_R) # [batch, num_of_spans, R] R_value = top_values[:, :, -1] # [batch, num_of_spans] R_value = tf.maximum(self.args.zero, R_value) # so to avoid keeping words that # have max score with any of the entities <=0 (also score = 0 can have words with # padding candidate entities) threshold = tf.tile(tf.expand_dims(R_value, 2), [1, 1, 2 * K]) # [batch, num_of_spans, 2K] scores = scores - tf.to_float(((scores - threshold) < 0)) * 50 # 50 where score<thr, 0 where score>=thr scores = tf.nn.softmax(scores, dim=2) # [batch, num_of_spans, 2K] scores = tf.expand_dims(scores, 3) # [batch, num_of_spans, 2K, 1] # [batch, num_of_spans, 2K, 1] * [batch, num_of_spans, 2K, emb_size] # = [batch, num_of_spans, 2K, emb_size] x_c = tf.reduce_sum(scores * ctxt_word_emb, 2) # = [batch, num_of_spans, emb_size] if self.args.attention_use_AB: att_B = tf.get_variable("att_B", [300]) x_c = att_B * x_c x_c = tf.expand_dims(x_c, 3) # [batch, num_of_spans, emb_size, 1] # [batch, num_of_spans, 30, emb_size=300] mul with [batch, num_of_spans, emb_size, 1] x_e__x_c = tf.matmul(attention_entity_emb, x_c) # [batch, num_of_spans, 30, 1] x_e__x_c = tf.squeeze(x_e__x_c, axis=3) # [batch, num_of_spans, 30] self.attention_scores = x_e__x_c
# C3 conv Input=14*14*6 Output=10*10*6 conv2_w = tf.Variable(tf.truncated_normal(shape=[5, 5, 6, 16], mean=0, stddev=0.1)) conv2_b = tf.Variable(tf.zeros(16)) conv2 = tf.nn.conv2d(pool_1, conv2_w, strides=[1, 1, 1, 1], padding='VALID') + conv2_b conv2 = tf.nn.relu(conv2) # S4 Pooling Input=10*10*6 OutPut=5*5*16 pool_2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') # Flatten Input=5*5*16 Output=400 fc1 = tf.reshape(pool_2,[-1,400]) # C5 conv Input=5*5*16=400 Output=120 fc1_w = tf.Variable(tf.truncated_normal(shape=(400, 120), mean=0, stddev=0.1)) fc1_b = tf.Variable(tf.zeros(120)) fc1 = tf.matmul(fc1, fc1_w) + fc1_b # F6 Input=120 OutPut=84 fc2_w = tf.Variable(tf.truncated_normal(shape=(120, 84), mean=0, stddev=0.1)) fc2_b = tf.Variable(tf.zeros(84)) fc2 = tf.matmul(fc1, fc2_w) + fc2_b fc2 = tf.nn.relu(fc2) # F7 Input=84 Output=10 fc3_w = tf.Variable(tf.truncated_normal(shape=(84, 10), mean=0, stddev=0.1)) fc3_b = tf.Variable(tf.zeros(10)) y_conv = tf.matmul(fc2, fc3_w) + fc3_b # 我们不采用先Softmax再计算交叉熵的方法,而是直接用tf.nn.softmax_cross_entropy_with_logits直接计算 cross_entropy = tf.reduce_mean(
def __init__(self, params, hidden_weights=None): self.params = params self.network_shape = self.params['network_shape'] self.input_dim = self.network_shape[0] self.output_dim = self.network_shape[-1] self.batch_size = self.params['batch_size'] self.hidden_weights = hidden_weights # self.weights = self.initialize_weights() # self.mirror_weights = self.initialize_mirror_weights() # self.readout_weights = self.initialize_readout_weights() # self.hs = self.initialize_hs() # self.hidden_states = self.initialize_hidden_states() self.tensorboard_dir = self.params['tensorboard_dir'] self.activation_function = self.params['activation_function'] self.optimizer_ = self.params['optimizer'] # model self.input = tf.placeholder(tf.float32, [None, self.input_dim], name="input") self.output = tf.placeholder(tf.float32, [None, self.output_dim], name="output") self.activation_patterns = {} self.hidden_state_activation_patterns = {} self.activation = self.input self.hidden_states = {} self.hidden_states_update_ops = {} for i in range(1, len(self.network_shape) - 1): with tf.name_scope("layer{0}".format(i)): h = tf.Variable(tf.truncated_normal([self.network_shape[i]]), name="hidden_state", trainable=False) # h = tf.truncated_normal([self.batch_size, self.network_shape[i]]) self.hidden_states["hs_{0}".format(i)] = h Utils.variable_summaries( self.hidden_states["hs_{0}".format(i)], "hs_{0}".format(i)) if self.hidden_weights is not None: H_tune = tf.Variable(1.0, trainable=True, name="H_tune") Utils.variable_summaries(H_tune, "H_tune") else: H_tune = tf.Variable(1, trainable=False, name="H_tune") for i in range(len(self.network_shape) - 1): with tf.name_scope("layer{0}".format(i + 1)): if i < len(self.network_shape) - 2: with tf.name_scope("hidden"): # input weight and bias W = tf.Variable(tf.random_normal( [self.network_shape[i], self.network_shape[i + 1]], stddev=0.05), name="W") bW = tf.Variable(tf.random_normal( [self.network_shape[i + 1]], stddev=0.05), name="bW") Utils.variable_summaries(W, "W") Utils.variable_summaries(bW, "bW") H_name = "H_{0}".format(i + 1) if self.hidden_weights is not None and H_name in self.hidden_weights.keys( ): H = tf.Variable( self.hidden_weights[H_name].astype('float32'), dtype=tf.float32, trainable=False, name="H") else: H = tf.Variable(tf.random_normal([ self.network_shape[i + 1], self.network_shape[i + 1] ], stddev=0.05), trainable=False, name="H") input_for_hidden = tf.matmul(self.activation, W) + bW tiled_h = tf.reshape( tf.tile(self.hidden_states["hs_{0}".format(i + 1)], [self.batch_size]), [self.batch_size, -1]) hidden_update = tf.nn.tanh( tf.add( input_for_hidden, tf.matmul(tiled_h, tf.scalar_mul(H_tune, H)))) with tf.name_scope("mirror"): # mirror input and bias M = tf.Variable(tf.random_normal( [self.network_shape[i], self.network_shape[i + 1]], stddev=0.05), name="M") bM = tf.Variable(tf.random_normal( [self.network_shape[i + 1]], stddev=0.05), name="bM") Utils.variable_summaries(M, "M") Utils.variable_summaries(bM, "bM") input_for_mirror = tf.nn.tanh( tf.matmul(self.activation, M) + bM) with tf.name_scope("readout"): # readout weights and biases R = tf.Variable(tf.random_normal([ self.network_shape[i + 1], self.network_shape[i + 1] ], stddev=0.05), name="R") bR = tf.Variable(tf.random_normal( [self.network_shape[i + 1]], stddev=0.05), name="bR") Utils.variable_summaries(R, "R") Utils.variable_summaries(bR, "bR") readout = self.activation_function( tf.matmul(hidden_update, R) + bR) with tf.name_scope("activation"): self.activation = self.activation_function( tf.multiply(readout, input_for_mirror)) # self.hidden_state_activation_patterns['hidden_state_layer_{0}'.format(i + 1)] = self.hidden_states[self.hidden_states["hs_{0}".format(i+1)]] self.hidden_states_update_ops["hs_{0}".format( i + 1)] = self.hidden_states["hs_{0}".format( i + 1)].assign(hidden_update[0]) # self.hidden_states["hs_{0}".format(i+1)] = hidden_update else: W = tf.Variable(tf.random_normal( [self.network_shape[i], self.network_shape[i + 1]], stddev=0.05), name="W") bW = tf.Variable(tf.random_normal( [self.network_shape[i + 1]], stddev=0.05), name="bW") Utils.variable_summaries(W, "W") Utils.variable_summaries(bW, "bW") with tf.name_scope("activation"): self.activation = self.activation_function( tf.matmul(self.activation, W) + bW) act = self.activation if i > 0: self.activation_patterns['layer_{0}'.format(i)] = act # cost with tf.name_scope("cost"): self.cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( self.activation, self.output)) tf.summary.scalar('cost', self.cost) with tf.name_scope("accuracy"): correct_prediction = tf.equal(tf.argmax(self.activation, 1), tf.argmax(self.output, 1)) self.accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', self.accuracy) self.optimizer = self.optimizer_.minimize(self.cost) self.sess = tf.Session() self.merged = tf.summary.merge_all() self.summ_writer = tf.summary.FileWriter(self.tensorboard_dir, self.sess.graph) init = tf.global_variables_initializer() self.sess.run(init)
def attention(t): before_att = activation(tf.matmul(t, W_hsz) + w_z) att = tf.matmul(before_att, v) # [batch_size, 1] return att
def call(self, inputs): return tf.matmul(inputs, self.w) + self.b
def inference(images_placeholder, keep_prob): # 重みを標準偏差0.1の正規分布で初期化 def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial) # バイアスを標準偏差0.1の正規分布で初期化 def bias_variable(shape): initial = tf.constant(0.1, shape=shape) return tf.Variable(initial) # 畳み込み層の作成 def conv2d(x, W): return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') # プーリング層の作成 def max_pool_2x2(x): return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # 入力を28x28x3に変形 x_image = tf.reshape(images_placeholder, [-1, 28, 28, 3]) # 畳み込み層1の作成 with tf.name_scope('conv1') as scope: W_conv1 = weight_variable([5, 5, 3, 32]) b_conv1 = bias_variable([32]) h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) # プーリング層1の作成 with tf.name_scope('pool1') as scope: h_pool1 = max_pool_2x2(h_conv1) # 畳み込み層2の作成 with tf.name_scope('conv2') as scope: W_conv2 = weight_variable([5, 5, 32, 64]) b_conv2 = bias_variable([64]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) # プーリング層2の作成 with tf.name_scope('pool2') as scope: h_pool2 = max_pool_2x2(h_conv2) # 全結合層1の作成 with tf.name_scope('fc1') as scope: W_fc1 = weight_variable([7*7*64, 1024]) b_fc1 = bias_variable([1024]) h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) # dropoutの設定 h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) # 全結合層2の作成 with tf.name_scope('fc2') as scope: W_fc2 = weight_variable([1024, NUM_CLASSES]) b_fc2 = bias_variable([NUM_CLASSES]) # ソフトマックス関数による正規化 with tf.name_scope('softmax') as scope: y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) # 各ラベルの確率のようなものを返す return y_conv
def out_layer(slice): out = activation(tf.matmul(slice, W_hh) + w_h) return out
h_pool3 = max_pool_4x4(h_conv3) #fourth convolution and max_pool layer W_conv4 = weight_variable([3, 3, 128, 256]) b_conv4 = bias_variable([256]) h_conv4 = tf.nn.relu(conv2d(h_pool3, W_conv4) + b_conv4) #h_pool4 = max_pool_4x4(h_conv4) h_pool4 = spp_layer(h_conv4) #变成全连接层,用一个MLP处理 reshape = tf.reshape(h_pool4, [batch_size, -1]) dim = reshape.get_shape()[1].value W_fc1 = weight_variable([dim, 1024]) b_fc1 = bias_variable([1024]) h_fc1 = tf.nn.relu(tf.matmul(reshape, W_fc1) + b_fc1) #dropout keep_prob = tf.placeholder(tf.float32) h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) W_fc2 = weight_variable([1024, 82]) b_fc2 = bias_variable([82]) y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) #损失函数及优化算法 cross_entropy = tf.reduce_mean( -tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1])) train_step = tf.train.AdamOptimizer(0.001).minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
def sketch_step(tensor, cum_attention, active_mask, temper): def csoftmax(ten, u, mask): """ Compute the constrained softmax (csoftmax); See paper "Learning What's Easy: Fully Differentiable Neural Easy-First Taggers" on https://andre-martins.github.io/docs/emnlp2017_final.pdf (page 4) :param ten: input tensor :param u: cumulative attention see paper :param mask: mask with active elements :return: distribution """ shape_t = ten.shape shape_u = u.shape assert shape_u == shape_t # mean ten = ten - tf.reduce_mean(ten, axis=1, keep_dims=True) neg_mask = tf.ones_like(mask) - mask # calculate new distribution with attention on distribution 'b' Q = tf.exp(ten) Z = tf.reduce_sum(Q*mask, axis=1, keep_dims=True)/(tf.ones(shape=[shape_t[0], 1]) - tf.reduce_sum(neg_mask*u, axis=1, keep_dims=True)) # war with NaN and inf z_mask = tf.cast(tf.less_equal(Z, tf.zeros_like(Z)), dtype=tf.float32) Z = Z + z_mask A = Q / Z # verification of the condition and modification of masks t_mask = tf.to_float(tf.less_equal(A, u)) f_mask = tf.to_float(tf.less(u, A)) alpha = A * t_mask + u * f_mask mask = mask * t_mask return alpha, mask def attention(t): before_att = activation(tf.matmul(t, W_hsz) + w_z) att = tf.matmul(before_att, v) # [batch_size, 1] return att tensor = tf.transpose(tensor, [1, 0, 2]) # [L; batch_size; 2*state_size*(2*window_size + 1)] attentions = tf.map_fn(attention, tensor, dtype=tf.float32) # [L, batch_size, 1] attentions = tf.reshape(attentions, [batch_size, L]) - cum_attention*discount_factor # [batch_size, L] U = tf.ones_like(cum_attention) - cum_attention constrained_weights, new_mask = csoftmax(attentions, U, active_mask) # [batch_size, L] tensor = tf.transpose(tensor, [1, 0, 2]) # [batch_size; L; 2*state_size*(2*window_size + 1)] if not full_model: # TODO: check cn = tf.reduce_sum(tensor*tf.expand_dims(constrained_weights, [2]), axis=1) # [batch_size, # 2*state_size*(2*window_size + 1)] cn = tf.reshape(cn, [batch_size, 2*state_size*(2*window_size + 1)]) # [batch_size, # 2*state_size*(2*window_size + 1)] s = activation(tf.matmul(cn, W_hh) + w_h) # [batch_size, state_size] s = tf.matmul(tf.expand_dims(constrained_weights, [2]), tf.expand_dims(s, [1])) # [batch_size, L, # state_size] else: def out_layer(slice): out = activation(tf.matmul(slice, W_hh) + w_h) return out tensor = tf.transpose(tensor, [1, 0, 2]) # [L; batch_size; 2*state_size*(2*window_size + 1)] s = tf.map_fn(out_layer, tensor, dtype=tf.float32) # [L; batch_size; state_size] s = tf.transpose(s, [1, 0, 2]) # [batch_size; L; state_size] s = tf.expand_dims(constrained_weights, [2]) * s return s, constrained_weights, new_mask
(x_train, y_train), (x_test, y_test) = load_data() # parameters learning_rate = 0.001 training_epochs = 10 batch_size = 128 D = 3072 # number of features. K = 10 # number of classes. # input place holders X = tf.placeholder(tf.float32, [None, D]) Y = tf.placeholder(tf.float32, [None, K]) W1 = tf.Variable(tf.random_normal([D, K])) b1 = tf.Variable(tf.random_normal([K])) hypothesis = tf.nn.relu(tf.matmul(X, W1) + b1) x_train = np.reshape(x_train, (-1, 3072)) x_test = np.reshape(x_test, (-1, 3072)) y_train = tf.one_hot(y_train, 10) y_test = tf.one_hot(y_test, 10) # define cost/loss & optimizer cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # initialize sess = tf.Session() sess.run(tf.global_variables_initializer())
def Inference(imgs): imgs = tf.cast(imgs, tf.float32) with tf.variable_scope("conv1") as scope: """ weights = tf.get_variable('weights', dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32)) """ weights = GenerateWegiths('weights', [3, 3, 3, 32]) # tf.summary.scalar(scope.name+"/weights",weights) conv = tf.nn.conv2d(imgs, weights, strides=[1, 1, 1, 1], padding='VALID') """ biases = tf.get_variable('biases', shape=[32], dtype=tf.float32, initializer=tf.constant_initializer(0.1)) """ biases = GenerateBias('biases', [32]) # tf.summary.scalar(scope.name+"/biases",biases) preActivition = tf.nn.bias_add(conv, biases) conv1 = tf.nn.relu(preActivition, name=scope.name) with tf.variable_scope("conv2") as scope: """ weights = tf.get_variable('weights', shape=[3, 3, 32, 32], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32)) """ weights = GenerateWegiths('weights', [3, 3, 32, 32]) # tf.summary.scalar(scope.name+'/weights',weights) conv = tf.nn.conv2d(conv1, weights, strides=[1, 1, 1, 1], padding='VALID') """ biases = tf.get_variable('biases', shape=[32], dtype=tf.float32, initializer=tf.constant_initializer(0.1)) """ biases = GenerateBias('biases', [32]) # tf.summary.scalar(scope.name+'/biases',biases) preActivition = tf.nn.bias_add(conv, biases) conv2 = tf.nn.relu(preActivition, name=scope.name) with tf.variable_scope("MaxPool1") as scope: pool1 = tf.nn.max_pool(conv2, [1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID', name='pooling1') with tf.variable_scope("conv3") as scope: """ weights = tf.get_variable('weights', shape=[3, 3, 32, 64], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32)) """ weights = GenerateWegiths('weights', [3, 3, 32, 64]) # tf.summary.scalar(scope.name+'/weights',weights) conv = tf.nn.conv2d(pool1, weights, strides=[1, 1, 1, 1], padding='VALID') """ biases = tf.get_variable('biases', shape=[64], dtype=tf.float32, initializer=tf.constant_initializer(0.1)) """ biases = GenerateBias('biases', [64]) # tf.summary.scalar(scope.name+'/biases',biases) preActivition = tf.nn.bias_add(conv, biases) conv3 = tf.nn.relu(preActivition, name=scope.name) with tf.variable_scope("conv4") as scope: """ weights = tf.get_variable('weights', shape=[3, 3, 64, 64], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32)) """ weights = GenerateWegiths('weights', [3, 3, 64, 64]) # tf.summary.scalar(scope.name+'/weights',weights) conv = tf.nn.conv2d(conv3, weights, strides=[1, 1, 1, 1], padding='VALID') """ biases = tf.get_variable('biases', shape=[64], dtype=tf.float32, initializer=tf.constant_initializer(0.1)) """ biases = GenerateBias('biases', [64]) # tf.summary.scalar(scope.name+'/biases',biases) preActivition = tf.nn.bias_add(conv, biases) conv4 = tf.nn.relu(preActivition, name=scope.name) with tf.variable_scope("MaxPool2") as scope: pool2 = tf.nn.max_pool(conv4, [1, 2, 2, 1], [1, 2, 2, 1], padding="VALID", name='pooling2') with tf.variable_scope("conv5") as scope: """ weights = tf.get_variable('weights', shape=[3, 3, 64, 128], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32)) """ weights = GenerateWegiths('weights', [3, 3, 64, 128]) # tf.summary.scalar(scope.name+'/weights',weights) conv = tf.nn.conv2d(pool2, weights, strides=[1, 1, 1, 1], padding='VALID') """ biases = tf.get_variable('biases', shape=[128], dtype=tf.float32, initializer=tf.constant_initializer(0.1)) """ biases = GenerateBias('biases', [128]) # tf.summary.scalar(scope.name+'/biases',biases) preActivition = tf.nn.bias_add(conv, biases) conv5 = tf.nn.relu(preActivition, name=scope.name) with tf.variable_scope("conv6") as scope: """ weights = tf.get_variable('weights', shape=[3, 3, 128, 128], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32)) """ weights = GenerateWegiths('weights', [3, 3, 128, 128]) # tf.summary.scalar(scope.name+'/weights',weights) conv = tf.nn.conv2d(conv5, weights, strides=[1, 1, 1, 1], padding='VALID') """ biases = tf.get_variable('biases', shape=[128], dtype=tf.float32, initializer=tf.constant_initializer(0.1)) """ biases = GenerateBias('biases', [128]) # tf.summary.scalar(scope.name+'/biases',biases) preActivition = tf.nn.bias_add(conv, biases) conv6 = tf.nn.relu(preActivition, name=scope.name) with tf.variable_scope("MaxPool3") as scope: pool3 = tf.nn.max_pool(conv6, [1, 2, 2, 1], [1, 2, 2, 1], padding="VALID", name='pooling3') with tf.variable_scope("FC1") as scope: tShape = pool3.get_shape() fc1Shape = tShape[1].value * tShape[2].value * tShape[3].value fc1 = tf.reshape(pool3, [-1, fc1Shape],name='fc1') #reshape = tf.reshape(pool3, [-1, fc1Shape]) #fc1 = tf.nn.dropout(reshape, keepProb, name='fc1Dropout') with tf.variable_scope('FC21') as scope: """ weights = tf.get_variable('weights', shape=[fc1Shape, 10], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.005, dtype=tf.float32)) biases = tf.get_variable('biases', shape=[10], dtype=tf.float32, initializer=tf.truncated_normal_initializer(0.1)) """ weights = GenerateWegiths('weights', [fc1Shape, 10]) biases = GenerateBias('biases', [10]) fc21 = tf.matmul(fc1, weights) + biases with tf.variable_scope("FC22") as scope: """ weights = tf.get_variable('weights', shape=[fc1Shape, 10], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.005, dtype=tf.float32)) biases = tf.get_variable('biases', shape=[10], dtype=tf.float32, initializer=tf.truncated_normal_initializer(0.1)) """ weights = GenerateWegiths('weights', [fc1Shape, 10]) biases = GenerateBias('biases', [10]) fc22 = tf.matmul(fc1, weights) + biases with tf.variable_scope("FC23") as scope: """ weights = tf.get_variable('weights', shape=[fc1Shape, 10], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.005, dtype=tf.float32)) biases = tf.get_variable('biases', shape=[10], dtype=tf.float32, initializer=tf.truncated_normal_initializer(0.1)) """ weights = GenerateWegiths('weights', [fc1Shape, 10]) biases = GenerateBias('biases', [10]) fc23 = tf.matmul(fc1, weights) + biases return fc21, fc22, fc23
def simple_nn_layer(x, y): return tf.nn.relu(tf.matmul(x, y))
def __init__(self, rnn_size, batch_size, learning_rate, training_seq_len, vocab_size, infer_sample=False): self.rnn_size = rnn_size self.vocab_size = vocab_size self.infer_sample = infer_sample self.learning_rate = learning_rate if infer_sample: self.batch_size = 1 self.training_seq_len = 1 else: self.batch_size = batch_size self.training_seq_len = training_seq_len self.lstm_cell = tf.contrib.rnn.BasicLSTMCell(rnn_size) self.initial_state = self.lstm_cell.zero_state(self.batch_size, tf.float32) self.x_data = tf.placeholder(tf.int32, [self.batch_size, self.training_seq_len]) self.y_output = tf.placeholder(tf.int32, [self.batch_size, self.training_seq_len]) with tf.variable_scope('lstm_vars'): # Softmax Output Weights W = tf.get_variable('W', [self.rnn_size, self.vocab_size], tf.float32, tf.random_normal_initializer()) b = tf.get_variable('b', [self.vocab_size], tf.float32, tf.constant_initializer(0.0)) # Define Embedding embedding_mat = tf.get_variable('embedding_mat', [self.vocab_size, self.rnn_size], tf.float32, tf.random_normal_initializer()) embedding_output = tf.nn.embedding_lookup(embedding_mat, self.x_data) rnn_inputs = tf.split(axis=1, num_or_size_splits=self.training_seq_len, value=embedding_output) rnn_inputs_trimmed = [tf.squeeze(x, [1]) for x in rnn_inputs] # If we are inferring (generating text), we add a 'loop' function # Define how to get the i+1 th input from the i th output def inferred_loop(prev, count): # Apply hidden layer prev_transformed = tf.matmul(prev, W) + b # Get the index of the output (also don't run the gradient) prev_symbol = tf.stop_gradient(tf.argmax(prev_transformed, 1)) # Get embedded vector output = tf.nn.embedding_lookup(embedding_mat, prev_symbol) return(output) decoder = tf.contrib.legacy_seq2seq.rnn_decoder outputs, last_state = decoder(rnn_inputs_trimmed, self.initial_state, self.lstm_cell, loop_function=inferred_loop if infer_sample else None) # Non inferred outputs output = tf.reshape(tf.concat(axis=1, values=outputs), [-1, self.rnn_size]) # Logits and output self.logit_output = tf.matmul(output, W) + b self.model_output = tf.nn.softmax(self.logit_output) loss_fun = tf.contrib.legacy_seq2seq.sequence_loss_by_example loss = loss_fun([self.logit_output],[tf.reshape(self.y_output, [-1])], [tf.ones([self.batch_size * self.training_seq_len])], self.vocab_size) self.cost = tf.reduce_sum(loss) / (self.batch_size * self.training_seq_len) self.final_state = last_state gradients, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tf.trainable_variables()), 4.5) optimizer = tf.train.AdamOptimizer(self.learning_rate) self.train_op = optimizer.apply_gradients(zip(gradients, tf.trainable_variables()))