Example #1
0
def train():

    #placeholders for the traning inputs (4 inputs with 2 features each) and outputs (4 outputs which have a value of 0 or 1)
    x = tf.placeholder(tf.float32, [4, 2], name='x-inputs')
    y = tf.placeholder(tf.float32, [4, 1], name='y-inputs')

    #set up the model calculations
    temp = tf.sigmoid(tf.matmul(x, w1) + b1)
    output = tf.sigmoid(tf.matmul(temp, w2) + b2)

    #cost function is avg error over training samples
    cost = tf.reduce_mean(((y * tf.log(output)) + ((1 - y) * tf.log(1.0 - output))) * -1)

    #training step is gradient descent
    train_step = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)

    #declare training data
    training_x = [[0,1], [0,0], [1,0], [1,1]]
    training_y = [[1], [0], [1], [0]]

    #init session
    init = tf.initialize_all_variables()
    sess.run(init)

    #training
    for i in range(100000):
        sess.run(train_step, feed_dict={x:training_x, y:training_y})

        if i % 1000 == 0:
            print (i, sess.run(cost, feed_dict={x:training_x, y:training_y}))

    print '\ntraining done\n'
Example #2
0
File: util.py Project: kuprel/skin
def loc_net_fc(images, batch_size):

    images -= 128
    images /= 128.

    images = tf.image.resize_images(images, 150, 150)
    images_flat = tf.reshape(images, [batch_size, -1])
    hidden_size = 100

    with tf.name_scope('fc1') as scope:
        weights = tf.Variable(tf.truncated_normal([150**2*3, hidden_size],
            dtype=tf.float32, stddev=1e-3), name='weights')
        biases = tf.Variable(tf.constant(0.0, shape=[hidden_size],
            dtype=tf.float32), name='biases')
        hidden = tf.add(tf.matmul(images_flat, weights), biases, name=scope)
        hidden = tf.nn.relu(hidden)

    with tf.name_scope('fc2') as scope:
        weights = tf.Variable(tf.truncated_normal([hidden_size, 3],
            dtype=tf.float32, stddev=1e-3), name='weights')
        biases = tf.Variable(tf.constant(0.0, shape=[3], dtype=tf.float32),
            name='biases')
        theta = tf.add(tf.matmul(hidden, weights), biases, name=scope)
        theta = tf.nn.tanh(theta)

    return theta
Example #3
0
    def output_dropout_no_bias(self,x,keep_prob=0.5):
       
        if(self.activation == 'sigmoid'):
            return tf.nn.dropout(tf.nn.sigmoid(tf.matmul(x,self.W)), keep_prob)
            
        elif(self.activation == 'relu'):
            return tf.nn.dropout(tf.nn.relu(tf.matmul(x,self.W)), keep_prob)
           
        elif(self.activation == 'relu6'):
        
            return tf.nn.dropout(tf.nn.relu6(tf.matmul(x,self.W)), keep_prob)
            
        elif(self.activation == 'leaky_relu'):

    	    return tf.nn.dropout(tf.maximum(0.1*tf.matmul(x,self.W),tf.matmul(x,self.W)),keep_prob)
   
        elif(self.activation == 'leaky_relu6'):

    	    return tf.nn.dropout(tf.maximum(0.1*tf.matmul(x,self.W),6),keep_prob)

    	elif(self.activation == 'linear'):
    
            return tf.nn.dropout(tf.matmul(x,self.W),keep_prob)
           
        elif(self.activation == 'softplus'):
           
            return tf.nn.dropout(tf.nn.softplus(tf.matmul(x,self.W)),keep_prob)
      
        elif(self.activation == 'tanh'):
         
            return tf.nn.dropout(tf.tanh(tf.matmul(x,self.W)),keep_prob)
         
        else:
            print "No known activation function selected, using linear"
        return tf.matmul(x,self.W)
Example #4
0
    def __init__(self):

        self.x = tf.placeholder(tf.float32, [None, NUM_FEATURES])
        self.y = tf.placeholder(tf.float32, [None, HIDDEN_3_SIZE])

        W_1 = tf.Variable(tf.random_uniform([NUM_FEATURES, HIDDEN_1_SIZE], maxval=1.0))
        b_1 = tf.Variable(tf.random_uniform([HIDDEN_1_SIZE], maxval=1.0))

        W_2 = tf.Variable(tf.random_uniform([HIDDEN_1_SIZE, HIDDEN_2_SIZE], maxval=1.0))
        b_2 = tf.Variable(tf.random_uniform([HIDDEN_2_SIZE], maxval=1.0))

        W_3 = tf.Variable(tf.random_uniform([HIDDEN_2_SIZE, HIDDEN_3_SIZE], maxval=1.0))
        b_3 = tf.Variable(tf.random_uniform([HIDDEN_3_SIZE], maxval=1.0))

        x_drop = tf.nn.dropout(self.x, KEEP_PROB_INPUT)

        h_1 = tf.nn.tanh(tf.matmul(x_drop, W_1) + b_1)
        h_1_drop = tf.nn.dropout(h_1, KEEP_PROB_HIDDEN)

        h_2 = tf.nn.tanh(tf.matmul(h_1_drop, W_2) + b_2)
        h_2_drop = tf.nn.dropout(h_2, KEEP_PROB_HIDDEN)

        h_3 = tf.matmul(h_2_drop, W_3) + b_3

        # self.y_pred = tf.nn.softmax(h_3)
        self.y_pred = h_3

        # self.cross_entropy = tf.reduce_mean(-tf.reduce_sum(self.y * tf.log(self.y_pred), reduction_indices=[1]))
        self.cross_entropy = tf.reduce_mean(tf.square(self.y_pred - self.y))

        self.train_step = tf.train.MomentumOptimizer(109,0.99).minimize(self.cross_entropy)

        self.sess = tf.Session()
Example #5
0
    def output(self,x):
        
        if(self.no_bias):
            return output_no_bias(self,x)
        
        
        if(self.activation == 'sigmoid'):
           
            return tf.nn.sigmoid(tf.matmul(x,self.W+self.b))
        elif(self.activation == 'relu'):
           
            return tf.nn.relu(tf.matmul(x,self.W+self.b))
        elif(self.activation == 'relu6'):
           
            return tf.nn.relu6(tf.matmul(x,self.W+self.b))
        elif(self.activation == 'leaky_relu'):
            
            return tf.maximum(0.1*tf.matmul(x,self.W+self.b),tf.matmul(x,self.W+self.b))

        elif(self.activation == 'leaky_relu6'):

            return tf.maximum(0.1*tf.matmul(x,self.W+self.b),6)

        elif(self.activation == 'linear'):
           
            return tf.matmul(x,self.W)+self.b
        elif(self.activation == 'softplus'):
            
            return tf.nn.softplus(tf.matmul(x,self.W+self.b))
        elif(self.activation == 'tanh'):
           
            return tf.tanh(tf.matmul(x,self.W+self.b))
        else:
            print "No known activation function selected, using linear"
            return tf.matmul(x,self.W)+self.b
Example #6
0
def solve(a, b):
    if b.ndim == 1:
        return tf.reshape(tf.matmul(tf.matrix_inverse(a), tf.expand_dims(b, -1)), [-1])
    elif b.ndim == 2:
        return tf.matmul(tf.matrix_inverse(a), b)
    else:
        import ipdb; ipdb.set_trace()
Example #7
0
File: gpr.py Project: erenis/GPflow
    def build_predict(self, Xnew, full_cov=False):
        """
        Xnew is a data matrix, point at which we want to predict

        This method computes

            p(F* | Y )

        where F* are points on the GP at Xnew, Y are noisy observations at X.

        """
        Kx = self.kern.K(self.X, Xnew)
        K = self.kern.K(self.X) + eye(self.num_data) * self.likelihood.variance
        L = tf.cholesky(K)
        A = tf.matrix_triangular_solve(L, Kx, lower=True)
        V = tf.matrix_triangular_solve(L, self.Y - self.mean_function(self.X))
        fmean = tf.matmul(tf.transpose(A), V) + self.mean_function(Xnew)
        if full_cov:
            fvar = self.kern.K(Xnew) - tf.matmul(tf.transpose(A), A)
            shape = tf.pack([1, 1, tf.shape(self.Y)[1]])
            fvar = tf.tile(tf.expand_dims(fvar, 2), shape)
        else:
            fvar = self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A), 0)
            fvar = tf.tile(tf.reshape(fvar, (-1, 1)), [1, self.Y.shape[1]])
        return fmean, fvar
Example #8
0
    def update_centers(self, img_dataset):
        '''
        Optimize:
            self.C = (U * hu^T + V * hv^T) (hu * hu^T + hv * hv^T)^{-1}
            self.C^T = (hu * hu^T + hv * hv^T)^{-1} (hu * U^T + hv * V^T)
            but all the C need to be replace with C^T :
            self.C = (hu * hu^T + hv * hv^T)^{-1} (hu^T * U + hv^T * V)
        '''
        old_C_value = self.sess.run(self.C)

        h = self.img_b_all
        U = self.img_output_all
        smallResidual = tf.constant(
            np.eye(self.subcenter_num * self.subspace_num, dtype=np.float32) * 0.001)
        Uh = tf.matmul(tf.transpose(h), U)
        hh = tf.add(tf.matmul(tf.transpose(h), h), smallResidual)
        compute_centers = tf.matmul(tf.matrix_inverse(hh), Uh)

        update_C = self.C.assign(compute_centers)
        C_value = self.sess.run(update_C, feed_dict={
            self.img_output_all: img_dataset.output,
            self.img_b_all: img_dataset.codes,
        })

        C_sums = np.sum(np.square(C_value), axis=1)
        C_zeros_ids = np.where(C_sums < 1e-8)
        C_value[C_zeros_ids, :] = old_C_value[C_zeros_ids, :]
        self.sess.run(self.C.assign(C_value))
 def model(data,text_data, train=False):
     """The Model definition."""
     # 2D convolution, with 'SAME' padding (i.e. the output feature map has
     # the same size as the input). Note that {strides} is a 4D array whose
     # shape matches the data layout: [image index, y, x, depth].
     conv = tf.nn.conv2d(data,
                         conv1_weights,
                         strides=[1, 1, 1, 1],
                         padding='SAME')
     # Bias and rectified linear non-linearity.
     relu = tf.nn.relu(tf.nn.bias_add(conv, conv1_biases))
     # Max pooling. The kernel size spec {ksize} also follows the layout of
     # the data. Here we have a pooling window of 2, and a stride of 2.
     pool = tf.nn.max_pool(relu,
                           ksize=[1, 2, 2, 1],
                           strides=[1, 2, 2, 1],
                           padding='SAME')
     conv = tf.nn.conv2d(pool,
                         conv2_weights,
                         strides=[1, 1, 1, 1],
                         padding='SAME')
     relu = tf.nn.relu(tf.nn.bias_add(conv, conv2_biases))
     pool = tf.nn.max_pool(relu,
                           ksize=[1, 2, 2, 1],
                           strides=[1, 2, 2, 1],
                           padding='SAME')
     print pool.get_shape().as_list()
     conv = tf.nn.conv2d(pool,
                         conv3_weights,
                         strides=[1, 1, 1, 1],
                         padding='SAME')
     relu = tf.nn.relu(tf.nn.bias_add(conv, conv3_biases))
     pool = tf.nn.max_pool(relu,
                           ksize=[1, 2, 2, 1],
                           strides=[1, 2, 2, 1],
                           padding='VALID')
                                                         
     # Reshape the feature map cuboid into a 2D matrix to feed it to the
     # fully connected layers.
     pool_shape = pool.get_shape().as_list()
     print pool_shape
     print fc1_weights.get_shape().as_list()
     reshape = tf.reshape(
         pool,
         [pool_shape[0], pool_shape[1] * pool_shape[2] * pool_shape[3]])
     #Add text vector into account before fully connected layer
     
     reshape = tf.concat(1,[reshape,text_data])
     
     # Fully connected layer. Note that the '+' operation automatically
     # broadcasts the biases.
     hidden1 = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_biases)
     # Add a 50% dropout during training only. Dropout also scales
     # activations such that no rescaling is needed at evaluation time.
     if train:
         hidden1 = tf.nn.dropout(hidden1, 0.5, seed=SEED)
     hidden2 = tf.nn.relu(tf.matmul(hidden1, fc2_weights) + fc2_biases)
     if train:
         hidden2 = tf.nn.dropout(hidden2, 0.5, seed=SEED)
     return tf.matmul(hidden2, fc3_weights) + fc3_biases
Example #10
0
    def inference(self, train=False):
        """
        Build the core of the model, initialize all convolutional and feed-forward layers, with the
        respective weights, and add dropout if necessary.

        :param train: Boolean if training or eval, necessary for including dropout.
        :return: Tuple of resulting logits, and the feed-forward trainable weights for L2 Loss.
        """
        # 2D Convolution Layer, then Bias + ReLU, then Pooling Layer (add conditional train/eval)
        if train:
            conv1 = tf.nn.conv2d(self.X, self.conv1_w, strides=[1, 1, 1, 1], padding='SAME')
        else:
            conv1 = tf.nn.conv2d(self.eval_X, self.conv1_w, strides=[1, 1, 1, 1], padding='SAME')

        relu1 = tf.nn.relu(tf.nn.bias_add(conv1, self.conv1_b))
        pool1 = tf.nn.max_pool(relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

        # 2D Convolution Layer, then Bias + ReLU, then Pooling Layer
        conv2 = tf.nn.conv2d(pool1, self.conv2_w, strides=[1, 1, 1, 1], padding='SAME')
        relu2 = tf.nn.relu(tf.nn.bias_add(conv2, self.conv2_b))
        pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

        # Reshape 4D Pool Tensor into a 2D Tensor
        pool_shape = pool2.get_shape().as_list()
        reshape = tf.reshape(pool2, [pool_shape[0], pool_shape[1] * pool_shape[2] * pool_shape[3]])

        # Fully Connected Layer 1 -> ReLU Activation
        hidden = tf.nn.relu(tf.matmul(reshape, self.fc1_w) + self.fc1_b)

        # Add dropout --> Only during training
        if train:
            hidden = tf.nn.dropout(hidden, 0.5)

        # Fully Connected Layer 2 -> for softmax (actual softmax performed in loss function)
        return tf.matmul(hidden, self.fc2_w) + self.fc2_b
def forward_propagation(X, parameters):
    """
    Implements the forward propagation for the model: LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SOFTMAX
    
    Arguments:
    X -- input dataset placeholder, of shape (input size, number of examples)
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3"
                  the shapes are given in initialize_parameters

    Returns:
    Z3 -- the output of the last LINEAR unit
    """
    
    # Retrieve the parameters from the dictionary "parameters" 
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W3 = parameters['W3']
    b3 = parameters['b3']
    print(W3.shape)
    
    ### START CODE HERE ### (approx. 5 lines)              # Numpy Equivalents:
    Z1 = tf.add(tf.matmul(W1,X),b1)                                              # Z1 = np.dot(W1, X) + b1
    A1 = tf.nn.relu(Z1)                                              # A1 = relu(Z1)
    Z2 = tf.add(tf.matmul(W2,A1),b2)                                              # Z2 = np.dot(W2, a1) + b2
    A2 = tf.nn.relu(Z2)                                              # A2 = relu(Z2)
    print(A2.shape)
    Z3 = tf.add(tf.matmul(W3,A2),b3)                                              # Z3 = np.dot(W3,Z2) + b3
    ### END CODE HERE ###
    
    return Z3
def forward_propagation(images):
  with tf.variable_scope('conv1') as scope:
      W_conv1 = weight_variable([5, 5, 3, 32])
      b_conv1 = bias_variable([32])
      image_matrix = tf.reshape(images, [-1, 1750, 1750, 3])
      h_conv1 = tf.nn.sigmoid(conv2d(image_matrix, W_conv1) + b_conv1)
      _activation_summary(h_conv1)
      h_pool1 = max_pool_5x5(h_conv1)

  with tf.variable_scope('conv2') as scope:
      W_conv2 = weight_variable([5, 5, 32, 64])
      b_conv2 = bias_variable([64])
      h_conv2 = tf.nn.sigmoid(conv2d(h_pool1, W_conv2) + b_conv2)
      _activation_summary(h_conv2)
      h_pool2 = max_pool_5x5(h_conv2)

  with tf.variable_scope('conv3') as scope:
      W_conv3 = weight_variable([5, 5, 64, 128])
      b_conv3 = bias_variable([128])
      h_conv3 = tf.nn.sigmoid(conv2d(h_pool2, W_conv3) + b_conv3)
      _activation_summary(h_conv3)
      h_pool3 = max_pool_5x5(h_conv3)

  with tf.variable_scope('local3') as scope:
      W_fc1 = weight_variable([14 * 14 * 128, 256])
      b_fc1 = bias_variable([256])
      h_pool3_flat = tf.reshape(h_pool3, [-1, 14 * 14 * 128])
      h_fc1 = tf.nn.sigmoid(tf.matmul(h_pool3_flat, W_fc1) + b_fc1)
      _activation_summary(h_fc1)
      keep_prob = tf.Variable(1.0)
      W_fc2 = weight_variable([256, 4])
      b_fc2 = bias_variable([4])
      y_conv = tf.nn.softmax(tf.matmul(h_fc1, W_fc2) + b_fc2)
      _activation_summary(y_conv)
      return y_conv
def alex_net(_X, _dropout):
    # Reshape input picture
    _X = tf.reshape(_X, shape=[-1, 40, 40, 1])

    # First convolutional layer
    conv1 = conv2d('conv1', _X, wc1, bc1)
    pool1 = max_pool('pool1', conv1, k=2)
    norm1 = norm('norm1', pool1, lsize=4)
    norm1 = tf.nn.dropout(norm1, _dropout)

    # Second convolutional layer
    conv2 = conv2d('conv2', norm1, wc2, bc2)
    pool2 = max_pool('pool2', conv2, k=2)
    norm2 = norm('norm2', pool2, lsize=4)
    norm2 = tf.nn.dropout(norm2, _dropout)

    # Third convolutional layer
    conv3 = conv2d('conv3', norm2, wc3, bc3)
    pool3 = max_pool('pool3', conv3, k=2)
    norm3 = norm('norm3', pool3, lsize=4)
    norm3 = tf.nn.dropout(norm3, _dropout)

    # Reshape conv3 output to fit dense layer input
    dense1 = tf.reshape(norm3, [-1, wd1.get_shape().as_list()[0]])

    # Fully connected layers
    dense1 = tf.nn.relu(tf.matmul(dense1, wd1) + bd1, name='fc1')  # Relu activation
    dense2 = tf.nn.relu(tf.matmul(dense1, wd2) + bd2, name='fc2')  # Relu activation

    # Output, class prediction
    out = tf.matmul(dense2, wout) + bout
    return out
Example #14
0
    def fc_layers(self):
        # fc1
        with tf.name_scope('fc1') as scope:
            shape = int(np.prod(self.pool5.get_shape()[1:]))
            fc1w = tf.Variable(tf.truncated_normal([shape, 4096],
                                                         dtype=tf.float32,
                                                         stddev=1e-1), name='weights')
            fc1b = tf.Variable(tf.constant(1.0, shape=[4096], dtype=tf.float32),
                                 trainable=True, name='biases')
            pool5_flat = tf.reshape(self.pool5, [-1, shape])
            fc1l = tf.nn.bias_add(tf.matmul(pool5_flat, fc1w), fc1b)
            self.fc1 = tf.nn.relu(fc1l)
            self.parameters += [fc1w, fc1b]

        # fc2
        with tf.name_scope('fc2') as scope:
            fc2w = tf.Variable(tf.truncated_normal([4096, 4096],
                                                         dtype=tf.float32,
                                                         stddev=1e-1), name='weights')
            fc2b = tf.Variable(tf.constant(1.0, shape=[4096], dtype=tf.float32),
                                 trainable=True, name='biases')
            fc2l = tf.nn.bias_add(tf.matmul(self.fc1, fc2w), fc2b)
            self.fc2 = tf.nn.relu(fc2l)
            self.parameters += [fc2w, fc2b]

        # fc3
        with tf.name_scope('fc3') as scope:
            fc3w = tf.Variable(tf.truncated_normal([4096, 1000],
                                                         dtype=tf.float32,
                                                         stddev=1e-1), name='weights')
            fc3b = tf.Variable(tf.constant(1.0, shape=[1000], dtype=tf.float32),
                                 trainable=True, name='biases')
            self.fc3l = tf.nn.bias_add(tf.matmul(self.fc2, fc3w), fc3b)
            self.parameters += [fc3w, fc3b]
def RNN(_X, _istate, _weights, _biases):

    # input shape: (batch_size, n_steps, 28, 28, 1)
    _X = tf.transpose(_X, [1, 0, 2, 3, 4])  # permute n_steps and batch_size
    # input shape: (n_steps=3, batch_size=20, 28, 28, 1)
    # Reshape to prepare input to hidden activation
    #_X = tf.reshape(_X, [-1, n_input]) # (n_steps*batch_size, n_input)
    # Linear activation  ==> convolutional net
    #_X = tf.matmul(_X, _weights['hidden']) + _biases['hidden']
    
    A = CNN(_X[0,:,:,:,:])
    B = CNN(_X[1,:,:,:,:])
    C = CNN(_X[2,:,:,:,:])

    # Define a lstm cell with tensorflow
    lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    # Split data because rnn cell needs a list of inputs for the RNN inner loop
    #_X = tf.split(0, n_steps, _X) # n_steps * (batch_size, n_hidden)

    # Get lstm cell output
    outputs, states = rnn.rnn(lstm_cell, [A,B,C], initial_state=_istate)

    # Linear activation
    # Get inner loop last output
    out1 = tf.nn.relu( tf.matmul(outputs[-1], _weights['out1']) + _biases['out1'] )
    out2 = tf.matmul(out1, _weights['out2']) + _biases['out2'] 
    return out2
def inference(images, hidden1_units):
  """Build the MNIST model up to where it may be used for inference.

  Args:
    images: Images placeholder, from inputs().
    hidden1_units: Size of the first hidden layer.
    hidden2_units: Size of the second hidden layer.

  Returns:
    softmax_linear: Output tensor with the computed logits.
  """
  # Hidden 1
  with tf.name_scope('hidden1'):
    weights = tf.Variable(
        tf.truncated_normal([IMAGE_PIXELS, hidden1_units],
                            stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))),
        name='weights')
    biases = tf.Variable(tf.zeros([hidden1_units]),
                         name='biases')
    hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases)
  # Hidden 2
  
  # Linear
  with tf.name_scope('softmax_linear'):
    weights = tf.Variable(
        tf.truncated_normal([hidden1_units, NUM_CLASSES],
                            stddev=1.0 / math.sqrt(float(hidden1_units))),
        name='weights')
    biases = tf.Variable(tf.zeros([NUM_CLASSES]),
                         name='biases')
    logits = tf.matmul(hidden1, weights) + biases
  return logits
def BiRNN(_X, _istate_fw, _istate_bw, _weights, _biases):

     # input shape: (batch_size, n_steps, n_input)
    _X = tf.transpose(_X, [1, 0, 2])  # permute n_steps and batch_size
    # Reshape to prepare input to hidden activation
    _X = tf.reshape(_X, [-1, n_input]) # (n_steps*batch_size, n_input)
    # Linear activation
    _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden']

    # Define lstm cells with tensorflow
    # Forward direction cell
    lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    # Backward direction cell
    lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    # Split data because rnn cell needs a list of inputs for the RNN inner loop
    _X = tf.split(0, n_steps, _X) # n_steps * (batch_size, n_hidden)

    # Get lstm cell output
    outputs = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, _X,
                                            initial_state_fw=_istate_fw,
                                            initial_state_bw=_istate_bw)

    # Linear activation
    # Get inner loop last output
    output = [tf.matmul(o, _weights['out']) + _biases['out'] for o in outputs]
    return output
Example #18
0
def feature_importance(sess, user_id, matrix_i, matrix_j, matrix_f, matrix_o, 
                       bias_i, bias_j, bias_f, bias_o):
    user_embedding = get_user_embedding(sess, user_id)
    end_index = 0
    gates_i, gates_j, gates_f, gates_o = [], [], [], []    
    for feature in range(len(config.feature_desc)):
        start_index = end_index
        end_index = start_index + config.feature_desc[feature]
        gate_i, gate_j, gate_f, gate_o = 0, 0, 0, 0
        for event in user_embedding:
            gate_i += np.sum(sess.run(tf.matmul(tf.reshape(event[feature], [1, -1]), 
                                                matrix_i[start_index:end_index]) + 
                                      tf.reshape(bias_i, [1, -1])))
            gate_j += np.sum(sess.run(tf.matmul(tf.reshape(event[feature], [1, -1]), 
                                                matrix_j[start_index:end_index]) + 
                                      tf.reshape(bias_j, [1, -1])))
            gate_f += np.sum(sess.run(tf.matmul(tf.reshape(event[feature], [1, -1]), 
                                                matrix_f[start_index:end_index]) + 
                                      tf.reshape(bias_f, [1, -1])))
            gate_o += np.sum(sess.run(tf.matmul(tf.reshape(event[feature], [1, -1]), 
                                                matrix_o[start_index:end_index]) + 
                                      tf.reshape(bias_o, [1, -1])))
                     
        gates_i.append(gate_i/len(user_embedding))
        gates_j.append(gate_j/len(user_embedding))
        gates_f.append(gate_f/len(user_embedding))
        gates_o.append(gate_o/len(user_embedding))
    return gates_i, gates_j, gates_f, gates_o
Example #19
0
def Linear(args, output_dim, bias=True, bias_init=0.0, scope=None):
    if not isinstance(args, (list, tuple)):
        args = [args]

    input_dim = 0
    shapes = [a.get_shape().as_list() for a in args]
    for shape in shapes:
        if len(shape) != 2:
            raise ValueError("Linear is expecting 2d arguments: %s" % str(shapes))
        elif not shape[1]:
            raise ValueError("Linear expects shape[1] of arguments: %s" % str(shapes))
        else:
            input_dim += shape[1]

    with tf.variable_scope(scope or "linear"):
        W = tf.get_variable("W", (input_dim, output_dim))

        if len(args) == 1:
            result = tf.matmul(args[0], W)
        else:
            result = tf.matmul(tf.concat(1, args), W)

        if not bias:
            return result

        b = tf.get_variable("b", (output_dim,),
                            initializer=tf.constant_initializer(bias_init))

    return result + b
Example #20
0
def dot(x, y):
    """Compute dot product between a Tensor matrix and a Tensor vector.

    If x is a ``[M x N]`` matrix, then y is a ``M``-vector.

    If x is a ``M``-vector, then y is a ``[M x N]`` matrix.

    Parameters
    ----------
    x : tf.Tensor
        ``M x N`` matrix or ``M`` vector (see above)
    y : tf.Tensor
        ``M`` vector or ``M x N`` matrix (see above)

    Returns
    -------
    tf.Tensor
        ``N``-vector
    """
    if len(x.get_shape()) == 1:
        vec = x
        mat = y
        return tf.matmul(tf.expand_dims(vec, 0), mat)
    else:
        mat = x
        vec = y
        return tf.matmul(mat, tf.expand_dims(vec, 1))
Example #21
0
def conv_net(x, weights, biases, dropout):
    # Reshape input picture
    x = tf.reshape(x, shape=[-1, 28, 28, 1])

    # Convolution Layer
    conv1 = conv2d(x, weights['wc1'], biases['bc1'])
    # Max Pooling (down-sampling)
    conv1 = maxpool2d(conv1, k=2)

    # Convolution Layer
    conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
    # Max Pooling (down-sampling)
    conv2 = maxpool2d(conv2, k=2)

    # Fully connected layer
    # Reshape conv2 output to fit fully connected layer input
    fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
    # Apply Dropout
    fc1 = tf.nn.dropout(fc1, dropout)

    # Output, class prediction
    out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
    return out
    def observation_net(self, input_):
        #decoder
        # input:[B,Z]

        with tf.name_scope('observation_net'):


            n_layers = len(self.network_architecture['decoder_net'])
            # weights = self.network_weights['decoder_weights']
            # biases = self.network_weights['decoder_biases']

            for layer_i in range(n_layers):

                # input_ = tf.contrib.layers.layer_norm(input_)
                # input_ = self.transfer_fct(tf.add(tf.matmul(input_, self.params_dict['decoder_weights_l'+str(layer_i)]), self.params_dict['decoder_biases_l'+str(layer_i)]))

                input_ = self.transfer_fct(tf.contrib.layers.layer_norm(tf.add(tf.matmul(input_, self.params_dict['decoder_weights_l'+str(layer_i)]), self.params_dict['decoder_biases_l'+str(layer_i)])))
                #add batch norm here

            x_mean = tf.add(tf.matmul(input_, self.params_dict['decoder_weights_out_mean']), self.params_dict['decoder_biases_out_mean'])
            x_log_var = tf.add(tf.matmul(input_, self.params_dict['decoder_weights_out_log_var']), self.params_dict['decoder_biases_out_log_var'])

            reward_mean = tf.add(tf.matmul(input_, self.params_dict['decoder_weights_reward_mean']), self.params_dict['decoder_biases_reward_mean'])
            reward_log_var = tf.add(tf.matmul(input_, self.params_dict['decoder_weights_reward_log_var']), self.params_dict['decoder_biases_reward_log_var'])


        return x_mean, x_log_var, reward_mean, reward_log_var
def forward(x, train, regularizer):
    # 实现第一层卷积层的前向传播过程
    conv1_w = get_weight([CONV1_SIZE, CONV1_SIZE, NUM_CHANNELS, CONV1_KERNEL_NUM], regularizer) 
    conv1_b = get_bias([CONV1_KERNEL_NUM]) 
    conv1 = conv2d(x, conv1_w) 
    relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_b)) 
    pool1 = max_pool_2x2(relu1) 

    # 实现第二层卷积层的前向传播过程,并初始化卷积层的对应变量
    conv2_w = get_weight([CONV2_SIZE, CONV2_SIZE, CONV1_KERNEL_NUM, CONV2_KERNEL_NUM],regularizer) 
    conv2_b = get_bias([CONV2_KERNEL_NUM])
    conv2 = conv2d(pool1, conv2_w) 
    relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_b))
    pool2 = max_pool_2x2(relu2)

    # 将上一池化层的输出 pool2(矩阵)转化为下一层全连接层的输入格式(向量)
    pool_shape = pool2.get_shape().as_list() 
    nodes = pool_shape[1] * pool_shape[2] * pool_shape[3] 
    reshaped = tf.reshape(pool2, [pool_shape[0], nodes]) 

    # 实现第三层全连接层的前向传播过程
    fc1_w = get_weight([nodes, FC_SIZE], regularizer) # 初始化全连接层的权重,并加入正则化
    fc1_b = get_bias([FC_SIZE]) # 初始化全连接层的偏置项
    fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_w) + fc1_b) 
    if train: fc1 = tf.nn.dropout(fc1, 0.5)

    # 实现第四层全连接层的前向传播过程,并初始化全连接层对应的变量
    fc2_w = get_weight([FC_SIZE, OUTPUT_NODE], regularizer)
    fc2_b = get_bias([OUTPUT_NODE])
    y = tf.matmul(fc1, fc2_w) + fc2_b
    return y 
	def model(data):
		"""
		Define o modelo da rede neural. Util para usar com diversos dados e nao so os de treinamento.
		Definir uma funcao-modelo aplica os mesmos pesos, ja que sao declarados externamente, ao dado
		de entrada (data), tornando assim possivel a predicao dos dados de validacao e teste utili-
		zando os pesos otimizados.
		"""
		# Camada 1
		net1 = tf.nn.relu(tf.nn.conv2d(data, weights1, [1, 2, 2, 1], padding='SAME'))
		layer1 = tf.nn.relu(net1)

		# Camada 2
		net2 = tf.nn.relu(tf.nn.conv2d(layer1, weights2, [1, 2, 2, 1], padding='SAME'))
		layer2 = tf.nn.relu(net2)
	
		# Formata camada 2
		shape = layer2.get_shape().as_list()
		reshaped_layer2 = tf.reshape(layer2, [shape[0], shape[1] * shape[2] * shape[3]])

		# Camada 3
		net3 = tf.matmul(reshaped_layer2, weights3)
		layer3 = tf.nn.relu(net3)

		# Ultima camada (output)(valor de retorno)
		return tf.matmul(layer3, weights4)
Example #25
0
 def loss_fn(w_flat):
   w = tf.reshape(w_flat, [visible_size, hidden_size])
   x = tf.matmul(data, w)
   x = tf.sigmoid(x)
   x = tf.matmul(x, w, transpose_b=True)
   x = tf.sigmoid(x)
   return tf.reduce_mean(tf.square(x-data))
Example #26
0
def main(_):
  sess = tf.Session()

  # Construct the TensorFlow network.
  ph_float = tf.placeholder(tf.float32, name="ph_float")
  x = tf.transpose(ph_float, name="x")
  v = tf.Variable(np.array([[-2.0], [-3.0], [6.0]], dtype=np.float32), name="v")
  m = tf.constant(
      np.array([[0.0, 1.0, 2.0], [-4.0, -1.0, 0.0]]),
      dtype=tf.float32,
      name="m")
  y = tf.matmul(m, x, name="y")
  z = tf.matmul(m, v, name="z")

  if FLAGS.debug:
    sess = tf_debug.LocalCLIDebugWrapperSession(sess, ui_type=FLAGS.ui_type)

  if FLAGS.error == "shape_mismatch":
    print(sess.run(y, feed_dict={ph_float: np.array([[0.0], [1.0], [2.0]])}))
  elif FLAGS.error == "uninitialized_variable":
    print(sess.run(z))
  elif FLAGS.error == "no_error":
    print(sess.run(y, feed_dict={ph_float: np.array([[0.0, 1.0, 2.0]])}))
  else:
    raise ValueError("Unrecognized error type: " + FLAGS.error)
def runNN (train_x, train_y, test_x, test_y, numHidden):
	print "NN({})".format(numHidden)
	session = tf.InteractiveSession()

	x = tf.placeholder("float", shape=[None, train_x.shape[1]])
	y_ = tf.placeholder("float", shape=[None, 2])

	W1 = tf.Variable(tf.truncated_normal([train_x.shape[1],numHidden], stddev=0.01))
	b1 = tf.Variable(tf.truncated_normal([numHidden], stddev=0.01))
	W2 = tf.Variable(tf.truncated_normal([numHidden,2], stddev=0.01))
	b2 = tf.Variable(tf.truncated_normal([2], stddev=0.01))

	z = tf.nn.relu(tf.matmul(x,W1) + b1)
	y = tf.nn.softmax(tf.matmul(z,W2) + b2)

	cross_entropy = -tf.reduce_sum(y_*tf.log(tf.clip_by_value(y,1e-10,1.0)))
	#cross_entropy = -tf.reduce_sum(y_*tf.log(y))
	train_step = tf.train.MomentumOptimizer(learning_rate=.001, momentum=0.1).minimize(cross_entropy)
	#train_step = tf.train.AdamOptimizer(learning_rate=.01).minimize(cross_entropy)

	session.run(tf.initialize_all_variables())
	for i in range(NUM_EPOCHS):
		offset = i*BATCH_SIZE % (train_x.shape[0] - BATCH_SIZE)
		train_step.run({x: train_x[offset:offset+BATCH_SIZE, :], y_: makeLabels(train_y[offset:offset+BATCH_SIZE])})
		if i % 100 == 0:
			util.showProgress(cross_entropy, x, y, y_, test_x, test_y)
	session.close()
Example #28
0
def get_training_model():
    """
    The training model acts on a batch of 128x64 windows, and outputs a (1 +
    7 * len(common.CHARS) vector, `v`. `v[0]` is the probability that a plate is
    fully within the image and is at the correct scale.
    
    `v[1 + i * len(common.CHARS) + c]` is the probability that the `i`'th
    character is `c`.

    """
    x, conv_layer, conv_vars = convolutional_layers()
    
    # Densely connected layer
    W_fc1 = weight_variable([32 * 8 * 128, 2048])
    b_fc1 = bias_variable([2048])

    conv_layer_flat = tf.reshape(conv_layer, [-1, 32 * 8 * 128])
    h_fc1 = tf.nn.relu(tf.matmul(conv_layer_flat, W_fc1) + b_fc1)

    # Output layer
    W_fc2 = weight_variable([2048, 1 + 7 * len(common.CHARS)])
    b_fc2 = bias_variable([1 + 7 * len(common.CHARS)])

    y = tf.matmul(h_fc1, W_fc2) + b_fc2

    return (x, y, conv_vars + [W_fc1, b_fc1, W_fc2, b_fc2])
def autoencoder_contd(input_dim, representation):
	x = tf.placeholder(tf.float32, [None, input_dim]);
	high_decW=tf.Variable(
		initial_value=tf.random_normal(
			[representation,input_dim],
			-math.sqrt(6.0/(input_dim+representation)),
			math.sqrt(6.0/(input_dim+representation))),
		dtype=tf.float32,
		name='high_decW');
	# high_encW=tf.transpose(high_decW);
	high_encW=tf.Variable(
		initial_value=tf.random_normal(
			[input_dim, representation],
			-math.sqrt(6.0/(input_dim+representation)),
			math.sqrt(6.0/(input_dim+representation))),
		name='high_encW');
	high_encb=tf.Variable(tf.zeros([representation]),
		name='high_encb');
	z=tf.nn.sigmoid(tf.matmul(x,high_encW) + high_encb);
	hidden_weights=high_encW;
	
	high_decb=tf.Variable(
		tf.zeros([input_dim]),
		name='high_decb');
	y=tf.nn.sigmoid(tf.matmul(z,high_decW)+high_decb);
	cost=tf.nn.l2_loss(x-y);
	loss_per_pixel=tf.reduce_mean(tf.abs(x-y));
	return {'x':x,'z':z,'y':y,'cost':cost,
		'weights':hidden_weights,
		'encW':high_encW,'decW':high_decW,
		'encb':high_encb,'decb':high_decb,
		'ppx':loss_per_pixel
		};
Example #30
0
def conv_net(_X, _weights, _biases, _dropout):
    # Reshape input picture
    _X = tf.reshape(_X, shape=[-1, 28, 28, 1])

    # Convolution Layer
    conv1 = conv2d(_X, _weights['wc1'], _biases['bc1'])
    # Max Pooling (down-sampling)
    conv1 = max_pool(conv1, k=2)
    # Apply Dropout
    conv1 = tf.nn.dropout(conv1, _dropout)

    # Convolution Layer
    conv2 = conv2d(conv1, _weights['wc2'], _biases['bc2'])
    # Max Pooling (down-sampling)
    conv2 = max_pool(conv2, k=2)
    # Apply Dropout
    conv2 = tf.nn.dropout(conv2, _dropout)

    # Fully connected layer
    dense1 = tf.reshape(conv2, [-1, _weights['wd1'].get_shape().as_list()[0]]) # Reshape conv2 output to fit dense layer input
    dense1 = tf.nn.relu(tf.add(tf.matmul(dense1, _weights['wd1']), _biases['bd1'])) # Relu activation
    dense1 = tf.nn.dropout(dense1, _dropout) # Apply Dropout

    # Output, class prediction
    out = tf.add(tf.matmul(dense1, _weights['out']), _biases['out'])
    return out
Example #31
0
    def __init__(self, is_training, config, input_):
        self._input = input_

        batch_size = input_.batch_size
        num_steps = input_.num_steps
        size = config.hidden_size
        vocab_size = config.vocab_size

        # Slightly better results can be obtained with forget gate biases
        # initialized to 1 but the hyperparameters of the model would need to be
        # different than reported in the paper.
        def lstm_cell():
            # With the latest TensorFlow source code (as of Mar 27, 2017),
            # the BasicLSTMCell will need a reuse parameter which is unfortunately not
            # defined in TensorFlow 1.0. To maintain backwards compatibility, we add
            # an argument check here:
            if 'reuse' in inspect.getargspec(
                    tf.contrib.rnn.BasicLSTMCell.__init__).args:
                return tf.contrib.rnn.BasicLSTMCell(
                    size,
                    forget_bias=0.0,
                    state_is_tuple=True,
                    reuse=tf.get_variable_scope().reuse)
            else:
                return tf.contrib.rnn.BasicLSTMCell(size,
                                                    forget_bias=0.0,
                                                    state_is_tuple=True)

        attn_cell = lstm_cell
        if is_training and config.keep_prob < 1:

            def attn_cell():
                return tf.contrib.rnn.DropoutWrapper(
                    lstm_cell(), output_keep_prob=config.keep_prob)

        cell = tf.contrib.rnn.MultiRNNCell(
            [attn_cell() for _ in range(config.num_layers)],
            state_is_tuple=True)

        self._initial_state = cell.zero_state(batch_size, data_type())

        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, size],
                                        dtype=data_type())
            inputs = tf.nn.embedding_lookup(embedding, input_.input_data)

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        # Simplified version of models/tutorials/rnn/rnn.py's rnn().
        # This builds an unrolled LSTM for tutorial purposes only.
        # In general, use the rnn() or state_saving_rnn() from rnn.py.
        #
        # The alternative version of the code below is:
        #
        # inputs = tf.unstack(inputs, num=num_steps, axis=1)
        # outputs, state = tf.contrib.rnn.static_rnn(
        #     cell, inputs, initial_state=self._initial_state)
        outputs = []
        state = self._initial_state
        with tf.variable_scope("RNN"):
            for time_step in range(num_steps):
                if time_step > 0: tf.get_variable_scope().reuse_variables()
                (cell_output, state) = cell(inputs[:, time_step, :], state)
                outputs.append(cell_output)

        output = tf.reshape(tf.stack(axis=1, values=outputs), [-1, size])
        softmax_w = tf.get_variable("softmax_w", [size, vocab_size],
                                    dtype=data_type())
        softmax_b = tf.get_variable("softmax_b", [vocab_size],
                                    dtype=data_type())
        logits = tf.matmul(output, softmax_w) + softmax_b

        # Reshape logits to be 3-D tensor for sequence loss
        logits = tf.reshape(logits, [batch_size, num_steps, vocab_size])

        # use the contrib sequence loss and average over the batches
        loss = tf.contrib.seq2seq.sequence_loss(logits,
                                                input_.targets,
                                                tf.ones(
                                                    [batch_size, num_steps],
                                                    dtype=data_type()),
                                                average_across_timesteps=False,
                                                average_across_batch=True)

        # update the cost variables
        self._cost = cost = tf.reduce_sum(loss)
        self._final_state = state

        if not is_training:
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.GradientDescentOptimizer(self._lr)
        self._train_op = optimizer.apply_gradients(
            zip(grads, tvars),
            global_step=tf.contrib.framework.get_or_create_global_step())

        self._new_lr = tf.placeholder(tf.float32,
                                      shape=[],
                                      name="new_learning_rate")
        self._lr_update = tf.assign(self._lr, self._new_lr)
Example #32
0
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru

        with tf.variable_scope("emb"):
            with tf.variable_scope("char"):
                ch_emb = tf.reshape(tf.nn.embedding_lookup(
                    self.char_mat, self.ch), [N * PL, CL, dc])
                qh_emb = tf.reshape(tf.nn.embedding_lookup(
                    self.char_mat, self.qh), [N * QL, CL, dc])
                ch_emb = dropout(
                    ch_emb, keep_prob=config.keep_prob, is_train=self.is_train)
                qh_emb = dropout(
                    qh_emb, keep_prob=config.keep_prob, is_train=self.is_train)
                cell_fw = tf.contrib.rnn.GRUCell(dg)
                cell_bw = tf.contrib.rnn.GRUCell(dg)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32)
                ch_emb = tf.concat([state_fw, state_bw], axis=1)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32)
                qh_emb = tf.concat([state_fw, state_bw], axis=1)
                qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])

            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

            self.c_emb = c_emb = tf.concat([c_emb, ch_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb], axis=2)

        with tf.variable_scope("encoding"):
            rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            c = rnn(c_emb, seq_len=self.c_len)
            q = rnn(q_emb, seq_len=self.q_len)

        self.c_ck = c
        self.q_ck = c

        with tf.variable_scope("attention"):
            qc_att, self.qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d,
                                   keep_prob=config.keep_prob, is_train=self.is_train, give=True)
            rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            att = rnn(qc_att, seq_len=self.c_len)
            self.att = att

        self.att_ck = att

        with tf.variable_scope("match"):
            self_att = dot_attention(
                att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train)
            rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            match = rnn(self_att, seq_len=self.c_len)

        self.match_ck = match

        with tf.variable_scope("pointer"):
            init = summ(q[:, :, -2 * d:], d, mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob, is_train=self.is_train)
            pointer = ptr_net(batch=N, hidden=init.get_shape().as_list(
            )[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train)
            logits1, logits2 = pointer(init, match, d, self.c_mask)

        with tf.variable_scope("predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1_distrib = tf.reduce_max(outer, axis=2)
            self.yp2_distrib = tf.reduce_max(outer, axis=1)
            self.yp1 = tf.argmax(self.yp1_distrib, axis=1)
            self.yp2 = tf.argmax(self.yp2_distrib, axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits1, labels=tf.stop_gradient(self.y1))
            losses2 = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits2, labels=tf.stop_gradient(self.y2))
            self.loss = tf.reduce_mean(losses + losses2)
Example #33
0
                         delimiter=',',
                         dtype=np.float32,
                         skiprows=1)

x_data = boston_train[:, :9]
y_data = boston_train[:, [-1]]
# print(x_data.shape)
# print(y_data.shape)

X = tf.placeholder(tf.float32, shape=[None, 9])
Y = tf.placeholder(tf.float32, shape=[None, 1])

W = tf.Variable(tf.random_normal([9, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')

hypothesis = tf.matmul(X, W) + b

cost = tf.reduce_mean(tf.square(hypothesis - Y))

optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-6)
train = optimizer.minimize(cost)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

for step in range(1000001):
    cost_val, W_val, b_val, _ = \
        sess.run([cost,W,b,train],
                 feed_dict = {X:x_data, Y:y_data})
    if step % 10000 == 0:
        print(step, cost_val)  #, W_val, b_val)
Example #34
0
	num_epochs = 5
	print_freq = 1

# Transform labels into on-hot encoding form
y_train_OHEnc = tf.one_hot(y_train.copy(), num_classes)
y_val_OHEnc = tf.one_hot(y_val.copy(), num_classes)

# reset placeholders
x = tf.placeholder(tf.float32, [None, total_features])
y_ = tf.placeholder(tf.float32, [None, num_classes])

W_ae_list = [init_weight_variable([size_list[i], size_list[i + 1]]) \
	for i in range(num_layers)]
b_ae_list = [init_bias_variable([size_list[i + 1]])\
	for i in range(num_layers)]
a_list = [batch_nm(tf.nn.relu(tf.matmul(x, W_ae_list[0]) + b_ae_list[0]))]
for i in range(num_layers - 1):
	# batch normalization for post-activated values
	a_i = batch_nm(tf.nn.relu(tf.matmul(a_list[-1], W_ae_list[i + 1]) + b_ae_list[i + 1]))
	a_list.append(a_i)

# dropout
keep_prob = tf.placeholder(tf.float32)
a_drop = tf.nn.dropout(a_list[-1], keep_prob)
W_sm = init_weight_variable([size_list[-1], num_classes])
b_sm = init_bias_variable([num_classes])
y_sm = tf.matmul(a_drop, W_sm) + b_sm

cross_entropy = tf.reduce_mean(
		tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_sm))
train_step = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cross_entropy)
Example #35
0
 def forward_fc(self, inp, weights, reuse=False):
     hidden = normalize(tf.matmul(inp, weights['w1']) + weights['b1'], activation=tf.nn.relu, reuse=reuse, scope='0')
     for i in range(1,len(self.dim_hidden)):
         hidden = normalize(tf.matmul(hidden, weights['w'+str(i+1)]) + weights['b'+str(i+1)], activation=tf.nn.relu, reuse=reuse, scope=str(i+1))
     return tf.matmul(hidden, weights['w'+str(len(self.dim_hidden)+1)]) + weights['b'+str(len(self.dim_hidden)+1)]
Example #36
0
y_vals_train = y_vals[train_indices]
y_vals_test = y_vals[test_indices]

# Declare batch size
batch_size = 100

# Initialize placeholders
x_data = tf.placeholder(shape=[None, 2], dtype=tf.float32)
y_target = tf.placeholder(shape=[None, 1], dtype=tf.float32)

# Create variables for linear regression
A = tf.Variable(tf.random_normal(shape=[2,1]))
b = tf.Variable(tf.random_normal(shape=[1,1]))

# Declare model operations
model_output = tf.sub(tf.matmul(x_data, A), b)

# Declare vector L2 'norm' function squared
l2_norm = tf.reduce_sum(tf.square(A))

# Declare loss function
# = max(0, 1-pred*actual) + alpha * L2_norm(A)^2
# L2 regularization parameter, alpha
alpha = tf.constant([0.01])
# Margin term in loss
classification_term = tf.reduce_mean(tf.maximum(0., tf.sub(1., tf.mul(model_output, y_target))))
# Put terms together
loss = tf.add(classification_term, tf.mul(alpha, l2_norm))

# Declare prediction function
prediction = tf.sign(model_output)
Example #37
0
    def __init__(self, cfg, vocab_counts):
        # add data placeholders
        self.left_context = tf.placeholder(name="left_context", shape=[None, None], dtype=tf.int32)
        self.left_seq_len = tf.placeholder(name="left_seq_len", shape=[None], dtype=tf.int32)
        self.right_context = tf.placeholder(name="right_context", shape=[None, None], dtype=tf.int32)
        self.right_seq_len = tf.placeholder(name="right_seq_len", shape=[None], dtype=tf.int32)
        self.verb = tf.placeholder(name="verb", shape=[None], dtype=tf.int32)

        # add hyper-parameter placeholders
        self.batch_size = tf.placeholder(name="batch_size", dtype=tf.int32)
        self.is_train = tf.placeholder(name="is_train", shape=[], dtype=tf.bool)
        self.drop_rate = tf.placeholder(name="dropout_rate", dtype=tf.float32)
        self.lr = tf.placeholder(name="learning_rate", dtype=tf.float32)

        # build embedding lookup table
        with tf.device("/gpu:0"):
            with tf.variable_scope("context_lookup_table"):
                self.word_embeddings = tf.Variable(np.load(cfg.pretrained_context)["embeddings"],
                                                   name="word_embeddings",
                                                   dtype=tf.float32,
                                                   trainable=cfg.tune_emb)
                self.word_embeddings = tf.concat([tf.zeros([1, cfg.word_dim]), self.word_embeddings[1:, :]], axis=0)

            with tf.variable_scope("target_lookup_table"):
                self.verb_embeddings = tf.Variable(np.load(cfg.pretrained_target)["embeddings"],
                                                   name="verb_embeddings",
                                                   dtype=tf.float32,
                                                   trainable=cfg.tune_emb)
                #self.verb_embeddings = tf.concat([tf.zeros([1, cfg.word_dim]), self.verb_embeddings[1:, :]], axis=0)

            # negative sampling
            self.neg_ids, _, _ = (tf.nn.fixed_unigram_candidate_sampler(
                true_classes=tf.cast(tf.expand_dims(self.verb, axis=1), dtype=tf.int64),
                num_true=1,
                num_sampled=cfg.neg_sample,
                unique=True,
                range_max=cfg.verb_size,
                distortion=0.75,
                unigrams=vocab_counts))

            print('neg_ids : ', self.neg_ids)

        # embedding lookup
        # with tf.device("/gpu:0"):
            with tf.variable_scope("embedding_lookup"):
                left_context_emb = tf.nn.embedding_lookup(self.word_embeddings, self.left_context)
                right_context_emb = tf.nn.embedding_lookup(self.word_embeddings, self.right_context)
                verb_emb = tf.nn.embedding_lookup(self.verb_embeddings, self.verb)
                neg_verb_emb = tf.nn.embedding_lookup(self.verb_embeddings, self.neg_ids)

        # left context bi-lstm
        with tf.device("/gpu:0"):
            with tf.variable_scope("right_context_representation"):
                cell_fw = LSTMCell(num_units=cfg.num_units)
                cell_bw = LSTMCell(num_units=cfg.num_units)
                h_rc, _ = bidirectional_dynamic_rnn(cell_fw, cell_bw, right_context_emb,
                                                    sequence_length=self.right_seq_len,
                                                    dtype=tf.float32,
                                                    time_major=False,
                                                    scope="bi_lstm")
                h_rc = tf.concat(h_rc, axis=-1)
                # self-attention
                h_rc = self_attention(h_rc, name="self_attn_right")
                r_weight = tf.get_variable(name="r_weight",
                                           shape=[2 * cfg.num_units, 2 * cfg.num_units],
                                           dtype=tf.float32)
                h_rc = tf.nn.tanh(tf.matmul(h_rc, r_weight))
                print("right context shape: {}".format(h_rc.get_shape().as_list()))

            with tf.variable_scope("left_context_representation"):
                cell_fw = LSTMCell(num_units=cfg.num_units)
                cell_bw = LSTMCell(num_units=cfg.num_units)
                h_lc, _ = bidirectional_dynamic_rnn(cell_fw, cell_bw, left_context_emb,
                                                    sequence_length=self.left_seq_len,
                                                    dtype=tf.float32,
                                                    time_major=False,
                                                    scope="bi_lstm")

                h_lc = tf.concat(h_lc, axis=-1)  # shape = (batch_size, max_len, 2 * num_units)

                # self-attention
                h_lc = self_attention(h_lc, name="self_attn_left")  # shape = (batch_size, 2 * num_units)
                l_weight = tf.get_variable(name="l_weight",
                                           shape=[2 * cfg.num_units, 2 * cfg.num_units],
                                           dtype=tf.float32)
                h_lc = tf.nn.tanh(tf.matmul(h_lc, l_weight))

                print("left context shape: {}".format(h_lc.get_shape().as_list()))

            # with tf.device("/gpu:0"):

        with tf.device("/gpu:1"):
            with tf.variable_scope("neural_tensor_network"):
                T = tf.get_variable(name="T",
                                    shape=[cfg.output_units, 2 * cfg.num_units, 2 * cfg.num_units],
                                    dtype=tf.float32)
                W = tf.get_variable(name="W",
                                    shape=[4 * cfg.num_units, cfg.output_units],
                                    dtype=tf.float32)
                b = tf.get_variable(name="b",
                                    shape=[cfg.output_units],
                                    dtype=tf.float32)
                # compute tensors
                ff_product = tf.matmul(tf.concat([h_lc, h_rc], axis=-1), W)
                bilinear_list = []
                for k in range(cfg.output_units):
                    cur_res = tf.reduce_sum(tf.matmul(h_lc, T[k]) * h_rc, axis=1)
                    bilinear_list.append(cur_res)
                context = tf.nn.tanh(tf.reshape(tf.concat(bilinear_list, axis=0), shape=[-1, cfg.output_units]) +
                                     ff_product + b)  # shape = (batch_size, output_units)
                print("context representation shape: {}".format(context.get_shape().as_list()))

        # with tf.device("/gpu:1"):
            with tf.variable_scope("verb_representation"):
                target_verb = ffn_layer(verb_emb, cfg.num_units, cfg.output_units, scope="ffn_layer")
                print("verb representation shape: {}".format(target_verb.get_shape().as_list()))
                tf.get_variable_scope().reuse_variables()
                negative_verbs = ffn_layer(neg_verb_emb, cfg.num_units, cfg.output_units, scope="ffn_layer")
                print("negative verb shape: {}".format(negative_verbs.get_shape().as_list()))

            with tf.variable_scope("compute_loss"):
                true_logits = tf.reduce_sum(context * target_verb, axis=1)
                print("true logits shape: {}".format(true_logits.get_shape().as_list()))
                neg_logits = tf.matmul(context, tf.transpose(negative_verbs, [1, 0]))
                print("negative logits shape: {}".format(neg_logits.get_shape().as_list()))

        # with tf.device("/cpu:0"):

            with tf.variable_scope("nce_loss"):
                # cross-entropy(logits, labels)
                true_xent = tf.nn.sigmoid_cross_entropy_with_logits(logits=true_logits,
                                                                    labels=tf.ones_like(true_logits))
                sampled_xent = tf.nn.sigmoid_cross_entropy_with_logits(logits=neg_logits,
                                                                       labels=tf.zeros_like(neg_logits))

                # NCE-loss is the sum of the true and noise (sampled words) contributions, averaged over the batch.
                self.loss = (tf.reduce_sum(true_xent) + tf.reduce_sum(sampled_xent)) / tf.cast(self.batch_size,
                                                                                               dtype=tf.float32)
            optimizer = tf.train.AdamOptimizer(learning_rate=self.lr)
            self.train_op = optimizer.minimize(self.loss)
Example #38
0
def model_fn(features, labels, mode):
    input_layer = tf.reshape(
        features['angular'],
        shape=[-1, cfg.anguler_shape[0], cfg.anguler_shape[1], 6])

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    net = cnn_model(input_layer, is_training, '')

    with tf.variable_scope('Reshape_cnn'):
        output_shape = net.get_shape().as_list(
        )  # [batch,height,width,features]
        net = tf.transpose(net, [0, 2, 1, 3])
        net = tf.reshape(
            net,
            shape=[-1, output_shape[2], output_shape[1] * output_shape[3]])

    with tf.variable_scope('bi_GRU'):
        fw_cell_list = [
            tf.nn.rnn_cell.DropoutWrapper(tf.nn.rnn_cell.GRUCell(
                1024, kernel_initializer=tf.orthogonal_initializer),
                                          state_keep_prob=0.5)
            for _ in range(3)
        ]
        bw_cell_list = [
            tf.nn.rnn_cell.DropoutWrapper(tf.nn.rnn_cell.GRUCell(
                1024, kernel_initializer=tf.orthogonal_initializer),
                                          state_keep_prob=0.5)
            for _ in range(3)
        ]

        # fw_cell_list = [
        #     tf.nn.rnn_cell.DropoutWrapper(tf.nn.rnn_cell.GRUCell(256, kernel_initializer=tf.orthogonal_initializer),
        #                                   input_keep_prob=0.8, output_keep_prob=0.8) for _ in range(3)]
        # bw_cell_list = [
        #     tf.nn.rnn_cell.DropoutWrapper(tf.nn.rnn_cell.GRUCell(256, kernel_initializer=tf.orthogonal_initializer),
        #                                   input_keep_prob=0.8, output_keep_prob=0.8) for _ in range(3)]

        multi_rnn_fW_cell = tf.nn.rnn_cell.MultiRNNCell(fw_cell_list)
        multi_rnn_bw_cell = tf.nn.rnn_cell.MultiRNNCell(bw_cell_list)

        rnn_outputs, (last_state_fw,
                      last_state_bw) = tf.nn.bidirectional_dynamic_rnn(
                          cell_fw=multi_rnn_fW_cell,
                          cell_bw=multi_rnn_bw_cell,
                          inputs=net,
                          dtype=tf.float32)

        # rnn_outputs_merged = tf.concat(rnn_outputs, 2)
        # rnn_finial = tf.unstack(rnn_outputs_merged, rnn_outputs_merged.get_shape().as_list()[1], 1)[-1]

        # record rnn cells
        for var in tf.global_variables():
            scope = var.name.split('/')[0]
            if scope == 'bi_GRU':
                gates_candidate = var.name.split('/')[-2]
                fw_cell = var.name.split('/')[2] + '_' + var.name.split(
                    '/')[-4] + '_'
                kernal_bise = var.name.split('/')[-1].split(':')[0]
                if gates_candidate == 'candidate':
                    tf.summary.histogram('bi_GRU_' + fw_cell + kernal_bise,
                                         var)

    with tf.variable_scope('dense_layer'):
        rnn_outputs_merged = tf.concat(rnn_outputs, 2)
        rnn_finial = tf.squeeze(rnn_outputs_merged, 1)

        weight = tf.get_variable(
            'birnn_out_weight', [2 * 1024, 1024],
            dtype=tf.float32,
            initializer=tf.truncated_normal_initializer(stddev=0.02))
        bise = tf.get_variable(
            'birnn_out_bise', [1024],
            dtype=tf.float32,
            initializer=tf.truncated_normal_initializer(stddev=0.02))
        net = tf.matmul(rnn_finial, weight) + bise

        # net = tf.layers.dense(inputs=rnn_finial, units=2048, activation=tf.nn.relu)
        # net = tf.layers.dense(inputs=last_state_fw[-1] + last_state_bw[-1], units=512, activation=tf.nn.relu)
        net = tf.layers.dropout(inputs=net, rate=0.4, training=is_training)
        logits = tf.layers.dense(inputs=net,
                                 units=cfg.num_class,
                                 activation=tf.nn.relu)

    predictions = {
        'classes': tf.argmax(tf.nn.softmax(logits),
                             axis=1,
                             name='predict_class'),
        'probabilities': tf.nn.softmax(logits, name='softmax_tensor'),
    }

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    accuracy = tf.metrics.accuracy(labels=labels,
                                   predictions=tf.argmax(tf.nn.softmax(logits),
                                                         axis=1))
    accuracy = tf.Print(accuracy, [accuracy], 'Acuracy__')
    tf.summary.scalar('train_accuracy', accuracy[1])

    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)

    if mode == tf.estimator.ModeKeys.TRAIN:
        update_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_op):
            optimizer = tf.train.AdagradOptimizer(learning_rate=0.01)
            train_op = optimizer.minimize(
                loss=loss, global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_op)

    eval_metric = {
        'accuracy':
        tf.metrics.accuracy(labels=labels, predictions=predictions['classes'])
    }
    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          eval_metric_ops=eval_metric)
    def call(self, inputs, training=None):

        # bi-directional recurrence
        def input_recurrence(initializer, elems):
            x_f_t, x_b_t = elems
            h_f_tm1, h_b_tm1 = initializer

            h_f_t = self.GRU_f(inputs=(tf.nn.embedding_lookup(self.We, x_f_t),
                                       h_f_tm1))
            h_b_t = self.GRU_b(inputs=(tf.nn.embedding_lookup(self.We, x_b_t),
                                       h_b_tm1))
            return [h_f_t, h_b_t]

        [h_f_t, h_b_t] = tf.scan(
            fn=input_recurrence,
            elems=[inputs, inputs[::-1]],  # forward and backward sequences
            initializer=[self.GRU_f.h0, self.GRU_b.h0])

        # 0-axis is time steps, 1-axis is batch size and 2-axis is hidden layer size
        context = tf.concat([h_f_t, h_b_t[::-1]], axis=2)
        #projected_context = tf.matmul(context, self.Wa_c) + self.ba for each tensor slice
        projected_context = tf.matmul(
            context,
            tf.tile(tf.expand_dims(self.Wa_c, 0),
                    tf.stack([tf.shape(context)[0], 1, 1]))) + self.ba

        def output_recurrence(initializer, elems):
            x_t = elems
            h_tm1, _, _ = initializer

            # Attention model
            h_a = tf.nn.tanh(projected_context + tf.matmul(h_tm1, self.Wa_h))

            #alphas = tf.exp(tf.matmul(h_a, self.Wa_y))
            #alphas = tf.reshape(alphas, [tf.shape(alphas)[0], tf.shape(alphas)[1]]) # drop 2-axis (sized 1) is replaced by:
            #sess.run(tf.reshape(tf.matmul(tf.reshape(x, [-1, tf.shape(x)[-1]]), tf.expand_dims(z,-1)), tf.shape(x)[:2]))
            alphas = tf.exp(
                tf.reshape(
                    tf.matmul(tf.reshape(h_a, [-1, tf.shape(h_a)[-1]]),
                              tf.expand_dims(self.Wa_y, -1)),
                    tf.shape(h_a)[:2]))
            alphas = alphas / tf.reduce_sum(alphas, axis=0, keepdims=True)
            weighted_context = tf.reduce_sum(context * alphas[:, :, None],
                                             axis=0)

            h_t = self.GRU(inputs=(x_t, h_tm1))

            # Late fusion
            lfc = tf.matmul(weighted_context, self.Wf_c)  # late fused context
            fw = tf.nn.sigmoid(
                tf.matmul(lfc, self.Wf_f) + tf.matmul(h_t, self.Wf_h) +
                self.bf)  # fusion weights
            hf_t = lfc * fw + h_t  # weighted fused context + hidden state

            z = tf.matmul(hf_t, self.Wy) + self.by
            y_t = z  #tf.nn.softmax(z)

            return [h_t, hf_t, y_t]

        [_, self.last_hidden_states, self.y] = tf.scan(
            fn=output_recurrence,
            elems=context[
                1:],  # ignore the 1st word in context, because there's no punctuation before that
            initializer=[
                self.GRU.h0, self.GRU.h0,
                tf.zeros([self.minibatch_size, self.y_vocabulary_size])
            ])

        return self.y
Example #40
0
import tensorflow as tf
import numpy as np

xy = np.loadtxt('data-03-diabetes.csv', delimiter=',', dtype=np.float32)
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]

X = tf.placeholder(tf.float32, shape=[None, 8])
Y = tf.placeholder(tf.float32, shape=[None, 1])

W = tf.Variable(tf.random_normal([8, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')

hypothesis = tf.sigmoid(tf.matmul(X, W) + b)
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))
train = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)

predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))


with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    feed = {X: x_data, Y: y_data}
    for step in range(10001):
        sess.run(train, feed_dict=feed)
        if step % 200 == 0:
            print(step, sess.run(cost, feed_dict=feed))

    h, c, a = sess.run([hypothesis, predicted, accuracy], feed_dict=feed)
Example #41
0
def embedding_postprocessor(input_tensor,
                            use_token_type=False,
                            token_type_ids=None,
                            token_type_vocab_size=16,
                            token_type_embedding_name="token_type_embeddings",
                            use_position_embeddings=True,
                            position_embedding_name="position_embeddings",
                            initializer_range=0.02,
                            max_position_embeddings=512,
                            dropout_prob=0.1):
  """Performs various post-processing on a word embedding tensor.

  Args:
    input_tensor: float Tensor of shape [batch_size, seq_length,
      embedding_size].
    use_token_type: bool. Whether to add embeddings for `token_type_ids`.
    token_type_ids: (optional) int32 Tensor of shape [batch_size, seq_length].
      Must be specified if `use_token_type` is True.
    token_type_vocab_size: int. The vocabulary size of `token_type_ids`.
    token_type_embedding_name: string. The name of the embedding table variable
      for token type ids.
    use_position_embeddings: bool. Whether to add position embeddings for the
      position of each token in the sequence.
    position_embedding_name: string. The name of the embedding table variable
      for positional embeddings.
    initializer_range: float. Range of the weight initialization.
    max_position_embeddings: int. Maximum sequence length that might ever be
      used with this model. This can be longer than the sequence length of
      input_tensor, but cannot be shorter.
    dropout_prob: float. Dropout probability applied to the final output tensor.

  Returns:
    float tensor with same shape as `input_tensor`.

  Raises:
    ValueError: One of the tensor shapes or input values is invalid.
  """
  input_shape = get_shape_list(input_tensor, expected_rank=3)
  batch_size = input_shape[0]
  seq_length = input_shape[1]
  width = input_shape[2]

  output = input_tensor

  if use_token_type:
    if token_type_ids is None:
      raise ValueError("`token_type_ids` must be specified if"
                       "`use_token_type` is True.")
    token_type_table = tf.get_variable(
        name=token_type_embedding_name,
        shape=[token_type_vocab_size, width],
        initializer=create_initializer(initializer_range))
    # This vocab will be small so we always do one-hot here, since it is always
    # faster for a small vocabulary.
    flat_token_type_ids = tf.reshape(token_type_ids, [-1])
    one_hot_ids = tf.one_hot(flat_token_type_ids, depth=token_type_vocab_size)
    token_type_embeddings = tf.matmul(one_hot_ids, token_type_table)
    token_type_embeddings = tf.reshape(token_type_embeddings,
                                       [batch_size, seq_length, width])
    output += token_type_embeddings

  if use_position_embeddings:
    #assert_op = tf.assert_less_equal(seq_length, max_position_embeddings)
    #with tf.control_dependencies():
    full_position_embeddings = tf.get_variable(
        name=position_embedding_name,
        shape=[max_position_embeddings, width],
        initializer=create_initializer(initializer_range))
    # Since the position embedding table is a learned variable, we create it
    # using a (long) sequence length `max_position_embeddings`. The actual
    # sequence length might be shorter than this, for faster training of
    # tasks that do not have long sequences.
    #
    # So `full_position_embeddings` is effectively an embedding table
    # for position [0, 1, 2, ..., max_position_embeddings-1], and the current
    # sequence has positions [0, 1, 2, ... seq_length-1], so we can just
    # perform a slice.
    position_embeddings = tf.slice(full_position_embeddings, [0, 0],
                                    [seq_length, -1])
    num_dims = len(output.shape.as_list())

    # Only the last two dimensions are relevant (`seq_length` and `width`), so
    # we broadcast among the first dimensions, which is typically just
    # the batch size.
    position_broadcast_shape = []
    for _ in range(num_dims - 2):
      position_broadcast_shape.append(1)
    position_broadcast_shape.extend([seq_length, width])
    position_embeddings = tf.reshape(position_embeddings,
                                      position_broadcast_shape)
    output += position_embeddings

  output = layer_norm_and_dropout(output, dropout_prob)
  return output
Example #42
0
with tf.name_scope("Conv_2"):
    w2 = weight([5, 5, 16, 36])
    b2 = bias([36])
    Conv_2 = conv2d(C1_Pool, w2) + b2
    C2 = tf.nn.relu(Conv_2)

with tf.name_scope("C2_Pool"):
    C2_Pool = average_pool_2x2(C2)

with tf.name_scope("Flatten"):
    Flatten = tf.reshape(C2_Pool,[-1,36])

with tf.name_scope("Hidden_layer_1"):
    w3 = weight([36,24])
    b3 = bias([24])
    D_Hidden = tf.nn.relu(tf.matmul(Flatten, w3)+b3)
    D_Hidden_Dropout = tf.nn.dropout(D_Hidden, dropout)

with tf.name_scope("Output_layer"):
    w4 = weight([24,10])
    b4 = bias([10])
    y_predict = tf.nn.softmax(tf.matmul(D_Hidden_Dropout,w4)+b4)

# Adjust our model

with tf.name_scope("Optimizer"):
    loss_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_predict,labels=y))
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss_function)

with tf.name_scope("Accuracy"):
    correct_prediction = tf.equal(tf.argmax(y_predict, 1), tf.argmax(y, 1))
Example #43
0
# ######################################## difference from rosettta
DIM_NUM = real_X.shape[1]

X = tf.placeholder(tf.float32, [None, DIM_NUM])
Y = tf.placeholder(tf.float32, [None, 1])
print(X)
print(Y)

# initialize W & b
W = tf.Variable(tf.zeros([DIM_NUM, 1]), dtype=tf.float32, name='w')
b = tf.Variable(tf.zeros([1]), dtype=tf.float32, name='b')
print(W)
print(b)

# predict
pred_Y = tf.sigmoid(tf.matmul(X, W) + b)
print(pred_Y)

# loss
logits = tf.matmul(X, W) + b
loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=Y, logits=logits)
loss = tf.reduce_mean(loss)
print(loss)

# optimizer
train = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
print(train)

init = tf.global_variables_initializer()
print(init)
Example #44
0
def attention_layer(from_tensor,
                    to_tensor,
                    attention_mask=None,
                    num_attention_heads=1,
                    size_per_head=512,
                    query_act=None,
                    key_act=None,
                    value_act=None,
                    attention_probs_dropout_prob=0.0,
                    initializer_range=0.02,
                    do_return_2d_tensor=False,
                    batch_size=None,
                    from_seq_length=None,
                    to_seq_length=None):
  """Performs multi-headed attention from `from_tensor` to `to_tensor`.

  This is an implementation of multi-headed attention based on "Attention
  is all you Need". If `from_tensor` and `to_tensor` are the same, then
  this is self-attention. Each timestep in `from_tensor` attends to the
  corresponding sequence in `to_tensor`, and returns a fixed-with vector.

  This function first projects `from_tensor` into a "query" tensor and
  `to_tensor` into "key" and "value" tensors. These are (effectively) a list
  of tensors of length `num_attention_heads`, where each tensor is of shape
  [batch_size, seq_length, size_per_head].

  Then, the query and key tensors are dot-producted and scaled. These are
  softmaxed to obtain attention probabilities. The value tensors are then
  interpolated by these probabilities, then concatenated back to a single
  tensor and returned.

  In practice, the multi-headed attention are done with transposes and
  reshapes rather than actual separate tensors.

  Args:
    from_tensor: float Tensor of shape [batch_size, from_seq_length,
      from_width].
    to_tensor: float Tensor of shape [batch_size, to_seq_length, to_width].
    attention_mask: (optional) int32 Tensor of shape [batch_size,
      from_seq_length, to_seq_length]. The values should be 1 or 0. The
      attention scores will effectively be set to -infinity for any positions in
      the mask that are 0, and will be unchanged for positions that are 1.
    num_attention_heads: int. Number of attention heads.
    size_per_head: int. Size of each attention head.
    query_act: (optional) Activation function for the query transform.
    key_act: (optional) Activation function for the key transform.
    value_act: (optional) Activation function for the value transform.
    attention_probs_dropout_prob: (optional) float. Dropout probability of the
      attention probabilities.
    initializer_range: float. Range of the weight initializer.
    do_return_2d_tensor: bool. If True, the output will be of shape [batch_size
      * from_seq_length, num_attention_heads * size_per_head]. If False, the
      output will be of shape [batch_size, from_seq_length, num_attention_heads
      * size_per_head].
    batch_size: (Optional) int. If the input is 2D, this might be the batch size
      of the 3D version of the `from_tensor` and `to_tensor`.
    from_seq_length: (Optional) If the input is 2D, this might be the seq length
      of the 3D version of the `from_tensor`.
    to_seq_length: (Optional) If the input is 2D, this might be the seq length
      of the 3D version of the `to_tensor`.

  Returns:
    float Tensor of shape [batch_size, from_seq_length,
      num_attention_heads * size_per_head]. (If `do_return_2d_tensor` is
      true, this will be of shape [batch_size * from_seq_length,
      num_attention_heads * size_per_head]).

  Raises:
    ValueError: Any of the arguments or tensor shapes are invalid.
  """

  def transpose_for_scores(input_tensor, batch_size, num_attention_heads,
                           seq_length, width):
    output_tensor = tf.reshape(
        input_tensor, [batch_size, seq_length, num_attention_heads, width])

    output_tensor = tf.transpose(output_tensor, [0, 2, 1, 3])
    return output_tensor

  from_shape = get_shape_list(from_tensor, expected_rank=[2, 3])
  to_shape = get_shape_list(to_tensor, expected_rank=[2, 3])

  if len(from_shape) != len(to_shape):
    raise ValueError(
        "The rank of `from_tensor` must match the rank of `to_tensor`.")

  if len(from_shape) == 3:
    batch_size = from_shape[0]
    from_seq_length = from_shape[1]
    to_seq_length = to_shape[1]
  elif len(from_shape) == 2:
    if (batch_size is None or from_seq_length is None or to_seq_length is None):
      raise ValueError(
          "When passing in rank 2 tensors to attention_layer, the values "
          "for `batch_size`, `from_seq_length`, and `to_seq_length` "
          "must all be specified.")

  # Scalar dimensions referenced here:
  #   B = batch size (number of sequences)
  #   F = `from_tensor` sequence length
  #   T = `to_tensor` sequence length
  #   N = `num_attention_heads`
  #   H = `size_per_head`

  from_tensor_2d = reshape_to_matrix(from_tensor)
  to_tensor_2d = reshape_to_matrix(to_tensor)

  # `query_layer` = [B*F, N*H]
  query_layer = tf.layers.dense(
      from_tensor_2d,
      num_attention_heads * size_per_head,
      activation=query_act,
      name="query",
      kernel_initializer=create_initializer(initializer_range))

  # `key_layer` = [B*T, N*H]
  key_layer = tf.layers.dense(
      to_tensor_2d,
      num_attention_heads * size_per_head,
      activation=key_act,
      name="key",
      kernel_initializer=create_initializer(initializer_range))

  # `value_layer` = [B*T, N*H]
  value_layer = tf.layers.dense(
      to_tensor_2d,
      num_attention_heads * size_per_head,
      activation=value_act,
      name="value",
      kernel_initializer=create_initializer(initializer_range))

  # `query_layer` = [B, N, F, H]
  query_layer = transpose_for_scores(query_layer, batch_size,
                                     num_attention_heads, from_seq_length,
                                     size_per_head)

  # `key_layer` = [B, N, T, H]
  key_layer = transpose_for_scores(key_layer, batch_size, num_attention_heads,
                                   to_seq_length, size_per_head)

  # Take the dot product between "query" and "key" to get the raw
  # attention scores.
  # `attention_scores` = [B, N, F, T]
  attention_scores = tf.matmul(query_layer, key_layer, transpose_b=True)
  attention_scores = tf.multiply(attention_scores,
                                 1.0 / math.sqrt(float(size_per_head)))

  if attention_mask is not None:
    # `attention_mask` = [B, 1, F, T]
    attention_mask = tf.expand_dims(attention_mask, axis=[1])

    # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
    # masked positions, this operation will create a tensor which is 0.0 for
    # positions we want to attend and -10000.0 for masked positions.
    adder = (1.0 - tf.cast(attention_mask, tf.float32)) * -10000.0

    # Since we are adding it to the raw scores before the softmax, this is
    # effectively the same as removing these entirely.
    attention_scores += adder

  # Normalize the attention scores to probabilities.
  # `attention_probs` = [B, N, F, T]
  attention_probs = tf.nn.softmax(attention_scores)

  # This is actually dropping out entire tokens to attend to, which might
  # seem a bit unusual, but is taken from the original Transformer paper.
  attention_probs = dropout(attention_probs, attention_probs_dropout_prob)

  # `value_layer` = [B, T, N, H]
  value_layer = tf.reshape(
      value_layer,
      [batch_size, to_seq_length, num_attention_heads, size_per_head])

  # `value_layer` = [B, N, T, H]
  value_layer = tf.transpose(value_layer, [0, 2, 1, 3])

  # `context_layer` = [B, N, F, H]
  context_layer = tf.matmul(attention_probs, value_layer)

  # `context_layer` = [B, F, N, H]
  context_layer = tf.transpose(context_layer, [0, 2, 1, 3])

  if do_return_2d_tensor:
    # `context_layer` = [B*F, N*H]
    context_layer = tf.reshape(
        context_layer,
        [batch_size * from_seq_length, num_attention_heads * size_per_head])
  else:
    # `context_layer` = [B, F, N*H]
    context_layer = tf.reshape(
        context_layer,
        [batch_size, from_seq_length, num_attention_heads * size_per_head])

  return context_layer
def full_layer(input, size):
    in_size = int(input.get_shape()[1])
    W = weight_variable([in_size, size])
    b = bias_variable([size])
    return tf.matmul(input, W) + b
Example #46
0
def __main__(env_name='FrozenLake-v0', learning_rate=0.1, gamma=0.99, epsilon=0.1, num_episodes=5000,
             debug=False, debug_scale=500):

    # make this environment
    env = gym.make(env_name)

    # Reset tensorflow graph
    tf.reset_default_graph()

    # Feed-forward part of the network
    env_size = env.observation_space.n
    action_size = env.action_space.n
    inputs1 = tf.placeholder(shape=[1,env_size], dtype=tf.float32)
    W = tf.Variable(tf.random_uniform([env_size, action_size], 0, 0.01))
    Qout = tf.matmul(inputs1, W)
    predict = tf.argmax(Qout,1)

    # Define loss function (sum of square difference between target and prediction Q values)
    nextQ = tf.placeholder(shape=[1,action_size], dtype=tf.float32)
    loss = tf.reduce_sum(tf.square(nextQ - Qout))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    updateModel = optimizer.minimize(loss)

    # Train
    init = tf.initialize_all_variables()

    # Lists for total rewards and steps to get to final state in that episode
    total_rewards = []
    steps_to_complete = []
    with tf.Session() as sess:
        sess.run(init)
        for i in range(num_episodes):
            show_episode = (debug and i % debug_scale == 0)
            if show_episode:
                print("Showing epsiode {}/{} ...\n".format(i, num_episodes))
                time.sleep(1)
            state = env.reset()
            cumulative_reward = 0
            done = False
            current_step = 0
            # DQN
            while not done:

                if show_episode:
                    env.render()
                    time.sleep(0.5)

                current_step += 1
                # Predict next action using NN rather that querying Q-table
                one_hot_encoded_state = np.identity(env_size)[state:state+1]
                action, q_values = sess.run([predict, Qout], feed_dict={inputs1:one_hot_encoded_state})

                # Epsilon greedy (bigger epsilon more random)
                if np.random.rand(1) < epsilon:
                    # Replace chosen action with random action
                    action[0] = env.action_space.sample()

                # Take action (either chosen or random)
                new_state, reward, done, _ = env.step(action[0])

                one_hot_encoded_new_state = np.identity(env_size)[new_state:new_state+1]
                Q1 = sess.run(Qout, feed_dict={inputs1:one_hot_encoded_new_state})

                maxQ1 = np.max(Q1)
                targetQ = q_values
                targetQ[0,action[0]] = reward + gamma*maxQ1

                _,W1 = sess.run([updateModel,W], feed_dict={inputs1:one_hot_encoded_state, nextQ:targetQ})

                # For logging
                cumulative_reward += reward
                # Move to next state
                state = new_state

                if done:
                    epsilon = 1.0/((i/50) + 10)
                    break

            steps_to_complete.append(current_step)
            total_rewards.append(cumulative_reward)

            if show_episode:
                print("Completed with cumulative reward of {}...\n".format(cumulative_reward))
                time.sleep(1)

    return total_rewards
    print("Percent of success: {}...\n".format(sum(total_rewards)/num_episodes))
import tensorflow as tf
from numpy.random import RandomState
batch_size=20
w1=tf.Variable(tf.random_normal([2,3],seed=1))
w2=tf.Variable(tf.random_normal([3,1],seed=1))
x=tf.placeholder(tf.float32,name='x-input')
y_=tf.placeholder(tf.float32,name='y-input')

a=tf.matmul(x,w1)
y=tf.matmul(a,w2)
cross_entropy=-tf.reduce_mean(y_*tf.log(tf.clip_by_value(y,1e-10,1.0)))
train_step=tf.train.AdamOptimizer(0.001).minimize(cross_entropy)

rdm=RandomState(1)
dataset_size=128
X=rdm.rand(dataset_size,2)
Y=[[int(x1+x2<1)] for (x1,x2) in X]

with tf.Session() as sess:
    init_op=tf.global_variables_initializer()
    sess.run(init_op)
    print(sess.run(w1))
    print(sess.run(w2))
    
    STEPS=10000
    for i in range(STEPS):
        start=(i*batch_size)%dataset_size
        end=min(start+batch_size,dataset_size)
    
        sess.run(train_step,feed_dict={x:X[start:end],y_:Y[start:end]})
        if i%1000==0:
Example #48
0
    def add_local_attention_op(self):
        attention_entity_emb = self.pure_entity_embeddings if self.args.attention_ent_vecs_no_regularization else self.entity_embeddings
        with tf.variable_scope("attention"):
            K = self.args.attention_K
            left_mask = self._sequence_mask_v13(self.begin_span, K)   # number of words on the left (left window)
            right_mask = self._sequence_mask_v13(tf.expand_dims(self.words_len, 1) - self.end_span, K)
            # number of words on the right. of course i don't get more than K even if more words exist.
            ctxt_mask = tf.concat([left_mask, right_mask], 2)  # [batch, num_of_spans, 2*K]
            ctxt_mask = tf.log(tf.minimum(1.0, tf.maximum(self.args.zero, ctxt_mask)))
               #  T,   T,  T, F,  F | T,  T,  F,  F,  F
               # -1, -2, -3, -4, -5  +0, +1, +2, +3, +4

            leftctxt_indices = tf.maximum(0, tf.range(-1, -K - 1, -1) +
                                          tf.expand_dims(self.begin_span, 2))  # [batch, num_mentions, K]
            rightctxt_indices = tf.minimum(tf.shape(self.pure_word_embeddings)[1] - 1, tf.range(K) +
                                           tf.expand_dims(self.end_span, 2))  # [batch, num_mentions, K]
            ctxt_indices = tf.concat([leftctxt_indices, rightctxt_indices], 2)  # [batch, num_mentions, 2*K]

            batch_index = tf.tile(tf.expand_dims(tf.expand_dims(tf.range(tf.shape(ctxt_indices)[0]), 1), 2),
                                  [1, tf.shape(ctxt_indices)[1], tf.shape(ctxt_indices)[2]])
            ctxt_indices = tf.stack([batch_index, ctxt_indices], 3)
            # [batch, num_of_spans, 2*K, 2]   the last dimension is row,col for gather_nd
            # [batch, num_of_spans, 2*K, [row,col]]

            att_x_w = self.pure_word_embeddings  # [batch, max_sent_len, 300]
            if self.args.attention_on_lstm and self.args.nn_components.find("lstm") != -1:
                # ablation: here the attention is computed on the output of the lstm layer x_k instead of using the
                # pure word2vec vectors. (word2vec used in paper).
                att_x_w = util.projection(self.context_emb, 300)  # if tf.shape(self.context_emb)[-1] != 300 else self.context_emb

            ctxt_word_emb = tf.gather_nd(att_x_w, ctxt_indices)
            # [batch, num_of_spans, 2K, emb_size]    emb_size = 300  only pure word emb used  (word2vec)
            #  and not after we add char emb and dropout

            # in this implementation we don't use the diagonal A and B arrays that are mentioned in
            # Ganea and Hoffmann 2017 (only used in the ablations)
            temp = attention_entity_emb
            if self.args.attention_use_AB:
                att_A = tf.get_variable("att_A", [300])
                temp = att_A * attention_entity_emb
            scores = tf.matmul(ctxt_word_emb, temp, transpose_b=True)
            scores = tf.reduce_max(scores, reduction_indices=[-1])  # max score of each word for each span acquired from any cand entity
            scores = scores + ctxt_mask   # some words are not valid out of window so we assign to them very low score
            top_values, _ = tf.nn.top_k(scores, self.args.attention_R)
            # [batch, num_of_spans, R]
            R_value = top_values[:, :, -1]    # [batch, num_of_spans]
            R_value = tf.maximum(self.args.zero, R_value)  # so to avoid keeping words that
            # have max score with any of the entities <=0 (also score = 0 can have words with
            # padding candidate entities)

            threshold = tf.tile(tf.expand_dims(R_value, 2), [1, 1, 2 * K])
            # [batch, num_of_spans, 2K]
            scores = scores - tf.to_float(((scores - threshold) < 0)) * 50  # 50 where score<thr, 0 where score>=thr
            scores = tf.nn.softmax(scores, dim=2)  # [batch, num_of_spans, 2K]
            scores = tf.expand_dims(scores, 3)  # [batch, num_of_spans, 2K, 1]
            #    [batch, num_of_spans, 2K, 1]  *  [batch, num_of_spans, 2K, emb_size]
            # =  [batch, num_of_spans, 2K, emb_size]
            x_c = tf.reduce_sum(scores * ctxt_word_emb, 2)  # =  [batch, num_of_spans, emb_size]
            if self.args.attention_use_AB:
                att_B = tf.get_variable("att_B", [300])
                x_c = att_B * x_c
            x_c = tf.expand_dims(x_c, 3)   # [batch, num_of_spans, emb_size, 1]
            # [batch, num_of_spans, 30, emb_size=300]  mul with  [batch, num_of_spans, emb_size, 1]
            x_e__x_c = tf.matmul(attention_entity_emb, x_c)  # [batch, num_of_spans, 30, 1]
            x_e__x_c = tf.squeeze(x_e__x_c, axis=3)  # [batch, num_of_spans, 30]
            self.attention_scores = x_e__x_c
    # C3 conv Input=14*14*6 Output=10*10*6
    conv2_w = tf.Variable(tf.truncated_normal(shape=[5, 5, 6, 16], mean=0, stddev=0.1))
    conv2_b = tf.Variable(tf.zeros(16))
    conv2 = tf.nn.conv2d(pool_1, conv2_w, strides=[1, 1, 1, 1], padding='VALID') + conv2_b
    conv2 = tf.nn.relu(conv2)

    # S4 Pooling Input=10*10*6 OutPut=5*5*16
    pool_2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')

    # Flatten Input=5*5*16 Output=400
    fc1  = tf.reshape(pool_2,[-1,400])

    # C5 conv Input=5*5*16=400 Output=120
    fc1_w = tf.Variable(tf.truncated_normal(shape=(400, 120), mean=0, stddev=0.1))
    fc1_b = tf.Variable(tf.zeros(120))
    fc1 = tf.matmul(fc1, fc1_w) + fc1_b

    # F6 Input=120 OutPut=84
    fc2_w = tf.Variable(tf.truncated_normal(shape=(120, 84), mean=0, stddev=0.1))
    fc2_b = tf.Variable(tf.zeros(84))
    fc2 = tf.matmul(fc1, fc2_w) + fc2_b
    fc2 = tf.nn.relu(fc2)

    # F7 Input=84  Output=10
    fc3_w = tf.Variable(tf.truncated_normal(shape=(84, 10), mean=0, stddev=0.1))
    fc3_b = tf.Variable(tf.zeros(10))
    y_conv = tf.matmul(fc2, fc3_w) + fc3_b


    # 我们不采用先Softmax再计算交叉熵的方法,而是直接用tf.nn.softmax_cross_entropy_with_logits直接计算
    cross_entropy = tf.reduce_mean(
    def __init__(self, params, hidden_weights=None):
        self.params = params
        self.network_shape = self.params['network_shape']
        self.input_dim = self.network_shape[0]
        self.output_dim = self.network_shape[-1]
        self.batch_size = self.params['batch_size']
        self.hidden_weights = hidden_weights
        # self.weights = self.initialize_weights()
        # self.mirror_weights = self.initialize_mirror_weights()
        # self.readout_weights = self.initialize_readout_weights()
        # self.hs = self.initialize_hs()
        # self.hidden_states = self.initialize_hidden_states()
        self.tensorboard_dir = self.params['tensorboard_dir']

        self.activation_function = self.params['activation_function']
        self.optimizer_ = self.params['optimizer']
        # model
        self.input = tf.placeholder(tf.float32, [None, self.input_dim],
                                    name="input")
        self.output = tf.placeholder(tf.float32, [None, self.output_dim],
                                     name="output")
        self.activation_patterns = {}
        self.hidden_state_activation_patterns = {}
        self.activation = self.input
        self.hidden_states = {}
        self.hidden_states_update_ops = {}
        for i in range(1, len(self.network_shape) - 1):
            with tf.name_scope("layer{0}".format(i)):
                h = tf.Variable(tf.truncated_normal([self.network_shape[i]]),
                                name="hidden_state",
                                trainable=False)
                # h = tf.truncated_normal([self.batch_size, self.network_shape[i]])
                self.hidden_states["hs_{0}".format(i)] = h
                Utils.variable_summaries(
                    self.hidden_states["hs_{0}".format(i)], "hs_{0}".format(i))
        if self.hidden_weights is not None:
            H_tune = tf.Variable(1.0, trainable=True, name="H_tune")
            Utils.variable_summaries(H_tune, "H_tune")
        else:
            H_tune = tf.Variable(1, trainable=False, name="H_tune")
        for i in range(len(self.network_shape) - 1):
            with tf.name_scope("layer{0}".format(i + 1)):
                if i < len(self.network_shape) - 2:
                    with tf.name_scope("hidden"):

                        # input weight and bias
                        W = tf.Variable(tf.random_normal(
                            [self.network_shape[i], self.network_shape[i + 1]],
                            stddev=0.05),
                                        name="W")
                        bW = tf.Variable(tf.random_normal(
                            [self.network_shape[i + 1]], stddev=0.05),
                                         name="bW")
                        Utils.variable_summaries(W, "W")
                        Utils.variable_summaries(bW, "bW")
                        H_name = "H_{0}".format(i + 1)
                        if self.hidden_weights is not None and H_name in self.hidden_weights.keys(
                        ):
                            H = tf.Variable(
                                self.hidden_weights[H_name].astype('float32'),
                                dtype=tf.float32,
                                trainable=False,
                                name="H")

                        else:
                            H = tf.Variable(tf.random_normal([
                                self.network_shape[i + 1],
                                self.network_shape[i + 1]
                            ],
                                                             stddev=0.05),
                                            trainable=False,
                                            name="H")

                        input_for_hidden = tf.matmul(self.activation, W) + bW
                        tiled_h = tf.reshape(
                            tf.tile(self.hidden_states["hs_{0}".format(i + 1)],
                                    [self.batch_size]), [self.batch_size, -1])
                        hidden_update = tf.nn.tanh(
                            tf.add(
                                input_for_hidden,
                                tf.matmul(tiled_h, tf.scalar_mul(H_tune, H))))

                    with tf.name_scope("mirror"):
                        # mirror input and bias
                        M = tf.Variable(tf.random_normal(
                            [self.network_shape[i], self.network_shape[i + 1]],
                            stddev=0.05),
                                        name="M")
                        bM = tf.Variable(tf.random_normal(
                            [self.network_shape[i + 1]], stddev=0.05),
                                         name="bM")
                        Utils.variable_summaries(M, "M")
                        Utils.variable_summaries(bM, "bM")
                        input_for_mirror = tf.nn.tanh(
                            tf.matmul(self.activation, M) + bM)

                    with tf.name_scope("readout"):
                        # readout weights and biases
                        R = tf.Variable(tf.random_normal([
                            self.network_shape[i + 1],
                            self.network_shape[i + 1]
                        ],
                                                         stddev=0.05),
                                        name="R")
                        bR = tf.Variable(tf.random_normal(
                            [self.network_shape[i + 1]], stddev=0.05),
                                         name="bR")
                        Utils.variable_summaries(R, "R")
                        Utils.variable_summaries(bR, "bR")
                        readout = self.activation_function(
                            tf.matmul(hidden_update, R) + bR)

                    with tf.name_scope("activation"):
                        self.activation = self.activation_function(
                            tf.multiply(readout, input_for_mirror))
                    # self.hidden_state_activation_patterns['hidden_state_layer_{0}'.format(i + 1)] = self.hidden_states[self.hidden_states["hs_{0}".format(i+1)]]
                    self.hidden_states_update_ops["hs_{0}".format(
                        i + 1)] = self.hidden_states["hs_{0}".format(
                            i + 1)].assign(hidden_update[0])
                    # self.hidden_states["hs_{0}".format(i+1)] = hidden_update
                else:
                    W = tf.Variable(tf.random_normal(
                        [self.network_shape[i], self.network_shape[i + 1]],
                        stddev=0.05),
                                    name="W")
                    bW = tf.Variable(tf.random_normal(
                        [self.network_shape[i + 1]], stddev=0.05),
                                     name="bW")
                    Utils.variable_summaries(W, "W")
                    Utils.variable_summaries(bW, "bW")
                    with tf.name_scope("activation"):
                        self.activation = self.activation_function(
                            tf.matmul(self.activation, W) + bW)
                act = self.activation
                if i > 0:
                    self.activation_patterns['layer_{0}'.format(i)] = act

        # cost
        with tf.name_scope("cost"):
            self.cost = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(
                    self.activation, self.output))
            tf.summary.scalar('cost', self.cost)

        with tf.name_scope("accuracy"):
            correct_prediction = tf.equal(tf.argmax(self.activation, 1),
                                          tf.argmax(self.output, 1))
            self.accuracy = tf.reduce_mean(
                tf.cast(correct_prediction, tf.float32))
            tf.summary.scalar('accuracy', self.accuracy)

        self.optimizer = self.optimizer_.minimize(self.cost)

        self.sess = tf.Session()

        self.merged = tf.summary.merge_all()
        self.summ_writer = tf.summary.FileWriter(self.tensorboard_dir,
                                                 self.sess.graph)

        init = tf.global_variables_initializer()

        self.sess.run(init)
Example #51
0
 def attention(t):
     before_att = activation(tf.matmul(t, W_hsz) + w_z)
     att = tf.matmul(before_att, v)  # [batch_size, 1]
     return att
Example #52
0
 def call(self, inputs):
     return tf.matmul(inputs, self.w) + self.b
Example #53
0
def inference(images_placeholder, keep_prob):
    # 重みを標準偏差0.1の正規分布で初期化
    def weight_variable(shape):
        initial = tf.truncated_normal(shape, stddev=0.1)
        return tf.Variable(initial)

    # バイアスを標準偏差0.1の正規分布で初期化
    def bias_variable(shape):
        initial = tf.constant(0.1, shape=shape)
        return tf.Variable(initial)

    # 畳み込み層の作成
    def conv2d(x, W):
        return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

    # プーリング層の作成
    def max_pool_2x2(x):
        return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                            strides=[1, 2, 2, 1], padding='SAME')
    
    # 入力を28x28x3に変形
    x_image = tf.reshape(images_placeholder, [-1, 28, 28, 3])

    # 畳み込み層1の作成
    with tf.name_scope('conv1') as scope:
        W_conv1 = weight_variable([5, 5, 3, 32])
        b_conv1 = bias_variable([32])
        h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)

    # プーリング層1の作成
    with tf.name_scope('pool1') as scope:
        h_pool1 = max_pool_2x2(h_conv1)
    
    # 畳み込み層2の作成
    with tf.name_scope('conv2') as scope:
        W_conv2 = weight_variable([5, 5, 32, 64])
        b_conv2 = bias_variable([64])
        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)

    # プーリング層2の作成
    with tf.name_scope('pool2') as scope:
        h_pool2 = max_pool_2x2(h_conv2)

    # 全結合層1の作成
    with tf.name_scope('fc1') as scope:
        W_fc1 = weight_variable([7*7*64, 1024])
        b_fc1 = bias_variable([1024])
        h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
        # dropoutの設定
        h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    # 全結合層2の作成
    with tf.name_scope('fc2') as scope:
        W_fc2 = weight_variable([1024, NUM_CLASSES])
        b_fc2 = bias_variable([NUM_CLASSES])

    # ソフトマックス関数による正規化
    with tf.name_scope('softmax') as scope:
        y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

    # 各ラベルの確率のようなものを返す
    return y_conv
Example #54
0
 def out_layer(slice):
     out = activation(tf.matmul(slice, W_hh) + w_h)
     return out
h_pool3 = max_pool_4x4(h_conv3)

#fourth convolution and max_pool layer
W_conv4 = weight_variable([3, 3, 128, 256])
b_conv4 = bias_variable([256])
h_conv4 = tf.nn.relu(conv2d(h_pool3, W_conv4) + b_conv4)
#h_pool4 = max_pool_4x4(h_conv4)

h_pool4 = spp_layer(h_conv4)

#变成全连接层,用一个MLP处理
reshape = tf.reshape(h_pool4, [batch_size, -1])
dim = reshape.get_shape()[1].value
W_fc1 = weight_variable([dim, 1024])
b_fc1 = bias_variable([1024])
h_fc1 = tf.nn.relu(tf.matmul(reshape, W_fc1) + b_fc1)

#dropout
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

W_fc2 = weight_variable([1024, 82])
b_fc2 = bias_variable([82])
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

#损失函数及优化算法
cross_entropy = tf.reduce_mean(
    -tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))
train_step = tf.train.AdamOptimizer(0.001).minimize(cross_entropy)

correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
Example #56
0
    def sketch_step(tensor, cum_attention, active_mask, temper):

        def csoftmax(ten, u, mask):
            """
            Compute the constrained softmax (csoftmax);
            See paper "Learning What's Easy: Fully Differentiable Neural Easy-First Taggers"
            on https://andre-martins.github.io/docs/emnlp2017_final.pdf (page 4)

            :param ten: input tensor
            :param u: cumulative attention see paper
            :param mask: mask with active elements
            :return: distribution
            """

            shape_t = ten.shape
            shape_u = u.shape
            assert shape_u == shape_t

            # mean
            ten = ten - tf.reduce_mean(ten, axis=1, keep_dims=True)

            neg_mask = tf.ones_like(mask) - mask

            # calculate new distribution with attention on distribution 'b'
            Q = tf.exp(ten)
            Z = tf.reduce_sum(Q*mask, axis=1, keep_dims=True)/(tf.ones(shape=[shape_t[0], 1]) -
                                                               tf.reduce_sum(neg_mask*u, axis=1, keep_dims=True))

            # war with NaN and inf
            z_mask = tf.cast(tf.less_equal(Z, tf.zeros_like(Z)), dtype=tf.float32)
            Z = Z + z_mask

            A = Q / Z

            # verification of the condition and modification of masks
            t_mask = tf.to_float(tf.less_equal(A, u))
            f_mask = tf.to_float(tf.less(u, A))

            alpha = A * t_mask + u * f_mask

            mask = mask * t_mask

            return alpha, mask

        def attention(t):
            before_att = activation(tf.matmul(t, W_hsz) + w_z)
            att = tf.matmul(before_att, v)  # [batch_size, 1]
            return att

        tensor = tf.transpose(tensor, [1, 0, 2])  # [L; batch_size; 2*state_size*(2*window_size + 1)]

        attentions = tf.map_fn(attention, tensor, dtype=tf.float32)  # [L, batch_size, 1]
        attentions = tf.reshape(attentions, [batch_size, L]) - cum_attention*discount_factor  # [batch_size, L]

        U = tf.ones_like(cum_attention) - cum_attention
        constrained_weights, new_mask = csoftmax(attentions, U, active_mask)  # [batch_size, L]

        tensor = tf.transpose(tensor, [1, 0, 2])  # [batch_size; L; 2*state_size*(2*window_size + 1)]

        if not full_model:
            # TODO: check
            cn = tf.reduce_sum(tensor*tf.expand_dims(constrained_weights, [2]), axis=1)  # [batch_size,
            #  2*state_size*(2*window_size + 1)]
            cn = tf.reshape(cn, [batch_size, 2*state_size*(2*window_size + 1)])  # [batch_size,
            #  2*state_size*(2*window_size + 1)]
            s = activation(tf.matmul(cn, W_hh) + w_h)  # [batch_size, state_size]

            s = tf.matmul(tf.expand_dims(constrained_weights, [2]), tf.expand_dims(s, [1]))  # [batch_size, L,
            #  state_size]
        else:
            def out_layer(slice):
                out = activation(tf.matmul(slice, W_hh) + w_h)
                return out

            tensor = tf.transpose(tensor, [1, 0, 2])  # [L; batch_size; 2*state_size*(2*window_size + 1)]
            s = tf.map_fn(out_layer, tensor, dtype=tf.float32)  # [L; batch_size; state_size]
            s = tf.transpose(s, [1, 0, 2])  # [batch_size; L; state_size]
            s = tf.expand_dims(constrained_weights, [2]) * s

        return s, constrained_weights, new_mask
Example #57
0
(x_train, y_train), (x_test, y_test) = load_data()

# parameters
learning_rate = 0.001
training_epochs = 10
batch_size = 128
D = 3072  # number of features.
K = 10  # number of classes.

# input place holders
X = tf.placeholder(tf.float32, [None, D])
Y = tf.placeholder(tf.float32, [None, K])

W1 = tf.Variable(tf.random_normal([D, K]))
b1 = tf.Variable(tf.random_normal([K]))
hypothesis = tf.nn.relu(tf.matmul(X, W1) + b1)

x_train = np.reshape(x_train, (-1, 3072))
x_test = np.reshape(x_test, (-1, 3072))
y_train = tf.one_hot(y_train, 10)
y_test = tf.one_hot(y_test, 10)

# define cost/loss & optimizer
cost = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# initialize
sess = tf.Session()
sess.run(tf.global_variables_initializer())
Example #58
0
def Inference(imgs):
    imgs = tf.cast(imgs, tf.float32)

    with tf.variable_scope("conv1") as scope:
        """
        weights = tf.get_variable('weights',
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32))
        """
        weights = GenerateWegiths('weights', [3, 3, 3, 32])
        # tf.summary.scalar(scope.name+"/weights",weights)
        conv = tf.nn.conv2d(imgs, weights, strides=[1, 1, 1, 1], padding='VALID')
        """
        biases = tf.get_variable('biases',
                                 shape=[32],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        """
        biases = GenerateBias('biases', [32])
        # tf.summary.scalar(scope.name+"/biases",biases)
        preActivition = tf.nn.bias_add(conv, biases)
        conv1 = tf.nn.relu(preActivition, name=scope.name)

    with tf.variable_scope("conv2") as scope:
        """
        weights = tf.get_variable('weights',
                                  shape=[3, 3, 32, 32],
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32))
        """
        weights = GenerateWegiths('weights', [3, 3, 32, 32])
        # tf.summary.scalar(scope.name+'/weights',weights)
        conv = tf.nn.conv2d(conv1, weights, strides=[1, 1, 1, 1], padding='VALID')
        """
        biases = tf.get_variable('biases',
                                 shape=[32],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        """
        biases = GenerateBias('biases', [32])
        # tf.summary.scalar(scope.name+'/biases',biases)
        preActivition = tf.nn.bias_add(conv, biases)
        conv2 = tf.nn.relu(preActivition, name=scope.name)

    with tf.variable_scope("MaxPool1") as scope:
        pool1 = tf.nn.max_pool(conv2, [1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID', name='pooling1')

    with tf.variable_scope("conv3") as scope:
        """
        weights = tf.get_variable('weights',
                                  shape=[3, 3, 32, 64],
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32))
        """
        weights = GenerateWegiths('weights', [3, 3, 32, 64])
        # tf.summary.scalar(scope.name+'/weights',weights)
        conv = tf.nn.conv2d(pool1, weights, strides=[1, 1, 1, 1], padding='VALID')
        """
        biases = tf.get_variable('biases',
                                 shape=[64],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        """
        biases = GenerateBias('biases', [64])
        # tf.summary.scalar(scope.name+'/biases',biases)
        preActivition = tf.nn.bias_add(conv, biases)
        conv3 = tf.nn.relu(preActivition, name=scope.name)

    with tf.variable_scope("conv4") as scope:
        """
        weights = tf.get_variable('weights',
                                  shape=[3, 3, 64, 64],
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32))
        """
        weights = GenerateWegiths('weights', [3, 3, 64, 64])
        # tf.summary.scalar(scope.name+'/weights',weights)
        conv = tf.nn.conv2d(conv3, weights, strides=[1, 1, 1, 1], padding='VALID')
        """
        biases = tf.get_variable('biases',
                                 shape=[64],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        """
        biases = GenerateBias('biases', [64])
        # tf.summary.scalar(scope.name+'/biases',biases)
        preActivition = tf.nn.bias_add(conv, biases)
        conv4 = tf.nn.relu(preActivition, name=scope.name)

    with tf.variable_scope("MaxPool2") as scope:
        pool2 = tf.nn.max_pool(conv4, [1, 2, 2, 1], [1, 2, 2, 1], padding="VALID", name='pooling2')

    with tf.variable_scope("conv5") as scope:
        """
        weights = tf.get_variable('weights',
                                  shape=[3, 3, 64, 128],
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32))
        """
        weights = GenerateWegiths('weights', [3, 3, 64, 128])
        # tf.summary.scalar(scope.name+'/weights',weights)
        conv = tf.nn.conv2d(pool2, weights, strides=[1, 1, 1, 1], padding='VALID')
        """
        biases = tf.get_variable('biases',
                                 shape=[128],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        """
        biases = GenerateBias('biases', [128])
        # tf.summary.scalar(scope.name+'/biases',biases)
        preActivition = tf.nn.bias_add(conv, biases)
        conv5 = tf.nn.relu(preActivition, name=scope.name)

    with tf.variable_scope("conv6") as scope:
        """
        weights = tf.get_variable('weights',
                                  shape=[3, 3, 128, 128],
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32))
        """
        weights = GenerateWegiths('weights', [3, 3, 128, 128])
        # tf.summary.scalar(scope.name+'/weights',weights)
        conv = tf.nn.conv2d(conv5, weights, strides=[1, 1, 1, 1], padding='VALID')
        """
        biases = tf.get_variable('biases',
                                 shape=[128],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        """
        biases = GenerateBias('biases', [128])
        # tf.summary.scalar(scope.name+'/biases',biases)
        preActivition = tf.nn.bias_add(conv, biases)
        conv6 = tf.nn.relu(preActivition, name=scope.name)

    with tf.variable_scope("MaxPool3") as scope:
        pool3 = tf.nn.max_pool(conv6, [1, 2, 2, 1], [1, 2, 2, 1], padding="VALID", name='pooling3')

    with tf.variable_scope("FC1") as scope:
        tShape = pool3.get_shape()
        fc1Shape = tShape[1].value * tShape[2].value * tShape[3].value
        fc1 = tf.reshape(pool3, [-1, fc1Shape],name='fc1')
        #reshape = tf.reshape(pool3, [-1, fc1Shape])
        #fc1 = tf.nn.dropout(reshape, keepProb, name='fc1Dropout')

    with tf.variable_scope('FC21') as scope:
        """
        weights = tf.get_variable('weights',
                                  shape=[fc1Shape, 10],
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.005, dtype=tf.float32))

        biases = tf.get_variable('biases',
                                 shape=[10],
                                 dtype=tf.float32,
                                 initializer=tf.truncated_normal_initializer(0.1))
        """
        weights = GenerateWegiths('weights', [fc1Shape, 10])
        biases = GenerateBias('biases', [10])
        fc21 = tf.matmul(fc1, weights) + biases

    with tf.variable_scope("FC22") as scope:
        """
        weights = tf.get_variable('weights',
                                  shape=[fc1Shape, 10],
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.005, dtype=tf.float32))
        biases = tf.get_variable('biases',
                                 shape=[10],
                                 dtype=tf.float32,
                                 initializer=tf.truncated_normal_initializer(0.1))
        """
        weights = GenerateWegiths('weights', [fc1Shape, 10])
        biases = GenerateBias('biases', [10])
        fc22 = tf.matmul(fc1, weights) + biases

    with tf.variable_scope("FC23") as scope:
        """
        weights = tf.get_variable('weights',
                                  shape=[fc1Shape, 10],
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.005, dtype=tf.float32))
        biases = tf.get_variable('biases',
                                 shape=[10],
                                 dtype=tf.float32,
                                 initializer=tf.truncated_normal_initializer(0.1))
        """
        weights = GenerateWegiths('weights', [fc1Shape, 10])
        biases = GenerateBias('biases', [10])
        fc23 = tf.matmul(fc1, weights) + biases

    return fc21, fc22, fc23
Example #59
0
def simple_nn_layer(x, y):
    return tf.nn.relu(tf.matmul(x, y))
Example #60
0
 def __init__(self, rnn_size, batch_size, learning_rate,
              training_seq_len, vocab_size, infer_sample=False):
     self.rnn_size = rnn_size
     self.vocab_size = vocab_size
     self.infer_sample = infer_sample
     self.learning_rate = learning_rate
     
     if infer_sample:
         self.batch_size = 1
         self.training_seq_len = 1
     else:
         self.batch_size = batch_size
         self.training_seq_len = training_seq_len
     
     self.lstm_cell = tf.contrib.rnn.BasicLSTMCell(rnn_size)
     self.initial_state = self.lstm_cell.zero_state(self.batch_size, tf.float32)
     
     self.x_data = tf.placeholder(tf.int32, [self.batch_size, self.training_seq_len])
     self.y_output = tf.placeholder(tf.int32, [self.batch_size, self.training_seq_len])
     
     with tf.variable_scope('lstm_vars'):
         # Softmax Output Weights
         W = tf.get_variable('W', [self.rnn_size, self.vocab_size], tf.float32, tf.random_normal_initializer())
         b = tf.get_variable('b', [self.vocab_size], tf.float32, tf.constant_initializer(0.0))
     
         # Define Embedding
         embedding_mat = tf.get_variable('embedding_mat', [self.vocab_size, self.rnn_size],
                                         tf.float32, tf.random_normal_initializer())
                                         
         embedding_output = tf.nn.embedding_lookup(embedding_mat, self.x_data)
         rnn_inputs = tf.split(axis=1, num_or_size_splits=self.training_seq_len, value=embedding_output)
         rnn_inputs_trimmed = [tf.squeeze(x, [1]) for x in rnn_inputs]
     
     # If we are inferring (generating text), we add a 'loop' function
     # Define how to get the i+1 th input from the i th output
     def inferred_loop(prev, count):
         # Apply hidden layer
         prev_transformed = tf.matmul(prev, W) + b
         # Get the index of the output (also don't run the gradient)
         prev_symbol = tf.stop_gradient(tf.argmax(prev_transformed, 1))
         # Get embedded vector
         output = tf.nn.embedding_lookup(embedding_mat, prev_symbol)
         return(output)
     
     decoder = tf.contrib.legacy_seq2seq.rnn_decoder
     outputs, last_state = decoder(rnn_inputs_trimmed,
                                   self.initial_state,
                                   self.lstm_cell,
                                   loop_function=inferred_loop if infer_sample else None)
     # Non inferred outputs
     output = tf.reshape(tf.concat(axis=1, values=outputs), [-1, self.rnn_size])
     # Logits and output
     self.logit_output = tf.matmul(output, W) + b
     self.model_output = tf.nn.softmax(self.logit_output)
     
     loss_fun = tf.contrib.legacy_seq2seq.sequence_loss_by_example
     loss = loss_fun([self.logit_output],[tf.reshape(self.y_output, [-1])],
             [tf.ones([self.batch_size * self.training_seq_len])],
             self.vocab_size)
     self.cost = tf.reduce_sum(loss) / (self.batch_size * self.training_seq_len)
     self.final_state = last_state
     gradients, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tf.trainable_variables()), 4.5)
     optimizer = tf.train.AdamOptimizer(self.learning_rate)
     self.train_op = optimizer.apply_gradients(zip(gradients, tf.trainable_variables()))