Example #1
0
  def add_model(self, input_data):
    """Adds a linear-layer plus a softmax transformation

    The core transformation for this model which transforms a batch of input
    data into a batch of predictions. In this case, the mathematical
    transformation effected is

    y = softmax(xW + b)

    Hint: Make sure to create tf.Variables as needed. Also, make sure to use
          tf.name_scope to ensure that your name spaces are clean.
    Hint: For this simple use-case, it's sufficient to initialize both weights W
          and biases b with zeros.

    Args:
      input_data: A tensor of shape (batch_size, n_features).
    Returns:
      out: A tensor of shape (batch_size, n_classes)
    """
    ### YOUR CODE HERE
    # W = tf.Variable(tf.zeros((self.config.n_features, self.config.n_classes)), name="weights")
    # b = tf.Variable(tf.zeros((self.config.n_classes, )), name="biases")
    
    with tf.variable_scope('softmax'):
        W = tf.get_variable("weights", (self.config.n_features, self.config.n_classes),
                            initializer=tf.constant_initializer(0.0))
        b = tf.get_variable("bias", (self.config.n_classes,),
                            initializer=tf.constant_initializer(0.0))
    
    out = softmax(tf.matmul(input_data, W) + b)
    ### END YOUR CODE
    return out
Example #2
0
def instance_norm(x, epsilon=1e-5):
    """Instance Normalization.

    See Ulyanov, D., Vedaldi, A., & Lempitsky, V. (2016).
    Instance Normalization: The Missing Ingredient for Fast Stylization,
    Retrieved from http://arxiv.org/abs/1607.08022

    Parameters
    ----------
    x : TYPE
        Description
    epsilon : float, optional
        Description

    Returns
    -------
    TYPE
        Description
    """
    with tf.variable_scope('instance_norm'):
        mean, var = tf.nn.moments(x, [1, 2], keep_dims=True)
        scale = tf.get_variable(
            name='scale',
            shape=[x.get_shape()[-1]],
            initializer=tf.truncated_normal_initializer(mean=1.0, stddev=0.02))
        offset = tf.get_variable(
            name='offset',
            shape=[x.get_shape()[-1]],
            initializer=tf.constant_initializer(0.0))
        out = scale * tf.div(x - mean, tf.sqrt(var + epsilon)) + offset
        return out
Example #3
0
    def __call__(self, inputs, state, scope=None):
        """Gated recurrent unit (GRU) with nunits cells."""
        with tf.variable_scope(scope or type(self).__name__):  # "GRUCell"
            with tf.variable_scope("Gates"):  # Reset gate and update gate.
                # We start with bias of 1.0 to not reset and not update.
                r, u = array_ops.split(1, 2, _linear([inputs, state],
                    2 * self._num_units, True, 1.0, self.weights_init,
                    self.trainable, self.restore, self.reuse))
                r, u = self._inner_activation(r), self._inner_activation(u)
            with tf.variable_scope("Candidate"):
                c = self._activation(
                    _linear([inputs, r * state], self._num_units, True, 0.,
                            self.weights_init, self.trainable, self.restore,
                            self.reuse))
            new_h = u * state + (1 - u) * c

            self.W, self.b = list(), list()
            # Retrieve RNN Variables
            with tf.variable_scope('Gates/Linear', reuse=True):
                self.W.append(tf.get_variable('Matrix'))
                self.b.append(tf.get_variable('Bias'))
            with tf.variable_scope('Candidate/Linear', reuse=True):
                self.W.append(tf.get_variable('Matrix'))
                self.b.append(tf.get_variable('Bias'))

        return new_h, new_h
    def __init__(self, is_training, config):
        self.batch_size = batch_size = config.batch_size
        size = config.hidden_size
        self.max_len = max_len = config.max_len
        vocab_size = config.vocab_size

        self._input_data = tf.placeholder(tf.int32, [batch_size, config.max_len])
        self._targets = tf.placeholder(tf.int32, [batch_size])

        embedding = tf.get_variable("embedding", [vocab_size, size])
        inputs = tf.nn.embedding_lookup(embedding, self._input_data)

        output = tf.reduce_sum(inputs, 1)
        softmax_w = tf.get_variable("softmax_w", [size, 2])
        softmax_b = tf.get_variable("softmax_b", [2])
        
        logits = tf.matmul(output, softmax_w) + softmax_b
        prediction = tf.nn.softmax(logits)
        self._prediction = prediction

        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, self._targets)
        
        self._cost = cost = tf.reduce_sum(loss) / batch_size

        if not is_training:
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.GradientDescentOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
Example #5
0
def get_run_op():
  # Create an optimizer that performs gradient descent.
  #opt = tf.train.GradientDescentOptimizer(learning_rate=0.01)
  slice_size = FLAGS.batch_size / FLAGS.num_cuts
  print('Slice size:{}'.format(slice_size))
  data = None
  label = None
  last_fc = [tf.no_op()]
  with tf.device('/gpu:0'):
    data = tf.get_variable(
        name = 'data',
        shape=[slice_size, FLAGS.hidden_size],
        trainable=False)
    '''
    label = tf.get_variable(
        name = 'label',
        shape = [slice_size, FLAGS.hidden_size],
        trainable=False))
    with tf.variable_scope('fc_in'):
      weight_in = tf.zeros([1000, FLAGS.hidden_size])
      for k in xrange(FLAGS.num_cuts):
        with tf.control_dependencies([last_fc[-1]]):
            last_fc.append(tf.matmul(data[k+1], weight_in))
    '''
  for i in xrange(FLAGS.num_cuts):
    last_fc.append(data)
  for i in xrange(FLAGS.num_layers):
    dev = '/gpu:%d' % (i * FLAGS.num_gpus / FLAGS.num_layers)
    with tf.device(dev), scopes.arg_scope([variables.variable], device=dev):
      tmp_fc = [tf.no_op()]
      with tf.variable_scope('fc%d' % i):
        w = tf.get_variable(
            name='w',
            shape=[FLAGS.hidden_size, FLAGS.hidden_size],
            trainable=True)
        for k in xrange(FLAGS.num_cuts):
          with tf.control_dependencies([tmp_fc[-1]]):
            tmp_fc.append(tf.matmul(last_fc[k+1], w))
      last_fc = tmp_fc
      if i == FLAGS.num_layers - 1:
        with tf.control_dependencies(last_fc):
          train_op = tf.no_op()
  '''
  with tf.device('/gpu:%d' % (FLAGS.num_gpus - 1)):
    tmp_fc = [tf.no_op()]
    with tf.variable_scope('fc_out'):
      weight_out = tf.zeros([FLAGS.hidden_size, 1000])
      for k in xrange(FLAGS.num_cuts):
        with tf.control_dependencies([tmp_fc[-1]]):
          tmp_fc.append(tf.matmul(last_fc[k+1], weight_out))
    last_fc = tmp_fc
  loss = tf.nn_softmax_cross_entropy_with_logits(last_fc, labels, name='xentropy')
  grads = opt.compute_gradients(loss)
  apply_gradient_op = opt.apply_gradients(grads)

  train_op = tf.group(apply_gradient_op)
  '''
  init_op = tf.initialize_all_variables()

  return init_op, train_op
Example #6
0
    def add_model_vars(self):
        '''
        You model contains the following parameters:
            embedding:  tensor(vocab_size, embed_size)
            W1:         tensor(2* embed_size, embed_size)
            b1:         tensor(1, embed_size)
            U:          tensor(embed_size, output_size)
            bs:         tensor(1, output_size)
        Hint: Add the tensorflow variables to the graph here and *reuse* them while building
                the compution graphs for composition and projection for each tree
        Hint: Use a variable_scope "Composition" for the composition layer, and
              "Projection") for the linear transformations preceding the softmax.
        '''
        embed_size = self.config.embed_size
        vocab_size = len(self.vocab)
        output_size = self.config.label_size
        with tf.variable_scope('Composition'):
            ### YOUR CODE HERE
            embedding = tf.get_variable("embedding", shape=(vocab_size, embed_size))
            W1 = tf.get_variable("W1", shape=(2 * embed_size, embed_size))
            b1 = tf.get_variable("b1", shape=(1, embed_size))
            ### END YOUR CODE
        with tf.variable_scope('Projection'):
            ### YOUR CODE HERE
            U = tf.get_variable("U", shape=(embed_size, output_size))
            bs = tf.get_variable("bs", shape=(1, output_size))
            ### END YOUR CODE

        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.config.lr)
        # dummy_total is a simple sum to ensure that the variables for the AdamOptimizer
        # are created for initialization and before restore the variables later.
        # It should never actually get executed.
        dummy_total = tf.constant(0.0)
        for v in tf.trainable_variables(): dummy_total +=tf.reduce_sum(v)
        self.dummy_minimizer = self.optimizer.minimize(dummy_total)
Example #7
0
    def __call__(self, inputs, state, scope=None):
        """Long short-term memory cell (LSTM)."""
        with tf.variable_scope(scope or type(self).__name__):  # "BasicLSTMCell"
            # Parameters of gates are concatenated into one multiply for efficiency.
            if self._state_is_tuple:
                c, h = state
            else:
                c, h = array_ops.split(1, 2, state)
            concat = _linear([inputs, h], 4 * self._num_units, True, 0.,
                             self.weights_init, self.trainable, self.restore,
                             self.reuse)

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            i, j, f, o = array_ops.split(1, 4, concat)

            new_c = (c * self._inner_activation(f + self._forget_bias) +
                     self._inner_activation(i) *
                     self._activation(j))
            new_h = self._activation(new_c) * self._inner_activation(o)

            if self._state_is_tuple:
                new_state = _rnn_cell.LSTMStateTuple(new_c, new_h)
            else:
                new_state = array_ops.concat(1, [new_c, new_h])

            # Retrieve RNN Variables
            with tf.variable_scope('Linear', reuse=True):
                self.W = tf.get_variable('Matrix')
                self.b = tf.get_variable('Bias')

            return new_h, new_state
 def instantiate_weights(self):
     """define all weights here"""
     with tf.variable_scope("embedding_projection"):  # embedding matrix
         self.Embedding = tf.get_variable("Embedding", shape=[self.vocab_size, self.embed_size],initializer=self.initializer)  # [vocab_size,embed_size] tf.random_uniform([self.vocab_size, self.embed_size],-1.0,1.0)
         self.Embedding_label = tf.get_variable("Embedding_label", shape=[self.num_classes, self.embed_size],dtype=tf.float32) #,initializer=self.initializer
         self.W_projection = tf.get_variable("W_projection", shape=[self.sequence_length*self.d_model, self.num_classes],initializer=self.initializer)  # [embed_size,label_size]
         self.b_projection = tf.get_variable("b_projection", shape=[self.num_classes])
Example #9
0
    def loss(self, logits, labels):
        """Adds loss ops to the computational graph.

        Hint: Use sparse_softmax_cross_entropy_with_logits
        Hint: Remember to add l2_loss (see tf.nn.l2_loss)
        Args:
            logits: tensor(num_nodes, output_size)
            labels: python list, len = num_nodes
        Returns:
            loss: tensor 0-D
        """
        loss = None
        # YOUR CODE HERE
        labels = tf.convert_to_tensor(labels, dtype=tf.int64)
        softmax_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels)

        l2 = self.config.l2
        with tf.variable_scope('Composition', reuse=True):
            W1 = tf.get_variable("W1")
        with tf.variable_scope('Projection', reuse=True):
            U = tf.get_variable("U")
        l2_loss = tf.nn.l2_loss(W1) + tf.nn.l2_loss(U)
        l2_loss *= l2

        loss = tf.reduce_sum(softmax_loss) + l2_loss
        # END YOUR CODE
        return loss
Example #10
0
def FullyConnected(x, out_dim,
                   W_init=None, b_init=None,
                   nl=tf.nn.relu, use_bias=True):
    """
    Fully-Connected layer.

    :param input: a tensor to be flattened except the first dimension.
    :param out_dim: output dimension
    :param W_init: initializer for W. default to `xavier_initializer_conv2d`.
    :param b_init: initializer for b. default to zero initializer.
    :param nl: nonlinearity. default to `relu`.
    :param use_bias: whether to use bias. a boolean default to True
    :returns: a 2D tensor
    """
    x = batch_flatten(x)
    in_dim = x.get_shape().as_list()[1]

    if W_init is None:
        #W_init = tf.truncated_normal_initializer(stddev=1 / math.sqrt(float(in_dim)))
        W_init = tf.uniform_unit_scaling_initializer(factor=1.43)
    if b_init is None:
        b_init = tf.constant_initializer()

    W = tf.get_variable('W', [in_dim, out_dim], initializer=W_init)
    if use_bias:
        b = tf.get_variable('b', [out_dim], initializer=b_init)
    prod = tf.nn.xw_plus_b(x, W, b) if use_bias else tf.matmul(x, W)
    return nl(prod, name='output')
Example #11
0
def wide_model(numeric_input, category_input, vocabs):
    transpose_category_input = tf.transpose(category_input)
    category_sum = None
    # Append embadding category to numeric_sum
    for i in range(0, len(vocabs)):
        embedding = tf.get_variable("wideem" + str(i), [vocabs[i], 8],
                                    initializer=tf.contrib.layers.xavier_initializer()
                                    #partitioner=tf.fixed_size_partitioner(n_pss))
                                    #partitioner=tf.min_max_variable_partitioner(n_pss, 0, 2 << 10)
                                    )
        # Pick one column from category input
        col = tf.gather(transpose_category_input, [i])[0]
        #col = tf.nn.embedding_lookup(transpose_category_input, [i])[0]

        # Same as make [0001]*[w1,w2,w3,w4] = lookup w4
        #embedded_col = embedding_lookup(tf.identity(embedding), col)  # number * embedding output number
        embedded_col = embedding_ops.embedding_lookup_unique(embedding, col)

        if category_sum is None:
            category_sum = embedded_col
        else:
            category_sum = tf.concat([category_sum, embedded_col], 1)

    tf.set_random_seed(1)
    w = tf.get_variable("W", [numeric_input.shape[1] + category_sum.shape[1], 1], initializer=tf.contrib.layers.xavier_initializer())
    wmodel_logits_sum = tf.matmul(tf.concat([numeric_input, category_sum], 1), w)

    return wmodel_logits_sum
Example #12
0
def Linear(args, output_dim, bias=True, bias_init=0.0, scope=None):
    if not isinstance(args, (list, tuple)):
        args = [args]

    input_dim = 0
    shapes = [a.get_shape().as_list() for a in args]
    for shape in shapes:
        if len(shape) != 2:
            raise ValueError("Linear is expecting 2d arguments: %s" % str(shapes))
        elif not shape[1]:
            raise ValueError("Linear expects shape[1] of arguments: %s" % str(shapes))
        else:
            input_dim += shape[1]

    with tf.variable_scope(scope or "linear"):
        W = tf.get_variable("W", (input_dim, output_dim))

        if len(args) == 1:
            result = tf.matmul(args[0], W)
        else:
            result = tf.matmul(tf.concat(1, args), W)

        if not bias:
            return result

        b = tf.get_variable("b", (output_dim,),
                            initializer=tf.constant_initializer(bias_init))

    return result + b
	def _initialize_weights(self):
		all_weights = dict()
		# Encoding layers
		for i, n_hidden in enumerate(self.hidden_units):
			weight_name = 'encoder%d_W' % i
			bias_name = 'encoder%d_b' % i
			if i == 0:
				weight_shape = [self.n_input, n_hidden]
			else:
				weight_shape = [self.hidden_units[i-1], n_hidden]

			all_weights[weight_name] = tf.get_variable(weight_name, weight_shape, 
				initializer=tf.contrib.layers.xavier_initializer())
			all_weights[bias_name] = tf.get_variable(bias_name, [n_hidden],
				initializer=tf.constant_initializer(0.0))
		
		# Decoding layers
		hidden_units_rev = self.hidden_units[::-1]
		for i, n_hidden in enumerate(hidden_units_rev):
			weight_name = 'decoder%d_W' % i
			bias_name = 'decoder%d_b' % i
			if i != len(hidden_units_rev) - 1: # not the last layer
				weight_shape = [n_hidden, hidden_units_rev[i+1]]
			else:
				weight_shape = [n_hidden, self.n_input]

			all_weights[weight_name] = tf.get_variable(weight_name, weight_shape, 
				initializer=tf.contrib.layers.xavier_initializer())
			all_weights[bias_name] = tf.get_variable(bias_name, [n_hidden],
				initializer=tf.constant_initializer(0.0))

		return all_weights
    def add_logits_op(self):
        """
        Adds logits to self
        """
        with tf.variable_scope("bi-lstm"):
            lstm_fwrd_cell = tf.contrib.rnn.LSTMCell(self.hidden_size)
            lstm_back_cell = tf.contrib.rnn.LSTMCell(self.hidden_size)
            (output_fw, output_bw), _ = tf.nn.bidirectional_dynamic_rnn(lstm_fwrd_cell,
                                                                        lstm_back_cell,
                                                                        self.word_embeddings,
                                                                        sequence_length=self.sequence_lengths,
                                                                        dtype=tf.float32)
            output = tf.concat([output_fw, output_bw], axis=-1)
            output = tf.nn.dropout(output, self.dropout)

        with tf.variable_scope("proj"):
            W = tf.get_variable("W", shape=[2*self.hidden_size, self.ntags],
                dtype=tf.float32)

            b = tf.get_variable("b", shape=[self.ntags], dtype=tf.float32,
                initializer=tf.zeros_initializer())

            ntime_steps = tf.shape(output)[1]
            output = tf.reshape(output, [-1, 2*self.hidden_size])
            pred = tf.matmul(output, W) + b
            self.logits = tf.reshape(pred, [-1, ntime_steps, self.ntags])
def init():
    d_model = 512
    d_k = 64
    d_v = 64
    sequence_length =6 #5
    decoder_sent_length=6
    h = 8
    batch_size = 4*32
    num_layer=6
    # 2.set Q,K,V
    vocab_size = 1000
    embed_size = d_model
    initializer = tf.random_normal_initializer(stddev=0.1)
    Embedding = tf.get_variable("Embedding_d", shape=[vocab_size, embed_size], initializer=initializer)
    decoder_input_x = tf.placeholder(tf.int32, [batch_size, decoder_sent_length], name="input_x")  # [4,10]
    print("1.decoder_input_x:", decoder_input_x)
    decoder_input_embedding = tf.nn.embedding_lookup(Embedding, decoder_input_x)  # [batch_size*sequence_length,embed_size]
    #Q = embedded_words  # [batch_size*sequence_length,embed_size]
    #K_s = embedded_words  # [batch_size*sequence_length,embed_size]
    #K_v_encoder = tf.placeholder(tf.float32, [batch_size,decoder_sent_length, d_model], name="input_x") #sequence_length
    Q = tf.placeholder(tf.float32, [batch_size,sequence_length, d_model], name="input_x")
    K_s=decoder_input_embedding
    K_v_encoder= tf.get_variable("v_variable",shape=[batch_size,decoder_sent_length, d_model],initializer=initializer) #tf.float32,
    print("2.output from encoder:",K_v_encoder)
    mask = get_mask(decoder_sent_length) #sequence_length
    decoder = Decoder(d_model, d_k, d_v, sequence_length, h, batch_size, Q, K_s, K_v_encoder,decoder_sent_length,mask=mask,num_layer=num_layer)
    return decoder,Q, K_s
Example #16
0
    def __init__(self, session, np_matrix, rank,
                 learning_rate=0.1):
        matrix = tf.constant(np_matrix, dtype=tf.float32)
        scale = 2 * np.sqrt(np_matrix.mean() / rank)
        initializer = tf.random_uniform_initializer(maxval=scale)

        with tf.device('/job:ps/task:0'):
            self.matrix_W = tf.get_variable(
                "W", (np_matrix.shape[0], rank), initializer=initializer
            )
        with tf.device("/job:ps/task:1"):
            self.matrix_H = tf.get_variable(
                "H", (rank, np_matrix.shape[1]), initializer=initializer
            )

        matrix_WH = tf.matmul(self.matrix_W, self.matrix_H)
        f_norm = tf.reduce_sum(tf.pow(matrix - matrix_WH, 2))

        nn_w = tf.reduce_sum(tf.abs(self.matrix_W) - self.matrix_W)
        nn_h = tf.reduce_sum(tf.abs(self.matrix_H) - self.matrix_H)
        constraint = INFINITY * (nn_w + nn_h)
        self.loss = f_norm + constraint
        self.constraint = constraint

        self.session = session
        self.optimizer = tf.train.GradientDescentOptimizer(
            learning_rate
        ).minimize(self.loss)
    def project_bilstm_layer(self, lstm_outputs, name=None):
        """
        hidden layer between lstm layer and logits
        :param lstm_outputs: [batch_size, num_steps, emb_size] 
        :return: [batch_size, num_steps, num_tags]
        """
        with tf.variable_scope("project" if not name else name):
            with tf.variable_scope("hidden"):
                W = tf.get_variable("W", shape=[self.hidden_unit * 2, self.hidden_unit],
                                    dtype=tf.float32, initializer=self.initializers.xavier_initializer())

                b = tf.get_variable("b", shape=[self.hidden_unit], dtype=tf.float32,
                                    initializer=tf.zeros_initializer())
                output = tf.reshape(lstm_outputs, shape=[-1, self.hidden_unit * 2])
                hidden = tf.tanh(tf.nn.xw_plus_b(output, W, b))

            # project to score of tags
            with tf.variable_scope("logits"):
                W = tf.get_variable("W", shape=[self.hidden_unit, self.num_labels],
                                    dtype=tf.float32, initializer=self.initializers.xavier_initializer())

                b = tf.get_variable("b", shape=[self.num_labels], dtype=tf.float32,
                                    initializer=tf.zeros_initializer())

                pred = tf.nn.xw_plus_b(hidden, W, b)
            return tf.reshape(pred, [-1, self.seq_length, self.num_labels])
    def create_model(self):
        print "Setting up model",
        sys.stdout.flush()
        # placeholders for data + targets
        self._input_data = tf.placeholder(tf.int32, shape=(self.batch_size, self.num_steps))
        self._targets = tf.placeholder(tf.int32, [self.batch_size, self.num_steps])

        # set up lookup function
        self.embedding = tf.constant(self.saved_embedding,name="embedding")
        self.inputs = tf.nn.embedding_lookup(self.embedding, self._input_data)
        # lstm model
        self.lstm_cell = rnn_cell.BasicLSTMCell(self.lstm_size)
        self.cell = rnn_cell.MultiRNNCell([self.lstm_cell] * self.num_layers)


        self._initial_state = self.cell.zero_state(self.batch_size, tf.float32)

        from tensorflow.models.rnn import rnn
        self.inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, self.num_steps, self.inputs)]
        self.outputs, self.states = rnn.rnn(self.cell, self.inputs, initial_state=self._initial_state)

        self.output = tf.reshape(tf.concat(1, self.outputs), [-1, self.lstm_size])
        self.softmax_w = tf.get_variable("softmax_w", [self.lstm_size, self.vocab_size])
        self.softmax_b = tf.get_variable("softmax_b", [self.vocab_size])
        self.logits = tf.matmul(self.output, self.softmax_w) + self.softmax_b
        #print  "self.states.get_shape():",self.states.get_shape()
        #print  "tf.shape(self.states)",tf.shape(self.states)
        self._final_state = self.states
        self.saver = tf.train.Saver()
        
        #delete data to save memory if network is used for sampling only
        if self.only_for_sampling:
            del self.data
            
        print "done"
Example #19
0
def affine_reuseable(x, shape):
    W = tf.get_variable("W", shape,
                    initializer=tf.random_normal_initializer())
    b = tf.get_variable("b", [shape[1]],
                    initializer=tf.constant_initializer(0.0))
    model = tf.nn.relu(tf.matmul(x, W) + b)
    return model
Example #20
0
def tf_baseline_conv2d():
    import tensorflow as tf
    import cntk.contrib.crosstalk.crosstalk_tensorflow as crtf
    ci = crtf.instance

    tf.reset_default_graph()

    x = tf.placeholder(tf.float32, [batch_size, num_chars, char_emb_dim])
    filter_bank = tf.get_variable("char_filter_bank",
                                  shape=[filter_width, char_emb_dim, num_filters],
                                  dtype=tf.float32)
    bias = tf.get_variable("char_filter_biases", shape=[num_filters], dtype=tf.float32)

    char_conv = tf.expand_dims(tf.transpose(tf.nn.conv1d(x, filter_bank, stride=1, padding='VALID') + bias, perm=[0,2,1]), -1)

    ci.watch(cstk.Conv2DArgs(W=crtf.find_trainable('char_filter_bank'), b=crtf.find_trainable('char_filter_biases')), 'conv2d', var_type=cstk.Conv2DAttr,
               attr=cstk.Conv2DAttr(filter_shape=(filter_width, char_emb_dim,), num_filters=num_filters))
    ci.watch(char_conv, 'conv2d_out', var_type=crtf.VariableType) # note the output is transposed to NCHW

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        data = {x:input_data}
        ci.set_workdir(workdir)
        ci.set_data(sess, data)
        ci.fetch('conv2d_out', save=True)
        ci.fetch('conv2d', save=True)
        ci.assign('conv2d', load=True)
        assert ci.compare('conv2d_out')
        ci.reset()
        sess.close()
Example #21
0
	def _conv_layer(self, name, input_var, stride, in_channels, out_channels, options = {}):
		activation = options.get('activation', 'relu')
		dropout = options.get('dropout', None)
		padding = options.get('padding', 'SAME')
		batchnorm = options.get('batchnorm', True)
		transpose = options.get('transpose', False)

		with tf.variable_scope(name) as scope:
			if not transpose:
				filter_shape = [KERNEL_SIZE, KERNEL_SIZE, in_channels, out_channels]
			else:
				filter_shape = [KERNEL_SIZE, KERNEL_SIZE, out_channels, in_channels]
			kernel = tf.get_variable(
				'weights',
				shape=filter_shape,
				initializer=tf.truncated_normal_initializer(stddev=math.sqrt(2.0 / KERNEL_SIZE / KERNEL_SIZE / in_channels)),
				dtype=tf.float32
			)
			biases = tf.get_variable(
				'biases',
				shape=[out_channels],
				initializer=tf.constant_initializer(0.0),
				dtype=tf.float32
			)
			if not transpose:
				output = tf.nn.bias_add(
					tf.nn.conv2d(
						input_var,
						kernel,
						[1, stride, stride, 1],
						padding=padding
					),
					biases
				)
			else:
				batch = tf.shape(input_var)[0]
				side = tf.shape(input_var)[1]
				output = tf.nn.bias_add(
					tf.nn.conv2d_transpose(
						input_var,
						kernel,
						[batch, side * stride, side * stride, out_channels],
						[1, stride, stride, 1],
						padding=padding
					),
					biases
				)
			if batchnorm:
				output = tf.contrib.layers.batch_norm(output, center=True, scale=True, is_training=self.is_training, decay=0.99)
			if dropout is not None:
				output = tf.nn.dropout(output, keep_prob=1-dropout)

			if activation == 'relu':
				return tf.nn.relu(output, name=scope.name)
			elif activation == 'sigmoid':
				return tf.nn.sigmoid(output, name=scope.name)
			elif activation == 'none':
				return output
			else:
				raise Exception('invalid activation {} specified'.format(activation))
Example #22
0
  def testLSTMBasicToBlockPeeping(self):
    with self.test_session(use_gpu=self._use_gpu) as sess:
      batch_size = 2
      input_size = 3
      cell_size = 4
      sequence_length = 5

      inputs = []
      for _ in range(sequence_length):
        inp = tf.convert_to_tensor(
            np.random.randn(batch_size, input_size),
            dtype=tf.float32)
        inputs.append(inp)

      initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=19890212)
      with tf.variable_scope("basic", initializer=initializer):
        cell = tf.nn.rnn_cell.LSTMCell(cell_size,
                                       use_peepholes=True,
                                       state_is_tuple=True)
        outputs, _ = tf.nn.rnn(cell, inputs, dtype=tf.float32)

        sess.run([tf.initialize_all_variables()])
        basic_outputs = sess.run(outputs)
        basic_grads = sess.run(tf.gradients(outputs, inputs))
        basic_wgrads = sess.run(tf.gradients(outputs, tf.trainable_variables()))

      with tf.variable_scope("block", initializer=initializer):
        w = tf.get_variable("w",
                            shape=[input_size + cell_size, cell_size * 4],
                            dtype=tf.float32)
        b = tf.get_variable("b",
                            shape=[cell_size * 4],
                            dtype=tf.float32,
                            initializer=tf.zeros_initializer)

        wci = tf.get_variable("wci", shape=[cell_size], dtype=tf.float32)
        wcf = tf.get_variable("wcf", shape=[cell_size], dtype=tf.float32)
        wco = tf.get_variable("wco", shape=[cell_size], dtype=tf.float32)

        _, _, _, _, _, _, outputs = fused_lstm(
            tf.convert_to_tensor(sequence_length,
                                 dtype=tf.int64),
            inputs,
            w,
            b,
            wci=wci,
            wcf=wcf,
            wco=wco,
            cell_clip=0,
            use_peephole=True)

        sess.run([tf.initialize_all_variables()])
        block_outputs = sess.run(outputs)
        block_grads = sess.run(tf.gradients(outputs, inputs))
        block_wgrads = sess.run(tf.gradients(outputs, [w, b, wci, wcf, wco]))

      self.assertAllClose(basic_outputs, block_outputs)
      self.assertAllClose(basic_grads, block_grads)
      for basic, block in zip(basic_wgrads, block_wgrads):
        self.assertAllClose(basic, block, rtol=1e-2, atol=1e-2)
Example #23
0
def _batch_norm(x, name, is_train):
    """ Apply a batch normalization layer. """
    with tf.variable_scope(name):
        inputs_shape = x.get_shape()
        axis = list(range(len(inputs_shape) - 1))
        param_shape = int(inputs_shape[-1])

        moving_mean = tf.get_variable('mean', [param_shape], initializer=tf.constant_initializer(0.0), trainable=False)
        moving_var = tf.get_variable('variance', [param_shape], initializer=tf.constant_initializer(1.0), trainable=False)

        beta = tf.get_variable('offset', [param_shape], initializer=tf.constant_initializer(0.0))
        gamma = tf.get_variable('scale', [param_shape], initializer=tf.constant_initializer(1.0))

        control_inputs = []

        def mean_var_with_update():
            mean, var = tf.nn.moments(x, axis)
            update_moving_mean = moving_averages.assign_moving_average(moving_mean, mean, 0.995)
            update_moving_var = moving_averages.assign_moving_average(moving_var, var, 0.995)
            control_inputs = [update_moving_mean, update_moving_var]
            return tf.identity(mean), tf.identity(var)

        def mean_var():
            mean = moving_mean
            var = moving_var            
            return tf.identity(mean), tf.identity(var)

        mean, var = tf.cond(is_train, mean_var_with_update, mean_var)

        with tf.control_dependencies(control_inputs):
            normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, 1e-4)

    return normed
  def add_projection(self, rnn_outputs):
    """Adds a projection layer.

    The projection layer transforms the hidden representation to a distribution
    over the vocabulary.

    Hint: Here are the dimensions of the variables you will need to
          create 
          
          U:   (hidden_size, len(vocab))
          b_2: (len(vocab),)

    Args:
      rnn_outputs: List of length num_steps, each of whose elements should be
                   a tensor of shape (batch_size, embed_size).
    Returns:
      outputs: List of length num_steps, each a tensor of shape
               (batch_size, len(vocab)
    """
    ### YOUR CODE HERE
    with tf.name_scope('Projection Layer'):
      U = tf.get_variable('U', [self.config.hidden_size, len(self.vocab)])
      b2 = tf.get_variable('b2', len(self.vocab))
      outputs = [tf.nn.softmax(tf.matmul(o,U)+b2) for o in rnn_outputs]
    ### END YOUR CODE
    return outputs
Example #25
0
def logistic_regression(X, y, class_weight=None):
    """Creates logistic regression TensorFlow subgraph.

    Args:
        X: tensor or placeholder for input features,
           shape should be [batch_size, n_features].
        y: tensor or placeholder for target,
           shape should be [batch_size, n_classes].
        class_weight: tensor, [n_classes], where for each class
                      it has weight of the class. If not provided
                      will check if graph contains tensor `class_weight:0`.
                      If that is not provided either all ones are used.

    Returns:
        Predictions and loss tensors.
    """
    with tf.variable_scope('logistic_regression'):
        tf.histogram_summary('logistic_regression.X', X)
        tf.histogram_summary('logistic_regression.y', y)
        weights = tf.get_variable('weights', [X.get_shape()[1],
                                              y.get_shape()[-1]])
        bias = tf.get_variable('bias', [y.get_shape()[-1]])
        tf.histogram_summary('logistic_regression.weights', weights)
        tf.histogram_summary('logistic_regression.bias', bias)
        # If no class weight provided, try to retrieve one from pre-defined
        # tensor name in the graph.
        if not class_weight:
            try:
                class_weight = tf.get_default_graph().get_tensor_by_name('class_weight:0')
            except KeyError:
                pass
        return softmax_classifier(X, y, weights, bias,
                                  class_weight=class_weight)
  def testVarOpScopeReuseParam(self):
    with self.test_session():
      with tf.variable_scope("outer") as outer:
        with tf.variable_op_scope([], "tower", "default"):
          self.assertEqual(tf.get_variable("w", []).name,
                           "outer/tower/w:0")
          with tf.name_scope("scope2") as sc2:
            self.assertEqual(sc2, "outer/tower/scope2/")
        with tf.variable_op_scope([], None, "default"):
          self.assertEqual(tf.get_variable("w", []).name,
                           "outer/default/w:0")
          with tf.name_scope("scope2") as sc2:
            self.assertEqual(sc2, "outer/default/scope2/")

      with tf.variable_scope(outer) as outer:
        with tf.variable_op_scope([], "tower", "default", reuse=True):
          self.assertEqual(tf.get_variable("w", []).name,
                           "outer/tower/w:0")
          with tf.name_scope("scope2") as sc2:
            self.assertEqual(sc2, "outer_1/tower/scope2/")
        outer.reuse_variables()
        with tf.variable_op_scope([], None, "default"):
          self.assertEqual(tf.get_variable("w", []).name,
                           "outer/default/w:0")
          with tf.name_scope("scope2") as sc2:
            self.assertEqual(sc2, "outer_1/default/scope2/")
  def testVarOpScopeOuterScope(self):
    with self.test_session():
      with tf.variable_scope("outer") as outer:
        pass
      with tf.variable_op_scope([], outer, "default"):
        self.assertEqual(tf.get_variable("w", []).name,
                         "outer/w:0")
        with tf.name_scope("scope2") as sc2:
          self.assertEqual(sc2, "outer_1/scope2/")
        with tf.variable_op_scope([], None, "default"):
          self.assertEqual(tf.get_variable("w", []).name,
                           "outer/default/w:0")
          with tf.name_scope("scope2") as sc2:
            self.assertEqual(sc2, "outer_1/default/scope2/")

      with tf.variable_op_scope([], outer, "default", reuse=True):
        self.assertEqual(tf.get_variable("w", []).name,
                         "outer/w:0")
        with tf.name_scope("scope2") as sc2:
          self.assertEqual(sc2, "outer_2/scope2/")
        outer.reuse_variables()
        with tf.variable_op_scope([], None, "default"):
          self.assertEqual(tf.get_variable("w", []).name,
                           "outer/default/w:0")
          with tf.name_scope("scope2") as sc2:
            self.assertEqual(sc2, "outer_2/default/scope2/")
    def __init__(self, epsilon=1e-2, shape=()):

        self._sum = tf.get_variable(
            dtype=tf.float64,
            shape=shape,
            initializer=tf.constant_initializer(0.0),
            name="runningsum", trainable=False)
        self._sumsq = tf.get_variable(
            dtype=tf.float64,
            shape=shape,
            initializer=tf.constant_initializer(epsilon),
            name="runningsumsq", trainable=False)
        self._count = tf.get_variable(
            dtype=tf.float64,
            shape=(),
            initializer=tf.constant_initializer(epsilon),
            name="count", trainable=False)
        self.shape = shape

        self.mean = tf.to_float(self._sum / self._count)
        self.std = tf.sqrt( tf.maximum( tf.to_float(self._sumsq / self._count) - tf.square(self.mean) , 1e-2 ))

        newsum = tf.placeholder(shape=self.shape, dtype=tf.float64, name='sum')
        newsumsq = tf.placeholder(shape=self.shape, dtype=tf.float64, name='var')
        newcount = tf.placeholder(shape=[], dtype=tf.float64, name='count')
        self.incfiltparams = U.function([newsum, newsumsq, newcount], [],
            updates=[tf.assign_add(self._sum, newsum),
                     tf.assign_add(self._sumsq, newsumsq),
                     tf.assign_add(self._count, newcount)])
  def testVarOpScope(self):
    with self.test_session():
      with tf.name_scope("scope1"):
        with tf.variable_op_scope([], "tower", "default"):
          self.assertEqual(tf.get_variable("w", []).name,
                           "tower/w:0")
          with tf.name_scope("scope2") as sc2:
            self.assertEqual(sc2, "scope1/tower/scope2/")
        with tf.variable_op_scope([], "tower", "default"):
          with self.assertRaises(ValueError):
            tf.get_variable("w", [])
          with tf.name_scope("scope2") as sc2:
            self.assertEqual(sc2, "scope1/tower_1/scope2/")

      with tf.name_scope("scope2"):
        with tf.variable_op_scope([], None, "default"):
          self.assertEqual(tf.get_variable("w", []).name,
                           "default/w:0")
          with tf.name_scope("scope2") as sc2:
            self.assertEqual(sc2, "scope2/default/scope2/")
        with tf.variable_op_scope([], None, "default"):
          self.assertEqual(tf.get_variable("w", []).name,
                           "default_1/w:0")
          with tf.name_scope("scope2") as sc2:
            self.assertEqual(sc2, "scope2/default_1/scope2/")
Example #30
0
def ternarize(x, thresh=0.05):
    """
    Implemented Trained Ternary Quantization:
    https://arxiv.org/abs/1612.01064

    Code modified from the authors' at:
    https://github.com/czhu95/ternarynet/blob/master/examples/Ternary-Net/ternary.py
    """
    shape = x.get_shape()

    thre_x = tf.stop_gradient(tf.reduce_max(tf.abs(x)) * thresh)

    w_p = tf.get_variable('Wp', initializer=1.0, dtype=tf.float32)
    w_n = tf.get_variable('Wn', initializer=1.0, dtype=tf.float32)

    tf.summary.scalar(w_p.op.name + '-summary', w_p)
    tf.summary.scalar(w_n.op.name + '-summary', w_n)

    mask = tf.ones(shape)
    mask_p = tf.where(x > thre_x, tf.ones(shape) * w_p, mask)
    mask_np = tf.where(x < -thre_x, tf.ones(shape) * w_n, mask_p)
    mask_z = tf.where((x < thre_x) & (x > - thre_x), tf.zeros(shape), mask)

    @tf.custom_gradient
    def _sign_mask(x):
        return tf.sign(x) * mask_z, lambda dy: dy

    w = _sign_mask(x)

    w = w * mask_np

    tf.summary.histogram(w.name, w)
    return w
Example #31
0
def zero_bias(shape, name=None):
    return tf.get_variable(name, shape, initializer=tf.constant_initializer(0.0))
Example #32
0
def weight_variable(scope, shape):
    with tf.variable_scope(scope):
        W = tf.get_variable('W',
                            shape,
                            initializer=tf.contrib.layers.xavier_initializer())
        return W
Example #33
0
def bias_variable(scope, shape):
    with tf.variable_scope(scope):
        b = tf.get_variable('b',
                            shape,
                            initializer=tf.constant_initializer(0.1))
        return b
def main(flag, load_existing_dump=False):
    highlight_string("INITIALIZING")
    print "loading data.."

    dataset = load_datasets(load_existing_dump)#加载数据集
    config = dataset.model_config#加载训练参数

    print "word vocab Size: {}".format(len(dataset.word2idx))
    print "pos vocab Size: {}".format(len(dataset.pos2idx))
    print "dep vocab Size: {}".format(len(dataset.dep2idx))
    print "Training Size: {}".format(len(dataset.train_inputs[0]))
    print "valid data Size: {}".format(len(dataset.valid_data))
    print "test data Size: {}".format(len(dataset.test_data))

    print len(dataset.word2idx), len(dataset.word_embedding_matrix)
    print len(dataset.pos2idx), len(dataset.pos_embedding_matrix)
    print len(dataset.dep2idx), len(dataset.dep_embedding_matrix)

    if not os.path.exists(os.path.join(DataConfig.data_dir_path, DataConfig.model_dir)):
        os.makedirs(os.path.join(DataConfig.data_dir_path, DataConfig.model_dir))

    with tf.Graph().as_default(), tf.Session() as sess:
        print "Building network...",
        start = time.time()
        with tf.variable_scope("model") as model_scope:
            model = ParserModel(config, dataset.word_embedding_matrix, dataset.pos_embedding_matrix,
                                dataset.dep_embedding_matrix)#神经网络模型
            saver = tf.train.Saver()#保存
            """
            model_scope.reuse_variables()
                -> no need to call tf.variable_scope(model_scope, reuse = True) again
                -> directly access variables & call functions inside this block itself.
                -> ref: https://www.tensorflow.org/versions/r1.2/api_docs/python/tf/variable_scope
                -> https://stackoverflow.com/questions/35919020/whats-the-difference-of-name-scope-and-a-variable-scope-in-tensorflow
            """

        print "took {:.2f} seconds\n".format(time.time() - start)

        merged = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter(os.path.join(DataConfig.data_dir_path, DataConfig.summary_dir,
                                                          DataConfig.train_summ_dir), sess.graph)
        valid_writer = tf.summary.FileWriter(os.path.join(DataConfig.data_dir_path, DataConfig.summary_dir,
                                                          DataConfig.test_summ_dir))

        if flag == Flags.TRAIN:#训练

            # Variable initialization -> not needed for .restore()
            """ The variables to restore do not have to have been initialized,
            as restoring is itself a way to initialize variables. """
            sess.run(tf.global_variables_initializer())
            """ call 'assignment' after 'init' only, else 'assignment' will get reset by 'init' """
            sess.run(tf.assign(model.word_embedding_matrix, model.word_embeddings))#把初始化的向量放入训练矩阵中
            sess.run(tf.assign(model.pos_embedding_matrix, model.pos_embeddings))
            sess.run(tf.assign(model.dep_embedding_matrix, model.dep_embeddings))

            highlight_string("TRAINING")
            model.print_trainable_varibles()#输出各个层定义

            model.fit(sess, saver, config, dataset, train_writer, valid_writer, merged)#训练主函数

            # Testing
            highlight_string("Testing")
            print "Restoring best found parameters on dev set"
            saver.restore(sess, os.path.join(DataConfig.data_dir_path, DataConfig.model_dir,
                                             DataConfig.model_name))
            model.compute_dependencies(sess, dataset.test_data, dataset)#由模型计算当前依存弧
            test_UAS = model.get_UAS(dataset.test_data)#准确度
            print "test UAS: {}".format(test_UAS * 100)

            train_writer.close()
            valid_writer.close()

            # visualize trained embeddings after complete training (not after each epoch)
            with tf.variable_scope(model_scope, reuse=True):
                pos_emb = tf.get_variable("feature_lookup/pos_embedding_matrix",
                                          [len(dataset.pos2idx.keys()), dataset.model_config.embedding_dim])
                visualize_sample_embeddings(sess, os.path.join(DataConfig.data_dir_path, DataConfig.model_dir),
                                            dataset.pos2idx.keys(), dataset.pos2idx, pos_emb)
            print "to Visualize Embeddings, run in terminal:"
            print "tensorboard --logdir=" + os.path.abspath(os.path.join(DataConfig.data_dir_path,
                                                                         DataConfig.model_dir))

        else:#加载已有的变量进行测试
            ckpt_path = tf.train.latest_checkpoint(os.path.join(DataConfig.data_dir_path,
                                                                DataConfig.model_dir))
            if ckpt_path is not None:
                print "Found checkpoint! Restoring variables.."
                saver.restore(sess, ckpt_path)
                highlight_string("Testing")
                model.compute_dependencies(sess, dataset.test_data, dataset)#由模型计算当前依存弧
                test_UAS = model.get_UAS(dataset.test_data)#准确度
                print "test UAS: {}".format(test_UAS * 100)
                # model.run_valid_epoch(sess, dataset.valid_data, dataset)
                # valid_UAS = model.get_UAS(dataset.valid_data)
                # print "valid UAS: {}".format(valid_UAS * 100)

                highlight_string("Embedding Visualization")
                with tf.variable_scope(model_scope, reuse=True):
                    pos_emb = tf.get_variable("feature_lookup/pos_embedding_matrix",
                                              [len(dataset.pos2idx.keys()), dataset.model_config.embedding_dim])
                    visualize_sample_embeddings(sess, os.path.join(DataConfig.data_dir_path, DataConfig.model_dir),
                                                dataset.pos2idx.keys(), dataset.pos2idx, pos_emb)
                print "to Visualize Embeddings, run in terminal:"
                print "tensorboard --logdir=" + os.path.abspath(os.path.join(DataConfig.data_dir_path,
                                                                             DataConfig.model_dir))

            else:
                print "No checkpoint found!"
Example #35
0
    def conv_layers(self,input_x,name_scope,reuse_flag=False):
        """main computation graph here: 1.embedding-->2.CONV-RELU-MAX_POOLING-->3.linear classifier"""
        # 1.=====>get emebedding of words in the sentence
        embedded_words = tf.nn.embedding_lookup(self.Embedding,input_x)#[None,sentence_length,embed_size]
        sentence_embeddings_expanded=tf.expand_dims(embedded_words,-1) #[None,sentence_length,embed_size,1). expand dimension so meet input requirement of 2d-conv

        # 2.=====>loop each filter size. for each filter, do:convolution-pooling layer(a.create filters,b.conv,c.apply nolinearity,d.max-pooling)--->
        # you can use:tf.nn.conv2d;tf.nn.relu;tf.nn.max_pool; feature shape is 4-d. feature is a new variable
        pooled_outputs = []
        for i,filter_size in enumerate(self.filter_sizes):
            with tf.variable_scope(str(name_scope)+"convolution-pooling-%s" %filter_size,reuse=reuse_flag):
                # ====>a.create filter
                #Layer1:CONV-RELU
                filter=tf.get_variable("filter-%s"%filter_size,[filter_size,self.embed_size,1,self.num_filters],initializer=self.initializer)
                # ====>b.conv operation: conv2d===>computes a 2-D convolution given 4-D `input` and `filter` tensors.
                #Conv.Input: given an input tensor of shape `[batch, in_height, in_width, in_channels]` and a filter / kernel tensor of shape `[filter_height, filter_width, in_channels, out_channels]`
                #Conv.Returns: A `Tensor`. Has the same type as `input`.
                #         A 4-D tensor. The dimension order is determined by the value of `data_format`, see below for details.
                #1)each filter with conv2d's output a shape:[1,sequence_length-filter_size+1,1,1];2)*num_filters--->[1,sequence_length-filter_size+1,1,num_filters];3)*batch_size--->[batch_size,sequence_length-filter_size+1,1,num_filters]
                #input data format:NHWC:[batch, height, width, channels];output:4-D
                conv=tf.nn.conv2d(sentence_embeddings_expanded, filter, strides=[1,1,1,1], padding="VALID",name="conv") #shape:[batch_size,sequence_length - filter_size + 1,1,num_filters]
                print("conv1.0:", conv)
                #conv,update_ema_conv1=self.batchnorm(conv,self.tst, self.iter, self.b1_conv1) #TODO TODO TODO TODO TODO
                #print("conv1.1:",conv)
                # ====>c. apply nolinearity
                b=tf.get_variable("b-%s"%filter_size,[self.num_filters]) #ADD 2017-06-09
                h=tf.nn.relu(tf.nn.bias_add(conv,b),"relu") #shape:[batch_size,sequence_length - filter_size + 1,1,num_filters]. tf.nn.bias_add:adds `bias` to `value`


                #Layer2:CONV-RELU
                #################################################################################
                #TODO h=tf.reshape(h,[-1,self.sequence_length-filter_size+1,self.num_filters,1]) #shape:[batch_size,sequence_length-filter_size+1,num_filters,1]
                #TODO ##filter2 = tf.get_variable("filter2-%s" % filter_size, [1, self.num_filters, 1, self.num_filters],initializer=self.initializer)
                ###conv2=tf.nn.conv2d(h,filter2,strides=[1,1,1,1],padding="VALID",name="conv2") #shape:[]
                #conv2, update_ema_conv2 = self.batchnorm(conv2, self.tst, self.iter, self.b1_conv2)
                ###print("conv2:",conv2)
                ###b2 = tf.get_variable("b2-%s" % filter_size, [self.num_filters])  # ADD 2017-06-09
                ###conv2=conv2+conv
                ###h = tf.nn.relu(tf.nn.bias_add(conv2, b2),"relu2")  # shape:[batch_size,sequence_length - filter_size + 1,1,num_filters]. tf.nn.bias_add:adds `bias` to `value`
                ################################################################################
                #Layer3:CONV-RELU
                #h = tf.reshape(h, [-1, self.sequence_length - filter_size + 1, self.num_filters, 1]) #shape:[batch_size,sequence_length-filter_size+1,num_filters,1]
                #filter3 = tf.get_variable("filter3-%s" % filter_size, [1, self.num_filters, 1, self.num_filters],initializer=self.initializer)
                #conv3=tf.nn.conv2d(h,filter3,strides=[1,1,1,1],padding="VALID",name="conv3") #shape:[]
                #print("conv3:",conv3)
                #b3 = tf.get_variable("b3-%s" % filter_size, [self.num_filters])  # ADD 2017-06-09
                #h = tf.nn.relu(tf.nn.bias_add(conv3, b3),"relu3")  # shape:[batch_size,sequence_length - filter_size + 1,1,num_filters]. tf.nn.bias_add:adds `bias` to `value`

                # ====>. max-pooling.  value: A 4-D `Tensor` with shape `[batch, height, width, channels]
                #                  ksize: A list of ints that has length >= 4.  The size of the window for each dimension of the input tensor.
                #                  strides: A list of ints that has length >= 4.  The stride of the sliding window for each dimension of the input tensor.
                #pooled=tf.nn.max_pool(h, ksize=[1,self.sequence_length-filter_size*2+2,1,1], strides=[1,1,1,1], padding='VALID',name="pool")#shape:[batch_size, 1, 1, num_filters].max_pool:performs the max pooling on the input.

                #pooled=tf.nn.max_pool(h, ksize=[1,self.sequence_length-filter_size+1,1,1], strides=[1,1,1,1], padding='VALID',name="pool")#shape:[batch_size, 1, 1, num_filters].max_pool:performs the max pooling on the input. TODO
                #####max_k_pooling############
                h=tf.reshape(h,[-1,self.sequence_length - filter_size + 1,self.num_filters]) #[batch_size,sequence_length - filter_size + 1,num_filters]
                h=tf.transpose(h, [0, 2, 1]) #[batch_size,num_filters,sequence_length - filter_size + 1]
                h = tf.nn.top_k(h, k=self.top_k, name='top_k')[0]  # [batch_size,num_filters,self.k]
                h=tf.reshape(h,[-1,self.num_filters*self.top_k]) #TODO [batch_size,num_filters*self.k]
                ##################
                pooled_outputs.append(h)
        # 3.=====>combine all pooled features, and flatten the feature.output' shape is a [1,None]
        #e.g. >>> x1=tf.ones([3,3]);x2=tf.ones([3,3]);x=[x1,x2]
        #         x12_0=tf.concat(x,0)---->x12_0' shape:[6,3]
        #         x12_1=tf.concat(x,1)---->x12_1' shape;[3,6]
        h_pool=tf.concat(pooled_outputs,1) #shape:[batch_size, num_filters_total*self.k]. tf.concat=>concatenates tensors along one dimension.where num_filters_total=num_filters_1+num_filters_2+num_filters_3
        h_pool_flat=tf.reshape(h_pool,[-1,self.num_filters_total*3]) #shape should be:[None,num_filters_total]. here this operation has some result as tf.sequeeze().e.g. x's shape:[3,3];tf.reshape(-1,x) & (3, 3)---->(1,9)
        print("h_pool_flat:",h_pool_flat)
        #4.=====>add dropout: use tf.nn.dropout
        with tf.name_scope("dropout"):
            h=tf.nn.dropout(h_pool_flat,keep_prob=self.dropout_keep_prob) #[None,num_filters_total]

        return h #,update_ema_conv1,update_ema_conv2
Example #36
0
    def initialize(
        self,
        inputs,
        input_lengths,
        num_speakers,
        speaker_id,
        mel_targets=None,
        linear_targets=None,
        loss_coeff=None,
        rnn_decoder_test_mode=False,
        is_randomly_initialized=False,
    ):

        is_training2 = linear_targets is not None  # test에서 이게 True로 되는데, 이게 의도한 것인가???
        is_training = not rnn_decoder_test_mode

        self.is_randomly_initialized = is_randomly_initialized

        with tf.variable_scope('inference') as scope:
            hp = self._hparams
            batch_size = tf.shape(inputs)[0]

            # Embeddings(256)
            char_embed_table = tf.get_variable(
                'embedding', [len(symbols), hp.embedding_size],
                dtype=tf.float32,
                initializer=tf.truncated_normal_initializer(stddev=0.5))

            zero_pad = True
            if zero_pad:  # transformer에 구현되어 있는 거 보고, 가져온 로직.
                # <PAD> 0 은 embedding이 0으로 고정되고, train으로 변하지 않는다. 즉, 위의 get_variable에서 잡았던 변수의 첫번째 행(<PAD>)에 대응되는 것은 사용되지 않는 것이다)
                char_embed_table = tf.concat(
                    (tf.zeros(shape=[1, hp.embedding_size]),
                     char_embed_table[1:, :]), 0)

            # [N, T_in, embedding_size]
            char_embedded_inputs = tf.nn.embedding_lookup(
                char_embed_table, inputs)

            self.num_speakers = num_speakers
            if self.num_speakers > 1:
                if hp.speaker_embedding_size != 1:  # speaker_embedding_size = f(16)
                    speaker_embed_table = tf.get_variable(
                        'speaker_embedding',
                        [self.num_speakers, hp.speaker_embedding_size],
                        dtype=tf.float32,
                        initializer=tf.truncated_normal_initializer(
                            stddev=0.5))
                    # [N, T_in, speaker_embedding_size]
                    speaker_embed = tf.nn.embedding_lookup(
                        speaker_embed_table, speaker_id)

                if hp.model_type == 'deepvoice':
                    if hp.speaker_embedding_size == 1:
                        before_highway = get_embed(
                            speaker_id, self.num_speakers,
                            hp.enc_prenet_sizes[-1], "before_highway"
                        )  # 'enc_prenet_sizes': [f(256), f(128)]
                        encoder_rnn_init_state = get_embed(
                            speaker_id, self.num_speakers, hp.enc_rnn_size * 2,
                            "encoder_rnn_init_state")

                        attention_rnn_init_state = get_embed(
                            speaker_id, self.num_speakers,
                            hp.attention_state_size,
                            "attention_rnn_init_state")
                        decoder_rnn_init_states = [
                            get_embed(
                                speaker_id, self.num_speakers, hp.dec_rnn_size,
                                "decoder_rnn_init_states{}".format(idx + 1))
                            for idx in range(hp.dec_layer_num)
                        ]
                    else:
                        deep_dense = lambda x, dim: tf.layers.dense(
                            x, dim, activation=tf.nn.softsign
                        )  # softsign: x / (abs(x) + 1)

                        before_highway = deep_dense(speaker_embed,
                                                    hp.enc_prenet_sizes[-1])
                        encoder_rnn_init_state = deep_dense(
                            speaker_embed, hp.enc_rnn_size * 2)

                        attention_rnn_init_state = deep_dense(
                            speaker_embed, hp.attention_state_size)
                        decoder_rnn_init_states = [
                            deep_dense(speaker_embed, hp.dec_rnn_size)
                            for _ in range(hp.dec_layer_num)
                        ]

                    speaker_embed = None  # deepvoice does not use speaker_embed directly
                elif hp.model_type == 'simple':
                    # simple model은 speaker_embed를 DecoderPrenetWrapper,ConcatOutputAndAttentionWrapper에 각각 넣어서 concat하는 방식이다.
                    before_highway = None
                    encoder_rnn_init_state = None
                    attention_rnn_init_state = None
                    decoder_rnn_init_states = None
                else:
                    raise Exception(
                        " [!] Unkown multi-speaker model type: {}".format(
                            hp.model_type))
            else:
                # self.num_speakers =1인 경우
                speaker_embed = None
                before_highway = None
                encoder_rnn_init_state = None  # bidirectional GRU의 init state
                attention_rnn_init_state = None
                decoder_rnn_init_states = None

            ##############
            # Encoder
            ##############

            # [N, T_in, enc_prenet_sizes[-1]]
            prenet_outputs = prenet(
                char_embedded_inputs,
                is_training,
                hp.enc_prenet_sizes,
                hp.dropout_prob,
                scope='prenet'
            )  # 'enc_prenet_sizes': [f(256), f(128)],  dropout_prob = 0.5
            # ==> (N, T_in, 128)

            # enc_rnn_size = 128
            encoder_outputs = cbhg(
                prenet_outputs,
                input_lengths,
                is_training,
                hp.enc_bank_size,
                hp.enc_bank_channel_size,
                hp.enc_maxpool_width,
                hp.enc_highway_depth,
                hp.enc_rnn_size,
                hp.enc_proj_sizes,
                hp.enc_proj_width,
                scope="encoder_cbhg",
                before_highway=before_highway,
                encoder_rnn_init_state=encoder_rnn_init_state)

            ##############
            # Attention
            ##############

            # For manaul control of attention
            self.is_manual_attention = tf.placeholder(
                tf.bool,
                shape=(),
                name='is_manual_attention',
            )
            self.manual_alignments = tf.placeholder(
                tf.float32,
                shape=[None, None, None],
                name="manual_alignments",
            )

            # single: attention_size = 128
            if hp.attention_type == 'bah_mon':
                attention_mechanism = BahdanauMonotonicAttention(
                    hp.attention_size,
                    encoder_outputs,
                    memory_sequence_length=input_lengths,
                    normalize=False)
            elif hp.attention_type == 'bah_mon_norm':  # hccho 추가
                attention_mechanism = BahdanauMonotonicAttention(
                    hp.attention_size,
                    encoder_outputs,
                    memory_sequence_length=input_lengths,
                    normalize=True)
            elif hp.attention_type == 'loc_sen':  # Location Sensitivity Attention
                attention_mechanism = LocationSensitiveAttention(
                    hp.attention_size,
                    encoder_outputs,
                    memory_sequence_length=input_lengths)
            elif hp.attention_type == 'gmm':  # GMM Attention
                attention_mechanism = GmmAttention(
                    hp.attention_size,
                    memory=encoder_outputs,
                    memory_sequence_length=input_lengths)
            elif hp.attention_type == 'bah_mon_norm_hccho':
                attention_mechanism = BahdanauMonotonicAttention_hccho(
                    hp.attention_size, encoder_outputs, normalize=True)
            elif hp.attention_type == 'bah_norm':
                attention_mechanism = BahdanauAttention(
                    hp.attention_size,
                    encoder_outputs,
                    memory_sequence_length=input_lengths,
                    normalize=True)
            elif hp.attention_type == 'luong_scaled':
                attention_mechanism = LuongAttention(
                    hp.attention_size,
                    encoder_outputs,
                    memory_sequence_length=input_lengths,
                    scale=True)
            elif hp.attention_type == 'luong':
                attention_mechanism = LuongAttention(
                    hp.attention_size,
                    encoder_outputs,
                    memory_sequence_length=input_lengths)
            elif hp.attention_type == 'bah':
                attention_mechanism = BahdanauAttention(
                    hp.attention_size,
                    encoder_outputs,
                    memory_sequence_length=input_lengths)
            else:
                raise Exception(" [!] Unkown attention type: {}".format(
                    hp.attention_type))

            # DecoderPrenetWrapper, attention_mechanism을 결합하여 AttentionWrapper를 만든다.
            # carpedm20은  tensorflow 소스를코드를 가져와서 AttentionWrapper를 새로 구현했지만,  keith Ito는 tensorflow AttentionWrapper를 그냥 사용했다.
            attention_cell = AttentionWrapper(
                GRUCell(hp.attention_state_size),
                attention_mechanism,
                self.is_manual_attention,
                self.manual_alignments,
                initial_cell_state=attention_rnn_init_state,
                alignment_history=True,
                output_attention=False
            )  # output_attention=False 에 주목, attention_layer_size에 값을 넣지 않았다. 그래서 attention = contex vector가 된다.

            # attention_state_size = 256
            dec_prenet_outputs = DecoderPrenetWrapper(
                attention_cell, speaker_embed, is_training,
                hp.dec_prenet_sizes,
                hp.dropout_prob)  # dec_prenet_sizes =  [f(256), f(128)]

            # Concatenate attention context vector and RNN cell output into a 512D vector.
            # [N, T_in, attention_size+attention_state_size]

            #dec_prenet_outputs의 다음 cell에 전달하는 AttentionWrapperState의 member (attention,cell_state, ...)에서 attention과 output을 concat하여 output으로 내보낸다.
            # output이 output은 cell_state와 같기 때문에, concat [ output(=cell_state) | attention ]
            concat_cell = ConcatOutputAndAttentionWrapper(
                dec_prenet_outputs, embed_to_concat=speaker_embed
            )  # concat(output,attention,speaker_embed)해서 새로운 output을 만든다.

            # Decoder (layers specified bottom to top):  dec_rnn_size= 256
            cells = [OutputProjectionWrapper(concat_cell, hp.dec_rnn_size)
                     ]  # OutputProjectionWrapper는 논문에 언급이 없는 것 같은데...
            for _ in range(hp.dec_layer_num):  # hp.dec_layer_num = 2
                cells.append(ResidualWrapper(GRUCell(hp.dec_rnn_size)))

            # [N, T_in, 256]
            decoder_cell = MultiRNNCell(cells, state_is_tuple=True)

            # Project onto r mel spectrograms (predict r outputs at each RNN step):
            output_cell = OutputProjectionWrapper(
                decoder_cell, hp.num_mels * hp.reduction_factor
            )  # 여기에 stop token도 나올 수 있도록...수정하면 되지 않을까???   (hp.num_mels+1) * hp.reduction_factor
            decoder_init_state = output_cell.zero_state(
                batch_size=batch_size, dtype=tf.float32
            )  # 여기서 zero_state를 부르면, 위의 AttentionWrapper에서 이미 넣은 준 값도 포함되어 있다.

            if hp.model_type == "deepvoice":
                # decoder_init_state[0] : AttentionWrapperState
                # = cell_state + attention + time + alignments + alignment_history
                # decoder_init_state[0][0] = attention_rnn_init_state (already applied: AttentionWrapper의 initial_cell_state를 이미 넣어 주었다. )
                decoder_init_state = list(decoder_init_state)

                for idx, cell in enumerate(decoder_rnn_init_states):
                    shape1 = decoder_init_state[idx + 1].get_shape().as_list()
                    shape2 = cell.get_shape().as_list()
                    if shape1 != shape2:
                        raise Exception(
                            " [!] Shape {} and {} should be equal".format(
                                shape1, shape2))
                    decoder_init_state[idx + 1] = cell

                decoder_init_state = tuple(decoder_init_state)

            if is_training2:
                # rnn_decoder_test_mode = True if test mode,  train mode에서는 False
                helper = TacoTrainingHelper(
                    inputs, mel_targets, hp.num_mels, hp.reduction_factor,
                    rnn_decoder_test_mode)  # inputs은 batch_size 계산에만 사용됨
            else:
                helper = TacoTestHelper(batch_size, hp.num_mels,
                                        hp.reduction_factor)

            (decoder_outputs, _), final_decoder_state, _ = \
                    tf.contrib.seq2seq.dynamic_decode(BasicDecoder(output_cell, helper, decoder_init_state),maximum_iterations=hp.max_iters)  # max_iters=200

            # [N, T_out, M]
            mel_outputs = tf.reshape(decoder_outputs,
                                     [batch_size, -1, hp.num_mels])

            # Add post-processing CBHG:
            # [N, T_out, 256]
            #post_outputs = post_cbhg(mel_outputs, hp.num_mels, is_training)
            post_outputs = cbhg(mel_outputs,
                                None,
                                is_training,
                                hp.post_bank_size,
                                hp.post_bank_channel_size,
                                hp.post_maxpool_width,
                                hp.post_highway_depth,
                                hp.post_rnn_size,
                                hp.post_proj_sizes,
                                hp.post_proj_width,
                                scope='post_cbhg')

            if speaker_embed is not None and hp.model_type == 'simple':
                expanded_speaker_emb = tf.expand_dims(speaker_embed, [1])
                tiled_speaker_embedding = tf.tile(
                    expanded_speaker_emb, [1, tf.shape(post_outputs)[1], 1])

                # [N, T_out, 256 + alpha]
                post_outputs = tf.concat(
                    [tiled_speaker_embedding, post_outputs], axis=-1)

            linear_outputs = tf.layers.dense(
                post_outputs, hp.num_freq)  # [N, T_out, F(1025)]

            # Grab alignments from the final decoder state:
            # MultiRNNCell이 3단이기 때문에, final_decoder_state는 len 3 tuple이다.  ==> final_decoder_state[0]
            alignments = tf.transpose(
                final_decoder_state[0].alignment_history.stack(),
                [1, 2, 0
                 ])  # batch_size, text length(encoder), target length(decoder)

            self.inputs = inputs
            self.speaker_id = speaker_id
            self.input_lengths = input_lengths
            self.loss_coeff = loss_coeff
            self.mel_outputs = mel_outputs
            self.linear_outputs = linear_outputs
            self.alignments = alignments
            self.mel_targets = mel_targets
            self.linear_targets = linear_targets
            self.final_decoder_state = final_decoder_state

            log('=' * 40)
            log(' model_type: %s' % hp.model_type)
            log('=' * 40)

            log('Initialized Tacotron model. Dimensions: ')
            log('    embedding:                %d' %
                char_embedded_inputs.shape[-1])
            if speaker_embed is not None:
                log('    speaker embedding:        %d' %
                    speaker_embed.shape[-1])
            else:
                log('    speaker embedding:        None')
            log('    prenet out:               %d' % prenet_outputs.shape[-1])
            log('    encoder out:              %d' % encoder_outputs.shape[-1])
            log('    attention out:            %d' %
                attention_cell.output_size)
            log('    concat attn & out:        %d' % concat_cell.output_size)
            log('    decoder cell out:         %d' % decoder_cell.output_size)
            log('    decoder out (%d frames):  %d' %
                (hp.reduction_factor, decoder_outputs.shape[-1]))
            log('    decoder out (1 frame):    %d' % mel_outputs.shape[-1])
            log('    postnet out:              %d' % post_outputs.shape[-1])
            log('    linear out:               %d' % linear_outputs.shape[-1])
def fc(input, num_output, name='fc'):
    with tf.variable_scope(name):
        num_input = input.get_shape()[1]
        W = tf.get_variable('w', [num_input, num_output], tf.float32, tf.random_normal_initializer(0.0, 0.02))
        b = tf.get_variable('b', [num_output], initializer=tf.constant_initializer(0.0))
        return tf.matmul(input, W) + b
Example #38
0
def weight(shape, name=None):
    return tf.get_variable(name, shape, initializer=tf.random_normal_initializer(0.0, 0.1))
sparse = False
p = 0.1         # sparsity parameter
beta = 0.1      # sparsity regularization strength


# setting up the graph:
data = tf.placeholder( dtype=tf.float32, shape=(None, input_dim))
noiseless = tf.placeholder( dtype=tf.float32, shape=(None, input_dim))          # JUST FOR TRAINING

# def get_noise(data):
#     noise = np.random.randn(*data.shape)*0.2
#     return noise

a = data
for l in range(1,len(network_structure)):
    w = tf.get_variable(name="w%s" % l, initializer=tf.random_normal(shape=(network_structure[l-1], network_structure[l]), stddev=1./np.sqrt(network_structure[l-1])))
    b = tf.get_variable(name="b%s" % l, shape=(1, network_structure[l]), initializer=tf.zeros_initializer)

    z = tf.matmul(a, w) + b
    a = tf.nn.sigmoid(z, name="a%s" % l)    # last activation has name "a%s" % (len(network_structure)-1)
    # if l == coding_layer_index:
    #     coding_layer_activations = a
 
################################
# Defining the loss operation: #
################################
loss_reconstruction = tf.losses.mean_squared_error(labels=noiseless, predictions=a)

loss_sparsity = 0
if sparse:
    # getting the coding layer (the smallest hidden layer):
Example #40
0
def positive_bias(shape, name=None):
    return tf.get_variable(name, shape, initializer=tf.constant_initializer(0.1))
Example #41
0
def transformer_model(input_tensor,
                      attention_mask=None,
                      hidden_size=768,
                      num_hidden_layers=12,
                      num_attention_heads=12,
                      intermediate_size=3072,
                      intermediate_act_fn=gelu,
                      hidden_dropout_prob=0.1,
                      attention_probs_dropout_prob=0.1,
                      initializer_range=0.02,
                      do_return_all_layers=False,
                      extras=None):
    """Multi-headed, multi-layer Transformer from "Attention is All You Need".

  This is almost an exact implementation of the original Transformer encoder.

  See the original paper:
  https://arxiv.org/abs/1706.03762

  Also see:
  https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/models/transformer.py

  Args:
    input_tensor: float Tensor of shape [batch_size, seq_length, hidden_size].
    attention_mask: (optional) int32 Tensor of shape [batch_size, seq_length,
      seq_length], with 1 for positions that can be attended to and 0 in
      positions that should not be.
    hidden_size: int. Hidden size of the Transformer.
    num_hidden_layers: int. Number of layers (blocks) in the Transformer.
    num_attention_heads: int. Number of attention heads in the Transformer.
    intermediate_size: int. The size of the "intermediate" (a.k.a., feed
      forward) layer.
    intermediate_act_fn: function. The non-linear activation function to apply
      to the output of the intermediate/feed-forward layer.
    hidden_dropout_prob: float. Dropout probability for the hidden layers.
    attention_probs_dropout_prob: float. Dropout probability of the attention
      probabilities.
    initializer_range: float. Range of the initializer (stddev of truncated
      normal).
    do_return_all_layers: Whether to also return all layers or just the final
      layer.

  Returns:
    float Tensor of shape [batch_size, seq_length, hidden_size], the final
    hidden layer of the Transformer.

  Raises:
    ValueError: A Tensor shape or parameter is invalid.
  """
    if hidden_size % num_attention_heads != 0:
        raise ValueError(
            "The hidden size (%d) is not a multiple of the number of attention "
            "heads (%d)" % (hidden_size, num_attention_heads))

    attention_head_size = int(hidden_size / num_attention_heads)
    input_shape = get_shape_list(input_tensor, expected_rank=3)
    batch_size = input_shape[0]
    seq_length = input_shape[1]
    input_width = input_shape[2]

    # The Transformer performs sum residuals on all layers so the input needs
    # to be the same as the hidden size.
    if input_width != hidden_size:
        raise ValueError(
            "The width of the input tensor (%d) != hidden size (%d)" %
            (input_width, hidden_size))

    extras.entity_pos_table_key = tf.get_variable(
        name='entity_pos_embeddings_key',
        shape=[extras.max_distance * 2 + 1, hidden_size],
        initializer=create_initializer(0.5))

    extras.entity_pos_table_val = tf.get_variable(
        name='entity_pos_embeddings_val',
        shape=[extras.max_distance * 2 + 1, hidden_size],
        initializer=create_initializer(0.5))

    # We keep the representation as a 2D tensor to avoid re-shaping it back and
    # forth from a 3D tensor to a 2D tensor. Re-shapes are normally free on
    # the GPU/CPU but may not be free on the TPU, so we want to minimize them to
    # help the optimizer.
    prev_output = reshape_to_matrix(input_tensor)

    all_layer_outputs = []
    for layer_idx in range(num_hidden_layers):
        with tf.variable_scope("layer_%d" % layer_idx):
            layer_input = prev_output

            with tf.variable_scope("attention"):
                attention_heads = []
                with tf.variable_scope("self"):
                    attention_head = attention_layer(
                        from_tensor=layer_input,
                        to_tensor=layer_input,
                        attention_mask=attention_mask,
                        num_attention_heads=num_attention_heads,
                        size_per_head=attention_head_size,
                        attention_probs_dropout_prob=
                        attention_probs_dropout_prob,
                        initializer_range=initializer_range,
                        do_return_2d_tensor=True,
                        batch_size=batch_size,
                        from_seq_length=seq_length,
                        to_seq_length=seq_length,
                        extras=extras)
                    attention_heads.append(attention_head)

                attention_output = None
                if len(attention_heads) == 1:
                    attention_output = attention_heads[0]
                else:
                    # In the case where we have other sequences, we just concatenate
                    # them to the self-attention head before the projection.
                    attention_output = tf.concat(attention_heads, axis=-1)

                # Run a linear projection of `hidden_size` then add a residual
                # with `layer_input`.
                with tf.variable_scope("output"):
                    attention_output = tf.layers.dense(
                        attention_output,
                        hidden_size,
                        kernel_initializer=create_initializer(
                            initializer_range))
                    attention_output = dropout(attention_output,
                                               hidden_dropout_prob)
                    attention_output = layer_norm(attention_output +
                                                  layer_input)

            # The activation is only applied to the "intermediate" hidden layer.
            with tf.variable_scope("intermediate"):
                intermediate_output = tf.layers.dense(
                    attention_output,
                    intermediate_size,
                    activation=intermediate_act_fn,
                    kernel_initializer=create_initializer(initializer_range))

            # Down-project back to `hidden_size` then add the residual.
            with tf.variable_scope("output"):
                layer_output = tf.layers.dense(
                    intermediate_output,
                    hidden_size,
                    kernel_initializer=create_initializer(initializer_range))
                layer_output = dropout(layer_output, hidden_dropout_prob)
                layer_output = layer_norm(layer_output + attention_output)
                prev_output = layer_output
                all_layer_outputs.append(layer_output)

    if do_return_all_layers:
        final_outputs = []
        for layer_output in all_layer_outputs:
            final_output = reshape_from_matrix(layer_output, input_shape)
            final_outputs.append(final_output)
        return final_outputs
    else:
        final_output = reshape_from_matrix(prev_output, input_shape)
        return final_output
Example #42
0
X = tf.placeholder(tf.float32, [None, 784])
X_img = tf.reshape(X, [-1, 28, 28, 1])
Y = tf.placeholder(tf.float32, [None, 10])

W1 = tf.Variable(tf.random_normal([3,3,1,32], stddev=0.01))
L1 = tf.nn.conv2d(X_img, W1, strides=[1,1,1,1], padding='SAME')
L1 = tf.nn.relu(L1)
L1 = tf.nn.max_pool(L1, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')

W2 = tf.Variable(tf.random_normal([3,3,32,64], stddev=0.01))
L2 = tf.nn.conv2d(L1, W2, strides=[1,1,1,1], padding='SAME')
L2 = tf.nn.relu(L2)
L2 = tf.nn.max_pool(L2, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
L2 = tf.reshape(L2, [-1,7*7*64])

W3 = tf.get_variable('W3', shape=[7*7*64, 10],
    initializer = tf.contrib.layers.xavier_initializer())
b = tf.Variable(tf.random_normal([10]))
hypothesis = tf.matmul(L2, W3) + b

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

print('Learning started. It takes sometime.')
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(mnist.train.num_examples / batch_size)
    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
    def __init__(self,
                 source_vocab_size,
                 target_vocab_size,
                 buckets,
                 size,
                 num_layers,
                 max_gradient_norm,
                 batch_size,
                 learning_rate,
                 learning_rate_decay_factor,
                 num_samples=512,
                 forward_only=False):

        self.source_vocab_size = source_vocab_size
        self.target_vocab_size = target_vocab_size
        self.buckets = buckets
        self.batch_size = batch_size
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)

        # If we use sampled softmax, we need an output projection.
        output_projection = None
        softmax_loss_function = None

        # Sampled softmax only makes sense if we sample less than vocabulary size.
        if num_samples > 0 and num_samples < self.target_vocab_size:
            w = tf.get_variable("proj_w", [size, self.target_vocab_size])
            w_t = tf.transpose(w)
            b = tf.get_variable("proj_b", [self.target_vocab_size])
            output_projection = (w, b)

            def sampled_loss(inputs, labels):
                labels = tf.reshape(labels, [-1, 1])
                return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels,
                                                  num_samples,
                                                  self.target_vocab_size)

            softmax_loss_function = sampled_loss

        # Create the internal multi-layer cell for our RNN.
        single_cell = tf.nn.rnn_cell.GRUCell(size)
        cell = single_cell

        if num_layers > 1:
            cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * num_layers)

        # The seq2seq function: we use embedding for the input and attention.
        def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
            return tf.nn.seq2seq.embedding_attention_seq2seq(
                encoder_inputs,
                decoder_inputs,
                cell,
                num_encoder_symbols=source_vocab_size,
                num_decoder_symbols=target_vocab_size,
                embedding_size=size,
                output_projection=output_projection,
                feed_previous=do_decode)

        # Feeds for inputs.
        self.encoder_inputs = []
        self.decoder_inputs = []
        self.target_weights = []
        for i in xrange(buckets[-1][0]):  # Last bucket is the biggest one.
            self.encoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name="encoder{0}".format(i)))
        for i in xrange(buckets[-1][1] + 1):
            self.decoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name="decoder{0}".format(i)))
            self.target_weights.append(
                tf.placeholder(tf.float32,
                               shape=[None],
                               name="weight{0}".format(i)))

        # Our targets are decoder inputs shifted by one.
        targets = [
            self.decoder_inputs[i + 1]
            for i in xrange(len(self.decoder_inputs) - 1)
        ]

        # Training outputs and losses.
        if forward_only:
            self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(
                self.encoder_inputs,
                self.decoder_inputs,
                targets,
                self.target_weights,
                buckets,
                lambda x, y: seq2seq_f(x, y, True),
                softmax_loss_function=softmax_loss_function)

            # If we use output projection, we need to project outputs for decoding.
            if output_projection is not None:
                for b in xrange(len(buckets)):
                    self.outputs[b] = [
                        tf.matmul(output, output_projection[0]) +
                        output_projection[1] for output in self.outputs[b]
                    ]
        else:
            self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(
                self.encoder_inputs,
                self.decoder_inputs,
                targets,
                self.target_weights,
                buckets,
                lambda x, y: seq2seq_f(x, y, False),
                softmax_loss_function=softmax_loss_function)

        # Gradients and SGD update operation for training the model.
        params = tf.trainable_variables()
        if not forward_only:
            self.gradient_norms = []
            self.updates = []
            opt = tf.train.GradientDescentOptimizer(self.learning_rate)
            for b in xrange(len(buckets)):
                gradients = tf.gradients(self.losses[b], params)
                clipped_gradients, norm = tf.clip_by_global_norm(
                    gradients, max_gradient_norm)
                self.gradient_norms.append(norm)
                self.updates.append(
                    opt.apply_gradients(zip(clipped_gradients, params),
                                        global_step=self.global_step))

        self.saver = tf.train.Saver(tf.global_variables())
Example #44
0
 def _create_variables(self):
     with tf.name_scope('embedding'):
         self.user_map_embedding = tf.Variable(tf.truncated_normal(shape=[self.n_users, self.n_factors * self.embedding], mean=0.0, stddev=0.01),name='user_map', dtype=tf.float32)
         self.user_embedding = tf.Variable(tf.truncated_normal(shape=[self.n_users, self.embedding], mean=0.0, stddev=0.01),name='user_emb', dtype=tf.float32)
         self.item_map_embedding = tf.get_variable( dtype=tf.float32, initializer=tf.reshape(self.item_factors, [-1, self.n_factors * self.embedding]), name='item_map', trainable=False)
         self.item_embedding = tf.Variable(tf.truncated_normal(shape=[self.n_items, self.embedding], mean=0.0, stddev=0.01),name='item_emb', dtype=tf.float32)
Example #45
0
    def __init__(self, is_training, batch_size):
        """
        :param is_training: is or not training, True/False
        :param batch_size: the size of one batch
        :param num_steps: the length of one lstm
        """
        #定义网络参数
        self.learning_rate = tf.Variable(float(LEARNING_RATE), trainable=False, dtype=tf.float32)
        self.learning_rate_decay_op = self.learning_rate.assign(self.learning_rate * LEARNING_RATE_DECAY_FACTOR)
        self.global_step = 0
        self.global_epoch = 0
        self.batch_size = batch_size

        # 定义输入层,其维度是batch_size * num_steps
        self.pre_input = tf.placeholder(tf.int32, [batch_size,None])
        self.pre_input_seq_length = tf.placeholder(tf.int32, [batch_size,])
        self.fol_input = tf.placeholder(tf.int32, [batch_size,None])
        self.fol_input_seq_length = tf.placeholder(tf.int32, [batch_size,])
        # 定义预期输出,它的维度和上面维度相同
        self.targets = tf.placeholder(tf.int32, [batch_size,])
        embedding = tf.get_variable("embedding", [VOCAB_SIZE, HIDDEN_SIZE])  # embedding矩阵

        # pre_context_model
        with tf.variable_scope('Pre') as scope:
            pre_cell = tf.contrib.rnn.BasicLSTMCell(num_units=PRE_CONTEXT_HIDDEN_SIZE, forget_bias=0.0,
                                                state_is_tuple=True)
            if is_training:
                pre_cell = tf.contrib.rnn.DropoutWrapper(pre_cell, output_keep_prob=KEEP_PROB)
            pre_lstm_cell = tf.contrib.rnn.MultiRNNCell([pre_cell] * PRE_CONTEXT_NUM_LAYERS, state_is_tuple=True)

            pre_input = tf.nn.embedding_lookup(embedding, self.pre_input)  # 将原本单词ID转为单词向量。
            if is_training:
                pre_input = tf.nn.dropout(pre_input, KEEP_PROB)
            self.pre_initial_state = pre_lstm_cell.zero_state(batch_size, tf.float32)  # 初始化最初的状态。
            pre_outputs, pre_states = tf.nn.dynamic_rnn(pre_lstm_cell, pre_input,sequence_length=self.pre_input_seq_length,
                                                        initial_state=self.pre_initial_state,dtype=tf.float32)
            #tmp_output = pre_outputs[:, -1, :]    #上一时刻的输出作下一时刻预测的输入
            #pre_outputs, pre_states = pre_lstm_cell(tmp_output, pre_states)
            pre_outputs = pre_outputs[:, -1, :]
            self.pre_final_state = pre_states  #上文LSTM的最终状态

        # fol_context_model
        with tf.variable_scope('Fol') as scope:
            fol_cell = tf.contrib.rnn.BasicLSTMCell(num_units=FOL_CONTEXT_HIDDEN_SIZE, forget_bias=0.0,
                                                    state_is_tuple=True)
            if is_training:
                fol_cell = tf.contrib.rnn.DropoutWrapper(fol_cell, output_keep_prob=KEEP_PROB)
            fol_lstm_cell = tf.contrib.rnn.MultiRNNCell([fol_cell] * FOL_CONTEXT_NUM_LAYERS, state_is_tuple=True)

            fol_input = tf.nn.embedding_lookup(embedding, self.fol_input)  # 将原本单词ID转为单词向量。
            if is_training:
                fol_input = tf.nn.dropout(fol_input, KEEP_PROB)
            self.fol_initial_state = fol_lstm_cell.zero_state(batch_size, tf.float32)  # 初始化最初的状态。
            fol_outputs, fol_states = tf.nn.dynamic_rnn(fol_lstm_cell, fol_input,sequence_length=self.fol_input_seq_length,
                                                        initial_state=self.fol_initial_state,
                                                        dtype=tf.float32)
            #tmp_output = fol_outputs[:, -1, :]  # 上一时刻的输出作下一时刻预测的输入
            #fol_outputs, fol_states = fol_lstm_cell(tmp_output, fol_states)
            fol_outputs = fol_outputs[:, -1, :]
            self.fol_final_state = fol_states  #下文lstm的最终状态

        # 综合两个lstm的数据,加权平均
        # self.output = tf.add(pre_outputs,fol_outputs)/2
        # 全连接层
        # weight = tf.get_variable("weight", [HIDDEN_SIZE, VOCAB_SIZE])
        # bias = tf.get_variable("bias", [VOCAB_SIZE])
        # self.logits = tf.matmul(self.output, weight) + bias

        # 简单拼接
        output = tf.concat([pre_outputs, fol_outputs], 1)
        # 全连接层
        weight = tf.get_variable("weight", [2*HIDDEN_SIZE, VOCAB_SIZE])
        bias = tf.get_variable("bias", [VOCAB_SIZE])
        self.logits = tf.matmul(output, weight) + bias


        ''' 定义交叉熵损失函数和平均损失。
        logits中在vocab_size个结果中选择概率最大的结果与相应的targets结果比较计算loss值
         返回一个 [batch_size] 的1维张量 '''
        #loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
        #    [self.logits], [self.targets],
        #    [tf.ones([batch_size], dtype=tf.float32)])

        #softmax+交叉熵损失函数
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=self.targets)

        # 记录cost
        with  tf.variable_scope('cost') as scope:
            self.cost = tf.reduce_mean(loss)
            self.ave_cost = tf.Variable(0.0, trainable=False, dtype=tf.float32)
            self.ave_cost_op = self.ave_cost.assign(tf.divide(
                tf.add(tf.multiply(self.ave_cost, self.global_step), self.cost), self.global_step+1))
            #global_step从0开始
            tf.summary.scalar('cost', self.cost)
            tf.summary.scalar('ave_cost', self.ave_cost)
        # 只在训练模型时定义反向传播操作。

        # 记录accuracy
        with  tf.variable_scope('accuracy') as scope:
            correct_prediction = tf.equal(self.targets, tf.cast(tf.argmax(self.logits, -1), tf.int32))
            self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
            self.ave_accuracy = tf.Variable(0.0, trainable=False, dtype=tf.float32)
            self.ave_accuracy_op =  self.ave_accuracy.assign(tf.divide(
                tf.add(tf.multiply(self.ave_accuracy, self.global_step),self.accuracy),self.global_step+1))
            # global_step从0开始
            tf.summary.scalar('accuracy', self.accuracy)
            tf.summary.scalar('ave_accuracy', self.ave_accuracy)
            # 只在训练模型时定义反向传播操作。
        # 只在训练模型时定义反向传播操作。
        if not is_training: return

        # trainable_variables = tf.trainable_variables()
        # trainable_variables = tf.all_variables()
        # 控制梯度大小,定义优化方法和训练步骤。
        # grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, trainable_variables), MAX_GRAD_NORM)
        # optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE)
        # self.train_op = optimizer.apply_gradients(zip(grads, trainable_variables))

        self.train_op = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.cost)

        self.merged_summary_op = tf.summary.merge_all() # 收集节点
Example #46
0
def embedding_postprocessor(input_tensor,
                            use_token_type=False,
                            token_type_ids=None,
                            token_type_vocab_size=16,
                            token_type_embedding_name="token_type_embeddings",
                            use_position_embeddings=True,
                            position_embedding_name="position_embeddings",
                            initializer_range=0.02,
                            max_position_embeddings=512,
                            dropout_prob=0.1):
    """Performs various post-processing on a word embedding tensor.

  Args:
    input_tensor: float Tensor of shape [batch_size, seq_length,
      embedding_size].
    use_token_type: bool. Whether to add embeddings for `token_type_ids`.
    token_type_ids: (optional) int32 Tensor of shape [batch_size, seq_length].
      Must be specified if `use_token_type` is True.
    token_type_vocab_size: int. The vocabulary size of `token_type_ids`.
    token_type_embedding_name: string. The name of the embedding table variable
      for token type ids.
    use_position_embeddings: bool. Whether to add position embeddings for the
      position of each token in the sequence.
    position_embedding_name: string. The name of the embedding table variable
      for positional embeddings.
    initializer_range: float. Range of the weight initialization.
    max_position_embeddings: int. Maximum sequence length that might ever be
      used with this model. This can be longer than the sequence length of
      input_tensor, but cannot be shorter.
    dropout_prob: float. Dropout probability applied to the final output tensor.

  Returns:
    float tensor with same shape as `input_tensor`.

  Raises:
    ValueError: One of the tensor shapes or input values is invalid.
  """
    input_shape = get_shape_list(input_tensor, expected_rank=3)
    batch_size = input_shape[0]
    seq_length = input_shape[1]
    width = input_shape[2]

    output = input_tensor

    if use_token_type:
        if token_type_ids is None:
            raise ValueError("`token_type_ids` must be specified if"
                             "`use_token_type` is True.")
        token_type_table = tf.get_variable(
            name=token_type_embedding_name,
            shape=[token_type_vocab_size, width],
            initializer=create_initializer(initializer_range))
        # This vocab will be small so we always do one-hot here, since it is always
        # faster for a small vocabulary.
        flat_token_type_ids = tf.reshape(token_type_ids, [-1])
        one_hot_ids = tf.one_hot(flat_token_type_ids,
                                 depth=token_type_vocab_size)
        token_type_embeddings = tf.matmul(one_hot_ids, token_type_table)
        token_type_embeddings = tf.reshape(token_type_embeddings,
                                           [batch_size, seq_length, width])
        output += token_type_embeddings

    if use_position_embeddings:
        assert_op = tf.assert_less_equal(seq_length, max_position_embeddings)
        with tf.control_dependencies([assert_op]):
            full_position_embeddings = tf.get_variable(
                name=position_embedding_name,
                shape=[max_position_embeddings, width],
                initializer=create_initializer(initializer_range))
            # Since the position embedding table is a learned variable, we create it
            # using a (long) sequence length `max_position_embeddings`. The actual
            # sequence length might be shorter than this, for faster training of
            # tasks that do not have long sequences.
            #
            # So `full_position_embeddings` is effectively an embedding table
            # for position [0, 1, 2, ..., max_position_embeddings-1], and the current
            # sequence has positions [0, 1, 2, ... seq_length-1], so we can just
            # perform a slice.
            position_embeddings = tf.slice(full_position_embeddings, [0, 0],
                                           [seq_length, -1])
            num_dims = len(output.shape.as_list())

            # Only the last two dimensions are relevant (`seq_length` and `width`), so
            # we broadcast among the first dimensions, which is typically just
            # the batch size.
            position_broadcast_shape = []
            for _ in range(num_dims - 2):
                position_broadcast_shape.append(1)
            position_broadcast_shape.extend([seq_length, width])
            position_embeddings = tf.reshape(position_embeddings,
                                             position_broadcast_shape)
            output += position_embeddings

    output = layer_norm_and_dropout(output, dropout_prob)
    return output
Example #47
0
    def add_prediction_op(self):
        """Adds the unrolled RNN:
            h_0 = 0
            for t in 1 to T:
                o_t, h_t = cell(x_t, h_{t-1})
                o_drop_t = Dropout(o_t, dropout_rate)
                y_t = o_drop_t U + b_2

        TODO: There a quite a few things you'll need to do in this function:
            - Define the variables U, b_2.
            - Define the vector h as a constant and inititalize it with
              zeros. See tf.zeros and tf.shape for information on how
              to initialize this variable to be of the right shape.
              https://www.tensorflow.org/api_docs/python/constant_op/constant_value_tensors#zeros
              https://www.tensorflow.org/api_docs/python/array_ops/shapes_and_shaping#shape
            - In a for loop, begin to unroll the RNN sequence. Collect
              the predictions in a list.
            - When unrolling the loop, from the second iteration
              onwards, you will HAVE to call
              tf.get_variable_scope().reuse_variables() so that you do
              not create new variables in the RNN cell.
              See https://www.tensorflow.org/versions/master/how_tos/variable_scope/
            - Concatenate and reshape the predictions into a predictions
              tensor.
        Hint: You will find the function tf.pack (similar to np.asarray)
              useful to assemble a list of tensors into a larger tensor.
              https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#pack
        Hint: You will find the function tf.transpose and the perms
              argument useful to shuffle the indices of the tensor.
              https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#transpose

        Remember:
            * Use the xavier initilization for matrices.
            * Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument.
            The keep probability should be set to the value of self.dropout_placeholder

        Returns:
            pred: tf.Tensor of shape (batch_size, max_length, n_classes)
        """

        x = self.add_embedding()
        dropout_rate = self.dropout_placeholder

        preds = []  # Predicted output at each timestep should go here!

        # Use the cell defined below. For Q2, we will just be using the
        # RNNCell you defined, but for Q3, we will run this code again
        # with a GRU cell!
        if Config.cell == "rnn":
            cell = RNNCell(Config.n_features * Config.embed_size,
                           Config.hidden_size)
        elif Config.cell == "gru":
            cell = GRUCell(Config.n_features * Config.embed_size,
                           Config.hidden_size)
        else:
            raise ValueError("Unsuppported cell type: " + Config.cell)

        # Define U and b2 as variables.
        # Initialize state as vector of zeros.
        ### YOUR CODE HERE (~4-6 lines)
        U = tf.get_variable(
            'U',
            shape=(Config.hidden_size, Config.n_classes),
            initializer=tf.contrib.layers.xavier_initializer(seed=1))
        b2 = tf.get_variable(
            'b2',
            shape=(Config.n_classes),
            initializer=tf.contrib.layers.xavier_initializer(seed=2))
        h = tf.zeros(shape=(tf.shape(x)[0], Config.hidden_size))
        ### END YOUR CODE

        with tf.variable_scope("RNN"):
            for time_step in range(self.max_length):
                ### YOUR CODE HERE (~6-10 lines)
                if time_step > 0:
                    tf.get_variable_scope().reuse_variables()
                output, h = cell(x[:, time_step, :], h)
                output = tf.nn.dropout(output, self.dropout_placeholder)
                output = tf.matmul(output, U) + b2
                preds.append(output)
                ### END YOUR CODE

        # Make sure to reshape @preds here.
        ### YOUR CODE HERE (~2-4 lines)
        preds = tf.stack(preds)
        print preds.shape
        preds = tf.transpose(preds, perm=[1, 0, 2])
        ### END YOUR CODE

        assert preds.get_shape().as_list() == [
            None, self.max_length, Config.n_classes
        ], "predictions are not of the right shape. Expected {}, got {}".format(
            [None, self.max_length, Config.n_classes],
            preds.get_shape().as_list())
        return preds
Example #48
0
            hr_imgs.append(img)
            tmp = img[:, :, 0]
            lr_img = misc.imresize(tmp, [height // scale, width // scale], interp='bicubic', mode='F')
            lr_imgs.append(lr_img / 255.0)
            bic_img = misc.imresize(lr_img, [height, width], interp='bicubic', mode='F')
            bic_imgs.append(bic_img / 255.0)

        pad = t // 2
        lr_imgs = [lr_imgs[0]] * pad + lr_imgs + [lr_imgs[-1]] * pad
        bic_imgs = [bic_imgs[0]] * pad + bic_imgs + [bic_imgs[-1]] * pad
        print('files num:', len(lr_imgs))
        lr = tf.placeholder(dtype=tf.float32, shape=[1, t, height // scale, width // scale, 1])
        bic = tf.placeholder(dtype=tf.float32, shape=[1, height, width, 1])

        tf_pre_sr = tf.get_variable('tf_pre_sr',
                                    shape=[1, height, width, 1],
                                    dtype=tf.float32,
                                    collections=[tf.GraphKeys.LOCAL_VARIABLES])
        tf_pre_feat = tf.get_variable('tf_pre_feat',
                                      shape=[1, height // scale, width // scale, 128],
                                      dtype=tf.float32,
                                      collections=[tf.GraphKeys.LOCAL_VARIABLES])

        with tf.variable_scope('video_sr'):
            m = model()
            local_sr, local_feat = m.local_net(lr, bic)
            local_sr = tf.clip_by_value(local_sr, 0, 1)
            refine_sr, refine_feat = m.refine_net(tf_pre_sr, tf_pre_feat, local_sr, local_feat)
            refine_sr = tf.clip_by_value(refine_sr, 0, 1)

        saver = tf.train.Saver()
Example #49
0
    def __init__(self, args, infer=False):

        self.kernels = [1, 2, 3, 4, 5, 6, 7]
        self.kernel_features = [50, 100, 150, 200, 200, 200, 200]
        assert len(self.kernels) == len(
            self.kernel_features), 'kernels size:%d,kernel_feature size:%d' % (
                len(self.kernels, len(self.kernel_features)))

        self.input_ = tf.placeholder(tf.int32,
                                     shape=[
                                         args.batch_size,
                                         args.num_unroll_steps,
                                         args.max_word_length
                                     ],
                                     name="input")
        self.targets = tf.placeholder(tf.int32,
                                      [args.batch_size, args.num_unroll_steps],
                                      name='targets')
        target_list = tf.unpack(self.targets, axis=1)  #hjq
        ''' First, embed characters '''
        with tf.variable_scope('Embedding'):
            char_embedding_r = tf.get_variable(
                'char_embedding', [args.char_vocab_size, args.char_embed_size])
            char_embedinglist = tf.unpack(char_embedding_r)
            char_embedinglist[0] = tf.zeros([args.char_embed_size],
                                            dtype=tf.float32)
            self.char_embedding = tf.pack(char_embedinglist)
            # [batch_size x max_word_length, num_unroll_steps, char_embed_size]
            input_embedded = tf.nn.embedding_lookup(self.char_embedding,
                                                    self.input_)

            input_embedded_s = tf.reshape(
                input_embedded,
                [-1, args.max_word_length, args.char_embed_size])
        ''' Second, apply convolutions '''
        # [batch_size x num_unroll_steps, cnn_size]  # where cnn_size=sum(kernel_features)
        input_cnn = tdnn(input_embedded_s, self.kernels, self.kernel_features)
        ''' Maybe apply Highway '''
        if args.highway_layers > 0:
            input_cnn = highway(input_cnn,
                                input_cnn.get_shape()[-1],
                                num_layers=args.highway_layers)
        ''' Finally, do LSTM '''
        with tf.variable_scope('LSTM'):
            cell = tf.nn.rnn_cell.BasicLSTMCell(args.rnn_size,
                                                state_is_tuple=True,
                                                forget_bias=0.0)
            if args.dropout > 0.0:
                cell = tf.nn.rnn_cell.DropoutWrapper(cell,
                                                     output_keep_prob=1. -
                                                     args.dropout)
            if args.rnn_layers > 1:
                cell = tf.nn.rnn_cell.MultiRNNCell([cell] * args.rnn_layers,
                                                   state_is_tuple=True)

            self.initial_rnn_state = cell.zero_state(args.batch_size,
                                                     dtype=tf.float32)

            input_cnn = tf.reshape(
                input_cnn, [args.batch_size, args.num_unroll_steps, -1])
            # input_cnn2 = [tf.squeeze(x, [1]) for x in tf.split(1, num_unroll_steps, input_cnn)]
            input_cnn2 = tf.unpack(
                input_cnn, axis=1
            )  #hjq, a list of Tensor[batch_size x hidden],length of num_unroll_steps

            outputs, state = tf.nn.rnn(cell,
                                       input_cnn2,
                                       initial_state=self.initial_rnn_state,
                                       dtype=tf.float32)  #origin

            self.final_rnn_state = state

            # linear projection onto output (word) vocab
            self.logits = []
            with tf.variable_scope('WordEmbedding') as scope:
                for idx, output in enumerate(tf.unpack(outputs, axis=0)):
                    if idx > 0:
                        scope.reuse_variables()
                    self.logits.append(linear(output, args.word_vocab_size))

        self.loss = tf.reduce_sum(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                self.logits, target_list),
            name='loss') / args.batch_size
        cost = self.loss / args.num_unroll_steps
        self.global_step = tf.Variable(0, name='global_step', trainable=False)
        self.learning_rate = tf.Variable(args.learning_rate,
                                         trainable=False,
                                         name='learning_rate')
        tvars = tf.trainable_variables()
        grads, self.global_norm = tf.clip_by_global_norm(
            tf.gradients(cost, tvars), args.max_grad_norm)
        optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Example #50
0
def train(data,
          batch_size=128,
          learning_rate=FLAGS.learning_rate,
          log_dir='./log',
          checkpoint_dir='./checkpoint',
          num_epochs=-1):

    # tf Graph input
    with tf.device('/cpu:0'):
        with tf.name_scope('data'):
            if FLAGS.dataset == "imagenet":
                x, yt = image_processing.distorted_inputs(
                    data,
                    batch_size=batch_size,
                    num_preprocess_threads=FLAGS.num_threads)
            else:
                x, yt = data.generate_batches(batch_size,
                                              num_threads=FLAGS.num_threads)
        global_step = tf.get_variable('global_step',
                                      shape=[],
                                      dtype=tf.int64,
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)
    if FLAGS.gpu:
        device_str = '/gpu:' + str(FLAGS.device)
    else:
        device_str = '/cpu:0'
    with tf.device(device_str):
        alphas_training_operations = []

        # Convolution Layer 1
        W_conv1 = weight_variable(shape=([5, 5, 3, 32]), name="W_conv1")
        b_conv1 = bias_variable(shape=[32], name="b_conv1")
        alphas_training_op1, ABCLayer1, alphas_loss1 = ABC(
            W_conv1,
            b_conv1,
            no_binary_filters=5,
            no_ApproxConvLayers=5,
            padding="SAME")
        alphas_training_operations.append(alphas_training_op1)
        conv1 = ABCLayer1(x)
        pool1 = max_pool_2x2(conv1)
        bn_conv1 = tf.layers.batch_normalization(pool1, axis=-1)
        h_conv1 = tf.nn.relu(bn_conv1)

        # Convolution Layer 2
        W_conv2 = tf.Variable(values["W_conv2"], name="W_conv2")
        b_conv2 = tf.Variable(values["b_conv2"], name="b_conv2")
        alphas_training_op2, ABCLayer2, alphas_loss2 = ABC(
            W_conv2,
            b_conv2,
            no_binary_filters=5,
            no_ApproxConvLayers=5,
            padding="SAME")
        alphas_training_operations.append(alphas_training_op2)
        conv2 = ABCLayer2(h_conv1)
        pool2 = max_pool_2x2(conv2)
        bn_conv2 = tf.layers.batch_normalization(pool2, axis=-1)
        h_conv2 = tf.nn.relu(bn_conv2)

        # Flat the conv2 output
        h_conv2_flat = tf.reshape(h_conv2, shape=(-1, 7 * 7 * 64))

        # Dense layer1
        W_fc1 = weight_variable([7 * 7 * 64, 1024])
        b_fc1 = bias_variable([1024])
        h_fc1 = tf.nn.relu(tf.matmul(h_conv2_flat, W_fc1) + b_fc1)

        # Output layer
        W_fc2 = weight_variable([1024, 10])
        b_fc2 = bias_variable([10])
        model = tf.matmul(h_fc1, W_fc2) + b_fc2

        y = model

        graph_init = tf.global_variables_initializer()
        # Define loss and optimizer
        with tf.name_scope('objective'):
            loss = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(labels=yt,
                                                               logits=y))
            accuracy = tf.reduce_mean(
                tf.cast(tf.nn.in_top_k(y, yt, 1), tf.float32))

        opt = tf.contrib.layers.optimize_loss(
            loss,
            global_step,
            learning_rate,
            'Adam',
            gradient_noise_scale=None,
            gradient_multipliers=None,
            clip_gradients=None,  #moving_average_decay=0.9,
            learning_rate_decay_fn=learning_rate_decay_fn
            if FLAGS.using_learning_rate_decay_fn else None,
            update_ops=None,
            variables=None,
            name=None)
        #grads = opt.compute_gradients(loss)
        #apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    # loss_avg

    ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY,
                                            global_step,
                                            name='average')
    ema_op = ema.apply([loss, accuracy] + tf.trainable_variables())
    tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, ema_op)

    loss_avg = ema.average(loss)
    tf.summary.scalar('loss/training', loss_avg)
    accuracy_avg = ema.average(accuracy)
    tf.summary.scalar('accuracy/training', accuracy_avg)

    check_loss = tf.check_numerics(loss, 'model diverged: loss->nan')
    tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, check_loss)
    updates_collection = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    with tf.control_dependencies([opt]):
        train_op = tf.group(*updates_collection)

    if FLAGS.summary:
        add_summaries(scalar_list=[accuracy, accuracy_avg, loss, loss_avg],
                      activation_list=tf.get_collection(
                          tf.GraphKeys.ACTIVATIONS),
                      var_list=tf.trainable_variables())
        # grad_list=grads)

    summary_op = tf.summary.merge_all()
    # Configure options for session
    gpu_options = tf.GPUOptions(allow_growth=True)
    sess = tf.InteractiveSession(config=tf.ConfigProto(
        log_device_placement=False,
        allow_soft_placement=True,
        gpu_options=gpu_options,
    ))
    if FLAGS.resume:
        logging.info('resuming from ' + checkpoint_dir)
        saver = tf.train.Saver()
        ckpt = tf.train.get_checkpoint_state(checkpoint_dir + '/')
        if ckpt and ckpt.model_checkpoint_path:
            # Restores from checkpoint
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print('No checkpoint file found')
            return
        #print sess.run('global_step:0')
        #print global_step.eval()
    else:
        saver = tf.train.Saver(max_to_keep=5)
        sess.run(tf.global_variables_initializer())

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    num_batches = data.size[0] / batch_size
    summary_writer = tf.summary.FileWriter(log_dir, graph=sess.graph)
    epoch = global_step.eval() / num_batches if FLAGS.resume else 0
    display_interval = FLAGS.display_interval or num_batches / 10
    test_interval = FLAGS.test_interval or num_batches / 2
    logging.info('num of trainable paramaters: %d' %
                 count_params(tf.trainable_variables()))
    tic = time.clock()
    while epoch != num_epochs:

        curr_step = 0
        # Initializing the variables

        #with tf.Session() as session:
        #    print(session.run(ww))

        logging.info('Started epoch %d' % epoch)
        while curr_step < data.size[0]:
            for alpha_training_op in alphas_training_operations:
                for alpha_epoch in range(alpha_training_epochs):
                    sess.run(alpha_training_op)
            _, loss_val, step = sess.run([train_op, loss, global_step])
            # if step%display_interval==0:
            #   step, acc_value, loss_value, summary = sess.run(
            #     [global_step, accuracy_avg, loss_avg, summary_op])
            #   logging.info("step %d loss %.3f accuracy %.3f" %(step,loss_value,acc_value))
            #   summary_out = tf.Summary()
            #   summary_out.ParseFromString(summary)
            #   summary_writer.add_summary(summary_out, step)
            #   summary_writer.flush()
            # if step%test_interval==0:
            #   saver.save(sess, save_path=checkpoint_dir +
            #        '/model.ckpt', global_step=global_step)
            #   test_top1,test_top5,test_loss = evaluate(model, FLAGS.dataset,
            #                            batch_size=batch_size,
            #                            checkpoint_dir=checkpoint_dir)
            #   logging.info('Test loss %.3f Test top1 %.3f Test top5 %.3f' % (test_loss,test_top1,test_top5))
            #   summary_out = tf.Summary()
            #   summary_out.ParseFromString(summary)
            #   summary_out.value.add(tag='accuracy/test_top1', simple_value=test_top1)
            #   summary_out.value.add(tag='accuracy/test_top5', simple_value=test_top5)
            #   summary_out.value.add(tag='loss/test', simple_value=test_loss)
            #   summary_writer.add_summary(summary_out, step)
            #   summary_writer.flush()
            curr_step += FLAGS.batch_size
        step, acc_value, loss_value, summary = sess.run(
            [global_step, accuracy_avg, loss_avg, summary_op])
        saver.save(sess,
                   save_path=checkpoint_dir + '/model.ckpt',
                   global_step=global_step)
        test_top1, test_top5, test_loss = evaluate(
            model,
            FLAGS.dataset,
            batch_size=batch_size,
            checkpoint_dir=checkpoint_dir)
        logging.info('Test loss %.3f Test top1 %.3f Test top5 %.3f' %
                     (test_loss, test_top1, test_top5))
        summary_out = tf.Summary()
        summary_out.ParseFromString(summary)
        summary_out.value.add(tag='accuracy/test_top1', simple_value=test_top1)
        summary_out.value.add(tag='accuracy/test_top5', simple_value=test_top5)
        summary_out.value.add(tag='loss/test', simple_value=test_loss)
        summary_writer.add_summary(summary_out, step)
        summary_writer.flush()
        logging.info("Finished epoch %d " % epoch)
        epoch += 1

    # When done, ask the threads to stop.
    coord.request_stop()
    coord.join(threads)
    coord.clear_stop()
    summary_writer.close()
    toc = time._conv_block
    duration = toc - tic
    logging.info('Duration %.3f ' % (duration))
Example #51
0
 def decoding_w(self) -> tf.Variable:
     with tf.name_scope("output_projection"):
         return tf.get_variable(
             name="state_to_word_W",
             shape=[self.output_projection_size, len(self.vocabulary)],
             initializer=tf.glorot_normal_initializer())
Example #52
0
def build_graph(top_k, is_train=True, num_classes=FLAGS.charset_size):
    images = tf.placeholder(dtype=tf.float32,
                            shape=[None, 112, 112, 1],
                            name='image_batch')
    labels = tf.placeholder(dtype=tf.int64, shape=[None], name='label_batch')
    if is_train:
        net, endpoints = vgg_a(images,
                               num_classes=num_classes,
                               is_training=True,
                               reuse=False)
        pre_label = tf.argmax(net, 1)
        loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=net,
                                                           labels=labels))
        accuracy = tf.reduce_mean(
            tf.cast(tf.equal(tf.argmax(net, 1), labels), tf.float32))
        probabilities = tf.nn.softmax(net)
        predicted_val_top_k, predicted_index_top_k = tf.nn.top_k(probabilities,
                                                                 k=top_k)
        accuracy_in_top_k = tf.reduce_mean(
            tf.cast(tf.nn.in_top_k(probabilities, labels, top_k), tf.float32))
    else:
        vali_net, vali_end_points = vgg_a(images,
                                          num_classes=num_classes,
                                          is_training=False,
                                          reuse=True)
        vali_loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=vali_net,
                                                           labels=labels))
        vali_pre_label = tf.argmax(vali_net, 1)
        vali_accuracy = tf.reduce_mean(
            tf.cast(tf.equal(tf.argmax(vali_net, 1), labels), tf.float32))
        vali_probabilities = tf.nn.softmax(vali_net)
        vali_predicted_val_top_k, vali_predicted_index_top_k = tf.nn.top_k(
            vali_probabilities, k=top_k)
        vali_accuracy_in_top_k = tf.reduce_mean(
            tf.cast(tf.nn.in_top_k(vali_probabilities, labels, top_k),
                    tf.float32))
        return {
            'images': images,
            'labels': labels,
            'logits': vali_net,
            'top_k': top_k,
            'loss': vali_loss,
            'accuracy': vali_accuracy,
            'predicted': vali_pre_label,
            'accuracy_top_k': vali_accuracy_in_top_k,
            'predicted_distribution': vali_probabilities,
            'predicted_index_top_k': vali_predicted_index_top_k,
            'predicted_val_top_k': vali_predicted_val_top_k
        }
    global_step = tf.get_variable("step", [],
                                  initializer=tf.constant_initializer(0.0),
                                  trainable=False)
    #rate = tf.train.exponential_decay(0.001, global_step, decay_steps=FLAGS.decay_step, decay_rate=0.97, staircase=True)
    opt = tf.train.AdamOptimizer(learning_rate=1e-4)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies([tf.group(*update_ops)]):
        train_op = opt.minimize(loss, global_step=global_step)
    tf.summary.scalar('loss', loss)
    tf.summary.scalar('accuracy', accuracy)
    merged_summary_op = tf.summary.merge_all()
    return {
        'images': images,
        'labels': labels,
        'logits': net,
        'top_k': top_k,
        'global_step': global_step,
        'train_op': train_op,
        'loss': loss,
        'accuracy': accuracy,
        'accuracy_top_k': accuracy_in_top_k,
        'merged_summary_op': merged_summary_op,
        'predicted': pre_label,
        'predicted_distribution': probabilities,
        'predicted_index_top_k': predicted_index_top_k,
        'predicted_val_top_k': predicted_val_top_k
    }
Example #53
0
def build_and_restore_model(init_checkpoint, bert_config_file):

    input_ids = tf.placeholder(tf.int32, shape=(None, None), name='input_ids')
    input_mask = tf.placeholder(tf.int32,
                                shape=(None, None),
                                name='input_mask')
    segment_ids = tf.placeholder(tf.int32,
                                 shape=(None, None),
                                 name='segment_ids')

    bert_config = modeling.BertConfig.from_json_file(bert_config_file)

    model = modeling.BertModel(
        config=bert_config,
        is_training=False,
        # embedding=None,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        # task_ids=None,
        use_one_hot_embeddings=True,
        scope="bert")

    final_hidden = model.get_sequence_output()

    final_hidden_shape = modeling.get_shape_list(final_hidden, expected_rank=3)
    #batch_size = final_hidden_shape[0]
    #seq_length = final_hidden_shape[1]
    #hidden_size = final_hidden_shape[2]

    output_weights = tf.get_variable(
        "cls/squad/output_weights", [2, bert_config.hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable("cls/squad/output_bias", [2],
                                  initializer=tf.zeros_initializer())

    final_hidden_matrix = tf.reshape(final_hidden,
                                     [-1, bert_config.hidden_size])
    logits = tf.matmul(final_hidden_matrix, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)

    #logits = tf.reshape(logits, [batch_size, seq_length, 2])
    #logits = tf.transpose(logits, [2, 0, 1])

    # unstacked_logits = tf.unstack(logits, axis=0)

    # (start_logits, end_logits) = (unstacked_logits[0], unstacked_logits[1])

    # return (start_logits, end_logits)
    tvars = tf.trainable_variables()

    initialized_variable_names = {}
    scaffold_fn = None
    (assignment_map,
     initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(
         tvars, init_checkpoint)

    tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
        init_string = ""
        if var.name in initialized_variable_names:
            init_string = ", *INIT_FROM_CKPT*"
        tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                        init_string)
    return tvars
Example #54
0
 def decoding_b(self) -> tf.Variable:
     with tf.name_scope("output_projection"):
         return tf.get_variable(
             name="state_to_word_b",
             shape=[len(self.vocabulary)],
             initializer=tf.zeros_initializer())
Example #55
0
	def model_fn(features, labels, mode):

		if model_io_config.fix_lm == True:
			scope = model_config.scope + "/task"
		else:
			scope = model_config.scope

		if mode == tf.estimator.ModeKeys.TRAIN:
			hidden_dropout_prob = model_config.hidden_dropout_prob
			attention_probs_dropout_prob = model_config.attention_probs_dropout_prob
			dropout_prob = model_config.dropout_prob
		else:
			hidden_dropout_prob = 0.0
			attention_probs_dropout_prob = 0.0
			dropout_prob = 0.0

		label_ids = features["label_ids"]
		repres_lst = {}
		for index, name in enumerate(input_name):
			if index > 0:
				reuse = True
			else:
				reuse = model_reuse
			repres_lst[name] = esim_bert_encoding(model_config, features, 
								labels, mode, name, scope, dropout_prob,
								reuse=reuse)

		a_output, b_output = alignment(model_config, 
				repres_lst["a"], repres_lst["b"], 
				features["input_mask_{}".format("a")], 
				features["input_mask_{}".format("b")], 
				scope, reuse=model_reuse)

		repres_a = esim_bert_pooling(model_config, a_output, 
					features["input_mask_{}".format("a")], 
					scope, dropout_prob, reuse=model_reuse)

		repres_b = esim_bert_pooling(model_config, b_output,
					features["input_mask_{}".format("b")],
					scope, dropout_prob, reuse=True)

		pair_repres = tf.concat([repres_a, repres_b,
					tf.abs(repres_a-repres_b),
					repres_b*repres_a], axis=-1)

		with tf.variable_scope(scope, reuse=model_reuse):

			try:
				label_ratio_table = tf.get_variable(
							name="label_ratio",
							shape=[num_labels,],
							initializer=tf.constant(label_tensor),
							trainable=False)

				ratio_weight = tf.nn.embedding_lookup(label_ratio_table,
				 	label_ids)
				print("==applying class weight==")
			except:
				ratio_weight = None

			(loss, 
			per_example_loss, 
			logits) = classifier.classifier(model_config,
										pair_repres,
										num_labels,
										label_ids,
										dropout_prob,
										ratio_weight)
		if mode == tf.estimator.ModeKeys.TRAIN:
			pretrained_tvars = model_io_fn.get_params(model_config.scope, 
											not_storage_params=not_storage_params)

			if load_pretrained:
				model_io_fn.load_pretrained(pretrained_tvars, 
											init_checkpoint,
											exclude_scope=exclude_scope_dict["task"])

		trainable_params = model_io_fn.get_params(scope, 
											not_storage_params=not_storage_params)

		tvars = trainable_params

		storage_params = model_io_fn.get_params(model_config.scope, 
											not_storage_params=not_storage_params)

		model_io_fn.set_saver()
		for var in storage_params:
			print(var.name, var.get_shape(), "==storage params==")

		for var in tvars:
			print(var.name, var.get_shape(), "==trainable params==")

		if mode == tf.estimator.ModeKeys.TRAIN:
			model_io_fn.print_params(tvars, string=", trainable params")
			update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
			with tf.control_dependencies(update_ops):
				optimizer_fn = optimizer.Optimizer(opt_config)
				train_op = optimizer_fn.get_train_op(loss, tvars, 
								opt_config.init_lr, 
								opt_config.num_train_steps)

				return [train_op, loss, per_example_loss, logits]
		else:
			model_io_fn.print_params(tvars, string=", trainable params")
			return [loss, loss, per_example_loss, logits]
def train():
  """Train CIFAR-10 for a number of steps."""
  with tf.Graph().as_default(), tf.device('/cpu:0'):
    # Create a variable to count the number of train() calls. This equals the
    # number of batches processed * FLAGS.num_gpus.
    global_step = tf.get_variable(
        'global_step', [],
        initializer=tf.constant_initializer(0), trainable=False)

    # Calculate the learning rate schedule.
    num_batches_per_epoch = (cifar10.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /
                             FLAGS.batch_size)
    decay_steps = int(num_batches_per_epoch * cifar10.NUM_EPOCHS_PER_DECAY)

    # Decay the learning rate exponentially based on the number of steps.
    lr = tf.train.exponential_decay(cifar10.INITIAL_LEARNING_RATE,
                                    global_step,
                                    decay_steps,
                                    cifar10.LEARNING_RATE_DECAY_FACTOR,
                                    staircase=True)

    # Create an optimizer that performs gradient descent.
    opt = tf.train.GradientDescentOptimizer(lr)

    # Calculate the gradients for each model tower.
    tower_grads = []
    for i in xrange(FLAGS.num_gpus):
      with tf.device('/gpu:%d' % i):
        with tf.name_scope('%s_%d' % (cifar10.TOWER_NAME, i)) as scope:
          # Calculate the loss for one tower of the CIFAR model. This function
          # constructs the entire CIFAR model but shares the variables across
          # all towers.
          loss = tower_loss(scope)

          # Reuse variables for the next tower.
          tf.get_variable_scope().reuse_variables()

          # Retain the summaries from the final tower.
          summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)

          # Calculate the gradients for the batch of data on this CIFAR tower.
          grads = opt.compute_gradients(loss)

          # Keep track of the gradients across all towers.
          tower_grads.append(grads)

    # We must calculate the mean of each gradient. Note that this is the
    # synchronization point across all towers.
    grads = average_gradients(tower_grads)

    # Add a summary to track the learning rate.
    summaries.append(tf.scalar_summary('learning_rate', lr))

    # Add histograms for gradients.
    for grad, var in grads:
      if grad:
        summaries.append(
            tf.histogram_summary(var.op.name + '/gradients', grad))

    # Apply the gradients to adjust the shared variables.
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    # Add histograms for trainable variables.
    for var in tf.trainable_variables():
      summaries.append(tf.histogram_summary(var.op.name, var))

    # Track the moving averages of all trainable variables.
    variable_averages = tf.train.ExponentialMovingAverage(
        cifar10.MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())

    # Group all updates to into a single train op.
    train_op = tf.group(apply_gradient_op, variables_averages_op)

    # Create a saver.
    saver = tf.train.Saver(tf.all_variables())

    # Build the summary operation from the last tower summaries.
    summary_op = tf.merge_summary(summaries)

    # Build an initialization operation to run below.
    init = tf.initialize_all_variables()

    # Start running operations on the Graph. allow_soft_placement must be set to
    # True to build towers on GPU, as some of the ops do not have GPU
    # implementations.
    sess = tf.Session(config=tf.ConfigProto(
        allow_soft_placement=True,
        log_device_placement=FLAGS.log_device_placement))
    sess.run(init)

    # Start the queue runners.
    tf.train.start_queue_runners(sess=sess)

    summary_writer = tf.train.SummaryWriter(FLAGS.train_dir,
                                            graph_def=sess.graph_def)

    for step in xrange(FLAGS.max_steps):
      start_time = time.time()
      _, loss_value = sess.run([train_op, loss])
      duration = time.time() - start_time

      assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

      if step % 10 == 0:
        num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
        examples_per_sec = num_examples_per_step / duration
        sec_per_batch = duration / FLAGS.num_gpus

        format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                      'sec/batch)')
        print (format_str % (datetime.now(), step, loss_value,
                             examples_per_sec, sec_per_batch))

      if step % 100 == 0:
        summary_str = sess.run(summary_op)
        summary_writer.add_summary(summary_str, step)

      # Save the model checkpoint periodically.
      if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
        checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
        saver.save(sess, checkpoint_path, global_step=step)
    def create_model(self,
                     model_input,
                     vocab_size,
                     num_frames,
                     iterations=None,
                     add_batch_norm=None,
                     sample_random_frames=None,
                     cluster_size=None,
                     hidden_size=None,
                     is_training=True,
                     **unused_params):
        iterations = iterations or FLAGS.iterations
        add_batch_norm = add_batch_norm or FLAGS.dbof_add_batch_norm
        random_frames = sample_random_frames or FLAGS.sample_random_frames
        cluster_size = cluster_size or FLAGS.dbof_cluster_size
        hidden1_size = hidden_size or FLAGS.dbof_hidden_size

        num_frames = tf.cast(tf.expand_dims(num_frames, 1), tf.float32)
        if random_frames:
            model_input = utils.SampleRandomFrames(model_input, num_frames,
                                                   iterations)
        else:
            model_input = utils.SampleRandomSequence(model_input, num_frames,
                                                     iterations)
        max_frames = model_input.get_shape().as_list()[1]
        feature_size = model_input.get_shape().as_list()[2]
        reshaped_input = tf.reshape(model_input, [-1, feature_size])
        tf.summary.histogram("input_hist", reshaped_input)

        if add_batch_norm:
            reshaped_input = slim.batch_norm(reshaped_input,
                                             center=True,
                                             scale=True,
                                             is_training=is_training,
                                             scope="input_bn")

        cluster_weights = tf.get_variable(
            "cluster_weights", [feature_size, cluster_size],
            initializer=tf.random_normal_initializer(stddev=1 /
                                                     math.sqrt(feature_size)))
        tf.summary.histogram("cluster_weights", cluster_weights)
        activation = tf.matmul(reshaped_input, cluster_weights)
        if add_batch_norm:
            activation = slim.batch_norm(activation,
                                         center=True,
                                         scale=True,
                                         is_training=is_training,
                                         scope="cluster_bn")
        else:
            cluster_biases = tf.get_variable(
                "cluster_biases", [cluster_size],
                initializer=tf.random_normal(stddev=1 /
                                             math.sqrt(feature_size)))
            tf.summary.histogram("cluster_biases", cluster_biases)
            activation += cluster_biases
        activation = tf.nn.relu6(activation)
        tf.summary.histogram("cluster_output", activation)

        activation = tf.reshape(activation, [-1, max_frames, cluster_size])
        activation = utils.FramePooling(activation, FLAGS.dbof_pooling_method)

        hidden1_weights = tf.get_variable(
            "hidden1_weights", [cluster_size, hidden1_size],
            initializer=tf.random_normal_initializer(stddev=1 /
                                                     math.sqrt(cluster_size)))
        tf.summary.histogram("hidden1_weights", hidden1_weights)
        activation = tf.matmul(activation, hidden1_weights)
        if add_batch_norm:
            activation = slim.batch_norm(activation,
                                         center=True,
                                         scale=True,
                                         is_training=is_training,
                                         scope="hidden1_bn")
        else:
            hidden1_biases = tf.get_variable(
                "hidden1_biases", [hidden1_size],
                initializer=tf.random_normal_initializer(stddev=0.01))
            tf.summary.histogram("hidden1_biases", hidden1_biases)
            activation += hidden1_biases
        activation = tf.nn.relu6(activation)
        tf.summary.histogram("hidden1_output", activation)

        aggregated_model = getattr(video_level_models,
                                   FLAGS.video_level_classifier_model)
        return aggregated_model().create_model(model_input=activation,
                                               vocab_size=vocab_size,
                                               **unused_params)
def create_variables(name, shape, initializer=tf.contrib.layers.xavier_initializer()):
    regularizer = tf.contrib.layers.l2_regularizer(scale=L2_value)
    new_variables = tf.get_variable(name, shape=shape, initializer=initializer,regularizer=regularizer)
    activation_summary(new_variables)
    return new_variables
    def _build_graph(self):
        with tf.name_scope('TRANSFORMER'):
            self.logits = self._transformer_layer(
                inputs=self.x,
                decoder_inputs=self.decoder_inputs,
                drop_rate=self._drop_rate,
                is_training=self._is_training)

            with tf.name_scope('Loss'):
                self.preds = tf.to_int32(tf.argmax(self.logits, axis=-1))
                # Accuracy: Remove <PAD> Characters
                is_target = tf.to_float(tf.not_equal(self.y, 0))
                total_num = tf.reduce_sum(is_target)
                correct_num = tf.reduce_sum(
                    tf.to_float(tf.equal(self.preds, self.y)) * is_target)
                self.acc = correct_num / total_num

                # Loss: Remove <PAD> Characters
                self.y_smoothed = tf.cond(
                    pred=self._is_training,
                    true_fn=lambda: label_smoother(
                        tf.one_hot(self.y, depth=len(self.target_int2vocab))),
                    false_fn=lambda: tf.one_hot(
                        self.y, depth=len(self.target_int2vocab)))
                self.loss = tf.nn.softmax_cross_entropy_with_logits(
                    logits=self.logits, labels=self.y_smoothed)
                self.mean_loss = tf.reduce_sum(
                    self.loss * is_target) / (tf.reduce_sum(is_target))

            with tf.name_scope('Training_Scheme'):
                self.global_step = tf.get_variable(
                    name='global_step',
                    shape=[],
                    dtype=tf.int32,
                    initializer=tf.constant_initializer(value=1,
                                                        dtype=tf.int32),
                    trainable=False)
                self.learning_rate = warmup_learning_rate(
                    d_model=self._num_units,
                    step_num=self.global_step,
                    warmup_step=self._warmup_step)
                # for batch normalization update
                self.update_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                self.optimizer = tf.train.AdamOptimizer(
                    learning_rate=self.learning_rate,
                    beta1=0.9,
                    beta2=0.98,
                    epsilon=1e-9)

                with tf.control_dependencies(self.update_op):
                    self.train_op = self.optimizer.minimize(
                        self.mean_loss, global_step=self.global_step)

            with tf.name_scope('Summary'):
                tf.summary.scalar('accuracy', self.acc)
                tf.summary.scalar('mean_loss', self.mean_loss)
                tf.summary.scalar('learning_rate', self.learning_rate)

            self.merged = tf.summary.merge_all()
            self.saver = tf.train.Saver()
        print("Model is built...")
Example #60
0
    def __init__(self, args):
        '''
        모델 초기화
        :param args: 하이퍼 파라미터가 저장된 dict
        '''
        self.is_train = args["is_train"]
        self.batch_size = args["batch_size"]
        self.keep_pob = args["keep_prob"]
        self.dropout_prob = 1.0 - self.keep_pob
        self.learning_rate = args["learning_rate"]

        self.relation_vocab_size = args["relation_vocab_size"]
        self.entity_vocab_size = args["entity_vocab_size"]
        self.entity_type_emb_size = args["entity_type_emb_size"]
        self.char_vocab_size = args["char_vocab_size"]
        self.char_emb_size = args["char_emb_size"]

        self.max_sentences = args["max_sentences"]
        self.word_maxlen = args["word_maxlen"]
        self.word_emb_table = args["embedding_table"]
        self.word_emb_size = args["word_emb_size"]

        self.filter_size = args["filter_size"]
        self.num_filter = args["num_filter"]

        self.max_entities = args["max_entities"]
        self.entity_max_tokens = args["entity_max_tokens"]
        self.entity_max_chars = args["entity_max_chars"]

        # 인코더, 디코더 파라미터
        self.encoder_stack = args["encoder_stack"]
        self.encoder_max_step = args["encoder_max_step"]
        self.encoder_hidden = args["encoder_hidden"]
        self.decoder_hidden = args["decoder_hidden"]

        self.global_step = tf.get_variable(
            'global_step',
            shape=[],
            dtype='int32',
            initializer=tf.constant_initializer(0),
            trainable=False)

        # 모델 입력단 초기화
        self._placeholder_init()

        # 모델과 함께 학습하며 finetune되는 단어 임베딩 테이블
        finetune_table = tf.get_variable(
            name="word_embedding_table_finetuning",
            initializer=self.word_emb_table,
            trainable=True,
            dtype=tf.float32)

        # 사전 학습 값 그대로 사용할 고정 단어 임베딩 테이블
        fix_table = tf.get_variable(name="word_embedding_table_fix",
                                    initializer=self.word_emb_table,
                                    trainable=False,
                                    dtype=tf.float32)
        # 임의 초기화 문자 임베딩 테이블
        char_emb_table = tf.get_variable(
            "char_emb_table",
            shape=[self.char_vocab_size, self.char_emb_size],
            initializer=tf.truncated_normal_initializer(stddev=0.1))
        # 임의 초기화 개체 타입 임베딩 테이블
        entity_type_emb_table = tf.get_variable(
            "entity_type_emb_table",
            shape=[self.entity_vocab_size, self.entity_type_emb_size],
            initializer=tf.truncated_normal_initializer(stddev=0.1))

        # 문장 인덱스 one-hot 임베딩 테이블
        sentence_id_emb_table = tf.eye(num_rows=self.max_sentences)

        # 문장 단어 임베딩
        context_embedding = self._context_embedding_layer(
            fix_table=fix_table,
            finetune_table=finetune_table,
            char_emb_table=char_emb_table)
        # 문장 개체 임베딩
        entity_type_embedding = tf.nn.embedding_lookup(
            entity_type_emb_table, self.context_entity_type)
        # 문장 인덱스 임베딩
        sentence_id_embedding = tf.nn.embedding_lookup(sentence_id_emb_table,
                                                       self.sentence_id)

        # entity token, character, type, position, sentence_id embedding
        entity_embedding = self._entity_pool_embedding(
            fix_table=fix_table,
            finetune_table=finetune_table,
            char_emb_table=char_emb_table,
            token_entities=self.entity_pool,
            char_entities=self.char_entity_pool)

        # 문장에 있는 개체의 임베딩 가져오는 부분
        context_entity_emb = []
        unstack_entity_pool = tf.unstack(entity_embedding, axis=0)
        unstack_context_entity_id = tf.unstack(self.context_entity_id, axis=0)
        for entity_pool, context in zip(unstack_entity_pool,
                                        unstack_context_entity_id):
            context_entity_emb.append(
                tf.nn.embedding_lookup(entity_pool, context))

        context_entity_emb = tf.stack(context_entity_emb, axis=0)

        # context token, character, entity_type, sentence_id embedding
        context_embedding = tf.concat([
            context_embedding, entity_type_embedding, sentence_id_embedding,
            context_entity_emb
        ],
                                      axis=-1)

        # 개체 임베딩, 개체 문장 인덱스 임베딩
        entity_pool_type_emb = tf.nn.embedding_lookup(entity_type_emb_table,
                                                      self.entity_pool_type)
        entity_pool_sent_emb = tf.nn.embedding_lookup(sentence_id_emb_table,
                                                      self.entity_sent_id)

        entity_pool_emb = tf.concat(
            [entity_embedding, entity_pool_type_emb, entity_pool_sent_emb],
            axis=-1)

        # 관계 없는 개체가 포인팅하게 할 none 벡터
        none_emb = tf.get_variable(name="none_emb",
                                   shape=[self.decoder_hidden],
                                   initializer=tf.zeros_initializer)
        pad_emb = tf.get_variable(name="pad_emb",
                                  shape=[self.decoder_hidden],
                                  initializer=tf.zeros_initializer)

        pad_token = tf.expand_dims(tf.stack([pad_emb] * self.batch_size, 0),
                                   axis=1,
                                   name="pad_token")
        none_token = tf.expand_dims(tf.stack([none_emb] * self.batch_size, 0),
                                    axis=1,
                                    name="none_token")

        # 문장 인코딩
        encoder_output, encoder_state = self._biGRU_encoding_layer(
            encoder_input=context_embedding,
            encoder_length=self.context_input_length,
            name="encoder_layer")

        # 개체 인코딩 및 문장 개체 간 주의 집중
        pointing_mem, decoder_state = self._entity_encoding_layer(
            entity_pool_emb, encoder_output, encoder_state)

        # 디코더에서 포인팅 할 타겟
        self.pointing_target = tf.concat([pad_token, none_token, pointing_mem],
                                         axis=1)
        # 디코더 입력
        decoder_input = tf.concat([entity_pool_emb, pointing_mem], axis=-1)

        # 디코더 레이어 및 train op
        self._dual_pointer_decoder(decoder_input=decoder_input,
                                   decoder_init_state=decoder_state,
                                   decoder_hidden=self.decoder_hidden,
                                   pointing_memory=self.pointing_target)