Beispiel #1
0
    def prepare_calculation(self):
        self.graph = tf.Graph()
        with self.graph.as_default():
            K = self.K
            mlambda = self.mlambda
            n_nodes = self.n_nodes
            deg_vec = self.get_degree_vector()
            edge_list, weights = self.seperate_nodeid_and_weight(
                                                            self.edge_list)
            const_pairs, const_weights = self.seperate_nodeid_and_weight(
                                                            self.const_pairs)

            pdb.set_trace()
            self.A = A = tf.sparse_to_dense(output_shape=[n_nodes, n_nodes],
                                            sparse_indices=edge_list,
                                            sparse_values=weights)
            self.O = O = tf.sparse_to_dense(output_shape=[n_nodes, n_nodes],
                                            sparse_indices=const_pairs,
                                            sparse_values=const_weights)
            self.P = P = tf.constant(self.get_degree_matrix(O))
            self.L = L = P - O

            degrees = self.get_degree_vector()
            self.U = U = tf.Variable(self.get_initial_U(degrees, K),
                                     name="U")
            self.Z = Z = tf.Variable(self.get_initial_Z(degrees, K),
                                     name="Z")
            U_norm = self.normalize_U(U)
            Z_norm = self.get_positive_variable(Z)

            Y = tf.matmul(U_norm, tf.matmul(Z_norm, U_norm, transpose_b=True))
            self.loss = loss = tf.nn.l2_loss(A - Y)
            adam = tf.AdamOptimizer(self.lr)
            self.opt = adam.minimize(loss)
            self.setup_session()
Beispiel #2
0
    def prepare_calculation(self):
        self.graph = tf.Graph()
        with self.graph.as_default():
            K = self.K
            n_nodes = self.n_nodes
            edge_list, weights = self.seperate_nodeid_and_weight(
                                                            self.edge_list)
            const_pairs, const_weights = self.seperate_nodeid_and_weight(
                                                            self.const_pairs)
            mlambda = self.mlambda

            self.A = A = tf.sparse_to_dense(output_shape=[n_nodes, n_nodes],
                                            sparse_indices=edge_list,
                                            sparse_values=weights)
            self.O = O = tf.sparse_to_dense(output_shape=[n_nodes, n_nodes],
                                            sparse_indices=const_pairs,
                                            sparse_values=const_weights)

            self.D = D = self.get_degree_matrix(O)
            self.L = L = D - O
            scaler = 2 * np.sqrt(weights.sum() / (n_nodes * n_nodes * K))
            initializer = tf.random_uniform_initializer(maxval=scaler)
            self.H_var = H_var = tf.get_variable("H_var", shape=[n_nodes, K],
                                                  initializer=initializer)
            self.W_var = W_var = tf.get_variable("W_var", shape=[n_nodes, K],
                                                 initializer=initializer,
                                                 trainable=(not self.synmetric))

            #Positivate H
            self.H = H = self.get_positive_variable(H_var)
            self.W = H

            H_norm = self.normalize_H(H, n_nodes)

            self.loss = loss = self.loss_LSE(A, H)
            self.sup_term = sup_term = self.supervisor_term(H_norm, L)

            self.cost = cost = loss + mlambda * sup_term

            self.define_tfsummary()

            if self.optimizer == "adam":
                optimizer = tf.train.AdamOptimizer(self.lr, epsilon=0.1)
            else:
                optimizer = tf.train.GradientDescentOptimizer(self.lr)
            opt = optimizer.minimize(cost)
            if self.positivate != "clip":
                self.opt = opt
            else:
                with tf.control_dependencies([opt]):
                    clipped = tf.maximum(H_var,0)
                    clip_H = H_var.assign(clipped)
                self.opt = tf.group(opt, clip_H)

            config = tf.ConfigProto(inter_op_parallelism_threads=self.threads,
                                  intra_op_parallelism_threads=self.threads)
            self.sess = tf.Session(config=config)
            self.init_op = tf.global_variables_initializer()
  def testShapeInferenceKnownShape(self):
    with self.test_session(use_gpu=False):
      indices = tf.placeholder(tf.int64)

      shape = [4, 5, 6]
      output = tf.sparse_to_dense(indices, shape, 1, 0)
      self.assertEqual(output.get_shape(), [4, 5, 6])

      shape = tf.placeholder(tf.int64, shape=(3,))
      output = tf.sparse_to_dense(indices, shape, 1, 0)
      self.assertEqual(output.get_shape().as_list(), [None, None, None])
Beispiel #4
0
 def map_box_encodings(i):
   """Produces box K-hot and score encodings for each class index."""
   box_mask = tf.equal(
       unique_indices, i * tf.ones(num_boxes, dtype=tf.int32))
   box_mask = tf.reshape(box_mask, [-1])
   box_indices = tf.boolean_mask(classes, box_mask)
   box_confidences = tf.boolean_mask(confidences, box_mask)
   box_class_encodings = tf.sparse_to_dense(
       box_indices, [num_classes], 1, validate_indices=False)
   box_confidence_encodings = tf.sparse_to_dense(
       box_indices, [num_classes], box_confidences, validate_indices=False)
   return box_class_encodings, box_confidence_encodings
    def build_generator(self):
	
	# placeholder is for feeding data
	image = tf.placeholder(tf.float32, [self.batch_size, self.dim_image])  # (batch_size, dim_image)
	local_image = tf.placeholder(tf.float32, [self.batch_size, self.dim_image])
	query = tf.placeholder(tf.int32, [self.batch_size, MAX_QUERY_WORDS])
	query_mask = tf.placeholder(tf.float32, [self.batch_size, MAX_QUERY_WORDS])
	bbox = tf.placeholder(tf.float32, [self.batch_size, self.dim_coordinates])

	# [image] embed image feature to dim_hidden
        image_emb = tf.nn.bias_add(tf.matmul(image, self.embed_image_W), self.embed_image_b) # (batch_size, dim_hidden)
	local_image_emb = tf.nn.bias_add(tf.matmul(local_image, self.embed_local_W), self.embed_local_b) # (batch_size, dim_hidden)
	
        score = tf.zeros([self.batch_size], tf.float32)

	state_lang = tf.zeros([self.batch_size, self.lstm_lang.state_size])
	state_context = tf.zeros([self.batch_size, self.lstm_context.state_size])
	state_local = tf.zeros([self.batch_size, self.lstm_local.state_size])
	query_emb = tf.zeros([self.batch_size, self.dim_hidden])
	for j in range(MAX_QUERY_WORDS): 


	    # language lstm
            with tf.variable_scope("lstm_lang"):
                output_lang, state_lang = self.lstm_lang(query_emb, state_lang)
            lang = tf.slice(state_lang, [0,0], [self.batch_size, self.dim_hidden])
            # context lstm

            with tf.variable_scope("lstm_context"):
                output_context, state_context = self.lstm_context(tf.concat(1,[image_emb, lang]), state_context)
            context = tf.slice(state_context, [0,0], [self.batch_size, self.dim_hidden])

            # local lstm
            with tf.variable_scope("lstm_local"):
                output_local, state_local = self.lstm_local(tf.concat(1,[local_image_emb, lang, bbox]), state_local)
            local = tf.slice(state_local, [0,0], [self.batch_size, self.dim_hidden])

            context_emb = tf.nn.xw_plus_b(context, self.W_context, self.B_context)
            local_emb = tf.nn.xw_plus_b(local, self.W_local, self.B_local)
            word_pred = tf.add(context_emb, local_emb)

	    max_prob_index = tf.argmax(word_pred, 1) # b

	    labels = tf.expand_dims(query[:,j], 1)
            indices = tf.expand_dims(tf.range(0, self.batch_size, 1), 1)
            concated = tf.concat(1, [indices, labels])
            with tf.device('/cpu:0'):
                onehot_labels = tf.sparse_to_dense(concated, tf.pack([self.batch_size, self.dict_words]), 1.0, 0.0)
	    current_score = tf.mul(onehot_labels, word_pred)
	    current_score = tf.reduce_sum(current_score, 1)
	    current_score = tf.mul(current_score, query_mask[:,j])
	    current_score = tf.reshape(current_score, [1,self.batch_size])
	    current_score = tf.nn.softmax(current_score)
	    score = tf.add(score, current_score)

            with tf.device("/cpu:0"):
                tf.get_variable_scope().reuse_variables()
                query_emb = tf.nn.embedding_lookup(self.query_emb_W, max_prob_index)

	return score, image, local_image, query, query_mask, bbox
Beispiel #6
0
    def build_model(self):
        video = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps, self.dim_image])
        video_mask = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps])

        HLness = tf.placeholder(tf.int32, [self.batch_size, self.n_lstm_steps])
        HLness_mask = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps])

        video_flat = tf.reshape(video, [-1, self.dim_image])
        image_emb = tf.nn.xw_plus_b( video_flat, self.encode_image_W, self.encode_image_b) # (batch_size*n_lstm_steps, dim_hidden)
        image_emb = tf.reshape(image_emb, [self.batch_size, self.n_lstm_steps, self.dim_hidden])
        image_emb = tf.transpose(image_emb, [1,0,2]) # n x b x h

        state2 = tf.zeros([self.batch_size, self.lstm2.state_size])

	loss_HL = 0.0
	_X = tf.reshape(image_emb, [-1, self.dim_hidden]) # (n x b) x h
	_X = tf.split(0, self.n_lstm_steps, _X) # n x (b x h)
	[output2, state2] = rnn.rnn(self.lstm_HL_net,_X,dtype=tf.float32) # n x (b x h)
	output2 = tf.transpose(tf.pack(output2), [1,0,2]) # b x n x h
	onehot_labels = []
	logit_words = []
	indices = tf.expand_dims(tf.range(0, self.n_lstm_steps, 1), 1) # n x 1
	for ii in xrange(10):
		labels = tf.expand_dims(HLness[ii,:], 1) # n x 1
		concated = tf.concat(1, [indices, labels]) # n x 2
		onehot_labels = tf.sparse_to_dense(concated, tf.pack([self.n_lstm_steps, 2]), 1.0, 0.0) # n x 2
		logit_words = tf.nn.xw_plus_b(output2[ii,:,:], self.embed_HL_W, self.embed_HL_b) # n x 2
		cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logit_words, onehot_labels) # n x 1
		cross_entropy = tf.mul(cross_entropy, HLness_mask[ii,:]) # n x 1
		loss_HL += tf.reduce_sum(cross_entropy) # 1

	loss_HL = loss_HL / tf.reduce_sum(HLness_mask)
	loss = loss_HL
        return loss, video, video_mask, HLness, HLness_mask
Beispiel #7
0
  def _input_fn():
    with tf.name_scope('input'):
        filename_queue = tf.train.string_input_producer(
            filenames, num_epochs=num_epochs)

        reader = tf.TFRecordReader()
        _, serialized_example = reader.read_up_to(filename_queue)

        features = tf.parse_single_example(
            serialized_examples,
            {
                'words': tf.VarLenFeature(tf.string),
                'subreddit': tf.FixedLenFeature([1], tf.int64)
            }
        )
        padded_words = tf.sparse_to_dense(
            features['words'].indices,
            [sentence_length],
            features['words'].values,
            default_value='UNK'
        )
        word_indices = tf.string_to_hash_bucket_fast(
            padded_words,
            vocab_size)

        sentences, subreddits = tf.train.shuffle_batch(
            [word_indices, features['subreddit']],
            batch_size,
            capacity=1000 + 3 * batch_size,
            min_after_dequeue=1000,
            enqueue_many=False
        )
    return sentences, subreddits
Beispiel #8
0
def build_input(data, batch_size, dataset, train):
    """Build CIFAR image and labels.

    Args:
        data_path: Filename for cifar10 data.
        batch_size: Input batch size.
        train: True if we are training and false if we are testing.

    Returns:
        images: Batches of images of size
            [batch_size, image_size, image_size, 3].
        labels: Batches of labels of size [batch_size, num_classes].

    Raises:
      ValueError: When the specified dataset is not supported.
    """
    image_size = 32
    depth = 3
    num_classes = 10 if dataset == "cifar10" else 100
    images, labels = data
    num_samples = images.shape[0] - images.shape[0] % batch_size
    dataset = tf.contrib.data.Dataset.from_tensor_slices(
        (images[:num_samples], labels[:num_samples]))

    def map_train(image, label):
        image = tf.image.resize_image_with_crop_or_pad(image, image_size + 4,
                                                       image_size + 4)
        image = tf.random_crop(image, [image_size, image_size, 3])
        image = tf.image.random_flip_left_right(image)
        image = tf.image.per_image_standardization(image)
        return (image, label)

    def map_test(image, label):
        image = tf.image.resize_image_with_crop_or_pad(image, image_size,
                                                       image_size)
        image = tf.image.per_image_standardization(image)
        return (image, label)

    dataset = dataset.map(map_train if train else map_test)
    dataset = dataset.batch(batch_size)
    dataset = dataset.repeat()
    if train:
        dataset = dataset.shuffle(buffer_size=16 * batch_size)
    images, labels = dataset.make_one_shot_iterator().get_next()
    images = tf.reshape(images, [batch_size, image_size, image_size, depth])
    labels = tf.reshape(labels, [batch_size, 1])
    indices = tf.reshape(tf.range(0, batch_size, 1), [batch_size, 1])
    labels = tf.sparse_to_dense(
        tf.concat([indices, labels], 1),
        [batch_size, num_classes], 1.0, 0.0)

    assert len(images.get_shape()) == 4
    assert images.get_shape()[0] == batch_size
    assert images.get_shape()[-1] == 3
    assert len(labels.get_shape()) == 2
    assert labels.get_shape()[0] == batch_size
    assert labels.get_shape()[1] == num_classes
    if not train:
        tf.summary.image("images", images)
    return images, labels
Beispiel #9
0
def encode_one_hot(label_batch, num_labels):
    sparse_labels = tf.reshape(label_batch, [-1, 1])
    derived_size = tf.shape(label_batch)[0]
    indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1])
    concated = tf.concat(1, [indices, sparse_labels])
    outshape = tf.pack([derived_size, num_labels])
    return tf.sparse_to_dense(concated, outshape, sparse_values=1.0, default_value=0.0)
Beispiel #10
0
def pad_tensor_to_batch_size(tensor, batch_size):
  """Pads a Tensor along the batch dimension to the desired batch size."""
  if batch_size < 2:
    raise ValueError("Cannot pad along batch dimension with batch_size < 2.")

  ndims = len(tensor.shape)
  if ndims < 1:
    raise ValueError("Cannot pad a 0-dimensional Tensor")

  num_pad_examples = batch_size - tf.shape(tensor)[0]

  # paddings is a 2D Tensor with shape [ndims, 2]. Every element is zero except
  # for paddings[0][1], which is the number of values to add along the 0-th
  # dimension (the batch dimension) after the contents of the input tensor.
  paddings = tf.sparse_to_dense(
      sparse_indices=[[0, 1]],
      output_shape=[ndims, 2],
      sparse_values=num_pad_examples)

  padded_tensor = tf.pad(tensor, paddings, name=tensor.op.name + "/pad")

  # Set the new shape.
  output_shape = tensor.shape.as_list()
  output_shape[0] = batch_size
  padded_tensor.set_shape(output_shape)

  return padded_tensor
def loss(logits, labels):
    """Add L2Loss to all the trainable variables.

    Add summary for for "Loss" and "Loss/avg".
    Args:
        logits: Logits from inference().
        labels: Labels from distorted_inputs or inputs(). 1-D tensor
                of shape [batch_size]

    Returns:
    Loss tensor of type float.
    """
    # Convert from sparse integer labels in the range [0, NUM_CLASSES)
    # to 1-hot dense float vectors (that is we will have batch_size vectors,
    # each with NUM_CLASSES values, all of which are 0.0 except there will
    # be a 1.0 in the entry corresponding to the label).
    batch_size = tf.size(labels)
    labels = tf.expand_dims(labels, 1)
    indices = tf.expand_dims(tf.range(0, batch_size), 1)
    concated = tf.concat(1, [indices, labels])
    onehot_labels = tf.sparse_to_dense(
        concated, tf.pack([batch_size, NUM_CLASSES]), 1.0, 0.0)
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits,
                                                            onehot_labels,
                                                            name='xentropy')
    # Calculate the average cross entropy loss across the batch.
    cross_entropy_mean = tf.reduce_mean(cross_entropy, name='xentropy_mean')
    tf.add_to_collection('losses', cross_entropy_mean)

    # The total loss is defined as the cross entropy loss plus all of the weight
    # decay terms (L2 loss).
    return tf.add_n(tf.get_collection('losses'), name='total_loss')
Beispiel #12
0
def read_data(filename_queue):
    """
    read_data is an access object to take a .tfrecord and transform it for modeling purposes. it hs both
    a label and an image associated with it
    :param filename_queue: The queue runner created by tensorflow
    :return: An object of the class CIFAR10Record that has both an label and an image value
    """
    class CIFAR10Record(object):
        pass
    result = CIFAR10Record()

    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    features = tf.parse_single_example(
        serialized_example,
        #dense_keys=['image_raw', 'label'],
        #dense_types=[tf.string, tf.int64]
        features={'image_raw': tf.FixedLenFeature([], tf.string),
                  'label': tf.FixedLenFeature([], tf.int64)}
    )
    image = tf.decode_raw(features['image_raw'], tf.uint8)
    image.set_shape([input_image_size * input_image_size * input_image_channels])
    image = tf.cast(image, tf.float32)
    result.image = tf.reshape(image, [input_image_size, input_image_size, input_image_channels])

    label = tf.cast(features['label'], tf.int32)
    result.label = tf.sparse_to_dense(label, [num_labels], 1.0, 0.0)

    return result
def loss_test(logits, labels, batch_size=None):
    # Reshape the labels into a dense Tensor of
    # shape [FLAGS.batch_size, num_classes].
    sparse_labels = tf.reshape(labels, [batch_size, 1])
    indices = tf.reshape(tf.range(batch_size), [batch_size, 1])
    sparse_labels = tf.cast(sparse_labels, tf.int32)
    concated = tf.concat(1, [indices, sparse_labels])
    num_classes = logits[0].get_shape()[-1].value
    dense_labels = tf.sparse_to_dense(concated,
                                    [batch_size, num_classes],
                                    1.0, 0.0)

    print "-"*10
    print type(logits)
    print len(logits)
    print logits[0].get_shape()
    print logits[1].get_shape()
    print "-"*10

    # Cross entropy loss for the main softmax prediction.
    loss = slim.losses.cross_entropy_loss_without_collection(logits[0],
                                 dense_labels,
                                 label_smoothing=0.1,
                                 weight=1.0)

    # Cross entropy loss for the auxiliary softmax head.
    aux_loss = slim.losses.cross_entropy_loss_without_collection(logits[1],
                                 dense_labels,
                                 label_smoothing=0.1,
                                 weight=0.4,
                                 scope='aux_loss')
    return loss, aux_loss
Beispiel #14
0
    def __init__(self, is_training, config):
        self._batch_size = batch_size = FLAGS.batch_size
        self.num_skills = num_skills = config.num_skills
        self.hidden_size = size = FLAGS.hidden_size
        self.num_steps = num_steps = config.num_steps
        input_size = num_skills*2

        inputs = self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
        self._target_id = target_id = tf.placeholder(tf.int32, [None])
        self._target_correctness = target_correctness = tf.placeholder(tf.float32, [None])
        final_hidden_size = size

        hidden_layers = []
        for i in range(FLAGS.hidden_layer_num):
            final_hidden_size = size/(i+1)
            hidden1 = tf.nn.rnn_cell.LSTMCell(final_hidden_size, state_is_tuple=True)
            if is_training and config.keep_prob < 1:
                hidden1 = tf.nn.rnn_cell.DropoutWrapper(hidden1, output_keep_prob=FLAGS.keep_prob)
            hidden_layers.append(hidden1)

        cell = tf.nn.rnn_cell.MultiRNNCell(hidden_layers, state_is_tuple=True)

        input_data = tf.reshape(self._input_data, [-1])
        #one-hot encoding
        with tf.device("/cpu:0"):
            labels = tf.expand_dims(input_data, 1)
            indices = tf.expand_dims(tf.range(0, batch_size*num_steps, 1), 1)
            concated = tf.concat(1, [indices, labels])
            inputs = tf.sparse_to_dense(concated, tf.pack([batch_size*num_steps, input_size]), 1.0, 0.0)
            inputs.set_shape([batch_size*num_steps, input_size])

        # [batch_size, num_steps, input_size]
        inputs = tf.reshape(inputs, [-1, num_steps, input_size])
        x = tf.transpose(inputs, [1, 0, 2])
        # Reshape to (n_steps*batch_size, n_input)
        x = tf.reshape(x, [-1, input_size])
        # Split to get a list of 'n_steps'
        # tensors of shape (doc_num, n_input)
        x = tf.split(0, num_steps, x)
        #inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, num_steps, inputs)]
        #outputs, state = tf.nn.rnn(hidden1, x, dtype=tf.float32)
        outputs, state = tf.nn.rnn(cell, x, dtype=tf.float32)
        output = tf.reshape(tf.concat(1, outputs), [-1, final_hidden_size])
        # calculate the logits from last hidden layer to output layer
        sigmoid_w = tf.get_variable("sigmoid_w", [final_hidden_size, num_skills])
        sigmoid_b = tf.get_variable("sigmoid_b", [num_skills])
        logits = tf.matmul(output, sigmoid_w) + sigmoid_b

        # from output nodes to pick up the right one we want
        logits = tf.reshape(logits, [-1])
        selected_logits = tf.gather(logits, self.target_id)

        #make prediction
        self._pred = self._pred_values = pred_values = tf.sigmoid(selected_logits)

        # loss function
        loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(selected_logits, target_correctness))

        #self._cost = cost = tf.reduce_mean(loss)
        self._cost = cost = loss
Beispiel #15
0
def softmax_loss_layer(name, score_bottom, label_bottom):
    """
    Calculates cumulative Softmax Cross Entropy Loss along the last dimension
    *This function does not divide the loss by batch size*

    Once tensorflow has SparseCrossEntropy function, this one will be replaced
    """
    # Check shape
    score_shape = score_bottom.get_shape().as_list()
    label_shape = label_bottom.get_shape().as_list()
    assert len(score_shape) == len(label_shape) + 1
    assert score_shape[:-1] == label_shape

    # Compute the outer dimensions dimensions in label
    inner_dim = score_shape[-1]
    outer_dim = 1
    for d in label_shape: outer_dim *= d

    # flatten score and label
    flat_score = tf.reshape(score_bottom, [outer_dim, inner_dim])
    flat_label = tf.reshape(label_bottom, [outer_dim, 1])

    # Reshape the labels into a dense Tensor of
    # shape [batch_size, NUM_CLASSES].
    sparse_labels = tf.reshape(labels, [FLAGS.batch_size, 1])
    indices = tf.reshape(tf.range(FLAGS.batch_size), [FLAGS.batch_size, 1])
    concated = tf.concat(1, [indices, sparse_labels])
    dense_labels = tf.sparse_to_dense(concated, [FLAGS.batch_size, NUM_CLASSES],
        1.0, 0.0)
  def default_exchange_proposed_fn_(num_replica, seed=None):
    """Default function for `exchange_proposed_fn` of `kernel`."""
    num_replica = tf.to_int32(num_replica)

    seed = distributions_util.gen_new_seed(seed, 'default_exchange_proposed_fn')
    random_uniform = tf.random_uniform([], seed=seed)
    accept_proposed_exchange = random_uniform < probs

    seed = distributions_util.gen_new_seed(seed, 'default_exchange_proposed_fn')
    zero_start = tf.random_uniform([], seed=seed) > 0.5
    if num_replica % 2 == 0:
      exchange_proposed = tf.where(
          zero_start, tf.range(num_replica),
          tf.sparse_to_dense(tf.range(num_replica - 2), (num_replica,),
                             tf.range(1, num_replica - 1)))
      exchange_proposed_n = tf.where(zero_start, num_replica // 2,
                                     num_replica // 2 - 1)
    else:
      exchange_proposed = tf.where(
          zero_start, tf.range(num_replica - 1), tf.range(1, num_replica))
      exchange_proposed_n = num_replica // 2

    exchange_proposed = tf.reshape(exchange_proposed, (num_replica // 2, 2))
    exchange_proposed = tf.where(accept_proposed_exchange, exchange_proposed,
                                 tf.zeros_like(exchange_proposed))
    exchange_proposed_n = tf.where(accept_proposed_exchange,
                                   exchange_proposed_n,
                                   tf.zeros_like(exchange_proposed_n))
    return exchange_proposed, exchange_proposed_n
 def body(i, next_replica_idx):
   """`tf.while_loop` body."""
   ratio = (
       sampled_replica_ratios[next_replica_idx[exchange_proposed[i, 0]]]
       - sampled_replica_ratios[next_replica_idx[exchange_proposed[i, 1]]])
   ratio *= (
       self.inverse_temperatures[exchange_proposed[i, 1]]
       - self.inverse_temperatures[exchange_proposed[i, 0]])
   self._seed_stream = distributions_util.gen_new_seed(
       self._seed_stream, salt='replica_exchange_one_step')
   log_uniform = tf.log(tf.random_uniform(
       shape=tf.shape(ratio),
       dtype=ratio.dtype.base_dtype,
       seed=self._seed_stream))
   exchange = log_uniform < ratio
   exchange_op = tf.sparse_to_dense(
       [exchange_proposed[i, 0], exchange_proposed[i, 1]],
       [self.num_replica],
       [next_replica_idx[exchange_proposed[i, 1]] -
        next_replica_idx[exchange_proposed[i, 0]],
        next_replica_idx[exchange_proposed[i, 0]] -
        next_replica_idx[exchange_proposed[i, 1]]])
   next_replica_idx = tf.cond(exchange,
                              lambda: next_replica_idx + exchange_op,
                              lambda: next_replica_idx)
   return [i + 1, next_replica_idx]
Beispiel #18
0
def loss(logits, labels, batch_size=None):
  """Adds all losses for the model.

  Note the final loss is not returned. Instead, the list of losses are collected
  by slim.losses. The losses are accumulated in tower_loss() and summed to
  calculate the total loss.

  Args:
    logits: List of logits from inference(). Each entry is a 2-D float Tensor.
    labels: Labels from distorted_inputs or inputs(). 1-D tensor
            of shape [batch_size]
    batch_size: integer
  """
  if not batch_size:
    batch_size = FLAGS.batch_size

  # Reshape the labels into a dense Tensor of
  # shape [FLAGS.batch_size, num_classes].
  sparse_labels = tf.reshape(labels, [batch_size, 1])
  indices = tf.reshape(tf.range(batch_size), [batch_size, 1])
  concated = tf.concat(1, [indices, sparse_labels])
  num_classes = logits[0].get_shape()[-1].value
  dense_labels = tf.sparse_to_dense(concated,
                                    [batch_size, num_classes],
                                    1.0, 0.0)

  # Cross entropy loss for the main softmax prediction.
  slim.losses.cross_entropy_loss(logits[0],
                                 dense_labels,
                                 label_smoothing=0.1,
                                 weight=1.0)
Beispiel #19
0
 def disable_some_fgs():
     # We want to delete a randomly-selected subset of fg_inds of
     # size `fg_inds.shape[0] - max_fg`.
     # We shuffle along the dimension 0 and then we get the first
     # num_fg_inds - max_fg indices and we disable them.
     shuffled_inds = tf.random_shuffle(fg_inds, seed=self._seed)
     disable_place = (tf.shape(fg_inds)[0] - max_fg)
     # This function should never run if num_fg_inds <= max_fg, so we
     # add an assertion to catch the wrong behaviour if it happens.
     integrity_assertion = tf.assert_positive(
         disable_place,
         message="disable_place in disable_some_fgs is negative."
     )
     with tf.control_dependencies([integrity_assertion]):
         disable_inds = shuffled_inds[:disable_place]
     is_disabled = tf.sparse_to_dense(
         sparse_indices=disable_inds,
         sparse_values=True, default_value=False,
         output_shape=tf.cast(proposals_label_shape, tf.int64),
         # We are shuffling the indices, so they may not be ordered.
         validate_indices=False
     )
     return tf.where(
         condition=is_disabled,
         # We set it to -label for debugging purposes.
         x=tf.negative(proposals_label),
         y=proposals_label
     )
Beispiel #20
0
def one_hot_mask(labels, num_classes, scope=None):
  """Compute 1-hot encodings for masks.

  Given a label image, this computes the one hot encoding at
  each pixel.

  Args:
    labels: (batch_size, width, height, 1) tensor containing labels.
    num_classes: number of classes
    scope: optional scope name

  Returns:
    Tensor of shape (batch_size, width, height, num_classes) with
    a 1-hot encoding.
  """
  with tf.name_scope(scope, "OneHotMask", [labels]):
    height, width, depth = _shape(labels)
    assert depth == 1
    sparse_labels = tf.to_int32(tf.reshape(labels, [-1, 1]))
    sparse_size, _ = _shape(sparse_labels)
    indices = tf.reshape(tf.range(0, sparse_size, 1), [-1, 1])
    concated = tf.concat_v2([indices, sparse_labels], 1)
    dense_result = tf.sparse_to_dense(concated, [sparse_size, num_classes], 1.0,
                                      0.0)
    result = tf.reshape(dense_result, [height, width, num_classes])
    return result
Beispiel #21
0
def ce(model, config, scope, connect, threshold = 1e-5):
	with tf.variable_scope(scope), tf.name_scope(scope):
		with tf.variable_scope('inputs'), tf.name_scope('inputs'):
			model['%s_in0length' %scope] = model['%s_out0length' %connect]
			model['%s_in1length' %scope] = model['%s_out1length' %connect]
			model['%s_in2length' %scope] = model['%s_out2length' %connect]
			model['%s_maxin2length' %scope] = model['%s_maxout2length' %connect]
			model['%s_inputs' %scope] = tf.clip_by_value(tf.nn.softmax(model['%s_outputs' %connect]), threshold, 1. - threshold, name = '%s_inputs' %scope)
			model['%s_out0length' %scope] = model['%s_in0length' %scope]
			model['%s_out1length' %scope] = model['%s_in1length' %scope]
			model['%s_out2length' %scope] = tf.placeholder(tf.int32, [model['%s_in0length' %scope]], '%s_out2length' %scope)
			model['%s_maxout2length' %scope] = model['%s_maxin2length' %scope]

		with tf.variable_scope('labels'), tf.name_scope('labels'):
			model['%s_labels_len' %scope] = tf.placeholder(tf.int32, [model['%s_in0length' %scope]], '%s_labels_len' %scope)
			model['%s_labels_ind' %scope] = tf.placeholder(tf.int64, [None, 2], '%s_labels_ind' %scope)
			model['%s_labels_val' %scope] = tf.placeholder(tf.int32, [None], '%s_labels_val' %scope)
			model['%s_labels_collapsed' %scope] = tf.sparse_to_dense(model['%s_labels_ind' %scope], [model['%s_maxin2length' %scope], model['%s_in0length' %scope]], model['%s_labels_val' %scope], -1, name = '%s_labels_collapsed' %scope)
			model['%s_labels' %scope] = tf.one_hot(model['%s_labels_collapsed' %scope], model['%s_out1length' %scope], name = '%s_labels' %scope)

		with tf.variable_scope('loss'), tf.name_scope('loss'):
			model['%s_loss' %scope] = tf.reduce_sum(-tf.multiply(model['%s_labels' %scope], tf.log(model['%s_inputs' %scope])), name = '%s_loss' %scope)

		with tf.variable_scope('outputs'), tf.name_scope('outputs'):
			model['%s_output' %scope] = model['%s_inputs' %scope]

	return model
Beispiel #22
0
    def f(X):
        """
        prob: n probabilities
        box: nx4 boxes

        Returns: n boolean, the selection
        """
        prob, box = X
        output_shape = tf.shape(prob)
        # filter by score threshold
        ids = tf.reshape(tf.where(prob > cfg.TEST.RESULT_SCORE_THRESH), [-1])
        prob = tf.gather(prob, ids)
        box = tf.gather(box, ids)
        # NMS within each class
        selection = tf.image.non_max_suppression(
            box, prob, cfg.TEST.RESULTS_PER_IM, cfg.TEST.FRCNN_NMS_THRESH)
        selection = tf.to_int32(tf.gather(ids, selection))
        # sort available in TF>1.4.0
        # sorted_selection = tf.contrib.framework.sort(selection, direction='ASCENDING')
        sorted_selection = -tf.nn.top_k(-selection, k=tf.size(selection))[0]
        mask = tf.sparse_to_dense(
            sparse_indices=sorted_selection,
            output_shape=output_shape,
            sparse_values=True,
            default_value=False)
        return mask
Beispiel #23
0
def ced(model, config, scope, connect, threshold = 1e-5):
	with tf.variable_scope(scope), tf.name_scope(scope):
		with tf.variable_scope('inputs'), tf.name_scope('inputs'):
			model['%s_in0length' %scope] = model['%s_out0length' %connect]
			model['%s_in1length' %scope] = model['%s_out1length' %connect]
			model['%s_in2length' %scope] = model['%s_out2length' %connect]
			model['%s_maxin2length' %scope] = model['%s_maxout2length' %connect]
			model['%s_inputs' %scope] = tf.clip_by_value(model['%s_outputs' %connect], threshold, 1. - threshold, name = '%s_inputs' %scope)
			model['%s_out0length' %scope] = model['%s_in0length' %scope]
			model['%s_out1length' %scope] = model['%s_in1length' %scope]
			model['%s_out2length' %scope] = tf.placeholder(tf.int32, [model['%s_in0length' %scope]], '%s_out2length' %scope)
			model['%s_maxout2length' %scope] = model['%s_maxin2length' %scope]

		with tf.variable_scope('labels'), tf.name_scope('labels'):
			model['%s_labels_len' %scope] = tf.placeholder(tf.int32, [model['%s_in0length' %scope]], '%s_labels_len' %scope)
			model['%s_labels_ind' %scope] = tf.placeholder(tf.int64, [None, 3], '%s_labels_ind' %scope)
			model['%s_labels_val' %scope] = tf.placeholder(tf.float32, [None], '%s_labels_val' %scope)
			model['%s_labels' %scope] = tf.sparse_to_dense(model['%s_labels_ind' %scope], [model['%s_in0length' %scope], model['%s_maxin2length' %scope], model['%s_maxin2length' %scope]], model['%s_labels_val' %scope], -1, name = '%s_labels' %scope)

		with tf.variable_scope('loss'), tf.name_scope('loss'):
			model['%s_loss' %scope] = tf.reduce_sum(tf.where(tf.less(model['%s_labels' %scope], tf.zeros([model['%s_in0length' %scope], model['%s_maxin2length' %scope], model['%s_maxin2length' %scope]], tf.float32)), tf.zeros([model['%s_in0length' %scope], model['%s_maxin2length' %scope], model['%s_maxin2length' %scope]], tf.float32), -tf.add(tf.multiply(model['%s_labels' %scope], tf.log(model['%s_inputs' %scope])), tf.multiply(tf.subtract(1., model['%s_labels' %scope]), tf.log(tf.subtract(1., model['%s_inputs' %scope]))))), name = '%s_loss' %scope)

		with tf.variable_scope('outputs'), tf.name_scope('outputs'):
			model['%s_output' %scope] = model['%s_inputs' %scope]

	return model
def loss(logits, labels):
  """Calculates the loss from the logits and the labels.

  Args:
    logits: Logits tensor, float - [batch_size, NUM_CLASSES].
    labels: Labels tensor, int32 - [batch_size].

  Returns:
    loss: Loss tensor of type float.
  """
  # Convert from sparse integer labels in the range [0, NUM_CLASSES)
  # to 1-hot dense float vectors (that is we will have batch_size vectors,
  # each with NUM_CLASSES values, all of which are 0.0 except there will
  # be a 1.0 in the entry corresponding to the label).
  batch_size = tf.size(labels)
  labels = tf.expand_dims(labels, 1)
  indices = tf.expand_dims(tf.range(0, batch_size), 1)
  concated = tf.concat(1, [indices, labels])
  onehot_labels = tf.sparse_to_dense(
      concated, tf.pack([batch_size, NUM_CLASSES]), 1.0, 0.0)
  cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits,
                                                          onehot_labels,
                                                          name='xentropy')
  loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
  return loss
Beispiel #25
0
def loss(logits, labels):
  """Add L2Loss to all the trainable variables.

  Add summary for for "Loss" and "Loss/avg".
  Args:
    logits: Logits from inference().
    labels: Labels from distorted_inputs or inputs(). 1-D tensor
            of shape [batch_size]

  Returns:
    Loss tensor of type float.
  """
  # Reshape the labels into a dense Tensor of
  # shape [batch_size, NUM_CLASSES].
  sparse_labels = tf.reshape(labels, [FLAGS.batch_size, 1])
  indices = tf.reshape(tf.range(0, FLAGS.batch_size, 1), [FLAGS.batch_size, 1])
  concated = tf.concat(1, [indices, sparse_labels])
  dense_labels = tf.sparse_to_dense(concated,
                                    [FLAGS.batch_size, NUM_CLASSES],
                                    1.0, 0.0)

  # Calculate the average cross entropy loss across the batch.
  cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
      logits, dense_labels, name='cross_entropy_per_example')
  cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
  tf.add_to_collection('losses', cross_entropy_mean)

  # The total loss is defined as the cross entropy loss plus all of the weight
  # decay terms (L2 loss).
  return tf.add_n(tf.get_collection('losses'), name='total_loss')
Beispiel #26
0
  def _testGraphExtensionRestore(self):
    test_dir = os.path.join(self.get_temp_dir(), "graph_extension")
    filename = os.path.join(test_dir, "metafile")
    saver0_ckpt = os.path.join(test_dir, "saver0.ckpt")
    with self.test_session(graph=tf.Graph()) as sess:
      # Restores from MetaGraphDef.
      new_saver = tf.train.import_meta_graph(filename)
      # Generates a new MetaGraphDef.
      new_saver.export_meta_graph()
      # Restores from checkpoint.
      new_saver.restore(sess, saver0_ckpt)
      # Addes loss and train.
      labels = tf.constant(0, tf.int32, shape=[100], name="labels")
      batch_size = tf.size(labels)
      labels = tf.expand_dims(labels, 1)
      indices = tf.expand_dims(tf.range(0, batch_size), 1)
      concated = tf.concat(1, [indices, labels])
      onehot_labels = tf.sparse_to_dense(
          concated, tf.pack([batch_size, 10]), 1.0, 0.0)
      logits = tf.get_collection("logits")[0]
      cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits,
                                                              onehot_labels,
                                                              name="xentropy")
      loss = tf.reduce_mean(cross_entropy, name="xentropy_mean")

      tf.scalar_summary(loss.op.name, loss)
      # Creates the gradient descent optimizer with the given learning rate.
      optimizer = tf.train.GradientDescentOptimizer(0.01)

      # Runs train_op.
      train_op = optimizer.minimize(loss)
      sess.run(train_op)
Beispiel #27
0
def one_hot_matrix(tensor_in, num_classes, on_value=1.0, off_value=0.0):
    """Encodes indices from given tensor as one-hot tensor.

    TODO(ilblackdragon): Ideally implementation should be
    part of TensorFlow with Eigen-native operation.

    Args:
        tensor_in: Input tensor of shape [N1, N2].
        num_classes: Number of classes to expand index into.
        on_value: Tensor or float, value to fill-in given index.
        off_value: Tensor or float, value to fill-in everything else.
    Returns:
        Tensor of shape [N1, N2, num_classes] with 1.0 for each id in original
        tensor.
    """
    tensor_in = tf.convert_to_tensor(tensor_in)
    sparse_values = tf.to_int64(tf.reshape(tensor_in, [-1, 1]))
    size = tf.shape(sparse_values)[0]
    dims = tf.shape(tensor_in)
    indices = tf.to_int64(tf.reshape(tf.range(0, size), [-1, 1]))
    indices_values = tf.concat(1, [indices, sparse_values])
    outshape = tf.to_int64(expand_concat(0, [size, num_classes]))
    one_hot_vector = tf.sparse_to_dense(indices_values, outshape, on_value, off_value)
    ret = tf.reshape(one_hot_vector, tf.concat(0, [dims, [num_classes]]))
    ret.set_shape(tensor_in.get_shape().concatenate(num_classes))
    return ret
def loss(logits, labels):
    #sparse_labels = tf.reshape(labels, [FLAGS.batch_size, 1])
    #indices = tf.reshape(tf.range(0, FLAGS.batch_size), [FLAGS.batch_size, 1])
    labels = tf.expand_dims(labels, 1)
    indices = tf.expand_dims(tf.range(0, FLAGS.batch_size, 1), 1)
    #concated = tf.concat(1, [indices, sparse_labels])
    concated = tf.concat(1, [indices, labels])
    # sparse_to_dense のクラス数は クラスラベルの最大値+1 とすること
    dense_labels = tf.sparse_to_dense(
        concated,
        [FLAGS.batch_size, NUM_CLASSES],
        1.0,
        0.0
    )

    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
        logits,
        dense_labels,
        name='cross_entropy_per_example'
    )

    cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
    tf.add_to_collection('losses', cross_entropy_mean)

    return tf.add_n(tf.get_collection('losses'), name='total_loss')
Beispiel #29
0
  def _count_matrix_input(self, filenames, submatrix_rows, submatrix_cols):
    """Creates ops that read submatrix shards from disk."""
    random.shuffle(filenames)
    filename_queue = tf.train.string_input_producer(filenames)
    reader = tf.WholeFileReader()
    _, serialized_example = reader.read(filename_queue)
    features = tf.parse_single_example(
        serialized_example,
        features={
            'global_row': tf.FixedLenFeature([submatrix_rows], dtype=tf.int64),
            'global_col': tf.FixedLenFeature([submatrix_cols], dtype=tf.int64),
            'sparse_local_row': tf.VarLenFeature(dtype=tf.int64),
            'sparse_local_col': tf.VarLenFeature(dtype=tf.int64),
            'sparse_value': tf.VarLenFeature(dtype=tf.float32)
        })

    global_row = features['global_row']
    global_col = features['global_col']

    sparse_local_row = features['sparse_local_row'].values
    sparse_local_col = features['sparse_local_col'].values
    sparse_count = features['sparse_value'].values

    sparse_indices = tf.concat(
        axis=1, values=[tf.expand_dims(sparse_local_row, 1),
                        tf.expand_dims(sparse_local_col, 1)])

    count = tf.sparse_to_dense(sparse_indices, [submatrix_rows, submatrix_cols],
                               sparse_count)

    return global_row, global_col, count
Beispiel #30
0
def _sparse_to_dense(labels, num_classes):
    sparse_labels = tf.reshape(labels, [-1, 1])
    batch_size = sparse_labels.get_shape().as_list()[0]
    indices = tf.reshape(tf.range(batch_size), [batch_size, 1])
    concated = tf.concat(1, [indices, sparse_labels])
    dense_labels = tf.sparse_to_dense(concated, [batch_size, num_classes], 1.0, 0.0)
    return dense_labels
Beispiel #31
0
    def _create_network(self):
        # Initialize autoencode network weights and biases
        network_weights = self._initialize_weights(**self.network_architecture)
        start_token_tensor = tf.constant(
            (np.zeros([self.batch_size, binary_dim])).astype(np.float32),
            dtype=tf.float32)
        self.network_weights = network_weights
        seqlen = tf.cast(tf.reduce_sum(self.mask, reduction_indices=-1),
                         tf.int32)
        self.embedded_input_KLD_loss = tf.constant(0.0)
        self.input_embedding_KLD_loss = tf.constant(0.0)
        # def train_encoder():
        embedded_input, self.embedded_input_KLD_loss = self._get_word_embedding(
            [
                network_weights['variational_encoding'],
                network_weights['biases_variational_encoding']
            ],
            network_weights['input_meaning'],
            tf.reshape(
                self.caption_placeholder,
                [self.batch_size * self.network_architecture['maxlen']]),
            logit=True)
        print 'eshape', embedded_input.shape
        embedded_input = tf.reshape(embedded_input, [
            self.batch_size, self.network_architecture['maxlen'],
            self.network_architecture['n_lstm_input']
        ])
        print embedded_input.shape
        if not vanilla:
            self.embedded_input_KLD_loss = tf.reshape(
                embedded_input_KLD_loss,
                [-1, self.network_architecture['maxlen']])[:, 1:]
        encoder_input = embedded_input[:, 1:, :]
        cell = tf.contrib.rnn.BasicLSTMCell(
            self.network_architecture['n_lstm_input'])
        if lstm_stack > 1:
            cell = tf.contrib.rnn.MultiRNNCell([cell] * lstm_stack)
        if not use_bdlstm:
            encoder_outs, encoder_states = rnn.dynamic_rnn(
                cell,
                encoder_input,
                sequence_length=seqlen - 1,
                dtype=tf.float32,
                time_major=False)
        else:
            backward_cell = tf.contrib.rnn.BasicLSTMCell(
                self.network_architecture['n_lstm_input'])
            if lstm_stack > 1:
                backward_cell = tf.contrib.rnn.MultiRNNCell([backward_cell] *
                                                            lstm_stack)
            encoder_outs, encoder_states = rnn.bidirectional_dynamic_rnn(
                cell,
                backward_cell,
                encoder_input,
                sequence_length=seqlen - 1,
                dtype=tf.float32,
                time_major=False)
        ix_range = tf.range(0, self.batch_size, 1)
        ixs = tf.expand_dims(ix_range, -1)
        to_cat = tf.expand_dims(seqlen - 2, -1)
        gather_inds = tf.concat([ixs, to_cat], axis=-1)
        print encoder_outs
        outs = tf.gather_nd(encoder_outs, gather_inds)
        # outs=tf.nn.dropout(outs,.75)
        self.deb = tf.gather_nd(self.caption_placeholder[:, 1:], gather_inds)
        print outs.shape
        input_embedding, self.input_embedding_KLD_loss = self._get_middle_embedding(
            [
                network_weights['middle_encoding'],
                network_weights['biases_middle_encoding']
            ],
            network_weights['middle_encoding'],
            outs,
            logit=True)
        # return input_embedding
        # input_embedding=tf.nn.l2_normalize(input_embedding,dim=-1)
        self.other_loss = tf.constant(0, dtype=tf.float32)
        KLD_penalty = (tf.cast(self.timestep, tf.float32) / 1.0) * 1e-3
        cos_penalty = tf.maximum(-0.1, (tf.cast(self.timestep, tf.float32) /
                                        (5.0))) * 1e-3

        self.input_KLD_loss = tf.constant(0.0)
        # def train_decoder():
        if form3:
            _x, self.input_KLD_loss = self._get_input_embedding([
                network_weights['variational_encoding'],
                network_weights['biases_variational_encoding']
            ], network_weights['variational_encoding'])
            self.input_KLD_loss = tf.reduce_mean(
                self.input_KLD_loss
            ) * KLD_penalty  #\*tf.constant(0.0,dtype=tf.float32)
            # normed_embedding= tf.nn.l2_normalize(self.mid_var, dim=-1)
            # normed_target=tf.nn.l2_normalize(self.word_var,dim=-1)
            # cos_sim=(tf.reduce_sum(tf.multiply(normed_embedding,normed_target),axis=-1))
            # # # self.exp_loss=tf.reduce_mean((-cos_sim))
            # # # self.exp_loss=tf.reduce_sum(xentropy)/float(self.batch_size)
            # self.other_loss += tf.reduce_mean(1-(cos_sim))*cos_penalty
            # # other_loss+=tf.reduce_mean(tf.reduce_sum(tf.square(_x-input_embedding),axis=-1))*cos_penalty
            # _x=tf.concat([input_embedding,_x],axis=-1)
            # tempe=tf.Variable(xavier_init(self.network_architecture['n_lstm_input']*2,self.network_architecture['n_lstm_input']),name='emb_cat')
            # tempb=tf.Variable(tf.zeros([self.network_architecture['n_lstm_input']]),name='emb_cat_b')
            # _x=tf.matmul(_x,tempe)+tempb
            # input_embedding=_x

        # input_embedding=tf.cond(tf.equal(self.timestep%5,0),train_decoder,train_encoder)
        # Use recognition network to determine mean and
        # (log) variance of Gaussian distribution in latent
        # space
        # if not same_embedding:
        # 	input_embedding,input_embedding_KLD_loss=self._get_input_embedding([network_weights['variational_encoding'],network_weights['biases_variational_encoding']],network_weights['input_meaning'])
        # else:
        # 	input_embedding,input_embedding_KLD_loss=self._get_input_embedding([network_weights['variational_encoding'],network_weights['biases_variational_encoding']],network_weights['LSTM'])
        # if not embeddings_trainable:
        # 	input_embedding=tf.stop_gradient(input_embedding)
        # embed2decoder=tf.Variable(xavier_init(self.network_architecture['n_z_m_2'],self.network_architecture['n_lstm_input']),name='decoder_embedding_weight')
        # embed2decoder_bias=tf.Variable(tf.zeros(self.network_architecture['n_lstm_input']),name='decoder_embedding_bias')
        state = self.lstm.zero_state(self.batch_size, dtype=tf.float32)
        # input_embedding=tf.matmul(input_embedding,embed2decoder)+embed2decoder_bias
        loss = 0
        self.debug = 0
        probs = []
        with tf.variable_scope("RNN"):
            for i in range(self.network_architecture['maxlen']):
                if i > 0:

                    # current_embedding = tf.nn.embedding_lookup(self.word_embedding, caption_placeholder[:,i-1]) + self.embedding_bias
                    if form4:
                        current_embedding, KLD_loss = input_embedding, 0
                    elif form2:
                        current_embedding, KLD_loss = self._get_word_embedding(
                            [
                                network_weights['variational_encoding'],
                                network_weights['biases_variational_encoding']
                            ],
                            network_weights['LSTM'],
                            self.caption_placeholder[:, i - 1],
                            logit=True)
                    else:
                        current_embedding, KLD_loss = self._get_word_embedding(
                            [
                                network_weights['variational_encoding'],
                                network_weights['biases_variational_encoding']
                            ], network_weights['LSTM'],
                            self.caption_placeholder[:, i - 1])
                    loss += tf.reduce_sum(
                        KLD_loss * self.mask[:, i]) * KLD_penalty
                else:
                    current_embedding = input_embedding
                if i > 0:
                    tf.get_variable_scope().reuse_variables()

                out, state = self.lstm(current_embedding, state)

                if i > 0:
                    if not form2:
                        labels = tf.expand_dims(self.caption_placeholder[:, i],
                                                1)
                        ix_range = tf.range(0, self.batch_size, 1)
                        ixs = tf.expand_dims(ix_range, 1)
                        concat = tf.concat([ixs, labels], 1)
                        onehot = tf.sparse_to_dense(
                            concat, tf.stack([self.batch_size, self.n_words]),
                            1.0, 0.0)
                    else:
                        onehot = self.caption_placeholder[:, i]

                    logit = tf.matmul(
                        out, network_weights['LSTM']['encoding_weight']
                    ) + network_weights['LSTM']['encoding_bias']
                    if not use_ctc:

                        xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                            logits=logit, labels=onehot)

                        xentropy = xentropy * self.mask[:, i]
                        xentropy = tf.reduce_sum(xentropy)
                        self.debug += xentropy
                        loss += xentropy

                    else:
                        probs.append(tf.expand_dims(tf.nn.sigmoid(logit), 1))
            self.debug = [
                self.input_KLD_loss,
                tf.reduce_mean(self.input_embedding_KLD_loss) /
                self.batch_size * KLD_penalty, self.other_loss, KLD_penalty
            ]
            if not use_ctc:
                loss_ctc = 0
                # self.debug=other_loss
                # self.debug=[input_KLD_loss,embedded_input_KLD_loss,input_embedding_KLD_loss]
            else:
                probs = tf.concat(probs, axis=1)
                probs = ctc_loss.get_output_probabilities(
                    probs, self.caption_placeholder[:, 1:, :])
                loss_ctc = ctc_loss.loss(
                    probs, self.caption_placeholder[:, 1:, :],
                    self.network_architecture['maxlen'] - 2, self.batch_size,
                    seqlen - 1)
                self.debug = loss_ctc
            #
            loss = (loss / tf.reduce_sum(self.mask[:, 1:])) + tf.reduce_sum(
                self.input_embedding_KLD_loss
            ) / self.batch_size * KLD_penalty + tf.reduce_sum(
                self.embedded_input_KLD_loss * self.mask[:, 1:]
            ) / tf.reduce_sum(
                self.mask[:, 1:]
            ) * KLD_penalty + loss_ctc + self.input_KLD_loss + self.other_loss
            print 'makin loss'
        self.loss = loss
Beispiel #32
0
    def build_model(self, video, video_mask, caption, caption_1, caption_mask):
        drop_type = tf.placeholder(tf.int32, shape=[])
        caption_mask = tf.cast(caption_mask, tf.float32)
        video_mask = tf.cast(video_mask, tf.float32)
        # for decoding
        video = video * tf.constant(feat_scale_factor)
        video_flat = tf.reshape(video, [-1, self.dim_image])  # (b x nv) x d
        image_emb = tf.nn.xw_plus_b(video_flat, self.encode_image_W,
                                    self.encode_image_b)  # (b x nv) x h
        image_emb = tf.reshape(
            image_emb, [self.batch_size, self.n_video_steps, self.dim_hidden
                        ])  # b x nv x h

        c_init = tf.zeros([self.batch_size, self.dim_hidden])  # b x h
        m_init = tf.zeros([self.batch_size, self.dim_hidden])  # b x h
        state2 = (c_init, m_init)  # 2 x b x h

        ######## Encoding Stage #########
        # encoding video
        # mean pooling && mapping into (-1, 1) range
        output1 = tf.nn.tanh(tf.reduce_mean(image_emb, axis=1))  # b x h
        # encoding sentence
        with tf.variable_scope("model") as scope:
            for i in xrange(self.n_caption_steps):
                if i > 0: scope.reuse_variables()
                with tf.variable_scope("LSTM2"):
                    with tf.device(cpu_device):
                        current_embed = tf.nn.embedding_lookup(
                            self.Wemb, caption_1[:, i])  # b x h
                    output2, state2 = self.lstm2_dropout(
                        current_embed, state2)  # b x h
        ######## Encoding Stage #########

        #### 0: keep both 1: keep video only 2: keep sentence only
        ######## Dropout Stage #########
        if drop_type == 1:
            output2 = tf.constant(0, dtype=tf.float32) * output2
            output2 = tf.stop_gradient(output2)
        elif drop_type == 2:
            output1 = tf.constant(0, dtype=tf.float32) * output1
            output1 = tf.stop_gradient(output1)
        ######## Dropout Stage #########

        ######## Semantic Learning Stage ########
        ##### normalization before concatenation
        input_state = tf.concat([output1, output2], 1)  # b x (2 * h)
        loss_latent, output_semantic = self.vae(input_state)
        ######## Semantic Learning Stage ########

        ######## Decoding Stage ##########
        state3 = (c_init, m_init)  # 2 x b x h
        state4 = (c_init, m_init)  # 2 x b x h
        current_embed = tf.zeros([self.batch_size, self.dim_hidden])  # b x h
        video_prev = tf.zeros([self.batch_size, self.dim_hidden])

        loss_caption = 0.0
        loss_video = 0.0

        ## decoding sentence without attention
        with tf.variable_scope("model") as scope:
            with tf.variable_scope("LSTM3"):
                _, state3 = self.lstm3_dropout(output_semantic,
                                               state3)  # b x h
            for i in xrange(n_caption_steps):
                scope.reuse_variables()
                with tf.variable_scope("LSTM3"):
                    output3, state3 = self.lstm3_dropout(
                        current_embed, state3)  # b x h
                labels = tf.expand_dims(caption[:, i], 1)  # b x 1
                indices = tf.expand_dims(tf.range(0, self.batch_size, 1),
                                         1)  # b x 1
                concated = tf.concat([indices, labels], 1)  # b x 2
                onehot_labels = tf.sparse_to_dense(
                    concated, tf.stack([self.batch_size,
                                        self.n_words]), 1.0, 0.0)  # b x w
                with tf.device(cpu_device):
                    current_embed = tf.nn.embedding_lookup(
                        self.Wemb, caption[:, i])
                logit_words = tf.nn.xw_plus_b(output3, self.embed_word_W,
                                              self.embed_word_b)  # b x w
                cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
                    logits=logit_words, labels=onehot_labels)  # b x 1
                cross_entropy = cross_entropy * caption_mask[:, i]  # b x 1
                loss_caption += tf.reduce_sum(cross_entropy)  # 1

        ## decoding video without attention
        with tf.variable_scope("model") as scope:
            ## TODO: add attention for video decoding
            ## write into memory first
            with tf.variable_scope("LSTM4"):
                _, state4 = self.lstm4_dropout(output_semantic, state4)
            for i in xrange(self.n_video_steps):
                scope.reuse_variables()
                with tf.variable_scope("LSTM4"):
                    output4, state4 = self.lstm4_dropout(video_prev, state4)
                decode_image = tf.nn.xw_plus_b(output4, self.decode_image_W,
                                               self.decode_image_b)  # b x d_im
                decode_image = tf.nn.sigmoid(decode_image)
                video_prev = image_emb[:, i, :]  # b x h
                euclid_loss = tf.reduce_sum(tf.square(
                    tf.subtract(decode_image, video[:, i, :])),
                                            axis=1,
                                            keep_dims=True)  # b x 1
                euclid_loss = euclid_loss * video_mask[:, i]  # b x 1
                loss_video += tf.reduce_sum(euclid_loss)  # 1

        loss_caption = loss_caption / tf.reduce_sum(caption_mask)
        loss_video = loss_video / tf.reduce_sum(video_mask)

        loss = tf.constant(caption_weight) * loss_caption + tf.constant(video_weight) * loss_video + \
            tf.constant(latent_weight) * loss_latent
        return loss, loss_caption, loss_latent, loss_video, output_semantic, output1, output2, drop_type
Beispiel #33
0
 def _sparse_to_batch(self, sparse):
     print('shapes', sparse.dense_shape)
     ids = tf.sparse_tensor_to_dense(sparse)
     mask = tf.sparse_to_dense(sparse.indices, sparse.dense_shape,
                               tf.ones_like(sparse.values, dtype=tf.int32))
     return ids, mask
Beispiel #34
0
    def __init__(
            self,
            source_vocab_size,
            target_vocab_size,
            entity_vocab_size,  # entity
            buckets,
            state_size,
            num_layers,
            embedding_size,
            max_gradient,
            batch_size,
            learning_rate,
            forward_only=False,
            dtype=tf.float32):

        entity_encode = 'cnn'
        highway = True
        self.source_vocab_size = source_vocab_size
        self.target_vocab_size = target_vocab_size
        self.entity_vocab_size = entity_vocab_size  # entity
        self.buckets = buckets
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.global_step = tf.Variable(0, trainable=False, name="global_step")
        self.state_size = state_size

        self.encoder_inputs = tf.placeholder(tf.int32,
                                             shape=[self.batch_size, None],
                                             name='1')
        self.decoder_inputs = tf.placeholder(tf.int32,
                                             shape=[self.batch_size, None],
                                             name='2')
        self.decoder_targets = tf.placeholder(tf.int32,
                                              shape=[self.batch_size, None],
                                              name='3')
        self.encoder_len = tf.placeholder(tf.int32,
                                          shape=[self.batch_size],
                                          name='4')
        self.decoder_len = tf.placeholder(tf.int32,
                                          shape=[self.batch_size],
                                          name='5')
        self.beam_tok = tf.placeholder(tf.int32,
                                       shape=[self.batch_size],
                                       name='6')
        self.prev_att = tf.placeholder(tf.float32,
                                       shape=[self.batch_size, state_size * 2],
                                       name='7')
        self.K = tf.placeholder(tf.int32)
        self.lvt_dict = tf.placeholder(tf.int32, shape=[None], name='8')
        self.lvt_len = tf.placeholder(tf.int32, name='9')
        self.batch_dec_len = tf.placeholder(tf.int32, name='10')

        # entity
        self.entity_inputs = tf.placeholder(tf.int32,
                                            shape=[self.batch_size, None])
        self.entity_len = tf.placeholder(tf.int32, shape=[self.batch_size])

        encoder_fw_cells = []
        encoder_bw_cells = []
        for _ in range(2):
            encoder_fw_cells.append(tf.contrib.rnn.GRUCell(state_size))
            encoder_bw_cells.append(tf.contrib.rnn.GRUCell(state_size))

        if not forward_only:
            for i in range(2):
                encoder_fw_cells[i] = tf.contrib.rnn.DropoutWrapper(
                    encoder_fw_cells[i], output_keep_prob=0.50)
                encoder_bw_cells[i] = tf.contrib.rnn.DropoutWrapper(
                    encoder_bw_cells[i], output_keep_prob=0.50)
        encoder_fw_cell = tf.contrib.rnn.MultiRNNCell(encoder_fw_cells)
        encoder_bw_cell = tf.contrib.rnn.MultiRNNCell(encoder_bw_cells)
        #decode
        decoder_cells = []
        for _ in range(2):
            decoder_cells.append(tf.contrib.rnn.GRUCell(state_size))
        decoder_cell = tf.contrib.rnn.MultiRNNCell(decoder_cells)

        self.loss = tf.constant(0)

        with tf.variable_scope("seq2seq", dtype=dtype):
            with tf.variable_scope("encoder"):

                self.encoder_emb = tf.get_variable(
                    "embedding", [source_vocab_size, embedding_size],
                    initializer=emb_init)

                encoder_inputs_emb = tf.nn.embedding_lookup(
                    self.encoder_emb, self.encoder_inputs)

                encoder_fw_cell = tf.contrib.rnn.MultiRNNCell(encoder_fw_cells)
                encoder_bw_cell = tf.contrib.rnn.MultiRNNCell(encoder_bw_cells)

                encoder_outputs, encoder_states = \
                    tf.nn.bidirectional_dynamic_rnn(
                        encoder_fw_cell, encoder_bw_cell, encoder_inputs_emb,
                        sequence_length=self.encoder_len, dtype=dtype)

                encoder_len = self.encoder_len

                if forward_only:
                    encoder_outputs = tile_batch(encoder_outputs,
                                                 multiplier=10)
                    encoder_states = nest.map_structure(
                        lambda s: tile_batch(s, 10), encoder_states)
                    encoder_len = tile_batch(self.encoder_len, multiplier=10)

                #encoder_states = encoder_states[-1]

            if entity_encode == 'no':
                # NO
                with tf.variable_scope("entity_encoder"):
                    self.entity_emb = tf.get_variable(
                        "embedding", [entity_vocab_size, 1000],
                        initializer=emb_init)

                    entity_vector = tf.nn.embedding_lookup(
                        self.entity_emb, self.entity_inputs)

            elif entity_encode == 'rnn':
                # RNN
                with tf.variable_scope("entity_encoder"):
                    entity_fw_cell = tf.contrib.rnn.GRUCell(state_size)
                    entity_bw_cell = tf.contrib.rnn.GRUCell(state_size)

                    if not forward_only:
                        entity_fw_cell = tf.contrib.rnn.DropoutWrapper(
                            entity_fw_cell, output_keep_prob=0.5)
                        entity_bw_cell = tf.contrib.rnn.DropoutWrapper(
                            entity_bw_cell, output_keep_prob=0.5)

                    self.entity_emb = tf.get_variable(
                        "embedding", [entity_vocab_size, 1000],
                        initializer=emb_init)

                    entity_inputs_emb = tf.nn.embedding_lookup(
                        self.entity_emb, self.entity_inputs)

                    entity_outputs, entity_states = \
                        tf.nn.bidirectional_dynamic_rnn(
                            entity_fw_cell, entity_bw_cell, entity_inputs_emb,
                            sequence_length=self.entity_len, dtype=dtype)

                    entity_vector = tf.concat(entity_outputs, 2)
                    entity_vector.set_shape(
                        [self.batch_size, None, state_size * 2])
                    entity_proj = entity_inputs_emb

            elif entity_encode == 'cnn':
                # CNN
                with tf.variable_scope("entity_encoder"):
                    self.entity_emb = tf.get_variable(
                        "embedding", [entity_vocab_size, 1000],
                        initializer=emb_init)

                    entity_inputs_emb = tf.nn.embedding_lookup(
                        self.entity_emb, self.entity_inputs)

                    entity_inputs_emb_expanded = tf.expand_dims(
                        entity_inputs_emb, -1)
                    filter_sizes = [3, 5, 7]
                    num_filters = [400, 300, 300]

                    outputs = []
                    for i, filter_size in enumerate(filter_sizes):
                        with tf.name_scope("conv-maxpool-%s" % filter_size):
                            filter_shape = [
                                filter_size, 1000, 1, num_filters[i]
                            ]
                            W = tf.Variable(tf.truncated_normal(filter_shape,
                                                                stddev=0.1),
                                            name="W")
                            b = tf.Variable(tf.constant(0.1,
                                                        shape=[num_filters[i]
                                                               ]),
                                            name="b")
                            conv = tf.nn.conv2d(entity_inputs_emb_expanded,
                                                W,
                                                strides=[1, 1, 1, 1],
                                                padding="VALID",
                                                name="conv")
                            h = tf.nn.relu(tf.nn.bias_add(conv, b),
                                           name="relu")
                            erase = int((7 - filter_size) / 2)
                            if erase != 0:
                                h = h[:, erase:-erase, :, :]
                            if not forward_only:
                                h = tf.nn.dropout(h, 0.5)
                            outputs.append(h)

                    entity_vector = tf.concat(outputs, axis=3)
                    entity_vector = tf.squeeze(entity_vector, 2)
                    entity_vector.set_shape(
                        [self.batch_size, None, state_size * 2])
                    entity_proj = entity_inputs_emb[:, 3:-3, :]

            if highway:
                # y
                #                entity_proj = entity_inputs_emb[:,3:-3,:]
                Wh = tf.get_variable("Wh", [1000, 1000],
                                     initializer=xavier_initializer())
                bh = tf.Variable(tf.constant(0.0, shape=[1000]))
                entity_proj = tf.nn.tanh(tf.tensordot(entity_proj, Wh, 1) + bh)
                if not forward_only:
                    entity_proj.set_shape([self.batch_size, None, 1000])
                else:
                    entity_proj.set_shape([self.batch_size * 10, None, 1000])

                if not forward_only:
                    entity_proj = tf.nn.dropout(entity_proj, keep_prob=0.5)

                # t
                Wt = tf.get_variable("Wt", [1000, 1],
                                     initializer=xavier_initializer())
                bt = tf.Variable(tf.constant(0.0, shape=[1]))
                t = tf.nn.sigmoid(tf.tensordot(entity_vector, Wt, 1) + bt)
                if not forward_only:
                    t.set_shape([self.batch_size, None, 1000])
                else:
                    t.set_shape([self.batch_size * 10, None, 1000])
                self.t = t

                entity_vector = t * entity_vector + (1 - t) * entity_proj

            with tf.variable_scope("init_state"):
                init_states = []
                for i in range(2):
                    init_state = fc_layer(tf.concat(encoder_states[i], 1),
                                          state_size)
                    init_states.append(init_state)
                # the shape of bidirectional_dynamic_rnn is weird
                # None for batch_size
                self.init_states = init_states
                #self.init_state.set_shape([self.batch_size, state_size])
                self.att_states = tf.concat(encoder_outputs, 2)

            #with tf.variable_scope("entity_init_state"):
            #    entity_init_state = fc_layer(
            #        tf.concat(entity_states, 1), state_size)
            #    self.entity_init_state = entity_init_state
            #    self.entity_init_state.set_shape([self.batch_size, state_size])
            #    self.entity_att_states = tf.concat(entity_outputs, 2)
            #    self.entity_att_states.set_shape([self.batch_size, None, state_size*2])

            with tf.variable_scope("entity_attention"):
                X = tf.get_variable("X",
                                    shape=[1000, state_size],
                                    initializer=xavier_initializer())
                x = tf.get_variable("x",
                                    shape=[state_size],
                                    initializer=xavier_initializer())
                Y = tf.get_variable("Y",
                                    shape=[state_size * 2, state_size],
                                    initializer=xavier_initializer())
                first = tf.matmul(tf.concat(encoder_states[-1], 1), Y)
                first = tf.expand_dims(first, 1)
                other = tf.tensordot(entity_vector, X, 1)
                weights = tf.nn.tanh(first + other)
                if not forward_only:
                    weights = tf.nn.dropout(weights, keep_prob=0.5)
                weights = tf.tensordot(weights, x, 1)
                if not forward_only:
                    weights.set_shape([self.batch_size, None])
                else:
                    weights.set_shape([10 * self.batch_size, None])

                k_values, k_indices = tf.nn.top_k(weights, k=self.K)
                my_range = tf.expand_dims(tf.range(0, k_indices.shape[0]), 1)
                #print(my_range)
                my_range_repeated = tf.tile(my_range, [1, self.K])

                full_indices = tf.concat([
                    tf.expand_dims(my_range_repeated, 2),
                    tf.expand_dims(k_indices, 2)
                ], 2)
                full_indices = tf.reshape(full_indices, [-1, 2])

                output_shape = tf.shape(weights)
                zeros = tf.sparse_to_dense(full_indices,
                                           output_shape,
                                           0.0,
                                           default_value=-1000000000.0,
                                           validate_indices=False)

                weights = tf.nn.softmax(zeros + weights)
                weights = tf.expand_dims(weights, -1)
                self.weights = weights
                context = tf.multiply(entity_vector, weights)
                context = tf.reduce_sum(context, axis=1)

            with tf.variable_scope("attention"):
                attention = BahdanauAttention(state_size, self.att_states,
                                              encoder_len)

            with tf.variable_scope("decoder") as scope:
                #decoder_cells = []
                #for _ in range(2):
                #    decoder_cells.append(tf.contrib.rnn.GRUCell(state_size))

                if not forward_only:
                    for i in range(2):
                        decoder_cells[i] = tf.contrib.rnn.DropoutWrapper(
                            decoder_cells[i], output_keep_prob=0.50)

                #for i in range(2):
                decoder_cells[-1] = AttentionWrapper(decoder_cells[-1],
                                                     attention,
                                                     state_size,
                                                     context=context)

                initial_states = [state for state in init_states]
                if not forward_only:
                    initial_states[-1] = decoder_cells[-1].zero_state(
                        batch_size=self.batch_size, dtype=tf.float32)
                else:
                    initial_states[-1] = decoder_cells[-1].zero_state(
                        batch_size=10 * self.batch_size, dtype=tf.float32)

                decoder_initial_state = tuple(initial_states)

                decoder_cell = tf.contrib.rnn.MultiRNNCell(decoder_cells)

                self.decoder_emb = tf.get_variable(
                    "embedding", [target_vocab_size, embedding_size],
                    initializer=emb_init)
                output_layer = tf.contrib.keras.layers.Dense(
                    target_vocab_size, name="train_output")
                if not forward_only:
                    #output_layer = tf.contrib.keras.layers.Dense(target_vocab_size, name="train_output")

                    decoder_inputs_emb = tf.nn.embedding_lookup(
                        self.decoder_emb, self.decoder_inputs)
                    helper = tf.contrib.seq2seq.TrainingHelper(
                        decoder_inputs_emb, self.decoder_len)
                    decoder = tf.contrib.seq2seq.BasicDecoder(
                        decoder_cell, helper, decoder_initial_state,
                        output_layer)

                    outputs, final_state, _ = \
                        tf.contrib.seq2seq.dynamic_decode(decoder)

                    outputs_logits = tf.identity(outputs.rnn_output)
                    self.outputs = outputs_logits

                    weights = tf.sequence_mask(self.decoder_len,
                                               dtype=tf.float32)

                    self.loss_t = weights
                    loss_t = tf.contrib.seq2seq.sequence_loss(
                        outputs_logits,
                        self.decoder_targets,
                        weights,
                        average_across_timesteps=False,
                        average_across_batch=False)
                    self.loss = tf.reduce_sum(loss_t) / self.batch_size

                    params = tf.trainable_variables()
                    opt = tf.train.AdadeltaOptimizer(self.learning_rate,
                                                     epsilon=1e-6)
                    gradients = tf.gradients(self.loss, params)
                    clipped_gradients, norm = \
                        tf.clip_by_global_norm(gradients, max_gradient)
                    self.updates = opt.apply_gradients(
                        zip(clipped_gradients, params),
                        global_step=self.global_step)

                    tf.summary.scalar('loss', self.loss)
                else:
                    #output_layer = tf.contrib.keras.layers.Dense(target_vocab_size, name="test_output", trainable=True)
                    st_toks = tf.convert_to_tensor([data_util.ID_GO] *
                                                   self.batch_size,
                                                   dtype=tf.int32)

                    def embed_proj(inputs):
                        return tf.nn.embedding_lookup(self.decoder_emb, inputs)

                    #decoding_helper = GreedyEmbeddingHelper(start_tokens=st_toks, end_token=data_util.ID_EOS, embedding=embed_and_input_proj)
                    inference_decoder = BeamSearchDecoder(
                        cell=decoder_cell,
                        embedding=embed_proj,
                        start_tokens=st_toks,
                        end_token=data_util.ID_EOS,
                        initial_state=decoder_initial_state,
                        beam_width=10,
                        output_layer=output_layer)

                    outputs, final_state, _ = tf.contrib.seq2seq.dynamic_decode(
                        decoder=inference_decoder,
                        output_time_major=False,
                        maximum_iterations=100)
                    self.outputs = outputs.predicted_ids[:, :, 0]
                    #self.outputs = tf.transpose(outputs.predicted_ids, [0,2,1])
                    print(self.outputs)

        self.saver = tf.train.Saver(tf.global_variables())
        #self.saver = tf.train.Saver()
        self.summary_merge = tf.summary.merge_all()
Beispiel #35
0
    def __init__(self, is_training, config):
        self._batch_size = batch_size = FLAGS.batch_size
        self.num_skills = num_skills = config.num_skills
        self.hidden_size = size = FLAGS.hidden_size
        self.num_steps = num_steps = config.num_steps
        input_size = num_skills * 2

        inputs = self._input_data = tf.placeholder(tf.int32,
                                                   [batch_size, num_steps])
        self._target_id = target_id = tf.placeholder(tf.int32, [None])
        self._target_correctness = target_correctness = tf.placeholder(
            tf.float32, [None])
        final_hidden_size = size

        hidden_layers = []
        for i in range(FLAGS.hidden_layer_num):
            final_hidden_size = size / (i + 1)
            hidden1 = tf.nn.rnn_cell.LSTMCell(final_hidden_size,
                                              state_is_tuple=True)
            if is_training and config.keep_prob < 1:
                hidden1 = tf.nn.rnn_cell.DropoutWrapper(
                    hidden1, output_keep_prob=FLAGS.keep_prob)
            hidden_layers.append(hidden1)

        cell = tf.nn.rnn_cell.MultiRNNCell(hidden_layers, state_is_tuple=True)

        input_data = tf.reshape(self._input_data, [-1])
        #one-hot encoding
        with tf.device("/cpu:0"):
            labels = tf.expand_dims(input_data, 1)
            indices = tf.expand_dims(tf.range(0, batch_size * num_steps, 1), 1)
            concated = tf.concat(1, [indices, labels])
            inputs = tf.sparse_to_dense(
                concated, tf.pack([batch_size * num_steps, input_size]), 1.0,
                0.0)
            inputs.set_shape([batch_size * num_steps, input_size])

        # [batch_size, num_steps, input_size]
        inputs = tf.reshape(inputs, [-1, num_steps, input_size])
        x = tf.transpose(inputs, [1, 0, 2])
        # Reshape to (n_steps*batch_size, n_input)
        x = tf.reshape(x, [-1, input_size])
        # Split to get a list of 'n_steps'
        # tensors of shape (doc_num, n_input)
        x = tf.split(0, num_steps, x)
        #inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, num_steps, inputs)]
        #outputs, state = tf.nn.rnn(hidden1, x, dtype=tf.float32)
        outputs, state = tf.nn.rnn(cell, x, dtype=tf.float32)
        output = tf.reshape(tf.concat(1, outputs), [-1, final_hidden_size])
        # calculate the logits from last hidden layer to output layer
        sigmoid_w = tf.get_variable("sigmoid_w",
                                    [final_hidden_size, num_skills])
        sigmoid_b = tf.get_variable("sigmoid_b", [num_skills])
        logits = tf.matmul(output, sigmoid_w) + sigmoid_b

        # from output nodes to pick up the right one we want
        logits = tf.reshape(logits, [-1])
        selected_logits = tf.gather(logits, self.target_id)

        #make prediction
        self._pred = self._pred_values = pred_values = tf.sigmoid(
            selected_logits)

        # loss function
        loss = tf.reduce_sum(
            tf.nn.sigmoid_cross_entropy_with_logits(selected_logits,
                                                    target_correctness))

        #self._cost = cost = tf.reduce_mean(loss)
        self._cost = cost = loss
def parse_tfrecord_function(example_proto):
    totalTags = 1930
    majorVal = 0.9
    defaultVal = 0.1 / (totalTags - 1)
    features = {
        "target":
        tf.FixedLenFeature([], tf.int64, default_value=0),
        "target_orgId":
        tf.FixedLenFeature([], tf.int64, default_value=0),
        "gender":
        tf.FixedLenFeature([], tf.int64, default_value=0),
        "age":
        tf.FixedLenFeature([], tf.int64, default_value=0),
        "location":
        tf.FixedLenFeature([], tf.int64, default_value=0),
        "education_schools":
        tf.FixedLenSequenceFeature([],
                                   tf.int64,
                                   allow_missing=True,
                                   default_value=0),
        "education_degrees":
        tf.FixedLenSequenceFeature([],
                                   tf.int64,
                                   allow_missing=True,
                                   default_value=0),
        "education_starts":
        tf.FixedLenSequenceFeature([],
                                   tf.float32,
                                   allow_missing=True,
                                   default_value=0),
        "education_majors":
        tf.FixedLenSequenceFeature([],
                                   tf.int64,
                                   allow_missing=True,
                                   default_value=0),
        "work_expr_descs":
        tf.FixedLenSequenceFeature([],
                                   tf.int64,
                                   allow_missing=True,
                                   default_value=0),
        "work_expr_orgs":
        tf.FixedLenSequenceFeature([],
                                   tf.int64,
                                   allow_missing=True,
                                   default_value=0),
        "work_expr_orgIds":
        tf.FixedLenSequenceFeature([],
                                   tf.int64,
                                   allow_missing=True,
                                   default_value=0),
        "work_expr_starts":
        tf.FixedLenSequenceFeature([],
                                   tf.float32,
                                   allow_missing=True,
                                   default_value=0),
        "work_expr_durations":
        tf.FixedLenSequenceFeature([],
                                   tf.float32,
                                   allow_missing=True,
                                   default_value=0),
        "work_expr_jobs":
        tf.FixedLenSequenceFeature([],
                                   tf.int64,
                                   allow_missing=True,
                                   default_value=0),
        "proj_expr_descs":
        tf.FixedLenSequenceFeature([],
                                   tf.int64,
                                   allow_missing=True,
                                   default_value=0),
    }
    parsed_features = tf.parse_single_example(example_proto, features)
    target = parsed_features["target"]
    target_orgId = parsed_features["target_orgId"]
    targets = tf.sparse_to_dense(target, [totalTags], majorVal, defaultVal)
    gender = parsed_features["gender"]
    age = parsed_features["age"]
    location = parsed_features["location"]
    # education part
    education_schools = parsed_features["education_schools"]
    education_schools.set_shape([3])
    education_degrees = parsed_features["education_degrees"]
    education_degrees.set_shape([3])
    education_starts = parsed_features["education_starts"]
    education_starts.set_shape([3])
    education_majors = parsed_features["education_majors"]
    education_majors.set_shape([3])
    # working experience part
    work_expr_orgs = parsed_features["work_expr_orgs"]
    work_expr_orgs.set_shape([3])
    work_expr_starts = parsed_features["work_expr_starts"]
    work_expr_starts.set_shape([3])
    work_expr_durations = parsed_features["work_expr_durations"]
    work_expr_durations.set_shape([3])
    work_expr_orgIds = parsed_features["work_expr_orgIds"]
    work_expr_orgIds.set_shape([3])
    work_expr_jobs = parsed_features["work_expr_jobs"]
    work_expr_jobs.set_shape([3])
    work_expr_descs = parsed_features["work_expr_descs"]
    work_expr_descs.set_shape([360])
    work_expr_descs = tf.reshape(work_expr_descs, [3, 120])

    proj_expr_descs = parsed_features["proj_expr_descs"]
    proj_expr_descs.set_shape([360])
    proj_expr_descs = tf.reshape(proj_expr_descs, [3, 120])

    return target, targets, gender, age, location, education_schools, education_degrees, education_starts, education_majors, work_expr_orgs, work_expr_starts, work_expr_durations, work_expr_jobs, work_expr_orgIds, work_expr_descs, proj_expr_descs
Beispiel #37
0
def build_input(flags, mode):
  image_size = flags.image_size
  batch_size = flags.batch_size
  num_classes = flags.num_label
  if mode == 'train':
    data_path = flags.train_filepath
  else:
    data_path = flags.valid_filepath

  label_bytes = 1
  label_offset = 0
  depth = 3
  image_bytes = image_size * image_size * depth
  record_bytes = label_bytes + label_offset + image_bytes

  data_files = tf.gfile.Glob(data_path)
  file_queue = tf.train.string_input_producer(data_files, shuffle=True)
  # Read examples from files in the filename queue.
  reader = tf.FixedLengthRecordReader(record_bytes=record_bytes)
  _, value = reader.read(file_queue)

  # Convert these examples to dense labels and processed images.
  record = tf.reshape(tf.decode_raw(value, tf.uint8), [record_bytes])
  label = tf.cast(tf.slice(record, [label_offset], [label_bytes]), tf.int32)
  # Convert from string to [depth * height * width] to [depth, height, width].
  depth_major = tf.reshape(tf.slice(record, [label_offset + label_bytes], [image_bytes]),
                           [depth, image_size, image_size])
  # Convert from [depth, height, width] to [height, width, depth].
  image = tf.cast(tf.transpose(depth_major, [1, 2, 0]), tf.float32)

  if mode == 'train':
    image = tf.image.resize_image_with_crop_or_pad(
        image, image_size+4, image_size+4)
    image = tf.random_crop(image, [image_size, image_size, 3])
    image = tf.image.random_flip_left_right(image)
    # Brightness/saturation/constrast provides small gains .2%~.5% on cifar.
    # image = tf.image.random_brightness(image, max_delta=63. / 255.)
    # image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
    # image = tf.image.random_contrast(image, lower=0.2, upper=1.8)
    image = tf.image.per_image_standardization(image)

    example_queue = tf.RandomShuffleQueue(
        capacity=16 * batch_size,
        min_after_dequeue=8 * batch_size,
        dtypes=[tf.float32, tf.int32],
        shapes=[[image_size, image_size, depth], [1]])
    num_threads = 16
  else:
    image = tf.image.resize_image_with_crop_or_pad(
        image, image_size, image_size)
    image = tf.image.per_image_standardization(image)

    example_queue = tf.FIFOQueue(
        3 * batch_size,
        dtypes=[tf.float32, tf.int32],
        shapes=[[image_size, image_size, depth], [1]])
    num_threads = 1

  example_enqueue_op = example_queue.enqueue([image, label])
  tf.train.add_queue_runner(tf.train.queue_runner.QueueRunner(
      example_queue, [example_enqueue_op] * num_threads))

  # Read 'batch' labels + images from the example queue.
  images, labels = example_queue.dequeue_many(batch_size)
  labels = tf.reshape(labels, [batch_size, 1])
  indices = tf.reshape(tf.range(0, batch_size, 1), [batch_size, 1])
  labels = tf.sparse_to_dense(
      tf.concat(values=[indices, labels], axis=1),
      [batch_size, num_classes], 1.0, 0.0)

  assert len(images.get_shape()) == 4
  assert images.get_shape()[0] == batch_size
  assert images.get_shape()[-1] == 3
  assert len(labels.get_shape()) == 2
  assert labels.get_shape()[0] == batch_size
  assert labels.get_shape()[1] == num_classes

  # Display the training images in the visualizer.
  tf.summary.image('images', images)
  return images, labels
def build_input(data_path, batch_size, num_class, reszie, mode='train'):
    #读取一个文件夹下匹配的文件
    files = tf.train.match_filenames_once(data_path)
    #把文件放入文件队列中
    filename_queue = tf.train.string_input_producer(files, shuffle=True)
    # #创建一个reader,
    reader = tf.TFRecordReader()
    # #从文件中读取一个样例。也可以使用read_up_to函数一次性读取多个样例
    _, serialized_example = reader.read(filename_queue)
    # #解析一个样本
    features = tf.parse_single_example(serialized_example,
                                       features={
                                           "image/encoded":
                                           tf.FixedLenFeature([], tf.string),
                                           "image/height":
                                           tf.FixedLenFeature([], tf.int64),
                                           "image/width":
                                           tf.FixedLenFeature([], tf.int64),
                                           "image/filename":
                                           tf.FixedLenFeature([], tf.string),
                                           "image/class/label":
                                           tf.FixedLenFeature([], tf.int64),
                                           'image/channels':
                                           tf.FixedLenFeature([], tf.int64),
                                       })

    # 组合样例中队列最多可以存储的样例个数
    capacity = 500 + 3 * batch_size
    #读取一个样例中的特征
    image, label = features['image/encoded'], features['image/class/label']
    height, width, channel = features['image/height'], features[
        'image/width'], features['image/channels']

    # #tf.decode_raw 可以将字符串解析成图像对应的像素数组
    # decoded_images=tf.decode_raw(image,tf.uint8)
    # retyped_images = tf.cast(decoded_images, tf.float32)
    # retyped_height = tf.cast(height,tf.int32)
    # retyped_width = tf.cast(width,tf.int32)
    # retyped_channel = tf.cast(channel,tf.int32)
    # labels = tf.cast(label,tf.int32)
    # decoded_images.set_shape([height,width,channel])
    # resize =32
    # reshaped_images=tf.reshape(decoded_images,[retyped_height,retyped_width ,retyped_channel])
    # distored_image = tf.image.resize_images(decoded_images,[32,32],method=np.random.randint(4))
    # distored_image = preprocess_for_train(reshaped_images,resize,resize,None)
    # images,labels= tf.train.shuffle_batch([distored_image,labels ],batch_size=batch_size,capacity=capacity,min_after_dequeue=500)
    # return images,labels

    ###tf.image.decode_jpeg#############
    image_raw = tf.image.decode_jpeg(image, channels=3)
    retyped_height = tf.cast(height, tf.int32)
    retyped_width = tf.cast(width, tf.int32)
    retyped_channel = tf.cast(channel, tf.int32)
    labels = tf.cast(label, tf.int32)
    # image_resize = tf.image.resize_images(image_raw,[32,32],method=np.random.randint(4))
    image_resize = tf.image.resize_image_with_crop_or_pad(
        image_raw, reszie, reszie)
    images, labels = tf.train.shuffle_batch([image_resize, labels],
                                            batch_size=batch_size,
                                            capacity=capacity,
                                            min_after_dequeue=500)
    labels = tf.reshape(labels, [batch_size, 1])
    indices = tf.reshape(tf.range(0, batch_size, 1), [batch_size, 1])
    labels = tf.sparse_to_dense(tf.concat(values=[indices, labels], axis=1),
                                [batch_size, num_class], 1.0, 0.0)
    tf.summary.image('images', images)
    return images, labels
Beispiel #39
0
    def build_model(self):
        video = tf.placeholder(
            tf.float32,
            [self.batch_size, self.n_video_lstm_step, self.dim_image
             ])  # (batch, 80, 4096)
        video_mask = tf.placeholder(tf.float32,
                                    [self.batch_size, self.n_video_lstm_step])

        caption = tf.placeholder(
            tf.int32, [self.batch_size, self.n_caption_lstm_step + 1
                       ])  # enclude <BOS>; store word ID; (batch, max_length)
        caption_mask = tf.placeholder(
            tf.float32, [self.batch_size, self.n_caption_lstm_step + 1
                         ])  # (batch_size, max_length+1)
        video_flat = tf.reshape(video, [-1, self.dim_image])
        image_emb = tf.nn.xw_plus_b(
            video_flat, self.encode_image_W,
            self.encode_image_b)  # (batch_size*n_lstm_steps, dim_hidden)
        image_emb = tf.reshape(
            image_emb, [self.batch_size, self.n_lstm_steps, self.dim_hidden])
        print("lstm1 sate size,", self.lstm1.state_size)
        print("lstm2 sate size,", self.lstm2.state_size)  # 2*hidden size
        state1 = tf.zeros([self.batch_size,
                           self.lstm1.state_size])  # initial state
        state2 = tf.zeros([self.batch_size,
                           self.lstm2.state_size])  # initial state
        padding = tf.zeros([self.batch_size, self.dim_hidden])  # (batch, 1000)

        probs = []
        loss = 0.0

        ##############################  Encoding Stage ##################################
        context_padding = tf.zeros([self.batch_size, self.lstm2.state_size
                                    ])  #(batch_size, 2000)
        h_list = []
        for i in range(0, self.n_video_lstm_step):  # n_vedio_lstm_step = 80
            with tf.variable_scope("LSTM1", reuse=(i != 0)):
                output1, state1 = self.lstm1(image_emb[:, i, :], state1)
                h_list.append(state1)
            with tf.variable_scope("LSTM2", reuse=(i != 0)):
                output2, state2 = self.lstm2(
                    tf.concat([padding, output1, context_padding], 1), state2)
        print(np.shape(h_list))
        h_list = tf.stack(h_list, axis=1)
        print(np.shape(h_list))  # (64, 80, 2000)
        ############################# Decoding Stage ######################################
        for i in range(0, self.n_caption_lstm_step
                       ):  ## Phase 2 => only generate captions
            if i == 0:
                with tf.device("/cpu:0"):
                    current_embed = tf.nn.embedding_lookup(
                        self.Wemb, caption[:, i])
            else:  # schedule sampling
                print(self.schedule_p)
                if (np.random.binomial(
                        1, self.schedule_p) == 1):  # schedule_p 擲骰子值出來是1的機率
                    with tf.device("/cpu:0"):
                        current_embed = tf.nn.embedding_lookup(
                            self.Wemb, caption[:, i])
                else:
                    max_prob_index = tf.argmax(logit_words, 1)[0]
                    with tf.device("/cpu:0"):
                        current_embed = tf.nn.embedding_lookup(
                            self.Wemb, max_prob_index)
            with tf.variable_scope("LSTM1", reuse=True):
                output1, state1 = self.lstm1(padding, state1)
            ##### attention ####
            context = []
            if i == 0:
                new_z = self.attention_z
            # h_list_flat = tf.reshape(h_list,[-1,self.lstm1.state_size])
            # print("h_list_flat shape, ", h_list_flat.shape) # 5120,2000

#             for sample in range(0, self.batch_size):
#                 alpha_list = [] # a list to store alpha"s" in each training sample
#                 for step_ in range(0,self.n_video_lstm_step):
#                     alpha =1 - tf.losses.cosine_distance(h_list[sample,step_,:], new_z[sample,:], dim=0)
#                     alpha_list.append(alpha)
#                 alpha_list = tf.expand_dims(alpha_list,1)
#                 ci = tf.reduce_sum(tf.multiply(alpha_list, h_list[sample,:,:]),axis = 0)
#                 context.append(ci)
#             context = tf.stack(context)
#             print("context shape", content.shape)
            h_list_flat = tf.reshape(h_list, [-1, self.lstm1.state_size])
            htmp = tf.matmul(
                h_list_flat,
                self.attention_W)  # for matmul operation (5120,2000)
            hW = tf.reshape(htmp, [
                self.batch_size, self.n_video_lstm_step, self.lstm2.state_size
            ])
            for x in range(0, self.batch_size):
                x_alpha = tf.reduce_sum(tf.multiply(hW[x, :, :], new_z[x, :]),
                                        axis=1)
                x_alpha = tf.nn.softmax(x_alpha)
                x_alpha = tf.expand_dims(x_alpha, 1)
                x_new_z = tf.reduce_sum(tf.multiply(x_alpha, h_list[x, :, :]),
                                        axis=0)
                context.append(x_new_z)
            context = tf.stack(context)
            print("context shape", context.shape)
            with tf.variable_scope("LSTM2", reuse=True):
                print(output1.shape)  # (64,1000)
                output2, state2 = self.lstm2(
                    tf.concat([current_embed, output1, context], 1), state2)
                new_z = state2

            labels = tf.expand_dims(caption[:, i + 1],
                                    1)  # (batch, max_length, 1)
            indices = tf.expand_dims(tf.range(0, self.batch_size, 1),
                                     1)  # (batch_size, 1)
            concated = tf.concat([indices, labels], 1)
            onehot_labels = tf.sparse_to_dense(
                concated, tf.stack([self.batch_size, self.n_words]), 1.0, 0.0)

            logit_words = tf.nn.xw_plus_b(
                output2, self.embed_word_W,
                self.embed_word_b)  #probability of each word
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
                logits=logit_words, labels=onehot_labels)
            cross_entropy = cross_entropy * caption_mask[:, i]

            probs.append(logit_words)

            current_loss = tf.reduce_sum(cross_entropy) / self.batch_size
            loss = loss + current_loss

        return loss, video, video_mask, caption, caption_mask, probs
    def __init__(self,
                 encoders,
                 vocabulary,
                 data_id,
                 rnn_size,
                 name,
                 embedding_size=128,
                 use_attention=None,
                 max_output_len=20,
                 scheduled_sampling=None,
                 dropout_keep_p=0.5,
                 copy_net=None,
                 reused_word_embeddings=None,
                 use_noisy_activations=False,
                 depth=1):
        """A class that collects the part of the computation graph that is
        needed for decoding.

        TensorBoard summaries are collected in this class into the following
        collections:

            * 'summary_train' - collects statistics from the train-time

            * 'sumarry_val' - collects OAstatistics while being tested on the
                 development data

        Arguments:

            encoders: List of encoders. If no encoder is provided, the decoder
                can be used to train a LM.

            vocabulary: Vocabulary used for decoding

            data_id:

            rnn_size: Size of the RNN state.

            embedding_size (int): Dimensionality of the word
                embeddings used during decoding.

            use_attention (str): The type of attention to use or None. (Refer to
                cli_options script for allowed types of attention]

            max_output_len (int): Maximum length of the decoder output.

            use_peepholes (bool): Flag whether peephole connections should be
                used in the GRU decoder.

            scheduled_sampling: Parameter k for inverse sigmoid decay in
                scheduled sampling. If set to None, linear combination of the
                decoded and supervised loss is used as a cost function.

            dropoout_keep_p:

            copy_net: Tuple of (i) list of indices to the target vocabulary
                (most likely input placeholders of a different encoder) and (ii)
                he tensor over which the copying will be done, and (iii) mask
                telling which words part of the input

            reused_word_embeddings: The decoder can be given the matrix of word
                embeddings from outside (if the vocabulary indexing is the
                same).  If it is None, the decoder creates its own matrix of
                word embeddings.

            use_noisy_activations: If set to True, the deocder will use the GRU
                units with noisy activation.


        Attributes:

            inputs: List of placeholders for the decoder inputs. The i-th
                element of the list contains a batch of i-th symbols in the
                sequence.

            weights_ins: List of placeholders of particular output symbols
                weights.  The i-th elements of the list contains a vector
                telling for each string of the batch how much the i-th word
                should.  contirbute to the loss cumputation. In practice it
                contains 1's for words which are parts of the decoded strings
                and 0's for the padding.

            loss_with_gt_ins: Operator computing the sequence loss when the
                decoder always gets the ground truth input.

            loss_with_decoded_ins: Operator computing the sequence loss when
                the decoder receives previously computed outputs on its input.

            decoded_seq: List of batches of decoded words. (When the
                decoder is fed with its own outputs.)

        """

        log("Initializing decoder, name: \"{}\"".format(name))
        self.encoders = encoders
        assert_type(self, 'vocabulary', vocabulary, Vocabulary)
        self.vocabulary = vocabulary
        self.data_id = data_id
        self.rnn_size = rnn_size
        self.embedding_size = embedding_size
        self.use_attention = use_attention
        self.max_output_len = max_output_len
        self.scheduled_sampling = scheduled_sampling
        self.dropout_keep_p = dropout_keep_p
        self.copy_net = copy_net
        self.reused_word_embeddings = reused_word_embeddings
        self.use_noisy_activations = use_noisy_activations
        self.depth = depth
        self.name = name

        self.dropout_placeholder = tf.placeholder(tf.float32,
                                                  name="decoder_dropout_plc")

        self.is_training = tf.placeholder(tf.bool, name="decoder_is_training")

        self.learning_step = tf.Variable(0,
                                         name="learning_step",
                                         trainable=False)

        ### tadyten nasledujici kus je rozhozeni podle poctu enkoderu
        ### kdyz je jeden, tak berem rovnou jeho zakodovanej stav
        ### kdyz je jich vic, tak je napred projektujem

        ### lepsi by bylo dat nepovinnej atribut encoder_projection a tomu
        ### dat jako hodnotu rnn_size. Ktera se odted bude inferovat automaticky
        ### = bez projekce se konkatenujou vystupni stavy vsech enkoderu
        ### a delka vyslednyho stavu bude rnn_size.

        if len(encoders) == 1 and (
                rnn_size == encoders[0].encoded.get_shape()[1].value):

            encoded = encoders[0].encoded
            log("Using encoder output without projection.")
        elif len(encoders) >= 1:
            with tf.variable_scope("encoders_projection"):
                encoded_concat = tf.concat(1, [e.encoded for e in encoders])
                concat_size = encoded_concat.get_shape()[1].value
                proj = tf.get_variable(name="project_encoders",
                                       shape=[concat_size, depth * rnn_size])
                encoded_concat_dropped = tf.nn.dropout(
                    encoded_concat, self.dropout_placeholder)
                proj_bias = tf.Variable(tf.zeros([depth * rnn_size]))
                encoded = tf.matmul(encoded_concat_dropped, proj) + proj_bias
        elif len(encoders) == 0:  # if we want to train just LM
            encoded = tf.zeros([rnn_size])
            log("No encoder - language model only.")

        ### TODO OTAZKA je, jestli to je ve spravnym poradi
        self.encoded = encoded
        encoded = tf.nn.dropout(encoded, self.dropout_placeholder)

        ### tenhle kus pode mnou je deklarovani placeholderu pro vstupy dekoderu
        ### placeholdery se vrazi do kolekce dec_endoder_ins, ktera
        ### se asi nikde nepouziva
        ### self.targets je self.gt_inputs posunuty o jedno doleva

        self.gt_inputs = []
        with tf.variable_scope("decoder_inputs"):
            for i in range(max_output_len + 2):
                dec = tf.placeholder(tf.int64, [None],
                                     name='decoder{0}'.format(i))
                tf.add_to_collection('dec_encoder_ins', dec)
                self.gt_inputs.append(dec)

        self.targets = self.gt_inputs[1:]

        ### tenhle kousek zadefinovava vahy na vstup. je jich tolik co
        ### targetu, a nejspis obsahujou jen jednicky a nuly podle toho,
        ### jestli uz jsme za koncem vstupni vety nebo ne.
        ### tohle by se melo s prechodem na dynamic rnn uplne vyhodit

        self.weights_ins = []
        with tf.variable_scope("input_weights"):
            for _ in range(len(self.targets)):
                self.weights_ins.append(tf.placeholder(tf.float32, [None]))

        ### nasleduje kod samotnyho decoderu ve vlastnim scopu
        ### proc veci nade mnou jsou jinym vlastnim scopu, to nevim

        with tf.variable_scope('decoder'):

            ### deklarovani promennych pro vahy a biasy pro prechod ze
            ### stavu na vystupni vrstvu
            ### proc tady neni get_variable? to pouziva uniform unit scaling
            ### initializer, coz je prinejmensim vic cool nazev

            decoding_w = tf.Variable(tf.random_uniform(
                [rnn_size, len(vocabulary)], -0.5, 0.5),
                                     name="state_to_word_W")

            decoding_b = tf.Variable(tf.fill([len(vocabulary)],
                                             -math.log(len(vocabulary))),
                                     name="state_to_word_b")

            ### pokud nepouzivame sdileny embeddingy, vytvorime si vlastni
            ### to slouzi jako mapovani ze slovniku na vektor, kterej se dava
            ### na vstup dekoderu v kazdym time-stepu
            ### pro sdileni embeddingu je zapotrebi, aby mely stejnou velikost

            if reused_word_embeddings is None:
                decoding_em = tf.Variable(tf.random_uniform(
                    [len(vocabulary), embedding_size], -0.5, 0.5),
                                          name="word_embeddings")
            else:
                decoding_em = reused_word_embeddings.word_embeddings

            ### vyrobime embeddovany ground-truth inputy a dropoutujem
            ### pouzivaj se pri trenovani

            embedded_gt_inputs = [
                tf.nn.embedding_lookup(decoding_em, o)
                for o in self.gt_inputs[:-1]
            ]

            embedded_gt_inputs = [
                tf.nn.dropout(i, self.dropout_placeholder)
                for i in embedded_gt_inputs
            ]

            ### zadefinujem funkci, ktera nam pro dany stav vrati logity
            ### tohle se bude muset predelat, je tu i ten copynet
            ### logity sou dropoutlej stav vynasobenej s vahovou matici
            ### vystupni a pricteny biasy

            def standard_logits(state):
                state = tf.nn.dropout(state, self.dropout_placeholder)
                return tf.matmul(state, decoding_w) + decoding_b, None

            logit_function = standard_logits

            ### COPY NET
            ### tomuhle se ted nebudu venovat

            if copy_net:
                # This is implementation of Copy-net
                # (http://arxiv.org/pdf/1603.06393v2.pdf)
                encoder_input_indices, copy_states, copy_mask = copy_net
                copy_tensor_dropped = tf.nn.dropout(copy_states,
                                                    self.dropout_placeholder)
                copy_tensors = [
                    tf.squeeze(t, [1])
                    for t in tf.split(1, max_output_len +
                                      2, copy_tensor_dropped)
                ]

                copy_features_size = copy_states.get_shape()[2].value

                # first we do the learned projection of the ecnoder outputs
                copy_w = tf.get_variable(name="copy_W",
                                         shape=[copy_features_size, rnn_size])

                projected_inputs = tf.concat(1, [
                    tf.expand_dims(tf.matmul(c, copy_w), 1)
                    for c in copy_tensors
                ])

                batch_size = tf.shape(encoder_input_indices[0])[0]

                # tensor of batch numbers for indexing in a sparse vector
                batch_range = tf.range(start=0, limit=batch_size)
                batch_time_vocabulary_shape = tf.concat(
                    0, [
                        tf.expand_dims(batch_size, 0),
                        tf.constant(len(vocabulary), shape=[1])
                    ])

                ones = tf.ones(tf.expand_dims(batch_size, 0))

                vocabulary_shaped_list = []
                for slice_indices in encoder_input_indices:
                    complete_indices = tf.concat(1, [
                        tf.expand_dims(batch_range, 1),
                        tf.expand_dims(slice_indices, 1)
                    ])

                    vocabulary_shaped = tf.sparse_to_dense(
                        complete_indices, batch_time_vocabulary_shape, ones)

                    vocabulary_shaped_list.append(vocabulary_shaped)

                vocabulary_shaped_indices = tf.concat(
                    1, [tf.expand_dims(v, 1) for v in vocabulary_shaped_list])

                def copy_net_logit_function(state):
                    state = tf.nn.dropout(state, self.dropout_placeholder)

                    # the logits for generating the next word are computed in
                    # the standard way
                    generate_logits = tf.matmul(state, decoding_w) + decoding_b

                    # Equation 8 in the paper ... in shape of source sentence
                    # (batch x time)
                    copy_logits_in_time = tf.reduce_sum(
                        projected_inputs * tf.expand_dims(state, 1), [2])

                    # mask out the padding in exponential domain
                    copy_logits_in_time_exp_masked = tf.exp(
                        tf.minimum([[80.0]], copy_logits_in_time)) * copy_mask

                    #  ... in shape of vocabulary (batch x time x vocabulary)
                    copy_logits_in_vocabulary = tf.expand_dims(
                        copy_logits_in_time_exp_masked,
                        2) * vocabulary_shaped_indices

                    # Equation 6 without normalization
                    copy_logits_exp = tf.reduce_sum(copy_logits_in_vocabulary,
                                                    [1])

                    logits_exp = copy_logits_exp \
                                 + tf.exp(tf.minimum([[80.0]], generate_logits))

                    return (tf.log(tf.maximum([[1e-40]], logits_exp)),
                            copy_logits_in_time)

                logit_function = copy_net_logit_function

            ### KONEC COPY-NETU
            ### Tohle pod nama jsou dve loop functions. Loop function je funkce
            ### ktera se pouziva za run-timu. Bere stav a cislo kroku v case
            ### a vraci vstup do dalsiho kroku po embeddovani a dropoutu

            def loop(prev_state, _):
                # it takes the previous hidden state, finds the word and formats
                # it as input for the next time step ... used in the decoder in
                # the "real decoding scenario"
                out_activation, _ = logit_function(prev_state)
                prev_word_index = tf.argmax(out_activation, 1)
                next_step_embedding = tf.nn.embedding_lookup(
                    decoding_em, prev_word_index)

                return tf.nn.dropout(next_step_embedding,
                                     self.dropout_placeholder)

            ### tahle loop function je pro scheduled sampling
            ### scheduled sampling trenuje napred na zlatejch datech a postupem
            ### casu zvolna prepina na loop function. Tahle konkretne to dela
            ### pro kazdou trenovaci instanci v batchi zvlast.

            def sampling_loop(prev_state, i):
                """
                Loop function performing the scheduled sampling
                (http://arxiv.org/pdf/1506.03099v3.pdf) with the inverse
                sigmoid decay.
                """
                threshold = scheduled_sampling / (scheduled_sampling + tf.exp(
                    tf.to_float(self.learning_step) / scheduled_sampling))

                condition = tf.less_equal(
                    tf.random_uniform(tf.shape(embedded_gt_inputs[0])),
                    threshold)

                return tf.select(condition, embedded_gt_inputs[i],
                                 loop(prev_state, i))

            gt_loop_function = sampling_loop if scheduled_sampling else None

            ### Tahle funkce tu strasi kvuli tomu, abychom mohli vybrat
            ### bunku, ktera se pouzije jako RNN cell. Jednak ty noisy
            ### activations nepomahaly a jednak bych to stejne cely vyhodil
            ### Dale tu je kod, kterej ty bunky vydropoutuje a udela z nich
            ### multirnncell (v pripade ze bychom chteli hlubsi rekurentni cast)

            def get_rnn_cell():
                if use_noisy_activations:
                    return NoisyGRUCell(rnn_size, training=self.is_training)
                else:
                    return tf.nn.rnn_cell.GRUCell(rnn_size)

            decoder_cells = [get_rnn_cell()]

            for _ in range(1, depth):
                decoder_cells[-1] = tf.nn.rnn_cell.DropoutWrapper(
                    decoder_cells[-1],
                    output_keep_prob=self.dropout_placeholder)

                decoder_cells.append(get_rnn_cell())

            decoder_cell = tf.nn.rnn_cell.MultiRNNCell(decoder_cells)

            ### A ted prichazi na radu attention. To se jen kouknem na encodery,
            ### jestli ho maji zadefinovanej nebo ne

            if use_attention:
                attention_objects = [
                    e.attention_object for e in encoders if e.attention_object
                ]
            else:
                attention_objects = []

            ### A ted samotna dekodovaci procedura. Tahle prvni vraci vystupy
            ### s pouzitim zlatych vstupu (pri trenovani)

            rnn_outputs_gt_ins, _ = attention_decoder(
                embedded_gt_inputs,
                encoded,
                attention_objects,
                embedding_size,
                cell=decoder_cell,
                loop_function=gt_loop_function)

            tf.get_variable_scope().reuse_variables()

            ### Tady to dolejc je dekodovaci procedura pro run-time, takze
            ### s pouzitim loop functioně
            ### Proc je to placeholder? Proc to neni konstanta?

            self.go_symbols = tf.placeholder(tf.int32,
                                             shape=[None],
                                             name="decoder_go_symbols")

            decoder_inputs = [
                tf.nn.embedding_lookup(decoding_em, self.go_symbols)
            ]

            decoder_inputs += [None for _ in range(self.max_output_len)]

            rnn_outputs_decoded_ins, _ = attention_decoder(decoder_inputs,
                                                           encoded,
                                                           attention_objects,
                                                           embedding_size,
                                                           cell=decoder_cell,
                                                           loop_function=loop)

            self.hidden_states = rnn_outputs_decoded_ins

            ### KONEC decoder scope

        def get_decoded(rnn_outputs):
            logits = []
            decoded = []
            copynet_logits = []

            for out in rnn_outputs:
                out_activation, logits_in_time = logit_function(out)

                if copy_net:
                    copynet_logits.append(logits_in_time)

                logits.append(out_activation)
                decoded.append(tf.argmax(out_activation[:, 1:], 1) + 1)

            return decoded, logits, copynet_logits

        ### decoding a loss s ground truth (behem trenovani)

        _, self.gt_logits, _ = get_decoded(rnn_outputs_gt_ins)

        self.loss_with_gt_ins = tf.nn.seq2seq.sequence_loss(
            self.gt_logits, self.targets, self.weights_ins, len(vocabulary))

        self.cost = self.loss_with_gt_ins

        ### decoding a loss s loop function (runtime)

        self.decoded_seq, self.decoded_logits, self.copynet_logits = \
            get_decoded(rnn_outputs_decoded_ins)

        self.loss_with_decoded_ins = tf.nn.seq2seq.sequence_loss(
            self.decoded_logits, self.targets, self.weights_ins,
            len(vocabulary))

        ### Tady pode mnou sou sumary. To je vsechno co se bude logovat do
        ### tensorboardu.

        tf.scalar_summary('train_loss_with_gt_intpus',
                          self.loss_with_gt_ins,
                          collections=["summary_train"])

        tf.scalar_summary('train_loss_with_decoded_inputs',
                          self.loss_with_decoded_ins,
                          collections=["summary_train"])

        tf.scalar_summary('train_optimization_cost',
                          self.cost,
                          collections=["summary_train"])

        log("Decoder initalized.")
def get_dense_x(index, value):
    dense_x = tf.sparse_to_dense(tf.sparse_tensor_to_dense(index),
                                 [num_features],
                                 tf.sparse_tensor_to_dense(value))
    return tf.reshape(dense_x, [num_features, 1])
Beispiel #42
0
    def build_net(self, is_training=True):
        with self.graph.as_default():
            if is_training:
                self.train_stage = tf.placeholder(tf.bool, shape=())
                train_image, train_label, train_label_len = self.load_tfrecord(
                    config.train_tfrecord)
                valid_image, valid_label, valid_label_len = self.load_tfrecord(
                    config.valid_tfrecord)
                self.x = tf.cond(self.train_stage, lambda: train_image,
                                 lambda: valid_image)
                self.label = tf.cond(self.train_stage, lambda: train_label,
                                     lambda: valid_label)
                self.label_len = tf.cond(self.train_stage,
                                         lambda: train_label_len,
                                         lambda: valid_label_len)
            else:
                self.x = tf.placeholder(tf.float32,
                                        shape=(None, config.image_height,
                                               config.image_max_width, 1),
                                        name='image_batch')

            enc = self.base_net(is_training)
            print('enc1:', enc)
            tshape = enc.get_shape().as_list()
            final_width = tshape[1] * tshape[2]
            enc = tf.reshape(enc, [-1, final_width, config.rnn_units])
            print('enc2:', enc)
            conv_mask = tf.sign(tf.abs(tf.reduce_sum(enc, -1)))
            conv_length = tf.reduce_sum(tf.cast(conv_mask, tf.int32), -1)
            for i in range(config.rnn_layers_num):
                _enc = tf.layers.dense(enc, config.rnn_units, use_bias=False)
                with tf.variable_scope("rnn_layer_{}".format(i)):
                    cell_fw = tf.nn.rnn_cell.LSTMCell(
                        num_units=config.rnn_units / 2, state_is_tuple=True)
                    cell_bw = tf.nn.rnn_cell.LSTMCell(
                        num_units=config.rnn_units / 2, state_is_tuple=True)
                    enc, state = tf.nn.bidirectional_dynamic_rnn(
                        cell_fw=cell_fw,
                        cell_bw=cell_bw,
                        inputs=enc,
                        dtype=tf.float32,
                        time_major=False)
                    enc = _enc + tf.concat(values=[enc[0], enc[1]], axis=-1)
            if is_training:
                enc = tf.layers.dropout(enc, 0.5)

            self.logits = tf.layers.dense(enc, len(self.idx2symbol) + 1)
            print('last logit shape', self.logits)
            logit_shape = self.logits.get_shape().as_list()

            time_major_logits = tf.transpose(
                self.logits, [1, 0, 2])  # max_time* batch_size * num_classes
            pmask = tf.sign(tf.abs(tf.reduce_sum(self.logits, -1)))

            seq_len = tf.fill([config.batch_size], logit_shape[1])
            print('seq:', seq_len)
            greedy_preds = tf.nn.ctc_greedy_decoder(time_major_logits, seq_len)
            preds_sparse = tf.cast(greedy_preds[0][0], tf.int32)

            self.preds = tf.sparse_to_dense(preds_sparse.indices,
                                            preds_sparse.dense_shape,
                                            preds_sparse.values,
                                            name='pred')

            print('preds:', self.preds)

            if is_training:
                # label转sparse
                batch_label_length = config.label_max_len
                spare_tensor_indices = tf.where(
                    tf.less(tf.cast(0, tf.int32), self.label))  # 返回大于0的indices
                print('label shape', self.label)
                spare_tensor_values = tf.reshape(
                    self.label, [config.batch_size * batch_label_length])
                mask = tf.cast(tf.less(tf.cast(0, tf.int32),
                                       spare_tensor_values),
                               dtype=tf.bool)
                spare_tensor_values = tf.boolean_mask(spare_tensor_values,
                                                      mask)
                labels_sparse = tf.SparseTensor(
                    indices=spare_tensor_indices,
                    values=spare_tensor_values,
                    dense_shape=[config.batch_size, batch_label_length])
                loss = tf.nn.ctc_loss(labels=labels_sparse,
                                      inputs=self.logits,
                                      sequence_length=seq_len,
                                      time_major=False)
                self.loss = tf.reduce_mean(loss)

                self.global_step = tf.Variable(0, trainable=False)
                #定义学习率和优化器
                lr = config.learning_rate
                rate = tf.train.exponential_decay(
                    lr,
                    self.global_step,
                    decay_steps=config.decay_steps,
                    decay_rate=0.97,
                    staircase=True)
                opt = tf.train.AdamOptimizer(learning_rate=rate)
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                with tf.control_dependencies(update_ops):
                    self.train_op = opt.minimize(self.loss,
                                                 global_step=self.global_step)
                # accuracy
                self.edit_dist = tf.reduce_sum(
                    tf.edit_distance(preds_sparse, labels_sparse, False))
                self.char_count = tf.reduce_sum(self.label_len)
                tf.summary.scalar('loss', self.loss)
                self.merged_summary_op = tf.summary.merge_all()
Beispiel #43
0
def main():
  # Get hyperparameters
  if FLAGS.enable_colored_log:
    import coloredlogs
    coloredlogs.install()
  logging.basicConfig(level=logging.INFO)
  INPUT_FILE_FORMAT = FLAGS.input_file_format
  if INPUT_FILE_FORMAT not in ["tfrecord", "csv"]:
    logging.error("Unknow input file format: {}".format(INPUT_FILE_FORMAT))
    exit(1)
  FEATURE_SIZE = FLAGS.feature_size
  LABEL_SIZE = FLAGS.label_size
  EPOCH_NUMBER = FLAGS.epoch_number
  if EPOCH_NUMBER <= 0:
    EPOCH_NUMBER = None
  BATCH_THREAD_NUMBER = FLAGS.batch_thread_number
  MIN_AFTER_DEQUEUE = FLAGS.min_after_dequeue
  BATCH_CAPACITY = BATCH_THREAD_NUMBER * FLAGS.batch_size + MIN_AFTER_DEQUEUE
  MODE = FLAGS.mode
  MODEL = FLAGS.model
  CHECKPOINT_PATH = FLAGS.checkpoint_path
  if not CHECKPOINT_PATH.startswith("fds://") and not os.path.exists(
      CHECKPOINT_PATH):
    os.makedirs(CHECKPOINT_PATH)
  CHECKPOINT_FILE = CHECKPOINT_PATH + "/checkpoint.ckpt"
  LATEST_CHECKPOINT = tf.train.latest_checkpoint(CHECKPOINT_PATH)
  OUTPUT_PATH = FLAGS.output_path
  if not OUTPUT_PATH.startswith("fds://") and not os.path.exists(OUTPUT_PATH):
    os.makedirs(OUTPUT_PATH)
  pprint.PrettyPrinter().pprint(FLAGS.__flags)

  # Process TFRecoreds files
  def read_and_decode_tfrecord(filename_queue):
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    features = tf.parse_single_example(
        serialized_example,
        features={
            "label": tf.FixedLenFeature([], tf.float32),
            "features": tf.FixedLenFeature([FEATURE_SIZE], tf.float32),
        })
    label = features["label"]
    features = features["features"]
    return label, features

  def read_and_decode_csv(filename_queue):
    # TODO: Not generic for all datasets
    reader = tf.TextLineReader()
    key, value = reader.read(filename_queue)

    # Default values, in case of empty columns. Also specifies the type of the
    # decoded result.
    #record_defaults = [[1], [1], [1], [1], [1]]
    record_defaults = [[1], [1.0], [1.0], [1.0], [1.0]]
    col1, col2, col3, col4, col5 = tf.decode_csv(
        value, record_defaults=record_defaults)
    label = col1
    features = tf.stack([col2, col3, col4, col4])
    return label, features

  # Read TFRecords files for training
  filename_queue = tf.train.string_input_producer(
      tf.train.match_filenames_once(FLAGS.train_file),
      num_epochs=EPOCH_NUMBER)
  if INPUT_FILE_FORMAT == "tfrecord":
    label, features = read_and_decode_tfrecord(filename_queue)
  elif INPUT_FILE_FORMAT == "csv":
    label, features = read_and_decode_csv(filename_queue)
  batch_labels, batch_features = tf.train.shuffle_batch(
      [label, features],
      batch_size=FLAGS.batch_size,
      num_threads=BATCH_THREAD_NUMBER,
      capacity=BATCH_CAPACITY,
      min_after_dequeue=MIN_AFTER_DEQUEUE)

  # Read TFRecords file for validatioin
  validate_filename_queue = tf.train.string_input_producer(
      tf.train.match_filenames_once(FLAGS.validate_file),
      num_epochs=EPOCH_NUMBER)
  if INPUT_FILE_FORMAT == "tfrecord":
    validate_label, validate_features = read_and_decode_tfrecord(
        validate_filename_queue)
  elif INPUT_FILE_FORMAT == "csv":
    validate_label, validate_features = read_and_decode_csv(
        validate_filename_queue)
  validate_batch_labels, validate_batch_features = tf.train.shuffle_batch(
      [validate_label, validate_features],
      batch_size=FLAGS.validate_batch_size,
      num_threads=BATCH_THREAD_NUMBER,
      capacity=BATCH_CAPACITY,
      min_after_dequeue=MIN_AFTER_DEQUEUE)

  # Define the model
  input_units = FEATURE_SIZE
  output_units = LABEL_SIZE
  model_network_hidden_units = [int(i) for i in FLAGS.model_network.split()]

  def full_connect(inputs, weights_shape, biases_shape, is_train=True):
    weights = tf.get_variable("weights",
                              weights_shape,
                              initializer=tf.random_normal_initializer())
    biases = tf.get_variable("biases",
                             biases_shape,
                             initializer=tf.random_normal_initializer())
    layer = tf.matmul(inputs, weights) + biases

    if FLAGS.enable_bn and is_train:
      mean, var = tf.nn.moments(layer, axes=[0])
      scale = tf.get_variable("scale",
                              biases_shape,
                              initializer=tf.random_normal_initializer())
      shift = tf.get_variable("shift",
                              biases_shape,
                              initializer=tf.random_normal_initializer())
      layer = tf.nn.batch_normalization(layer, mean, var, shift, scale,
                                        FLAGS.bn_epsilon)
    return layer

  def full_connect_relu(inputs, weights_shape, biases_shape, is_train=True):
    layer = full_connect(inputs, weights_shape, biases_shape, is_train)
    layer = tf.nn.relu(layer)
    return layer

  def customized_inference(inputs, is_train=True):
    hidden1_units = 128
    hidden2_units = 32
    hidden3_units = 8

    with tf.variable_scope("input"):
      layer = full_connect_relu(inputs, [input_units, hidden1_units],
                                [hidden1_units], is_train)
    with tf.variable_scope("layer0"):
      layer = full_connect_relu(layer, [hidden1_units, hidden2_units],
                                [hidden2_units], is_train)
    with tf.variable_scope("layer1"):
      layer = full_connect_relu(layer, [hidden2_units, hidden3_units],
                                [hidden3_units], is_train)
    if FLAGS.enable_dropout and is_train:
      layer = tf.nn.dropout(layer, FLAGS.dropout_keep_prob)
    with tf.variable_scope("output"):
      layer = full_connect(layer, [hidden3_units, output_units],
                           [output_units], is_train)
    return layer

  def dnn_inference(inputs, is_train=True):
    with tf.variable_scope("input"):
      layer = full_connect_relu(inputs,
                                [input_units, model_network_hidden_units[0]],
                                [model_network_hidden_units[0]], is_train)

    for i in range(len(model_network_hidden_units) - 1):
      with tf.variable_scope("layer{}".format(i)):
        layer = full_connect_relu(
            layer,
            [model_network_hidden_units[i], model_network_hidden_units[i + 1]],
            [model_network_hidden_units[i + 1]], is_train)

    with tf.variable_scope("output"):
      layer = full_connect(layer,
                           [model_network_hidden_units[-1], output_units],
                           [output_units], is_train)
    return layer

  def lr_inference(inputs, is_train=True):
    with tf.variable_scope("lr"):
      layer = full_connect(inputs, [input_units, output_units], [output_units])
    return layer

  def wide_and_deep_inference(inputs, is_train=True):
    return lr_inference(inputs, is_train) + dnn_inference(inputs, is_train)

  def cnn_inference(inputs, is_train=True):
    # TODO: Change if validate_batch_size is different
    # [BATCH_SIZE, 512 * 512 * 1] -> [BATCH_SIZE, 512, 512, 1]
    inputs = tf.reshape(inputs, [FLAGS.batch_size, 512, 512, 1])

    # [BATCH_SIZE, 512, 512, 1] -> [BATCH_SIZE, 128, 128, 8]
    with tf.variable_scope("conv0"):
      weights = tf.get_variable("weights", [3, 3, 1, 8],
                                initializer=tf.random_normal_initializer())
      bias = tf.get_variable("bias", [8],
                             initializer=tf.random_normal_initializer())

      layer = tf.nn.conv2d(inputs,
                           weights,
                           strides=[1, 1, 1, 1],
                           padding="SAME")
      layer = tf.nn.bias_add(layer, bias)
      layer = tf.nn.relu(layer)
      layer = tf.nn.max_pool(layer,
                             ksize=[1, 4, 4, 1],
                             strides=[1, 4, 4, 1],
                             padding="SAME")

    # [BATCH_SIZE, 128, 128, 8] -> [BATCH_SIZE, 32, 32, 8]
    with tf.variable_scope("conv1"):
      weights = tf.get_variable("weights", [3, 3, 8, 8],
                                initializer=tf.random_normal_initializer())
      bias = tf.get_variable("bias", [8],
                             initializer=tf.random_normal_initializer())

      layer = tf.nn.conv2d(layer,
                           weights,
                           strides=[1, 1, 1, 1],
                           padding="SAME")
      layer = tf.nn.bias_add(layer, bias)
      layer = tf.nn.relu(layer)
      layer = tf.nn.max_pool(layer,
                             ksize=[1, 4, 4, 1],
                             strides=[1, 4, 4, 1],
                             padding="SAME")

    # [BATCH_SIZE, 32, 32, 8] -> [BATCH_SIZE, 8, 8, 8]
    with tf.variable_scope("conv2"):
      weights = tf.get_variable("weights", [3, 3, 8, 8],
                                initializer=tf.random_normal_initializer())
      bias = tf.get_variable("bias", [8],
                             initializer=tf.random_normal_initializer())

      layer = tf.nn.conv2d(layer,
                           weights,
                           strides=[1, 1, 1, 1],
                           padding="SAME")
      layer = tf.nn.bias_add(layer, bias)
      layer = tf.nn.relu(layer)
      layer = tf.nn.max_pool(layer,
                             ksize=[1, 4, 4, 1],
                             strides=[1, 4, 4, 1],
                             padding="SAME")

    # [BATCH_SIZE, 8, 8, 8] -> [BATCH_SIZE, 8 * 8 * 8]
    layer = tf.reshape(layer, [-1, 8 * 8 * 8])

    # [BATCH_SIZE, 8 * 8 * 8] -> [BATCH_SIZE, LABEL_SIZE]
    with tf.variable_scope("output"):
      weights = tf.get_variable("weights", [8 * 8 * 8, LABEL_SIZE],
                                initializer=tf.random_normal_initializer())
      bias = tf.get_variable("bias", [LABEL_SIZE],
                             initializer=tf.random_normal_initializer())
      layer = tf.add(tf.matmul(layer, weights), bias)

    return layer

  def inference(inputs, is_train=True):
    if MODEL == "dnn":
      return dnn_inference(inputs, is_train)
    elif MODEL == "lr":
      return lr_inference(inputs, is_train)
    elif MODEL == "wide_and_deep":
      return wide_and_deep_inference(inputs, is_train)
    elif MODEL == "customized":
      return customized_inference(inputs, is_train)
    elif MODEL == "cnn":
      return cnn_inference(inputs, is_train)
    else:
      logging.error("Unknown model, exit now")
      exit(1)

  logging.info("Use the model: {}, model network: {}".format(
      MODEL, FLAGS.model_network))
  logits = inference(batch_features, True)
  batch_labels = tf.to_int64(batch_labels)
  cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
      logits=logits, labels=batch_labels)
  loss = tf.reduce_mean(cross_entropy, name="loss")
  global_step = tf.Variable(0, name="global_step", trainable=False)
  if FLAGS.enable_lr_decay:
    logging.info("Enable learning rate decay rate: {}".format(
        FLAGS.lr_decay_rate))
    starter_learning_rate = FLAGS.learning_rate
    learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                               global_step,
                                               100000,
                                               FLAGS.lr_decay_rate,
                                               staircase=True)
  else:
    learning_rate = FLAGS.learning_rate
  optimizer = get_optimizer(FLAGS.optimizer, learning_rate)
  train_op = optimizer.minimize(loss, global_step=global_step)
  tf.get_variable_scope().reuse_variables()

  # Define accuracy op for train data
  train_accuracy_logits = inference(batch_features, False)
  train_softmax = tf.nn.softmax(train_accuracy_logits)
  train_correct_prediction = tf.equal(
      tf.argmax(train_softmax, 1), batch_labels)
  train_accuracy = tf.reduce_mean(tf.cast(train_correct_prediction,
                                          tf.float32))

  # Define auc op for train data
  batch_labels = tf.cast(batch_labels, tf.int32)
  sparse_labels = tf.reshape(batch_labels, [-1, 1])
  derived_size = tf.shape(batch_labels)[0]
  indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1])
  concated = tf.concat(axis=1, values=[indices, sparse_labels])
  outshape = tf.stack([derived_size, LABEL_SIZE])
  new_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0)
  _, train_auc = tf.contrib.metrics.streaming_auc(train_softmax,
                                                  new_batch_labels)

  # Define accuracy op for validate data
  validate_accuracy_logits = inference(validate_batch_features, False)
  validate_softmax = tf.nn.softmax(validate_accuracy_logits)
  validate_batch_labels = tf.to_int64(validate_batch_labels)
  validate_correct_prediction = tf.equal(
      tf.argmax(validate_softmax, 1), validate_batch_labels)
  validate_accuracy = tf.reduce_mean(tf.cast(validate_correct_prediction,
                                             tf.float32))

  # Define auc op for validate data
  validate_batch_labels = tf.cast(validate_batch_labels, tf.int32)
  sparse_labels = tf.reshape(validate_batch_labels, [-1, 1])
  derived_size = tf.shape(validate_batch_labels)[0]
  indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1])
  concated = tf.concat(axis=1, values=[indices, sparse_labels])
  outshape = tf.stack([derived_size, LABEL_SIZE])
  new_validate_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0)
  _, validate_auc = tf.contrib.metrics.streaming_auc(validate_softmax,
                                                     new_validate_batch_labels)

  # Define inference op
  inference_features = tf.placeholder("float", [None, FEATURE_SIZE])
  inference_logits = inference(inference_features, False)
  inference_softmax = tf.nn.softmax(inference_logits)
  inference_op = tf.argmax(inference_softmax, 1)
  keys_placeholder = tf.placeholder(tf.int32, shape=[None, 1])
  keys = tf.identity(keys_placeholder)
  model_signature = {
      "inputs": exporter.generic_signature({"keys": keys_placeholder,
                                            "features": inference_features}),
      "outputs": exporter.generic_signature({"keys": keys,
                                             "softmax": inference_softmax,
                                             "prediction": inference_op})
  }

  # Initialize saver and summary
  saver = tf.train.Saver()
  tf.summary.scalar("loss", loss)
  tf.summary.scalar("train_accuracy", train_accuracy)
  tf.summary.scalar("train_auc", train_auc)
  tf.summary.scalar("validate_accuracy", validate_accuracy)
  tf.summary.scalar("validate_auc", validate_auc)
  summary_op = tf.summary.merge_all()
  init_op = [tf.global_variables_initializer(),
             tf.local_variables_initializer()]

  # Create session to run
  with tf.Session() as sess:
    logging.info("Start to run with mode: {}".format(MODE))
    writer = tf.summary.FileWriter(OUTPUT_PATH, sess.graph)
    sess.run(init_op)

    if MODE == "train":
      # Restore session and start queue runner
      restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT)
      coord = tf.train.Coordinator()
      threads = tf.train.start_queue_runners(coord=coord, sess=sess)
      start_time = datetime.datetime.now()

      try:
        while not coord.should_stop():
          _, loss_value, step = sess.run([train_op, loss, global_step])

          # Print state while training
          if step % FLAGS.steps_to_validate == 0:
            train_accuracy_value, train_auc_value, validate_accuracy_value, validate_auc_value, summary_value = sess.run(
                [train_accuracy, train_auc, validate_accuracy, validate_auc,
                 summary_op])
            end_time = datetime.datetime.now()
            logging.info(
                "[{}] Step: {}, loss: {}, train_acc: {}, train_auc: {}, valid_acc: {}, valid_auc: {}".format(
                    end_time - start_time, step, loss_value,
                    train_accuracy_value, train_auc_value,
                    validate_accuracy_value, validate_auc_value))
            writer.add_summary(summary_value, step)
            saver.save(sess, CHECKPOINT_FILE, global_step=step)
            start_time = end_time
      except tf.errors.OutOfRangeError:
        # Export the model after training
        export_model(sess, saver, model_signature, FLAGS.model_path,
                     FLAGS.model_version)
      finally:
        coord.request_stop()
      coord.join(threads)

    elif MODE == "export":
      if not restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT):
        logging.error("No checkpoint found, exit now")
        exit(1)

      # Export the model
      export_model(sess, saver, model_signature, FLAGS.model_path,
                   FLAGS.model_version)

    elif MODE == "savedmodel":
      if not restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT):
        logging.error("No checkpoint found, exit now")
        exit(1)

      logging.info("Export the saved model to {}".format(
          FLAGS.saved_model_path))
      export_path_base = FLAGS.saved_model_path
      export_path = os.path.join(
          compat.as_bytes(export_path_base),
          compat.as_bytes(str(FLAGS.model_version)))

      model_signature = signature_def_utils.build_signature_def(
          inputs={
              "keys": utils.build_tensor_info(keys_placeholder),
              "features": utils.build_tensor_info(inference_features)
          },
          outputs={
              "keys": utils.build_tensor_info(keys),
              "softmax": utils.build_tensor_info(inference_softmax),
              "prediction": utils.build_tensor_info(inference_op)
          },
          method_name=signature_constants.PREDICT_METHOD_NAME)

      try:
        builder = saved_model_builder.SavedModelBuilder(export_path)
        builder.add_meta_graph_and_variables(
            sess,
            [tag_constants.SERVING],
            clear_devices=True,
            signature_def_map={
                signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
                model_signature,
            },
            #legacy_init_op=legacy_init_op)
            legacy_init_op=tf.group(tf.initialize_all_tables(),
                                    name="legacy_init_op"))

        builder.save()
      except Exception as e:
        logging.error("Fail to export saved model, exception: {}".format(e))

    elif MODE == "inference":
      if not restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT):
        logging.error("No checkpoint found, exit now")
        exit(1)

      # Load inference test data
      inference_result_file_name = FLAGS.inference_result_file
      inference_test_file_name = FLAGS.inference_test_file
      inference_data = np.genfromtxt(inference_test_file_name, delimiter=",")
      inference_data_features = inference_data[:, 0:9]
      inference_data_labels = inference_data[:, 9]

      # Run inference
      start_time = datetime.datetime.now()
      prediction, prediction_softmax = sess.run(
          [inference_op, inference_softmax],
          feed_dict={inference_features: inference_data_features})
      end_time = datetime.datetime.now()

      # Compute accuracy
      label_number = len(inference_data_labels)
      correct_label_number = 0
      for i in range(label_number):
        if inference_data_labels[i] == prediction[i]:
          correct_label_number += 1
      accuracy = float(correct_label_number) / label_number

      # Compute auc
      y_true = np.array(inference_data_labels)
      y_score = prediction_softmax[:, 1]
      fpr, tpr, thresholds = metrics.roc_curve(y_true,
                                               y_score,
                                               pos_label=1)
      auc = metrics.auc(fpr, tpr)
      logging.info("[{}] Inference accuracy: {}, auc: {}".format(
          end_time - start_time, accuracy, auc))

      # Save result into the file
      np.savetxt(inference_result_file_name, prediction_softmax, delimiter=",")
      logging.info("Save result to file: {}".format(
          inference_result_file_name))
    def build_model(self):

        # for every video in the batch(50), there are n_video_lstm_step(80) represented by a vector of length 1000
        video = tf.placeholder(
            tf.float32,
            [self.batch_size, self.n_video_lstm_step, self.dim_image],
            name="video")

        # 1 - for video input and  0 - for no video input
        video_mask = tf.placeholder(tf.float32,
                                    [self.batch_size, self.n_video_lstm_step],
                                    name="video_mask")

        #  placeholder that holds the captions
        caption = tf.placeholder(
            tf.int32, [self.batch_size, self.n_caption_lstm_step + 1],
            name="caption")

        # caption word present - 1 not present - 0
        caption_mask = tf.placeholder(
            tf.float32, [self.batch_size, self.n_caption_lstm_step + 1],
            name="caption_mask")

        # flatten the video placeholder shape(50,80,4096) to (4000,4096) shape
        video_flat = tf.reshape(video, [-1, self.dim_image])

        # do the matrix multiplication operation and addition of biases
        # encode_image_W has dimension = (4096,1000)
        # encode_image_b has dimension = (1000)
        # video_flat has shape = (4000, 4096)
        # obtained dimension = (4000, 1000)
        image_emb = tf.nn.xw_plus_b(video_flat, self.encode_image_W,
                                    self.encode_image_b)
        # reshape from (4000, 1000) back to (50, 80, 1000)
        image_emb = tf.reshape(
            image_emb, [self.batch_size, self.n_lstm_steps, self.dim_hidden])

        state1 = tf.zeros([self.batch_size, self.lstm1.state_size])
        state2 = tf.zeros([self.batch_size, self.lstm2.state_size])
        padding = tf.zeros([self.batch_size, self.dim_hidden])

        probs = []
        loss = 0.0
        lbls = []
        predictions = []
        # encoding phase
        for i in range(0, self.n_video_lstm_step):
            if i > 0:
                tf.get_variable_scope().reuse_variables()

        # get the state (50,2000) and output(50,1000) from the lstm1 and use it over the timestpes
            with tf.variable_scope("LSTM1"):
                output1, state1 = self.lstm1(image_emb[:, i, :], state1)
                # As per the paper zeroes are padded to the output of the lstm1 and the fed into the lstm2
                # dimension of output1 = (50, 1000) for ith step
                # dimension of padding = (50, 1000)
                # after concatenation dimension becomes = (50, 2000)

            with tf.variable_scope("LSTM2"):
                output2, state2 = self.lstm2(tf.concat([padding, output1], 1),
                                             state2)

                # output2 dimension = (50, 1000) for ith step

        # decoding step
        print "---- decoding ----"
        for i in range(0, self.n_caption_lstm_step):
            #with tf.device("/gpu:2"):
            # looks up the embedding for all the words of all the batches for the current lstm step
            tf.get_variable_scope().reuse_variables()

            current_embed = tf.nn.embedding_lookup(self.Wemb, caption[:, i])
            with tf.variable_scope("LSTM1"):
                output1, state1 = self.lstm1(padding, state1)

            with tf.variable_scope("LSTM2"):
                output2, state2 = self.lstm2(
                    tf.concat([current_embed, output1], 1), state2)

        # for the ith timestep get all the caption placeholders
        # labels = tensor of shape (50,1)
            labels = tf.expand_dims(caption[:, i + 1], 1)
            # generate an indexing from 0 to batchsize-1
            # tf.range(start, limit, delta) just like np.arange()
            # labels = tensor of shape (50,1)
            indices = tf.expand_dims(tf.range(0, self.batch_size, 1), 1)

            # concat both these to get a tensor of shape (50,2)
            # concated stores the complete index where 1 should be placed, on all other places 0s are placed
            concated = tf.concat([indices, labels], 1)

            # onehot encoding for the words - dimension is (50, vocabulary)
            onehot_labels = tf.sparse_to_dense(
                concated, tf.stack([self.batch_size, self.vocabulary]), 1.0,
                0.0)

            # logit_words has dimension (50, vocabulary)
            logit_words = tf.nn.xw_plus_b(output2, self.embed_word_W,
                                          self.embed_word_b)

            # calculate the cross-entropy loss of the logits with the actual labels
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
                logits=logit_words, labels=onehot_labels)

            # find cross_entropy loss only where mask = 1
            cross_entropy = cross_entropy * caption_mask[:, i]

            # store the probabilities
            probs.append(logit_words)
            lbls.append(onehot_labels)
            current_loss = tf.reduce_sum(cross_entropy) / self.batch_size
            loss = loss + current_loss
            predictions.append(tf.nn.softmax(logit_words))
        return loss, video, video_mask, caption, caption_mask, probs, predictions, lbls
    def get_datapoint_iter(file_idx=[], batch_size=s_batch):
        fileNames = map(lambda s: "/home/ubuntu/criteo-tfr-tiny/tfrecords" + s,
                        file_idx)
        # We first define a filename queue comprising 5 files.
        filename_queue = tf.train.string_input_producer(fileNames,
                                                        num_epochs=None)

        # TFRecordReader creates an operator in the graph that reads data from queue
        reader = tf.TFRecordReader()

        # Include a read operator with the filenae queue to use. The output is a string
        # Tensor called serialized_example
        _, serialized_example = reader.read(filename_queue)

        # The string tensors is essentially a Protobuf serialized string. With the
        # following fields: label, index, value. We provide the protobuf fields we are
        # interested in to parse the data. Note, feature here is a dict of tensors
        features = tf.parse_single_example(
            serialized_example,
            features={
                'label': tf.FixedLenFeature([1], dtype=tf.int64),
                'index': tf.VarLenFeature(dtype=tf.int64),
                'value': tf.VarLenFeature(dtype=tf.float32),
            })

        label = features['label']
        index = features['index']
        value = features['value']

        # These print statements are there for you see the type of the following
        # variables
        print label
        print index
        print value

        # since we parsed a VarLenFeatures, they are returned as SparseTensors.
        # To run operations on then, we first convert them to dense Tensors as below.
        dense_feature = tf.sparse_to_dense(
            tf.sparse_tensor_to_dense(index),
            [
                33762578,
            ],
            #                               tf.constant([33762578, 1], dtype=tf.int64),
            tf.sparse_tensor_to_dense(value))

        label_flt = tf.cast(label, tf.float32)
        # min_after_dequeue defines how big a buffer we will randomly sample
        #   from -- bigger means better shuffling but slower start up and more
        #   memory used.
        # capacity must be larger than min_after_dequeue and the amount larger
        #   determines the maximum we will prefetch.  Recommendation:
        #   min_after_dequeue + (num_threads + a small safety margin) * batch_size
        min_after_dequeue = 10
        capacity = min_after_dequeue + 3 * batch_size
        example_batch, label_batch = tf.train.shuffle_batch(
            [dense_feature[0:num_features], label_flt],
            batch_size=batch_size,
            capacity=capacity,
            min_after_dequeue=min_after_dequeue)

        return example_batch, label_batch
Beispiel #46
0
    def build_model(self,
                    video,
                    video_mask,
                    caption,
                    caption_mask,
                    drop_sent='keep',
                    drop_video='keep',
                    weight_cap=1.,
                    weight_rbm=0.001,
                    weight_vid=1.):
        video_mask = tf.cast(video_mask, tf.float32)
        caption_mask = tf.cast(caption_mask, tf.float32)
        assert drop_sent in ['totally', 'random', 'keep']
        assert drop_video in ['totally', 'random', 'keep']

        video_flat = tf.reshape(video, [-1, self.dim_image])  # (b x nv) x d
        image_emb = tf.nn.xw_plus_b(video_flat, self.encode_image_W,
                                    self.encode_image_b)  # (b x nv) x h
        image_emb = tf.reshape(
            image_emb, [self.batch_size, self.n_video_steps, self.dim_hidden
                        ])  # b x nv x h
        image_emb = tf.transpose(image_emb, [1, 0, 2])  # n x b x h

        c_init = tf.zeros([self.batch_size, self.dim_hidden])  # b x h
        m_init = tf.zeros([self.batch_size, self.dim_hidden])  # b x h
        state1 = (c_init, m_init)
        c2 = tf.zeros([self.batch_size, self.dim_hidden])  # b x h
        m2 = tf.zeros([self.batch_size, self.dim_hidden])  # b x h
        state2 = (c_init, m_init)

        ######## Encoding Stage #########
        # encoding video
        output1 = tf.reduce_mean(image_emb, axis=0)

        with tf.variable_scope("model") as scope:
            # encoding sentence
            for i in xrange(self.n_caption_steps):
                if i > 0: scope.reuse_variables()
                with tf.variable_scope("LSTM2"):
                    with tf.device("/cpu:0"):
                        current_embed = tf.nn.embedding_lookup(
                            self.Wemb, caption[:, i])
                    output2, state2 = self.lstm2_dropout(
                        current_embed, state2)  # b x h
        ######## Encoding Stage #########

        ######## Dropout Stage #########
        if drop_sent == 'totally':
            output2 = tf.constant(0) * output2
            output2 = tf.stop_gradient(output2)
        elif drop_sent == 'random':
            coeff = tf.floor(tf.random_uniform([1], 0, 1) + 0.5)
            output2 = coeff * output2
        if drop_video == 'totally':
            output1 = tf.constant(0) * output1
            output1 = tf.stop_gradient(output1)
        elif drop_video == 'random':
            coeff = tf.floor(tf.random_uniform([1], 0, 1) + 0.5)
            output1 = coeff * output1
        ######## Dropout Stage #########

        ######## Semantic Learning Stage ########
        input_state = tf.concat([output1, output2], 1)  # b x (2 * h)
        loss_rbm, output_semantic = self.rbm(input_state)
        ######## Semantic Learning Stage ########

        ######## Decoding Stage ##########
        state3 = (c_init, m_init)
        state4 = (c_init, m_init)
        video_prev = tf.zeros([self.batch_size, self.dim_image])  # b x d_im
        sent_prev = tf.zeros([self.batch_size, self.dim_hidden])  # b x h
        current_embed = tf.zeros([self.batch_size, self.dim_hidden])  # b x h

        loss_caption = 0.0
        loss_video = 0.0

        image_part = tf.reshape(image_emb, [-1, self.dim_hidden])
        image_part = tf.matmul(image_part,
                               self.embed_att_Ua) + self.embed_att_ba
        image_part = tf.reshape(
            image_part, [self.n_video_steps, self.batch_size, self.dim_hidden])
        ## decoding sentence with attention
        with tf.variable_scope("model") as scope:
            # first write semantic into memory
            with tf.variable_scope("LSTM3"):
                _, state3 = self.lstm3_dropout(
                    tf.concat([output_semantic, output_semantic], 1), state3)
            for i in xrange(n_caption_steps):
                e = tf.tanh(
                    tf.matmul(sent_prev, self.embed_att_Wa) +
                    image_part)  # n x b x h
                e = tf.reshape(e, [-1, self.dim_hidden])
                e = tf.matmul(e, self.embed_att_w)  # n x b
                e = tf.reshape(e, [self.n_video_steps, self.batch_size])
                #            e = tf.reduce_sum(e,2) # n x b
                e_hat_exp = tf.multiply(tf.transpose(video_mask),
                                        tf.exp(e))  # n x b
                denomin = tf.reduce_sum(e_hat_exp, 0)  # b
                denomin = denomin + tf.to_float(tf.equal(
                    denomin, 0))  # regularize denominator
                alphas = tf.tile(tf.expand_dims(tf.div(e_hat_exp, denomin), 2),
                                 [1, 1, self.dim_hidden
                                  ])  # n x b x h  # normalize to obtain alpha
                attention_list = tf.multiply(alphas, image_emb)  # n x b x h
                atten = tf.reduce_sum(
                    attention_list,
                    0)  # b x h       #  soft-attention weighted sum
                if i > 0: scope.reuse_variables()

                with tf.variable_scope("LSTM3"):
                    output3, state3 = self.lstm3_dropout(
                        tf.concat([atten, current_embed], 1), state3)  # b x h

                output3_2 = tf.tanh(
                    tf.nn.xw_plus_b(
                        tf.concat([output3, atten, current_embed], 1),
                        self.embed_nn_Wp, self.embed_nn_bp))  # b x h
                sent_prev = output3  # b x h
                labels = tf.expand_dims(caption[:, i], 1)  # b x 1
                indices = tf.expand_dims(tf.range(0, self.batch_size, 1),
                                         1)  # b x 1
                concated = tf.concat([indices, labels], 1)  # b x 2
                onehot_labels = tf.sparse_to_dense(
                    concated, tf.stack([self.batch_size,
                                        self.n_words]), 1.0, 0.0)  # b x w
                with tf.device("/cpu:0"):
                    current_embed = tf.nn.embedding_lookup(
                        self.Wemb, caption[:, i])

                logit_words = tf.nn.xw_plus_b(output3_2, self.embed_word_W,
                                              self.embed_word_b)  # b x w
                cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
                    logits=logit_words, labels=onehot_labels)  # b x 1
                cross_entropy = cross_entropy * caption_mask[:, i]  # b x 1
                loss_caption += tf.reduce_sum(cross_entropy)  # 1

        ## decoding video with attention
        with tf.variable_scope("model") as scope:
            # first write semantic into memory
            with tf.variable_scope("LSTM4"):
                _, state4 = self.lstm4_dropout(output_semantic, state4)
            ## TODO: add attention for video decoding
            scope.reuse_variables()
            for i in xrange(n_video_steps):
                with tf.variable_scope("LSTM4"):
                    output4, state4 = self.lstm4_dropout(
                        image_emb[i, :, :], state4)
                video_prev = tf.nn.xw_plus_b(output4, self.decode_image_W,
                                             self.decode_image_b)  # b x d_im
                euclid_loss = tf.reduce_sum(tf.square(
                    tf.subtract(video_prev, video[:, i, :])),
                                            axis=1,
                                            keep_dims=True)  # b x 1
                euclid_loss = euclid_loss * video_mask[:, i]  # b x 1
                loss_video += tf.reduce_sum(euclid_loss)  # 1

        loss_caption = loss_caption / tf.reduce_sum(caption_mask)
        loss_video = loss_video / tf.reduce_sum(video_mask)

        loss = weight_cap * loss_caption + weight_rbm * loss_rbm + weight_vid * loss_video
        return loss, loss_caption, loss_rbm, loss_video, output_semantic
Beispiel #47
0
def run_training():

    with tf.Graph().as_default():

        # Extract data from tfrecords
        # data_type_placeholder = tf.placeholder("string")
        data_set_type = ['train', 'validation', 'test']
        data_sets = []
        for i in range(3):
            images, labels, rows = ctt.inputs(data_set_type=data_set_type[i],
                                              batch_size=FLAGS.batch_size,
                                              num_epochs=FLAGS.num_epochs)
            images = tf.sparse_to_dense(images.indices, images.shape, images.values)
            data_sets.append([images, labels, rows])

        # LSTM
        inputs_placeholder = tf.placeholder("float32", [FLAGS.batch_size, None, ctt.FLAGS.feature_col])
        rows_placeholder = tf.placeholder("float32", [FLAGS.batch_size])
        labels_placeholder = tf.placeholder("int32", [FLAGS.batch_size, None])
        max_time_placeholder = tf.placeholder("float32", [])

        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units,
                                                 forget_bias=1.0,
                                                 state_is_tuple=False)

        cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * num_layers, state_is_tuple=False)

        initial_state = cell.zero_state(FLAGS.batch_size, dtype=tf.float32)

        outputs, output_states = tf.nn.dynamic_rnn(cell=cell,
                                                   inputs=inputs_placeholder,
                                                   sequence_length=rows_placeholder,
                                                   initial_state=initial_state,
                                                   dtype=tf.float32,
                                                   swap_memory=False,
                                                   time_major=False,
                                                   scope=None)

        # softmax_w , shape=[num_units, num_class]
        # softmax_w = tf.get_variable("softmax_w", [num_units, num_class], dtype=tf.float32)
        # softmax_w = tf.Variable(tf.truncated_normal([num_units, num_class],
        #                                             stddev=1.0 / math.sqrt(float(num_units))),
        #                         name='output_weights')
        softmax_w = tf.Variable(tf.zeros([num_units, num_class]), name='output_weights')
        softmax_b = tf.Variable(tf.zeros([num_class]), name='output_biases')
        # softmax_b = tf.get_variable("softmax_b", [num_class], dtype=tf.float32)

        # extra_thing
        padding_vec = tf.zeros([1, FLAGS.batch_size], dtype=tf.float32)
        padding_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(padding_vec, [0])

        one_example_loss = 0.0
        for batch in range(FLAGS.batch_size):
            # for batch in range(5):
            # output Layer
            logits = tf.matmul(outputs[batch, :, :], softmax_w) + softmax_b

            # Add to the Graph the loss calculation.
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits, labels_placeholder[batch, :], name='xentropy')
            pad_loss_sum = padding_loss * (max_time_placeholder - rows_placeholder[batch]+1)
            one_example_loss += (tf.reduce_sum(cross_entropy, name='slot_loss') - pad_loss_sum) / rows_placeholder[batch]
        loss = one_example_loss / FLAGS.batch_size

        # logits = tf.matmul(outputs[:, -1, :], softmax_w) + softmax_b
        #
        # # Add to the Graph the loss calculation.
        # cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        #     logits, labels_placeholder, name='xentropy')
        # loss = tf.reduce_mean(cross_entropy, name='slot_loss')

        # Add a scalar summary for the snapshot loss.
        # tf.scalar_summary(loss.op.name, loss)

        # Add to the Graph operations that train the model.
        optimizer = tf.train.AdamOptimizer(learning_rate)
        # Create a variable to track the global step.
        global_step = tf.Variable(0, name='global_step', trainable=False)
        # Use the optimizer to apply the gradients that minimize the loss
        # (and also increment the global step counter) as a single training step.
        train_op = optimizer.minimize(loss, global_step=global_step)

        # evaluation
        correct = tf.nn.in_top_k(logits, labels, 1)
        eval_correct = tf.reduce_sum(tf.cast(correct, tf.int32))

        # summary = tf.merge_all_summaries()

        # The op for initializing the variables.
        init_op = tf.group(tf.initialize_all_variables(),
                           tf.initialize_local_variables())

        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)

        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        # Initialize the variables (the trained variables and the
        # epoch counter).

        # Instantiate a SummaryWriter to output summaries and the Graph.
        # summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)

        sess.run(init_op)

        # Start input enqueue threads.
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        try:
            step = 0
            while not coord.should_stop():
                start_time = time.time()
                # Run one step of the model.  The return values are
                # the activations from the `train_op` (which is
                # discarded) and the `loss` op.  To inspect the values
                # of your ops or variables, you may include them in
                # the list passed to sess.run() and the value tensors
                # will be returned in the tuple from the call.

                # feature_data, labels_data, sequence_len = sess.run([images, labels, rows],
                #                                                    feed_dict={data_type_placeholder: 'train'})
                feature_data, labels_data, sequence_len = sess.run(data_sets[0])
                # labels_data = tf.matmul(labels_data, np.ones(max(sequence_len), dtype=tf.float64))
                labels_data = np.array(labels_data).reshape(1, FLAGS.batch_size)
                labels_data = labels_data.repeat(max(sequence_len), axis=0).transpose()
                for index in range(FLAGS.batch_size):
                    labels_data[index, sequence_len[index]:] = 0
                # _, loss_value, outputs_value, \
                # softmax_w_value = sess.run([train_op, loss, outputs, softmax_w],
                #                            feed_dict={inputs_placeholder: feature_data,
                #                                       rows_placeholder: sequence_len,
                #                                       labels_placeholder: labels_data})

                _, loss_value = sess.run([train_op, loss],
                                         feed_dict={inputs_placeholder: feature_data,
                                                    rows_placeholder: sequence_len,
                                                    labels_placeholder: labels_data,
                                                    max_time_placeholder: max(sequence_len)})

                # data_sets_value = sess.run(data_sets)
                # _, loss_value = sess.run([train_op, loss],
                #                          feed_dict={inputs_placeholder: data_sets_value.data,
                #                                     rows_placeholder: data_sets_value.rows,
                #                                     labels_placeholder: data_sets_value.target})

                assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

                duration = time.time() - start_time

                # Write the summaries and print an overview fairly often.
                # if (step % 10 == 0) or (step > 80):
                #  if step < 10:
                if step % 10 == 0:
                    # Print status to stdout.
                    # print labels_data, sequence_len
                    print('Step %d: loss = %.2f(%.3f sec)' % (step, loss_value, duration))
                    # print(outputs_value, softmax_w_value)
                    # summary_writer.add_summary(summary)
                    # summary_writer.flush()

                    # print('Validation Data Eval:')
                    # do_eval(sess,
                    #         eval_correct,
                    #         inputs_placeholder,
                    #         rows_placeholder,
                    #         labels_placeholder,
                    #         data_sets,
                    #         1)
                step += 1

        except tf.errors.OutOfRangeError:
            print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step))
        finally:
            # When done, ask the threads to stop.
            coord.request_stop()

        # Wait for threads to finish.
        coord.join(threads)
        sess.close()
Beispiel #48
0
def RPNTarget(all_anchors, num_anchors, gt_boxes, im_shape):
    """RPNTarget: Get RPN's classification and regression targets.

    RPNTarget is responsible for:
      * calculating the correct values for both classification and regression
        problems.
      * defining which anchors and target values are going to be used for the
        RPN minibatch.

    For calculating the correct values for classification (ie. the question of
    "does this anchor refer to an object?") and returning an objectness score,
    we calculate the intersection over union (IoU) between the anchors boxes
    and the ground truth boxes, and use this to categorize anchors. When the
    intersection between anchors and groundtruth is above a threshold, we can
    mark the anchor as an object or as being foreground. In case of not having
    any intersection or having a low IoU value, then we say that the anchor
    refers to background.

    For calculating the correct values for the regression, the problem of
    transforming the fixed size anchor into a more suitable bounding box (equal
    to the ground truth box) only applies to the anchors that we consider to be
    foreground.

    RPNTarget is also responsible for selecting which of the anchors are going
    to be used for the minibatch. This is a random process with some
    restrictions on the ratio between foreground and background samples.

    For selecting the minibatch, labels are not only set to 0 or 1 (for the
    cases of being background and foreground respectively), but also to -1 for
    the anchors we just want to ignore and not include in the minibatch.

    In summary:
      * 1 is positive
          when GT overlap is >= 0.7 (configurable) or for GT max overlap (one
          anchor)
      * 0 is negative
          when GT overlap is < 0.3 (configurable)
      * -1 is don't care
          useful for subsampling negative labels

    Returns:
        labels: label for each anchor
        bbox_targets: bbox regresion values for each anchor
    """

    allowed_border = 0
    # We set clobber positive to False to make sure that there is always at
    # least one positive anchor per GT box.
    clobber_positives = False
    # We set anchors as positive when the IoU is greater than
    # `positive_overlap`.
    positive_overlap = 0.7
    # We set anchors as negative when the IoU is less than
    # `negative_overlap`.
    negative_overlap = 0.3
    # Fraction of the batch to be foreground labeled anchors.
    foreground_fraction = 0.5
    minibatch_size = 256

    # When choosing random targets use `seed` to replicate behaviour.
    seed = None
    """
    We compare anchors to GT and using the minibatch size and the different
    config settings (clobber, foreground fraction, etc), we end up with
    training targets *only* for the elements we want to use in the batch,
    while everything else is ignored.

    Basically what it does is, first generate the targets for all (valid)
    anchors, and then start subsampling the positive (foreground) and the
    negative ones (background) based on the number of samples of each type
    that we want.

    Args:
        all_anchors:
            A Tensor with all the bounding boxes coords of the anchors.
            Its shape should be (num_anchors, 4).
        gt_boxes:
            A Tensor with the ground truth bounding boxes of the image of
            the batch being processed. Its shape should be (num_gt, 5).
            The last dimension is used for the label.
        im_shape:
            Shape of original image (height, width) in order to define
            anchor targers in respect with gt_boxes.

    Returns:
        Tuple of the tensors of:
            labels: (1, 0, -1) for each anchor.
                Shape (num_anchors, 1)
            bbox_targets: 4d bbox targets as specified by paper.
                Shape (num_anchors, 4)
            max_overlaps: Max IoU overlap with ground truth boxes.
                Shape (num_anchors, 1)
    """
    # Keep only the coordinates of gt_boxes
    gt_boxes = gt_boxes[:, :4]
    all_anchors = all_anchors[:, :4]

    # Only keep anchors inside the image
    (x_min_anchor, y_min_anchor, x_max_anchor,
     y_max_anchor) = tf.unstack(all_anchors, axis=1)

    anchor_filter = tf.logical_and(
        tf.logical_and(tf.greater_equal(x_min_anchor, -allowed_border),
                       tf.greater_equal(y_min_anchor, -allowed_border)),
        tf.logical_and(tf.less(x_max_anchor, im_shape[1] + allowed_border),
                       tf.less(y_max_anchor, im_shape[0] + allowed_border)))

    # We (force) reshape the filter so that we can use it as a boolean mask
    anchor_filter = tf.reshape(anchor_filter, [-1])
    # Filter anchors.
    anchors = tf.boolean_mask(all_anchors,
                              anchor_filter,
                              name='filter_anchors')

    # Generate array with the labels for all_anchors.
    labels = tf.fill((tf.gather(tf.shape(all_anchors), [0])), -1)
    labels = tf.boolean_mask(labels, anchor_filter, name='filter_labels')

    # Intersection over union (IoU) overlap between the anchors and the
    # ground truth boxes.
    overlaps = bbox_overlap_tf(tf.to_float(anchors), tf.to_float(gt_boxes))

    # Generate array with the IoU value of the closest GT box for each
    # anchor.
    max_overlaps = tf.reduce_max(overlaps, axis=1)
    if not clobber_positives:
        # Assign bg labels first so that positive labels can clobber them.
        # First we get an array with True where IoU is less than
        # negative_overlap
        negative_overlap_nonzero = tf.less(max_overlaps, negative_overlap)

        # Finally we set 0 at True indices
        labels = tf.where(condition=negative_overlap_nonzero,
                          x=tf.zeros(tf.shape(labels)),
                          y=tf.to_float(labels))
    # Get the value of the max IoU for the closest anchor for each gt.
    gt_max_overlaps = tf.reduce_max(overlaps, axis=0)

    # Find all the indices that match (at least one, but could be more).
    gt_argmax_overlaps = tf.squeeze(tf.equal(overlaps, gt_max_overlaps))
    gt_argmax_overlaps = tf.where(gt_argmax_overlaps)[:, 0]
    # Eliminate duplicates indices.
    gt_argmax_overlaps, _ = tf.unique(gt_argmax_overlaps)
    # Order the indices for sparse_to_dense compatibility
    gt_argmax_overlaps, _ = tf.nn.top_k(gt_argmax_overlaps,
                                        k=tf.shape(gt_argmax_overlaps)[-1])
    gt_argmax_overlaps = tf.reverse(gt_argmax_overlaps, [0])

    # Foreground label: for each ground-truth, anchor with highest overlap.
    # When the argmax is many items we use all of them (for consistency).
    # We set 1 at gt_argmax_overlaps_cond indices
    gt_argmax_overlaps_cond = tf.sparse_to_dense(gt_argmax_overlaps,
                                                 tf.shape(labels,
                                                          out_type=tf.int64),
                                                 True,
                                                 default_value=False)

    labels = tf.where(condition=gt_argmax_overlaps_cond,
                      x=tf.ones(tf.shape(labels)),
                      y=tf.to_float(labels))

    # Foreground label: above threshold Intersection over Union (IoU)
    # First we get an array with True where IoU is greater or equal than
    # positive_overlap
    positive_overlap_inds = tf.greater_equal(max_overlaps, positive_overlap)
    # Finally we set 1 at True indices
    labels = tf.where(condition=positive_overlap_inds,
                      x=tf.ones(tf.shape(labels)),
                      y=labels)

    if clobber_positives:
        # Assign background labels last so that negative labels can clobber
        # positives. First we get an array with True where IoU is less than
        # negative_overlap
        negative_overlap_nonzero = tf.less(max_overlaps, negative_overlap)
        # Finally we set 0 at True indices
        labels = tf.where(condition=negative_overlap_nonzero,
                          x=tf.zeros(tf.shape(labels)),
                          y=labels)

    # Subsample positive labels if we have too many
    def subsample_positive():
        # Shuffle the foreground indices
        disable_fg_inds = tf.random_shuffle(fg_inds, seed=seed)
        # Select the indices that we have to ignore, this is
        # `tf.shape(fg_inds)[0] - num_fg` because we want to get only
        # `num_fg` foreground labels.
        disable_place = (tf.shape(fg_inds)[0] - num_fg)
        disable_fg_inds = disable_fg_inds[:disable_place]
        # Order the indices for sparse_to_dense compatibility
        disable_fg_inds, _ = tf.nn.top_k(disable_fg_inds,
                                         k=tf.shape(disable_fg_inds)[-1])
        disable_fg_inds = tf.reverse(disable_fg_inds, [0])
        disable_fg_inds = tf.sparse_to_dense(disable_fg_inds,
                                             tf.shape(labels,
                                                      out_type=tf.int64),
                                             True,
                                             default_value=False)
        # Put -1 to ignore the anchors in the selected indices
        return tf.where(condition=tf.squeeze(disable_fg_inds),
                        x=tf.to_float(tf.fill(tf.shape(labels), -1)),
                        y=labels)

    num_fg = tf.to_int32(foreground_fraction * minibatch_size)
    # Get foreground indices, get True in the indices where we have a one.
    fg_inds = tf.equal(labels, 1)
    # We get only the indices where we have True.
    fg_inds = tf.squeeze(tf.where(fg_inds), axis=1)
    fg_inds_size = tf.size(fg_inds)
    # Condition for check if we have too many positive labels.
    subsample_positive_cond = fg_inds_size > num_fg
    # Check the condition and subsample positive labels.
    labels = tf.cond(subsample_positive_cond,
                     true_fn=subsample_positive,
                     false_fn=lambda: labels)

    # Subsample negative labels if we have too many
    def subsample_negative():
        # Shuffle the background indices
        disable_bg_inds = tf.random_shuffle(bg_inds, seed=seed)

        # Select the indices that we have to ignore, this is
        # `tf.shape(bg_inds)[0] - num_bg` because we want to get only
        # `num_bg` background labels.
        disable_place = (tf.shape(bg_inds)[0] - num_bg)
        disable_bg_inds = disable_bg_inds[:disable_place]
        # Order the indices for sparse_to_dense compatibility
        disable_bg_inds, _ = tf.nn.top_k(disable_bg_inds,
                                         k=tf.shape(disable_bg_inds)[-1])
        disable_bg_inds = tf.reverse(disable_bg_inds, [0])
        disable_bg_inds = tf.sparse_to_dense(disable_bg_inds,
                                             tf.shape(labels,
                                                      out_type=tf.int64),
                                             True,
                                             default_value=False)
        # Put -1 to ignore the anchors in the selected indices
        return tf.where(condition=tf.squeeze(disable_bg_inds),
                        x=tf.to_float(tf.fill(tf.shape(labels), -1)),
                        y=labels)

    # Recalculate the foreground indices after (maybe) disable some of them

    # Get foreground indices, get True in the indices where we have a one.
    fg_inds = tf.equal(labels, 1)
    # We get only the indices where we have True.
    fg_inds = tf.squeeze(tf.where(fg_inds), axis=1)
    fg_inds_size = tf.size(fg_inds)

    num_bg = tf.to_int32(minibatch_size - fg_inds_size)
    # Get background indices, get True in the indices where we have a zero.
    bg_inds = tf.equal(labels, 0)
    # We get only the indices where we have True.
    bg_inds = tf.squeeze(tf.where(bg_inds), axis=1)
    bg_inds_size = tf.size(bg_inds)
    # Condition for check if we have too many positive labels.
    subsample_negative_cond = bg_inds_size > num_bg
    # Check the condition and subsample positive labels.
    labels = tf.cond(subsample_negative_cond,
                     true_fn=subsample_negative,
                     false_fn=lambda: labels)

    # Return bbox targets with shape (anchors.shape[0], 4).

    # Find the closest gt box for each anchor.
    argmax_overlaps = tf.argmax(overlaps, axis=1)
    # Eliminate duplicates.
    argmax_overlaps_unique, _ = tf.unique(argmax_overlaps)
    # Filter the gt_boxes.
    # We get only the indices where we have "inside anchors".
    anchor_filter_inds = tf.where(anchor_filter)
    gt_boxes = tf.gather(gt_boxes, argmax_overlaps)

    bbox_targets = encode_tf(anchors, gt_boxes)

    # For the anchors that arent foreground, we ignore the bbox_targets.
    anchor_foreground_filter = tf.equal(labels, 1)
    bbox_targets = tf.where(condition=anchor_foreground_filter,
                            x=bbox_targets,
                            y=tf.zeros_like(bbox_targets))

    # We unroll "inside anchors" value for all anchors (for shape
    # compatibility).

    # We complete the missed indices with zeros
    # (because scatter_nd has zeros as default).
    bbox_targets = tf.scatter_nd(indices=tf.to_int32(anchor_filter_inds),
                                 updates=bbox_targets,
                                 shape=tf.shape(all_anchors))

    labels_scatter = tf.scatter_nd(indices=tf.to_int32(anchor_filter_inds),
                                   updates=labels,
                                   shape=[tf.shape(all_anchors)[0]])
    # We have to put -1 to ignore the indices with 0 generated by
    # scatter_nd, otherwise it will be considered as background.
    labels = tf.where(condition=anchor_filter,
                      x=labels_scatter,
                      y=tf.to_float(tf.fill(tf.shape(labels_scatter), -1)))

    max_overlaps = tf.scatter_nd(indices=tf.to_int32(anchor_filter_inds),
                                 updates=max_overlaps,
                                 shape=[tf.shape(all_anchors)[0]])

    return labels, bbox_targets, max_overlaps
Beispiel #49
0
    def __init__(self, w_in, w_out, sense_dim, embedding_dim, batch_size,
                 context_window, learning_rate, bi_w_in):
        max_context_length = 2 * context_window + 1
        eval_mode = tf.placeholder(tf.bool, shape=[])
        self.eval_mode = eval_mode
        self.bi_info = tf.sparse_placeholder(tf.int32)
        bi_info = tf.sparse_to_dense(self.bi_info.indices,
                                     self.bi_info.dense_shape,
                                     self.bi_info.values)
        self.lengths = tf.placeholder(tf.int32,
                                      [context_window * 2 + batch_size])
        # add self here so we can feed it outside this class
        context_indices = tf.placeholder(
            tf.int32, [context_window * 2 + batch_size, max_context_length])
        self.context_indices = context_indices
        major_weight = tf.placeholder(tf.float32)
        reg_weight = tf.placeholder(tf.float32)
        self.major_weight = major_weight
        self.reg_weight = reg_weight
        embedded_context = self.dense_lookup(w_in, context_indices)
        bi_embedded_context = self.sparse_lookup(bi_w_in, bi_info,
                                                 self.lengths)

        # Combine bilingual contextual information
        embedded_context = tf.cond(
            eval_mode, lambda: tf.identity(embedded_context),
            lambda: tf.add(major_weight * embedded_context,
                           (1 - major_weight) * bi_embedded_context))

        # [(context_window*2+batch_size), sense_dim, embedding_dim]
        embedded_word_output = tf.nn.embedding_lookup(
            w_out, context_indices[:, context_window])

        # shape = [(context_window*2+batch_size), sense_dim, 1]
        sense_score = tf.matmul(embedded_word_output, embedded_context)

        # [(context_window*2+batch_size), sense_dim]
        sense_score = tf.squeeze(sense_score)

        # [context_window*2+batch_size]
        sense_greedy = tf.argmax(sense_score, 1)
        self.sense_greedy = sense_greedy

        target_sense_sampled_indices = tf.placeholder(tf.int32, [batch_size])
        self.target_sense_sampled_indices = target_sense_sampled_indices
        # [batch_size]
        reward_prob = tf.placeholder(tf.float32, [batch_size],
                                     name='reward_logit')
        self.reward_prob = reward_prob

        # [(context_window*2+batch_size), sense_dim]
        sense_prob = tf.nn.softmax(sense_score)
        self.sense_prob = sense_prob
        entropy = -tf.multiply(tf.log(sense_prob + 1e-8), sense_prob)
        entropy = tf.reduce_sum(entropy) * reg_weight
        # [(context_window*2+batch_size)* sense_dim]
        sense_score = tf.reshape(
            sense_score, [(context_window * 2 + batch_size) * sense_dim])
        # [batch_size]
        sense_selected_logit_input = tf.gather(sense_score,
                                               target_sense_sampled_indices)

        # [batch_size, sense_dim]
        cost = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=sense_selected_logit_input, labels=reward_prob))
        cost += entropy
        self.print_cost = cost
        self.print_ent = entropy
        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        self.update = optimizer.minimize(cost)
    def train_model(self, sparse_length, k_sparse, debug_level=0):
        print("Running with sparse_length=" + str(sparse_length) +
              " and k_sparse=" + str(k_sparse))
        # This is a bit untidy. Can be cleaned up later
        batch = 50
        signal_dim = self.signal_dim
        #
        if debug_level > 0: print(signal_dim)
        model_components = {}
        x = tf.placeholder(tf.float32, [None, signal_dim[0], signal_dim[1]])
        batch_size = tf.placeholder(tf.int32)
        if debug_level > 0: print(x.shape)
        #
        W = tf.Variable(tf.truncated_normal([signal_dim[1], sparse_length],
                                            stddev=1e-1),
                        name='weights')
        b = tf.Variable(tf.constant(0.0,
                                    shape=[sparse_length],
                                    dtype=tf.float32),
                        trainable=True,
                        name='biases')
        x_2d = tf.reshape(x, [-1, signal_dim[1]])
        z = tf.matmul(x_2d, W) + b
        if debug_level > 0: print(W.shape, b.shape, x_2d.shape, z.shape)
        #
        tao, tao_indices = tf.nn.top_k(z, k=k_sparse, sorted=True)
        indices_range = tf.expand_dims(tf.range(0, batch * signal_dim[0]), 1)
        range_repeated = tf.tile(indices_range, [1, k_sparse])
        if debug_level > 0:
            print(tao, tao_indices, indices_range, range_repeated)
        full_indices = tf.concat([
            tf.expand_dims(range_repeated, 2),
            tf.expand_dims(tao_indices, 2)
        ],
                                 axis=2)
        full_indices = tf.reshape(full_indices, [-1, 2])
        mask = tf.ones(tf.shape(full_indices)[0])
        #mask = tf.SparseTensor(tf.ones(tf.shape(full_indices)[0]),dense_shape=tf.constant([signal_dim[0]*batch,sparse_length]))
        tao_mask = tf.sparse_to_dense(
            full_indices,
            tf.constant([signal_dim[0] * batch, sparse_length]),
            mask,
            validate_indices=False)
        #tao_mask = tf.sparse_to_dense(full_indices,tf.constant([signal_dim[0]*batch,sparse_length]), mask)
        #
        z_tao = tf.multiply(tao_mask, z)
        #
        b_dash = tf.Variable(tf.constant(0.0,
                                         shape=[signal_dim[1]],
                                         dtype=tf.float32),
                             trainable=True,
                             name='biases')
        x_recons = tf.matmul(z_tao, tf.transpose(W)) + b_dash
        #
        error = tf.losses.mean_squared_error(x_2d, x_recons)
        #
        optimizer = tf.train.AdamOptimizer()
        train_step = optimizer.minimize(error)
        #
        sess = tf.Session()
        #if debug_level>0:   sess = tf_debug.LocalCLIDebugWrapperSession(sess)
        sess.run(tf.global_variables_initializer())
        #
        for e in range(1, self.N_epochs):
            sess.run(train_step,
                     feed_dict={
                         x: self.train_set,
                         batch_size: self.train_batch_size
                     })  #### inp should be replaced by batch

            if e % self.VALIDATION_AFTER_EPOCHS == 0:
                err = sess.run(error,
                               feed_dict={
                                   x: self.val_set,
                                   batch_size: self.test_batch_size
                               })  ### inp should be replaced by val_set
                print("Epoch ", e, " has val error : ", err)
        #
        x_recons_data_train = sess.run(x_recons, feed_dict={x: self.train_set})
        x_recons_train_list = np.split(x_recons_data_train,
                                       self.n_samples_test,
                                       axis=0)
        #
        x_recons_data_val = sess.run(x_recons, feed_dict={x: self.val_set})
        x_recons_test_list = np.split(x_recons_data_val,
                                      self.n_samples_test,
                                      axis=0)
        #
        model_components['W'] = sess.run(W)
        model_components['b'] = sess.run(b)
        model_components['tao'] = sess.run(tao, feed_dict={x: self.val_set})
        model_components['tao_indices'] = sess.run(tao_indices,
                                                   feed_dict={x: self.val_set})
        model_components['b_dash'] = sess.run(b_dash)
        model_components['err'] = err
        model_components['x_recons_train_list'] = x_recons_train_list
        model_components['x_recons_test_list'] = x_recons_test_list
        return model_components
Beispiel #51
0
def true_segments_1d(segments,
                     mode=SegmentsMode.CENTERS,
                     max_gap=0,
                     min_length=0,
                     name=None):
    """Labels contiguous True runs in segments.

  Args:
    segments: 1D boolean tensor.
    mode: The SegmentsMode. Returns the start of each segment (STARTS), or the
        rounded center of each segment (CENTERS).
    max_gap: Fill gaps of length at most `max_gap` between true segments. int.
    min_length: Minimum length of a returned segment. int.
    name: Optional name for the op.

  Returns:
    run_centers: int32 tensor. Depending on `mode`, either the start of each
        True run, or the (rounded) center of each True run.
    run_lengths: int32; the lengths of each True run.
  """
    with tf.name_scope(name, "true_segments", [segments]):
        segments = tf.convert_to_tensor(segments, tf.bool)
        run_starts, run_lengths = _segments_1d(segments,
                                               mode=SegmentsMode.STARTS)
        # Take only the True runs. After whichever run is True first, the True runs
        # are every other run.
        first_run = tf.cond(
            # First value is False, or all values are False. Handles empty segments
            # correctly.
            tf.logical_or(tf.reduce_any(segments[0:1]),
                          ~tf.reduce_any(segments)),
            lambda: tf.constant(0),
            lambda: tf.constant(1))

        num_runs = tf.shape(run_starts)[0]
        run_nums = tf.range(num_runs)
        is_true_run = tf.equal(run_nums % 2, first_run % 2)
        # Find gaps between True runs that can be merged.
        is_gap = tf.logical_and(
            tf.not_equal(run_nums % 2, first_run % 2),
            tf.logical_and(tf.greater(run_nums, first_run),
                           tf.less(run_nums, num_runs - 1)))
        fill_gap = tf.logical_and(is_gap, tf.less_equal(run_lengths, max_gap))

        # Segment the consecutive runs of True or False values based on whether they
        # are True, or are a gap of False values that can be bridged. Then, flatten
        # the runs of runs.
        runs_to_merge = tf.logical_or(is_true_run, fill_gap)
        run_of_run_starts, _ = _segments_1d(runs_to_merge,
                                            mode=SegmentsMode.STARTS)

        # Get the start of every new run from the original run starts.
        merged_run_starts = tf.gather(run_starts, run_of_run_starts)
        # Make an array mapping the original runs to their run of runs. Increment
        # the number for every run of run start except for the first one, so that
        # the array has values from 0 to num_run_of_runs.
        merged_run_inds = tf.cumsum(
            tf.sparse_to_dense(
                sparse_indices=tf.cast(run_of_run_starts[1:, None], tf.int64),
                output_shape=tf.cast(num_runs[None], tf.int64),
                sparse_values=tf.ones_like(run_of_run_starts[1:])))
        # Sum the lengths of the original runs that were merged.
        merged_run_lengths = tf.segment_sum(run_lengths, merged_run_inds)

        if mode is SegmentsMode.CENTERS:
            merged_starts_or_centers = (merged_run_starts +
                                        tf.floordiv(merged_run_lengths - 1, 2))
        else:
            merged_starts_or_centers = merged_run_starts

        # If there are no true values, increment first_run to 1, so we will skip
        # the single (false) run.
        first_run += tf.to_int32(tf.logical_not(tf.reduce_any(segments)))

        merged_starts_or_centers = merged_starts_or_centers[first_run::2]
        merged_run_lengths = merged_run_lengths[first_run::2]

        # Only take segments at least min_length long.
        is_long_enough = tf.greater_equal(merged_run_lengths, min_length)
        is_long_enough.set_shape([None])
        merged_starts_or_centers = tf.boolean_mask(merged_starts_or_centers,
                                                   is_long_enough)
        merged_run_lengths = tf.boolean_mask(merged_run_lengths,
                                             is_long_enough)

        return merged_starts_or_centers, merged_run_lengths
Beispiel #52
0
    def build_inference(self, x, flag="train"):
        # 设置regularizer,本别对应网络的四个部分
        regularizer1 = self.param_dict[
            "regulerizer1"] if flag == "train" else None
        regularizer2 = self.param_dict[
            "regulerizer2"] if flag == "train" else None
        regularizer3 = self.param_dict[
            "regulerizer3"] if flag == "train" else None
        regularizer4 = self.param_dict[
            "regulerizer4"] if flag == "train" else None
        is_train = True if flag == "train" else False
        # 先获取需要的参数
        hash_size = self.param_dict['hash_size']
        no_hash = self.param_dict["no_hash"]
        embed_size = self.param_dict["embed_size"]
        # 根据配置获取激活函数
        act_fn = self.get_activation_func(is_train)
        # 是否启用mini-batch aware regularization
        is_mba_reg = self.param_dict["is_mba_reg"]
        lambda_reg_mba = self.param_dict["lambda_reg_mba"]
        is_action_mba_reg = self.param_dict["is_action_mba_reg"]

        # 将输入划分
        x_feature = x[:, :-3]
        x_action_lists = x[:, -3:]

        # 先将稀疏特征转换成indice
        x_sparse = []
        for i in range(len(hash_size)):
            if i in no_hash:
                # 这部分特征本身可以直接作为indice,不需要转化
                x_i = tf.string_to_number(x_feature[:, i], tf.int32)
                x_sparse.append(x_i)
            else:
                # 这部分特征可以通过哈希函数来转化成index
                x_i = tf.string_to_hash_bucket_strong(
                    input=x_feature[:, i],
                    num_buckets=hash_size[i],
                    key=[679362, 964545],
                    name="sparse_feature_{}".format(i))
                x_sparse.append(x_i)
        # 将稀疏数据转换成embedding向量
        x_embed = []
        w_action_embed = []
        x_action = []
        indice_sku_cate_brand = []
        sku_cate_brand_index = self.param_dict["sku_cate_brand_index"]
        for i in range(len(embed_size)):
            if embed_size[i] != -1:
                with tf.variable_scope("embedding_{}".format(i)):
                    if hash_size[i] <= 500000:
                        weights = self.get_weight_variable(
                            [hash_size[i], embed_size[i]], regularizer1,
                            self.param_dict["initializer_embedding_w"](
                                [hash_size[i], embed_size[i]]))
                    elif hash_size[i] > 500000 and hash_size[i] <= 5000000:
                        weights = self.get_weight_variable(
                            [hash_size[i], embed_size[i]],
                            regularizer1,
                            self.param_dict["initializer_embedding_w"](
                                [hash_size[i], embed_size[i]]),
                            partitioner=tf.fixed_size_partitioner(5, 0))
                    elif hash_size[i] > 5000000 and hash_size[i] <= 10000000:
                        weights = self.get_weight_variable(
                            [hash_size[i], embed_size[i]],
                            regularizer1,
                            self.param_dict["initializer_embedding_w"](
                                [hash_size[i], embed_size[i]]),
                            partitioner=tf.fixed_size_partitioner(10, 0))
                    elif hash_size[i] > 10000000 and hash_size[i] <= 15000000:
                        weights = self.get_weight_variable(
                            [hash_size[i], embed_size[i]],
                            regularizer1,
                            self.param_dict["initializer_embedding_w"](
                                [hash_size[i], embed_size[i]]),
                            partitioner=tf.fixed_size_partitioner(15, 0))
                    elif hash_size[i] > 15000000 and hash_size[i] <= 20000000:
                        weights = self.get_weight_variable(
                            [hash_size[i], embed_size[i]],
                            regularizer1,
                            self.param_dict["initializer_embedding_w"](
                                [hash_size[i], embed_size[i]]),
                            partitioner=tf.fixed_size_partitioner(20, 0))
                    else:
                        weights = self.get_weight_variable(
                            [hash_size[i], embed_size[i]],
                            regularizer1,
                            self.param_dict["initializer_embedding_w"](
                                [hash_size[i], embed_size[i]]),
                            partitioner=tf.fixed_size_partitioner(30, 0))
                x_i = tf.nn.embedding_lookup(weights, x_sparse[i])

                if i in sku_cate_brand_index:  # skuid, cateid, brandid对应的embedding向量
                    w_action_embed.append(weights)
                    x_action.append(x_i)
                    indice_sku_cate_brand.append(x_sparse[i])
                    if is_train and is_mba_reg and not is_action_mba_reg:
                        # 计算mba
                        self.calculate_mini_batch_aware_reg(
                            weights, x_sparse[i], lambda_reg_mba)
                else:
                    if is_train and is_mba_reg:
                        # 计算mba
                        self.calculate_mini_batch_aware_reg(
                            weights, x_sparse[i], lambda_reg_mba)

            else:
                x_i = tf.one_hot(x_sparse[i], depth=hash_size[i])

            x_embed.append(x_i)

            # if i in sku_cate_brand_index: # skuid, cateid, brandid对应的embedding向量
            #     with tf.variable_scope("embedding_{}".format(i)):
            #         weights = self.get_weight_variable([hash_size[i], embed_size[i]], regularizer1,
            #                                             self.param_dict["initializer_embedding_w"]([hash_size[i], embed_size[i]]),
            #                                             partitioner=tf.fixed_size_partitioner(20, 0))
            #         w_action_embed.append(weights)
            #         x_i = tf.nn.embedding_lookup(weights, x_sparse[i])
            #         if is_train and is_mba_reg and not is_action_mba_reg:
            #             # 计算mba
            #             self.calculate_mini_batch_aware_reg(weights, x_sparse[i], lambda_reg_mba)
            #
            #         indice_sku_cate_brand.append(x_sparse[i])
            #         x_embed.append(x_i)
            #         x_action.append(x_i)
            # else:
            #     if embed_size[i] != -1:
            #         with tf.variable_scope("embedding_{}".format(i)):
            #             if i == 0:
            #                 weights = self.get_weight_variable([hash_size[i], embed_size[i]], regularizer1,
            #                                                    self.param_dict["initializer_embedding_w"]([hash_size[i], embed_size[i]]),
            #                                                    partitioner=tf.fixed_size_partitioner(20, 0))
            #             else:
            #                 weights = self.get_weight_variable([hash_size[i], embed_size[i]], regularizer1,
            #                                                    self.param_dict["initializer_embedding_w"]([hash_size[i], embed_size[i]]))
            #             x_i = tf.nn.embedding_lookup(weights, x_sparse[i])
            #             if is_train and is_mba_reg:
            #                 # 计算mba
            #                 self.calculate_mini_batch_aware_reg(weights, x_sparse[i], lambda_reg_mba)
            #
            #             x_embed.append(x_i)
            #     else:
            #         x_i = tf.one_hot(x_sparse[i], depth=hash_size[i])
            #         x_embed.append(x_i)
        x_embed = tf.concat(x_embed, 1)

        # 对浏览行为建模,构建DIN
        with tf.name_scope("user_behaviours"):
            x_browse_skus_list = tf.reshape(x_action_lists[:, 0], [
                -1,
            ])
            x_browse_cates_list = tf.reshape(x_action_lists[:, 1], [
                -1,
            ])
            x_browse_brand_list = tf.reshape(x_action_lists[:, 2], [
                -1,
            ])
            browse_lists = [
                x_browse_skus_list, x_browse_cates_list, x_browse_brand_list
            ]
            browse_names = ['skus', 'cates', 'brands']
            browse_nums = self.param_dict["browse_nums"]
            x_action_list_embeds = []
            sum_poolings = []
            x_action_list_masks = []
            for i in range(len(browse_names)):
                # for i in [0]:
                with tf.name_scope("user_browse_{}_embedding".format(
                        browse_names[i])):
                    browse_w_embed = w_action_embed[i]
                    # x_ad_embedded = x_action[i]
                    x_browse_action = browse_lists[
                        i]  # shape of x_browse_action is [?,]
                    x_browse_action_list = tf.string_split(
                        x_browse_action, "#")
                    x_browse_action_list_indices = tf.sparse_to_dense(
                        x_browse_action_list.indices,
                        # x_browse_action_list.dense_shape,
                        [x_browse_action_list.dense_shape[0], browse_nums[i]],
                        tf.string_to_hash_bucket_strong(
                            x_browse_action_list.values,
                            num_buckets=browse_w_embed.get_shape()[0].value,
                            key=[679362, 964545],
                            name="sparse_user_browse_{}".format(
                                browse_names[i])),
                        -1)
                    indice_mask = tf.reshape(
                        tf.not_equal(x_browse_action_list_indices, -1),
                        [-1, browse_nums[i]])
                    x_action_list_masks.append(indice_mask)
                    x_action_list_embed = tf.reshape(
                        tf.nn.embedding_lookup(browse_w_embed,
                                               x_browse_action_list_indices),
                        [
                            -1, browse_nums[i],
                            browse_w_embed.get_shape()[1].value
                        ])
                    if is_train and is_action_mba_reg:
                        # 计算mba
                        indice_action = tf.concat([
                            tf.string_to_hash_bucket_strong(
                                x_browse_action_list.values,
                                num_buckets=browse_w_embed.get_shape()
                                [0].value,
                                key=[679362, 964545]), indice_sku_cate_brand[i]
                        ], 0)
                        self.calculate_mini_batch_aware_reg(
                            browse_w_embed, indice_action, lambda_reg_mba)
                    x_action_list_embeds.append(x_action_list_embed)

            with tf.name_scope("activation_unit"):
                act_unit_hidden_layers = self.param_dict[
                    "act_unit_hidden_layers"]
                action_indexs = self.param_dict["action_indexs"]
                # for i in range(len(x_action_list_embeds)):
                for i in action_indexs:
                    x_action_list_embed = x_action_list_embeds[i]
                    x_ad_embedded = x_action[i]
                    indice_mask = x_action_list_masks[i]
                    # 外积:笛卡尔积矩阵拉平向量
                    # out_product_list = tf.map_fn(lambda action_emb: tf.reshape(tf.matmul(tf.expand_dims(action_emb, 2), tf.expand_dims(x_ad_embedded, 1)), [-1, x_ad_embedded.shape[1].value ** 2]),
                    #                              tf.transpose(x_action_list_embed, [1, 0, 2]))

                    # 近似外积:向量相减再concat向量点积

                    x_action_list_embed_new = tf.transpose(
                        x_action_list_embed, [1, 0, 2])

                    concat_list = [
                        tf.concat([
                            x_action_list_embed_new[ii],
                            x_action_list_embed_new[ii] - x_ad_embedded,
                            x_action_list_embed_new[ii] * x_ad_embedded,
                            x_ad_embedded
                        ], 1)
                        for ii in range(x_action_list_embed_new.shape[0].value)
                    ]

                    act_unit_in = concat_list[0].shape[1].value
                    act_in = concat_list
                    with tf.variable_scope("activation_unit_{}_list".format(
                            browse_names[i])):
                        for ii in range(len(act_unit_hidden_layers)):
                            weights_act_unit = self.get_weight_variable(
                                [act_unit_in, act_unit_hidden_layers[ii]],
                                regularizer3,
                                self.param_dict["initializer_act_unit_w"](
                                    [act_unit_in, act_unit_hidden_layers[ii]]),
                                name='_act_unit_w_{}'.format(ii))
                            biases_act_unit = tf.get_variable(
                                "biases_{}_act_unit".format(ii),
                                [act_unit_hidden_layers[ii]],
                                initializer=tf.constant_initializer(0.0),
                                dtype=tf.float32)

                            act_out = list(
                                map(
                                    lambda act_in_i: act_fn(
                                        tf.matmul(act_in_i[0], weights_act_unit
                                                  ) + biases_act_unit,
                                        name="act_func_{}_{}".format(
                                            ii, act_in_i[1])),
                                    zip(act_in, range(len(act_in)))))

                            # act_out = [tf.expand_dims(act_fn(tf.matmul(act_in[ii], weights_act_unit) + biases_act_unit, name="act_func_{}_{}".format(i, ii)), 0)
                            #                 for ii in range(act_in.shape[0].value)]
                            act_in = act_out
                            act_unit_in = act_in[0].shape[1].value
                        act_output_in = act_in
                        act_output_unit = act_unit_in
                        weights_act_unit_output = self.get_weight_variable(
                            [act_output_unit, 1],
                            regularizer3,
                            self.param_dict["initializer_act_unit_w"](
                                [act_output_unit, 1]),
                            name='_act_unit_output_w')
                        biases_act_unit_output = tf.get_variable(
                            "biases_act_unit_output", [1],
                            initializer=tf.constant_initializer(0.0),
                            dtype=tf.float32)

                        act_output_out = tf.concat(
                            list(
                                map(
                                    lambda act_output_i: tf.expand_dims(
                                        tf.matmul(act_output_i,
                                                  weights_act_unit_output) +
                                        biases_act_unit_output, 0),
                                    act_output_in)), 0)
                        # act_output_out = tf.concat([tf.expand_dims(tf.matmul(act_output_in[iii], weights_act_unit_output) + biases_act_unit_output, 0) for iii in range(act_output_in.shape[0].value)], 0)
                    active_weight_score = tf.transpose(act_output_out,
                                                       [1, 0, 2])
                    # 将空缺行为的权重设置为0.0
                    padding = tf.zeros_like(active_weight_score)
                    active_weight_score_t = tf.where(
                        tf.expand_dims(indice_mask, 2), active_weight_score,
                        padding)
                    with tf.name_scope("weight_sum_pooling"):
                        sum_pooling = tf.reduce_sum(
                            x_action_list_embed * active_weight_score_t, 1)
                    sum_poolings.append(sum_pooling)
            x_deep_in = tf.concat([x_embed, tf.concat(sum_poolings, 1)], 1)

        # 构建deep模块
        with tf.name_scope("deep_network"):
            deep_layers = self.param_dict["deep_layers"]
            for i in range(len(deep_layers)):
                with tf.variable_scope("dnn_layer_{}".format(i)):
                    weights = self.get_weight_variable(
                        [x_deep_in.shape[1].value, deep_layers[i]],
                        regularizer2, self.param_dict["initializer_dnn_w"](
                            [x_deep_in.shape[1].value, deep_layers[i]]))
                    biases = tf.get_variable(
                        "biases", [deep_layers[i]],
                        initializer=tf.constant_initializer(0.0),
                        dtype=tf.float32)
                    layer_i = act_fn(tf.matmul(x_deep_in, weights) + biases,
                                     name="deep_mlp_{}".format(i))
                    x_deep_in = layer_i

        # 构建输出模块full connect
        x_fc_in = x_deep_in
        with tf.name_scope("fc_layers"):
            fc_layers = self.param_dict['fc_layers']
            for i in range(len(fc_layers)):
                with tf.variable_scope("fc_layers_{}".format(i)):
                    weights = self.get_weight_variable(
                        [x_fc_in.shape[1].value, fc_layers[i]], regularizer4,
                        self.param_dict["initializer_fc_w"](
                            [x_fc_in.shape[1].value, fc_layers[i]]))
                    biases = tf.get_variable(
                        "biases", [fc_layers[i]],
                        initializer=tf.constant_initializer(0.0),
                        dtype=tf.float32)
                    layer_i = tf.nn.sigmoid(
                        tf.matmul(x_fc_in, weights) + biases)
                    x_fc_in = layer_i
        logit = x_fc_in
        return logit
                             trainable=False)

embed = tf.nn.embedding_lookup(embeddings, questions)

# Define weights
weights = {
    # Hidden layer weights => 2*n_hidden because of foward + backward cells
    'out': tf.Variable(tf.random_normal([2 * FLAGS.n_hidden, FLAGS.n_classes]))
}
biases = {'out': tf.Variable(tf.random_normal([FLAGS.n_classes]))}
pred = model(FLAGS.n_hidden, embed, weights, biases, FLAGS.attention_size)

indices = tf.expand_dims(tf.range(0, FLAGS.batch_size, 1), 1)
concated = tf.concat([indices, labels], 1)
labels = tf.sparse_to_dense(concated,
                            tf.stack([FLAGS.batch_size, FLAGS.n_classes]), 1.0,
                            0.0)

# Define loss and optimizer
cost = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=labels))
global_step = tf.identity(tf.Variable(0, trainable=False))
# optimizer = tf.train.AdamOptimizer(
#     learning_rate=cyclic_learning_rate(global_step, learning_rate=FLAGS.learning_rate)).minimize(cost)
optimizer = tf.train.AdamOptimizer(learning_rate=tf.train.exponential_decay(
    FLAGS.learning_rate, global_step, 100000, 0.96)).minimize(cost)

# Evaluate model
tags = tf.argmax(labels, 1)
y_pred_cls = tf.argmax(tf.nn.softmax(pred), 1)
correct_pred = tf.equal(tf.argmax(pred, 1), tags)
Beispiel #54
0
    def _create_network(self):
        # Initialize autoencode network weights and biases
        network_weights = self._initialize_weights(**self.network_architecture)
        start_token_tensor = tf.constant(
            (np.zeros([self.batch_size, binary_dim])).astype(np.float32),
            dtype=tf.float32)
        self.network_weights = network_weights
        seqlen = tf.cast(tf.reduce_sum(self.mask, reduction_indices=-1),
                         tf.int32)

        KLD_penalty = tf.tanh(tf.cast(self.timestep, tf.float32) / 1600.0)

        # Use recognition network to determine mean and
        # (log) variance of Gaussian distribution in latent
        # space
        if not same_embedding:
            input_embedding, input_embedding_KLD_loss = self._get_input_embedding(
                [
                    network_weights['variational_encoding'],
                    network_weights['biases_variational_encoding']
                ], network_weights['input_meaning'])
        else:
            input_embedding, input_embedding_KLD_loss = self._get_input_embedding(
                [
                    network_weights['variational_encoding'],
                    network_weights['biases_variational_encoding']
                ], network_weights['LSTM'])

        state = self.lstm.zero_state(self.batch_size, dtype=tf.float32)

        loss = 0
        self.debug = 0
        probs = []
        with tf.variable_scope("RNN"):
            for i in range(self.network_architecture['maxlen']):
                if i > 0:

                    # current_embedding = tf.nn.embedding_lookup(self.word_embedding, caption_placeholder[:,i-1]) + self.embedding_bias
                    if form3:
                        current_embedding, KLD_loss = self._get_word_embedding(
                            [network_weights['LSTM']],
                            network_weights['input_meaning'],
                            self.caption_placeholder[:, i - 1, :],
                            logit=True)
                    elif form2:
                        current_embedding, KLD_loss = self._get_word_embedding(
                            [
                                network_weights['variational_encoding'],
                                network_weights['biases_variational_encoding']
                            ],
                            network_weights['LSTM'],
                            self.caption_placeholder[:, i - 1, :],
                            logit=True)
                    else:
                        current_embedding, KLD_loss = self._get_word_embedding(
                            [
                                network_weights['variational_encoding'],
                                network_weights['biases_variational_encoding']
                            ], network_weights['LSTM'],
                            self.caption_placeholder[:, i - 1])
                    if transfertype2:
                        current_embedding = tf.stop_gradient(current_embedding)
                    loss += tf.reduce_sum(
                        KLD_loss * self.mask[:, i]) * KLD_penalty
                else:
                    current_embedding = input_embedding
                if i > 0:
                    tf.get_variable_scope().reuse_variables()

                out, state = self.lstm(current_embedding, state)

                if i > 0:
                    if not form2:
                        labels = tf.expand_dims(self.caption_placeholder[:, i],
                                                1)
                        ix_range = tf.range(0, self.batch_size, 1)
                        ixs = tf.expand_dims(ix_range, 1)
                        concat = tf.concat([ixs, labels], 1)
                        onehot = tf.sparse_to_dense(
                            concat, tf.stack([self.batch_size, self.n_words]),
                            1.0, 0.0)
                    else:
                        onehot = self.caption_placeholder[:, i, :]

                    logit = tf.matmul(
                        out, network_weights['LSTM']['encoding_weight']
                    ) + network_weights['LSTM']['encoding_bias']
                    if not use_ctc:
                        if form2:
                            # best_word=tf.nn.softmax(logit)

                            # best_word=tf.round(best_word)
                            # all_the_f_one_h.append(best_word)
                            xentropy = tf.nn.sigmoid_cross_entropy_with_logits(
                                logits=logit, labels=onehot)
                            xentropy = tf.reduce_sum(xentropy,
                                                     reduction_indices=-1)
                        else:
                            xentropy = tf.nn.softmax_cross_entropy_with_logits(
                                logits=logit, labels=onehot)

                        xentropy = xentropy * self.mask[:, i]
                        xentropy = tf.reduce_sum(xentropy)
                        self.debug += xentropy
                        loss += xentropy

                    else:
                        probs.append(tf.expand_dims(tf.nn.sigmoid(logit), 1))
            if not use_ctc:
                loss_ctc = 0
                self.debug = self.debug / tf.reduce_sum(self.mask[:, 1:])
            else:
                probs = tf.concat(probs, axis=1)
                self.debug = probs[0, 2]
                probs = ctc_loss.get_output_probabilities(
                    probs, self.caption_placeholder[:, 1:, :])
                loss_ctc = ctc_loss.loss(
                    probs, self.caption_placeholder[:, 1:, :],
                    self.network_architecture['maxlen'] - 2, self.batch_size,
                    seqlen - 1)

            # self.debug=tf.reduce_sum(input_embedding_KLD_loss)/self.batch_size*KLD_penalty+loss_ctc
            loss = (loss / tf.reduce_sum(self.mask[:, 1:])) + tf.reduce_sum(
                input_embedding_KLD_loss
            ) / self.batch_size * KLD_penalty + loss_ctc

            self.loss = loss
    def build_model(self, video, video_mask, caption, caption_1, caption_mask):
        caption_mask = tf.cast(caption_mask, tf.float32)
        video_mask = tf.cast(video_mask, tf.float32)
        # for decoding
        video_flat = tf.reshape(video, [-1, self.dim_image]) # (b x nv) x d
        image_emb = tf.nn.xw_plus_b( video_flat, self.encode_image_W, self.encode_image_b) # (b x nv) x h
        image_emb = tf.reshape(image_emb, [self.batch_size, self.n_video_steps, self.dim_hidden]) # b x nv x h

        c_init = tf.zeros([self.batch_size, self.dim_hidden]) # b x h
        m_init = tf.zeros([self.batch_size, self.dim_hidden]) # b x h
        state2 = (c_init, m_init) # 2 x b x h

        ######## Encoding Stage #########
        # encoding video
        # mean pooling && mapping into (-1, 1) range
        output1 = tf.nn.tanh(tf.reduce_mean(image_emb, axis=1)) # b x h
        # encoding sentence
        with tf.variable_scope("model") as scope:
            for i in xrange(self.n_caption_steps):
                if i > 0: scope.reuse_variables()
                with tf.variable_scope("LSTM2"):
                    with tf.device(cpu_device):
                        current_embed = tf.nn.embedding_lookup(self.Wemb, caption_1[:,i]) # b x h
                    output2, state2 = self.lstm2_dropout(current_embed, state2) # b x h
        ######## Encoding Stage #########


        ######## Semantic Learning Stage ########
        input_state = tf.concat([output1, output2], 1) # b x (2 * h)
        loss_latent, output_semantic = self.vae(input_state)
        ######## Semantic Learning Stage ########

        ####### tied loss ##########
        sh_pred = tf.tanh(tf.nn.xw_plus_b(output1, self.sv_W, self.s_b)) # b x h
        loss_tied_1 = tf.reduce_sum(tf.square(tf.subtract(output2, sh_pred)))
        vh_pred = tf.tanh(tf.nn.xw_plus_b(output2, self.sv_W, self.v_b)) # b x h
        loss_tied_2 = tf.reduce_sum(tf.square(tf.subtract(output1, vh_pred)))
        loss_tied = loss_tied_1 + loss_tied_2
        tf.summary.scalar('loss_tied_1', loss_tied_1)
        tf.summary.scalar('loss_tied_2', loss_tied_2)
        tf.summary.histogram('vh_pred', vh_pred)
        tf.summary.histogram('sh_pred', sh_pred)
        ####### tied loss ##########

        ######## Decoding Stage ##########
        state3 = (c_init, m_init) # 2 x b x h
        state4 = (c_init, m_init) # 2 x b x h
        current_embed = tf.zeros([self.batch_size, self.dim_hidden]) # b x h
        video_prev = tf.zeros([self.batch_size, self.dim_hidden])

        loss_caption = 0.0
        loss_video = 0.0

        ## decoding sentence without attention
        with tf.variable_scope("model") as scope:
            with tf.variable_scope("LSTM3"):
                _, state3 = self.lstm3_dropout(output_semantic, state3) # b x h
            for i in xrange(n_caption_steps):
                scope.reuse_variables()
                with tf.variable_scope("LSTM3"):
                    output3, state3 = self.lstm3_dropout(current_embed, state3) # b x h
                labels = tf.expand_dims(caption[:,i], 1) # b x 1
                indices = tf.expand_dims(tf.range(0, self.batch_size, 1), 1) # b x 1
                concated = tf.concat([indices, labels], 1) # b x 2
                onehot_labels = tf.sparse_to_dense(concated,
                    tf.stack([self.batch_size, self.n_words]), 1.0, 0.0) # b x w
                with tf.device(cpu_device):
                    current_embed = tf.nn.embedding_lookup(self.Wemb, caption[:,i])
                logit_words = tf.nn.xw_plus_b(output3, self.embed_word_W, self.embed_word_b) # b x w
                cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits = logit_words,
                    labels = onehot_labels) # b x 1
                cross_entropy = cross_entropy * caption_mask[:,i] # b x 1
                loss_caption += tf.reduce_sum(cross_entropy) # 1

        ## decoding video without attention
        with tf.variable_scope("model") as scope:
            ## TODO: add attention for video decoding
            ## write into memory first
            with tf.variable_scope("LSTM4"):
                _, state4 = self.lstm4_dropout(output_semantic, state4)
            for i in xrange(self.n_video_steps):
                scope.reuse_variables()
                with tf.variable_scope("LSTM4"):
                    output4, state4 = self.lstm4_dropout(video_prev, state4)
                decode_image = tf.nn.xw_plus_b(output4, self.decode_image_W, self.decode_image_b) # b x d_im
                video_prev = image_emb[:, i, :] # b x h
                euclid_loss = tf.reduce_sum(tf.square(tf.subtract(decode_image, video[:,i,:])),
                    axis=1, keep_dims=True) # b x 1
                euclid_loss = euclid_loss * video_mask[:, i] # b x 1
                loss_video += tf.reduce_sum(euclid_loss) # 1

        loss_caption = loss_caption / tf.reduce_sum(caption_mask)
        loss_video = loss_video / tf.reduce_sum(video_mask)

        loss = tf.constant(caption_weight) * loss_caption + tf.constant(video_weight) * loss_video + \
            tf.constant(latent_weight) * loss_latent + tf.constant(tied_weight) * loss_tied
        return loss, loss_caption, loss_tied, loss_latent, loss_video, output_semantic, output1, output2
Beispiel #56
0
def build_input(dataset, data_path, batch_size, mode):
  """Build CIFAR image and labels.

  Args:
    dataset: Either 'cifar10' or 'cifar100'.
    data_path: Filename for raw_data.
    batch_size: Input batch size.
    mode: Either 'train' or 'eval'.
  Returns:
    images: Batches of images. [batch_size, image_size, image_size, 3]
    labels: Batches of labels. [batch_size, num_classes]
  Raises:
    ValueError: when the specified dataset is not supported.
  """
  image_size = 32
  if dataset == 'cifar10':
    label_bytes = 1
    label_offset = 0
    num_classes = 10
  elif dataset == 'cifar100':
    label_bytes = 1
    label_offset = 1
    num_classes = 100
  else:
    raise ValueError('Not supported dataset %s', dataset)

  depth = 3
  image_bytes = image_size * image_size * depth
  record_bytes = label_bytes + label_offset + image_bytes

  data_files = tf.gfile.Glob(data_path)
  file_queue = tf.train.string_input_producer(data_files, shuffle=True)
  # Read examples from files in the filename queue.
  reader = tf.FixedLengthRecordReader(record_bytes=record_bytes)
  _, value = reader.read(file_queue)

  # Convert these examples to dense labels and processed images.
  record = tf.reshape(tf.decode_raw(value, tf.uint8), [record_bytes])
  label = tf.cast(tf.slice(record, [label_offset], [label_bytes]), tf.int32)
  # Convert from string to [depth * height * width] to [depth, height, width].
  depth_major = tf.reshape(tf.slice(record, [label_bytes], [image_bytes]),
                           [depth, image_size, image_size])
  # Convert from [depth, height, width] to [height, width, depth].
  image = tf.cast(tf.transpose(depth_major, [1, 2, 0]), tf.float32)

  if mode == 'train':
    image = tf.image.resize_image_with_crop_or_pad(
        image, image_size+4, image_size+4)
    image = tf.random_crop(image, [image_size, image_size, 3])
    image = tf.image.random_flip_left_right(image)
    # Brightness/saturation/constrast provides small gains .2%~.5% on cifar.
    # image = tf.image.random_brightness(image, max_delta=63. / 255.)
    # image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
    # image = tf.image.random_contrast(image, lower=0.2, upper=1.8)
    image = tf.image.per_image_standardization(image)

    example_queue = tf.RandomShuffleQueue(
        capacity=16 * batch_size,
        min_after_dequeue=8 * batch_size,
        dtypes=[tf.float32, tf.int32],
        shapes=[[image_size, image_size, depth], [1]])
    num_threads = 16
  else:
    image = tf.image.resize_image_with_crop_or_pad(
        image, image_size, image_size)
    image = tf.image.per_image_whitening(image)

    example_queue = tf.FIFOQueue(
        3 * batch_size,
        dtypes=[tf.float32, tf.int32],
        shapes=[[image_size, image_size, depth], [1]])
    num_threads = 1

  example_enqueue_op = example_queue.enqueue([image, label])
  tf.train.add_queue_runner(tf.train.queue_runner.QueueRunner(
      example_queue, [example_enqueue_op] * num_threads))

  # Read 'batch' labels + images from the example queue.
  images, labels = example_queue.dequeue_many(batch_size)
  labels = tf.reshape(labels, [batch_size, 1])
  indices = tf.reshape(tf.range(0, batch_size, 1), [batch_size, 1])
  labels = tf.sparse_to_dense(
      tf.concat(1, [indices, labels]),
      [batch_size, num_classes], 1.0, 0.0)

  assert len(images.get_shape()) == 4
  assert images.get_shape()[0] == batch_size
  assert images.get_shape()[-1] == 3
  assert len(labels.get_shape()) == 2
  assert labels.get_shape()[0] == batch_size
  assert labels.get_shape()[1] == num_classes

  # Display the training images in the visualizer.
  tf.image_summary('images', images)
  return images, labels
tf.get_variable_scope().reuse_variables()
accuracy_logits = inference(validate_batch_ids, validate_batch_values)
validate_softmax = tf.nn.softmax(accuracy_logits)
validate_batch_labels = tf.to_int64(validate_batch_labels)
correct_prediction = tf.equal(tf.argmax(validate_softmax, 1),
                              validate_batch_labels)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# Compute auc
validate_batch_labels = tf.cast(validate_batch_labels, tf.int32)
sparse_labels = tf.reshape(validate_batch_labels, [-1, 1])
derived_size = tf.shape(validate_batch_labels)[0]
indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1])
concated = tf.concat(1, [indices, sparse_labels])
outshape = tf.pack([derived_size, LABEL_SIZE])
new_validate_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0)
_, auc_op = tf.contrib.metrics.streaming_auc(validate_softmax,
                                             new_validate_batch_labels)

# Define inference op
sparse_index = tf.placeholder(tf.int64)
sparse_ids = tf.placeholder(tf.int64)
sparse_values = tf.placeholder(tf.float32)
sparse_shape = tf.placeholder(tf.int64)
inference_ids = tf.SparseTensor(sparse_index, sparse_ids, sparse_shape)
inference_values = tf.SparseTensor(sparse_index, sparse_values, sparse_shape)
inference_logits = inference(inference_ids, inference_values)
inference_softmax = tf.nn.softmax(inference_logits)
inference_op = tf.argmax(inference_softmax, 1)

# Initialize saver and summary
Beispiel #58
0
    def build_model(self):
        video = tf.placeholder(
            tf.float32,
            [self.batch_size, self.n_lstm_steps, self.dim_image])  # b x n x d
        video_mask = tf.placeholder(
            tf.float32, [self.batch_size, self.n_lstm_steps])  # b x n

        caption = tf.placeholder(tf.int32,
                                 [self.batch_size, n_caption_step])  # b x 16
        caption_mask = tf.placeholder(
            tf.float32, [self.batch_size, n_caption_step])  # b x 16

        video_flat = tf.reshape(video, [-1, self.dim_image])  # (b x n) x d
        image_emb = tf.nn.xw_plus_b(video_flat, self.encode_image_W,
                                    self.encode_image_b)  # (b x n) x h
        image_emb = tf.reshape(
            image_emb,
            [self.batch_size, self.n_lstm_steps, self.dim_hidden])  # b x n x h
        image_emb = tf.transpose(image_emb, [1, 0, 2])  # n x b x h

        state1 = tf.zeros([self.batch_size, self.lstm3.state_size])  # b x s
        h_prev = tf.zeros([self.batch_size, self.dim_hidden])  # b x h

        loss_caption = 0.0

        current_embed = tf.zeros([self.batch_size, self.dim_hidden])  # b x h
        brcst_w = tf.tile(tf.expand_dims(self.embed_att_w, 0),
                          [self.n_lstm_steps, 1, 1])  # n x h x 1
        image_part = tf.batch_matmul(
            image_emb,
            tf.tile(
                tf.expand_dims(self.embed_att_Ua, 0),
                [self.n_lstm_steps, 1, 1])) + self.embed_att_ba  # n x b x h
        for i in range(n_caption_step):
            e = tf.tanh(tf.matmul(h_prev, self.embed_att_Wa) +
                        image_part)  # n x b x h
            e = tf.batch_matmul(e, brcst_w)  # unnormalized relevance score
            e = tf.reduce_sum(e, 2)  # n x b
            e_hat_exp = tf.mul(tf.transpose(video_mask), tf.exp(e))  # n x b
            denomin = tf.reduce_sum(e_hat_exp, 0)  # b
            denomin = denomin + tf.to_float(tf.equal(
                denomin, 0))  # regularize denominator
            alphas = tf.tile(tf.expand_dims(tf.div(e_hat_exp, denomin), 2),
                             [1, 1, self.dim_hidden
                              ])  # n x b x h  # normalize to obtain alpha
            attention_list = tf.mul(alphas, image_emb)  # n x b x h
            atten = tf.reduce_sum(
                attention_list,
                0)  # b x h       #  soft-attention weighted sum
            if i > 0: tf.get_variable_scope().reuse_variables()

            with tf.variable_scope("LSTM3"):
                output1, state1 = self.lstm3_dropout(
                    tf.concat(1, [atten, current_embed]), state1)  # b x h

            output2 = tf.tanh(
                tf.nn.xw_plus_b(tf.concat(1, [output1, atten, current_embed]),
                                self.embed_nn_Wp, self.embed_nn_bp))  # b x h
            h_prev = output1  # b x h
            labels = tf.expand_dims(caption[:, i], 1)  # b x 1
            indices = tf.expand_dims(tf.range(0, self.batch_size, 1),
                                     1)  # b x 1
            concated = tf.concat(1, [indices, labels])  # b x 2
            onehot_labels = tf.sparse_to_dense(
                concated, tf.pack([self.batch_size,
                                   self.n_words]), 1.0, 0.0)  # b x w
            with tf.device("/cpu:0"):
                current_embed = tf.nn.embedding_lookup(self.Wemb, caption[:,
                                                                          i])

            logit_words = tf.nn.xw_plus_b(output2, self.embed_word_W,
                                          self.embed_word_b)  # b x w
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
                logit_words, onehot_labels)  # b x 1
            cross_entropy = cross_entropy * caption_mask[:, i]  # b x 1
            loss_caption += tf.reduce_sum(cross_entropy)  # 1

        loss_caption = loss_caption / tf.reduce_sum(caption_mask)
        loss = loss_caption
        return loss, video, video_mask, caption, caption_mask
Beispiel #59
0
    def build_model(self):
        video = tf.placeholder(
            tf.float32,
            [self.batch_size, self.n_video_lstm_step, self.dim_image])
        video_mask = tf.placeholder(tf.float32,
                                    [self.batch_size, self.n_video_lstm_step])

        caption = tf.placeholder(
            tf.int32, [self.batch_size, self.n_caption_lstm_step + 1])
        caption_mask = tf.placeholder(
            tf.float32, [self.batch_size, self.n_caption_lstm_step + 1])

        video_flat = tf.reshape(video, [-1, self.dim_image])
        image_emb = tf.nn.xw_plus_b(
            video_flat, self.encode_image_W,
            self.encode_image_b)  # (batch_size*n_lstm_steps, dim_hidden)
        image_emb = tf.reshape(
            image_emb, [self.batch_size, self.n_lstm_steps, self.dim_hidden])

        state1 = tf.zeros([self.batch_size, self.lstm1.state_size])
        state2 = tf.zeros([self.batch_size, self.lstm2.state_size])
        padding = tf.zeros([self.batch_size, self.dim_hidden])

        probs = []
        loss = 0.0

        ##############################  Encoding Stage ##################################
        for i in range(0, self.n_video_lstm_step):
            if i > 0:
                tf.get_variable_scope().reuse_variables()
                h_list = []
            with tf.variable_scope("LSTM1"):
                output1, state1 = self.lstm1(image_emb[:, i, :], state1)
                h_list.append(state1)
            with tf.variable_scope("LSTM2"):
                output2, state2 = self.lstm2(tf.concat(1, [padding, output1]),
                                             state2)
            h_list = tf.stack(h_list, axis=1)
        ############################# Decoding Stage ######################################
        for i in range(0, self.n_caption_lstm_step
                       ):  ## Phase 2 => only generate captions
            #if i == 0:
            #    current_embed = tf.zeros([self.batch_size, self.dim_hidden])
            #else:
            with tf.device("/gpu:0"):
                current_embed = tf.nn.embedding_lookup(self.Wemb, caption[:,
                                                                          i])

            tf.get_variable_scope().reuse_variables()

            with tf.variable_scope("LSTM1"):
                output1, state1 = self.lstm1(padding, state1)

            with tf.variable_scope("LSTM2"):
                output2, state2 = self.lstm2(
                    tf.concat(1, [current_embed, output1]), state2)

            labels = tf.expand_dims(caption[:, i + 1], 1)
            indices = tf.expand_dims(tf.range(0, self.batch_size, 1), 1)
            concated = tf.concat(1, [indices, labels])  #sparse matrix
            onehot_labels = tf.sparse_to_dense(
                concated, tf.pack([self.batch_size, self.n_words]), 1.0, 0.0)
            #acquire output
            logit_words = tf.nn.xw_plus_b(output2, self.embed_word_W,
                                          self.embed_word_b)
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
                logit_words, onehot_labels)
            cross_entropy = cross_entropy * caption_mask[:, i]
            probs.append(logit_words)

            current_loss = tf.reduce_sum(cross_entropy) / self.batch_size
            loss = loss + current_loss

        return loss, video, video_mask, caption, caption_mask, probs
Beispiel #60
0
    def build_model(self):
        word_vectors = tf.placeholder(
            tf.float32,
            [self.batch_size, self.n_encode_lstm_step, self.dim_wordvec])

        caption = tf.placeholder(
            tf.int32, [self.batch_size, self.n_decode_lstm_step + 1])
        caption_mask = tf.placeholder(
            tf.float32, [self.batch_size, self.n_decode_lstm_step + 1])

        word_vectors_flat = tf.reshape(word_vectors, [-1, self.dim_wordvec])
        wordvec_emb = tf.nn.xw_plus_b(
            word_vectors_flat, self.encode_vector_W, self.encode_vector_b
        )  # (batch_size*n_encode_lstm_step, dim_hidden)
        wordvec_emb = tf.reshape(
            wordvec_emb,
            [self.batch_size, self.n_encode_lstm_step, self.dim_hidden])

        reward = tf.placeholder(tf.float32,
                                [self.batch_size, self.n_decode_lstm_step])

        state1 = tf.zeros([self.batch_size, self.lstm1.state_size])
        state2 = tf.zeros([self.batch_size, self.lstm2.state_size])
        padding = tf.zeros([self.batch_size, self.dim_hidden])

        entropies = []
        loss = 0.
        pg_loss = 0.  # policy gradient loss

        ##############################  Encoding Stage ##################################
        for i in range(0, self.n_encode_lstm_step):
            if i > 0:
                tf.get_variable_scope().reuse_variables()

            with tf.variable_scope("LSTM1"):
                output1, state1 = self.lstm1(wordvec_emb[:, i, :], state1)
                # states.append(state1)

            with tf.variable_scope("LSTM2"):
                output2, state2 = self.lstm2(tf.concat([padding, output1], 1),
                                             state2)

        ############################# Decoding Stage ######################################
        for i in range(0, self.n_decode_lstm_step):
            with tf.device("/cpu:0"):
                current_embed = tf.nn.embedding_lookup(self.Wemb, caption[:,
                                                                          i])

            tf.get_variable_scope().reuse_variables()

            with tf.variable_scope("LSTM1"):
                output1, state1 = self.lstm1(padding, state1)

            with tf.variable_scope("LSTM2"):
                output2, state2 = self.lstm2(
                    tf.concat([current_embed, output1], 1), state2)

            labels = tf.expand_dims(caption[:, i + 1], 1)
            indices = tf.expand_dims(tf.range(0, self.batch_size, 1), 1)
            concated = tf.concat([indices, labels], 1)
            onehot_labels = tf.sparse_to_dense(
                concated, tf.stack([self.batch_size, self.n_words]), 1.0, 0.0)

            logit_words = tf.nn.xw_plus_b(output2, self.embed_word_W,
                                          self.embed_word_b)

            cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
                logits=logit_words, labels=onehot_labels)
            cross_entropy = cross_entropy * caption_mask[:, i]
            entropies.append(cross_entropy)
            pg_cross_entropy = cross_entropy * reward[:, i]

            pg_current_loss = tf.reduce_sum(pg_cross_entropy) / self.batch_size
            pg_loss = pg_loss + pg_current_loss

        with tf.variable_scope(tf.get_variable_scope(), reuse=False):
            train_op = tf.train.AdamOptimizer(self.lr).minimize(pg_loss)

        input_tensors = {
            'word_vectors': word_vectors,
            'caption': caption,
            'caption_mask': caption_mask,
            'reward': reward
        }

        feats = {'entropies': entropies}

        return train_op, pg_loss, input_tensors, feats