def test_sequence_accuracy_identical_samples(self):
        labels_tf = tf.convert_to_tensor(self._fake_labels())

        accuracy_tf = metrics.sequence_accuracy(labels_tf, labels_tf,
                                                self.rej_char)
        with self.initialized_session() as sess:
            accuracy_np = sess.run(accuracy_tf)

        self.assertAlmostEqual(accuracy_np, 1.0)
    def test_sequence_accuracy_one_char_difference(self):
        ground_truth_np = self._fake_labels()
        ground_truth_tf = tf.convert_to_tensor(ground_truth_np)
        prediction_tf = tf.convert_to_tensor(
            self._incorrect_copy(ground_truth_np, bad_indexes=((0, 0))))

        accuracy_tf = metrics.sequence_accuracy(prediction_tf, ground_truth_tf,
                                                self.rej_char)
        with self.initialized_session() as sess:
            accuracy_np = sess.run(accuracy_tf)

        # 1 of 4 sequences is incorrect.
        self.assertAlmostEqual(accuracy_np, 1.0 - 1.0 / self.batch_size)
Exemple #3
0
def main(_):
    prepare_training_dir()

    dataset = common_flags.create_dataset(split_name=FLAGS.split_name)
    print(dataset)
    model = common_flags.create_model(dataset.num_char_classes,
                                      dataset.max_sequence_length,
                                      dataset.num_of_views, dataset.null_code)
    hparams = get_training_hparams()

    # If ps_tasks is zero, the local device is used. When using multiple
    # (non-local) replicas, the ReplicaDeviceSetter distributes the variables
    # across the different devices.
    device_setter = tf.train.replica_device_setter(FLAGS.ps_tasks,
                                                   merge_devices=True)
    with tf.device(device_setter):
        data = data_provider.get_data(
            dataset,
            FLAGS.batch_size,
            augment=hparams.use_augment_input,
            central_crop_size=common_flags.get_crop_size(),
            use_default_augment=hparams.use_default_augment)
        endpoints = model.create_base(data.images, data.labels_one_hot)
        total_loss = model.create_loss(data, endpoints)
        CharacterAccuracy = metrics.char_accuracy(
            endpoints.predicted_chars,
            data.labels,
            streaming=True,
            rej_char=model._params.null_code)
        SequenceAccuracy = metrics.sequence_accuracy(
            endpoints.predicted_chars,
            data.labels,
            streaming=True,
            rej_char=model._params.null_code)
        model.create_summaries(data,
                               endpoints,
                               dataset.charset,
                               is_training=True)
        init_fn = model.create_init_fn_to_restore(FLAGS.checkpoint,
                                                  FLAGS.checkpoint_inception)
        if FLAGS.show_graph_stats:
            logging.info('Total number of weights in the graph: %s',
                         calculate_graph_metrics())
        train(total_loss, init_fn, CharacterAccuracy, SequenceAccuracy,
              hparams)
Exemple #4
0
  def create_summaries(self, data, endpoints, charset, is_training):
    """Creates all summaries for the model.

    Args:
      data: InputEndpoints namedtuple.
      endpoints: OutputEndpoints namedtuple.
      charset: A dictionary with mapping between character codes and
        unicode characters. Use the one provided by a dataset.charset.
      is_training: If True will create summary prefixes for training job,
        otherwise - for evaluation.

    Returns:
      A list of evaluation ops
    """

    def sname(label):
      prefix = 'train' if is_training else 'eval'
      return '%s/%s' % (prefix, label)

    max_outputs = 4
    # TODO(gorban): uncomment, when tf.summary.text released.
    # charset_mapper = CharsetMapper(charset)
    # pr_text = charset_mapper.get_text(
    #     endpoints.predicted_chars[:max_outputs,:])
    # tf.summary.text(sname('text/pr'), pr_text)
    # gt_text = charset_mapper.get_text(data.labels[:max_outputs,:])
    # tf.summary.text(sname('text/gt'), gt_text)
    tf.summary.image(sname('image'), data.images, max_outputs=max_outputs)

    if is_training:
      tf.summary.image(
        sname('image/orig'), data.images_orig, max_outputs=max_outputs)
      for var in tf.trainable_variables():
        tf.summary.histogram(var.op.name, var)
      return None

    else:
      names_to_values = {}
      names_to_updates = {}

      def use_metric(name, value_update_tuple):
        names_to_values[name] = value_update_tuple[0]
        names_to_updates[name] = value_update_tuple[1]

      use_metric('CharacterAccuracy',
                 metrics.char_accuracy(
                   endpoints.predicted_chars,
                   data.labels,
                   streaming=True,
                   rej_char=self._params.null_code))
      # Sequence accuracy computed by cutting sequence at the first null char
      use_metric('SequenceAccuracy',
                 metrics.sequence_accuracy(
                   endpoints.predicted_chars,
                   data.labels,
                   streaming=True,
                   rej_char=self._params.null_code))

      for name, value in names_to_values.items():
        summary_name = 'eval/' + name
        tf.summary.scalar(summary_name, tf.Print(value, [value], summary_name))
      return list(names_to_updates.values())
Exemple #5
0
    def convolutional_attention_network(self,
                                        vocab_size,
                                        max_seq=25,
                                        batch_size=8):
        """
            Builds the graph
        """
        inputs = tf.placeholder(tf.float32, [batch_size, 128, 400, 3])
        output = tf.placeholder(tf.int32, [batch_size, max_seq])
        length = tf.placeholder(tf.int32, [batch_size])
        resnet_34 = ResNet(34, 10)

        def resnet_34_backbone(x):
            out = resnet_34.network(x)
            print(out)
            return out

        feature_map_resnet = resnet_34_backbone(
            inputs)  #feature map of resnet 34
        feature_map = transform_dimension(feature_map_resnet, 1024)
        for i in range(6):
            global_representation = bottle_resblock(
                feature_map_resnet if i == 0 else global_representation,
                512,
                scope='bottle_resblock_' + str(i))
        global_representation = global_avg_pooling(global_representation)
        global_representation = fully_conneted(global_representation, 512)

        ##########################################################DECODER########################################
        def decoder_embedding(y, vocab_size, embed_size=512, shifted=True):
            embeddings = tf.random_normal(shape=(vocab_size, embed_size))
            embedded = tf.nn.embedding_lookup(embeddings, y)
            return embedded

        def positional_encoding(x):
            seq_len, dim = x.get_shape().as_list()[-2:]
            encoded_vec = np.array([
                pos / np.power(10000, 2 * i / dim) for pos in range(seq_len)
                for i in range(dim)
            ])
            encoded_vec[::2] = np.sin(encoded_vec[::2])
            encoded_vec[1::2] = np.cos(encoded_vec[1::2])
            encoded_vec_tensor = tf.convert_to_tensor(encoded_vec.reshape(
                [seq_len, dim]),
                                                      dtype=tf.float32)
            return tf.add(x, encoded_vec_tensor)

        def layer_norm(x):
            return tf.contrib.layers.layer_norm(x)

        y = decoder_embedding(output, vocab_size)

        y = tf.pad(y, [[0, 0], [1, 0], [0, 0]
                       ])[:, :-1, :]  #shift right from official transformer
        y = positional_encoding(y)  #(bs, seq_len, 512)

        #concatenate with global representation
        decoder_input = []
        for i in range(y.get_shape().as_list()[1]):
            decoder_input.append(
                tf.concat([global_representation, y[:, i, :]],
                          1))  #(bs, 1, 512)
        decoder_input = tf.stack(decoder_input, 1)  #(bs, seq_len, 1024)

        ####MASKED SELF ATTENTION###
        masked_self_attention = Attention(dropout=0)
        decoder_output = masked_self_attention.multi_head(
            decoder_input, decoder_input, decoder_input)
        norm_1 = layer_norm(decoder_output)
        decoder_output = decoder_input + norm_1

        ###2D self attention###
        two_D_attention = Attention(masked=False, dropout=0)
        enc_reshape = tf.reshape(feature_map, [
            decoder_output.get_shape().as_list()[0], -1,
            decoder_output.get_shape().as_list()[-1]
        ])
        decoder_output_2 = two_D_attention.multi_head(decoder_output,
                                                      enc_reshape, enc_reshape)
        norm_2 = layer_norm(decoder_output_2)
        decoder_output = decoder_output + norm_2

        def position_wise_feed_forward_network(x):  #using conv1D
            # First linear
            linear_1 = tf.layers.conv1d(x, 2048, 1)
            # ReLU operation
            relu_1 = tf.nn.relu(linear_1)
            # Second linear
            linear_2 = tf.layers.conv1d(relu_1, x.get_shape().as_list()[-1], 1)
            return tf.nn.dropout(linear_2, 1)

        pwff = position_wise_feed_forward_network(decoder_output)
        norm_3 = layer_norm(pwff)
        decoder_output = decoder_output + norm_3

        output_probabilities = tf.layers.dense(decoder_output, vocab_size)

        loss = self._compute_loss(output_probabilities, output, length,
                                  batch_size)
        ids, log_probs, scores = self.char_predictions(output_probabilities,
                                                       vocab_size, max_seq)
        char_acc = char_accuracy(ids, output, 0)
        word_acc = sequence_accuracy(ids, output, 0)

        with tf.name_scope('summaries'):
            tf.summary.scalar("loss", loss, collections=["train_summary"])
            tf.summary.scalar("character accuracy",
                              char_acc,
                              collections=["train_summary"])
            tf.summary.scalar("word accuracy",
                              word_acc,
                              collections=["train_summary"])

        summary_op = tf.summary.merge_all(key='train_summary')

        optimizer = tf.train.AdadeltaOptimizer(learning_rate=1).minimize(loss)

        init = tf.global_variables_initializer()
        return inputs, output, length, loss, optimizer, output_probabilities, summary_op, init, word_acc
Exemple #6
0
  def create_summaries(self, data, endpoints, charset, is_training):
    """Creates all summaries for the model.

    Args:
      data: InputEndpoints namedtuple.
      endpoints: OutputEndpoints namedtuple.
      charset: A dictionary with mapping between character codes and
        unicode characters. Use the one provided by a dataset.charset.
      is_training: If True will create summary prefixes for training job,
        otherwise - for evaluation.

    Returns:
      A list of evaluation ops
    """

    def sname(label):
      prefix = 'train' if is_training else 'eval'
      return '%s/%s' % (prefix, label)

    max_outputs = 4
    # TODO(gorban): uncomment, when tf.summary.text released.
    # charset_mapper = CharsetMapper(charset)
    # pr_text = charset_mapper.get_text(
    #     endpoints.predicted_chars[:max_outputs,:])
    # tf.summary.text(sname('text/pr'), pr_text)
    # gt_text = charset_mapper.get_text(data.labels[:max_outputs,:])
    # tf.summary.text(sname('text/gt'), gt_text)
    tf.summary.image(sname('image'), data.images, max_outputs=max_outputs)

    if is_training:
      tf.summary.image(
        sname('image/orig'), data.images_orig, max_outputs=max_outputs)
      for var in tf.trainable_variables():
        tf.summary.histogram(var.op.name, var)
      return None

    else:
      names_to_values = {}
      names_to_updates = {}

      def use_metric(name, value_update_tuple):
        names_to_values[name] = value_update_tuple[0]
        names_to_updates[name] = value_update_tuple[1]

      use_metric('CharacterAccuracy',
                 metrics.char_accuracy(
                   endpoints.predicted_chars,
                   data.labels,
                   streaming=True,
                   rej_char=self._params.null_code))
      # Sequence accuracy computed by cutting sequence at the first null char
      use_metric('SequenceAccuracy',
                 metrics.sequence_accuracy(
                   endpoints.predicted_chars,
                   data.labels,
                   streaming=True,
                   rej_char=self._params.null_code))

      for name, value in names_to_values.iteritems():
        summary_name = 'eval/' + name
        tf.summary.scalar(summary_name, tf.Print(value, [value], summary_name))
      return names_to_updates.values()