Beispiel #1
0
    def __init__(self,
                 num_layers,
                 d_model,
                 num_heads,
                 dff,
                 target_vocab_size,
                 maximum_position_encoding,
                 rate=0.1):
        super(Decoder, self).__init__()

        self.d_model = d_model
        self.num_layers = num_layers
        self.embedding = tf.keras.layers.Embedding(target_vocab_size, d_model)
        self.pos_encoding = utils.positional_encoding(
            maximum_position_encoding, d_model)
        self.dec_layers1 = [
            DecoderLayer(d_model, num_heads, dff, rate)
            for _ in range(num_layers)
        ]
        self.dec_layers2 = [
            DecoderLayer(d_model, num_heads, dff, rate)
            for _ in range(num_layers)
        ]
        # self.dec_layers3 = [DecoderLayer(d_model, num_heads, dff, rate)
        #                    for _ in range(num_layers)]
        self.dropout = tf.keras.layers.Dropout(rate)
        self.dropout1 = tf.keras.layers.Dropout(rate)
Beispiel #2
0
    def train_step(self, sbts_train, nodes_train, edges_train, comms_train):
        tar_inp = comms_train[:, :-1]
        tar_real = comms_train[:, 1:]
        sbt_padding_mask, node_padding_mask, look_ahead_mask = utils.create_masks(
            sbts_train, nodes_train, tar_inp)

        with tf.GradientTape() as tape:
            predictions, attention_weights, _, _ = self.transformer(
                sbts_train, nodes_train, edges_train, tar_inp, True,
                sbt_padding_mask, node_padding_mask, look_ahead_mask)
            loss = utils.loss_function(tar_real, predictions)
        gradients = tape.gradient(loss, self.transformer.trainable_variables)
        self.optimizer.apply_gradients(
            zip(gradients, self.transformer.trainable_variables))
        self.train_loss(loss)
        self.train_accuracy(tar_real, predictions)
Beispiel #3
0
    def val_step(self, sbts_val, nodes_val, edges_val, comms_val, val_bleu):
        current_batch_size = comms_val.shape[0]
        tar_reals = comms_val[:, 1:]
        _, _, comms_dic = get_dicts(self.sub_data_folder)
        # add start tag as the first input for the decoder
        decoder_input = [len(comms_dic.word_index) + 1] * current_batch_size
        outputs = tf.expand_dims(decoder_input, 1)
        for i in range(MAX_LENGTH_COMM - 1):
            sbt_padding_mask, node_padding_mask, look_ahead_mask = utils.create_masks(
                sbts_val, nodes_val, outputs)
            predictions, attention_weights, _, _ = self.transformer(
                sbts_val, nodes_val, edges_val, outputs, False,
                sbt_padding_mask, node_padding_mask, look_ahead_mask)

            predictions = predictions[:, -1:, :]
            predicted_ids = tf.cast(tf.argmax(predictions, axis=-1), tf.int32)

            outputs = tf.concat([outputs, predicted_ids], axis=-1)
            count_stopped = 0
            for output in outputs:
                if len(comms_dic.word_index) + 2 in output.numpy().tolist():
                    count_stopped += 1
            if count_stopped == current_batch_size:
                break
        candidates = EvaluationMetrics.remove_pad(
            outputs.numpy().tolist(),
            len(comms_dic.word_index) + 2, "candidates")
        refs = EvaluationMetrics.remove_pad(tar_reals.numpy().tolist(),
                                            len(comms_dic.word_index) + 2,
                                            "references")
        for ref, candi in zip(refs, candidates):
            val_bleu.append(
                EvaluationMetrics.smoothing1_sentence_bleu(ref, candi))
    def call(self, v, k, q, mask):
        batch_size = tf.shape(q)[0]
        q = self.wq(q)  # (batch_size, seq_len, d_model)
        k = self.wk(k)  # (batch_size, seq_len, d_model)
        v = self.wv(v)  # (batch_size, seq_len, d_model)

        q = self.split_heads(
            q, batch_size)  # (batch_size, num_heads, seq_len_q, depth)
        k = self.split_heads(
            k, batch_size)  # (batch_size, num_heads, seq_len_k, depth)
        v = self.split_heads(
            v, batch_size)  # (batch_size, num_heads, seq_len_v, depth)

        # scaled_attention.shape == (batch_size, num_heads, seq_len_q, depth)
        # attention_weights.shape == (batch_size, num_heads, seq_len_q, seq_len_k)

        scaled_attention, attention_weights = utils.scaled_dot_product_attention(
            q, k, v, mask)

        scaled_attention = tf.transpose(
            scaled_attention,
            perm=[0, 2, 1, 3])  # (batch_size, seq_len_q, num_heads, depth)

        concat_attention = tf.reshape(
            scaled_attention,
            (batch_size, -1, self.d_model))  # (batch_size, seq_len_q, d_model)

        output = self.dense(
            concat_attention)  # (batch_size, seq_len_q, d_model)

        return output, attention_weights
Beispiel #5
0
    def __init__(self, d_model, num_heads, dff, rate=0.1):
        super(EncoderLayer, self).__init__()

        self.mha = MultiHeadAttention(d_model, num_heads)
        self.ffn = utils.point_wise_feed_forward_network(d_model, dff)
        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)
        self.dropout3 = tf.keras.layers.Dropout(rate)
Beispiel #6
0
 def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, asthop,
            maximum_position_encoding, rate=0.1):
     super(EncoderGraph, self).__init__()
     self.d_model = d_model
     self.num_layers = num_layers
     self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model, name="graph_embed")
     self.pos_encoding = utils.positional_encoding(maximum_position_encoding,
                                             self.d_model)
     self.gcn_layer = GCNLayer(d_model)
     self.asthop = asthop
     self.enc_layers = [EncoderLayer(d_model, num_heads, dff, rate)
                        for _ in range(num_layers)]
     self.dropout = tf.keras.layers.Dropout(rate)
Beispiel #7
0
    def evaluate_attn(self, idx):
        """
        return the attn of the test case in designated idx.
        :param idx:
        :return:
        """
        test_set = test_data_prepare(self.sub_data_folder)
        ckpt = tf.train.Checkpoint(transformer=self.transformer,
                                   optimizer=self.optimizer)
        checkpoint_path = "./checkpoints_4/" + self.sub_data_folder
        ckpt_manager = tf.train.CheckpointManager(ckpt,
                                                  checkpoint_path,
                                                  max_to_keep=10)
        # 如果检查点存在,则恢复最新的检查点。
        if ckpt_manager.latest_checkpoint:
            print("ckpt: ", ckpt_manager.latest_checkpoint)
            ckpt.restore(ckpt_manager.latest_checkpoint)
            print('Latest checkpoint restored!!')
        else:
            print("cannot load latest model!!")
        for batch, (srcs_test, nodes_test, edges_test,
                    comms_test) in enumerate(test_set):

            if idx == batch:
                current_batch_size = comms_test.shape[0]
                srcs_dic, nodes_dic, comms_dic = get_dicts(
                    self.sub_data_folder)
                # add start tag as the first input for the decoder
                decoder_input = [len(comms_dic.word_index) + 1
                                 ] * current_batch_size
                outputs = tf.expand_dims(decoder_input, 1)
                # attn_graph = -1
                # attn_sbt = -1

                for i in range(MAX_LENGTH_COMM - 1):
                    src_padding_mask, node_padding_mask, look_ahead_mask = utils.create_masks(
                        srcs_test, nodes_test, outputs)
                    predictions, attention_weights, sbt_attn, graph_attn = self.transformer(
                        srcs_test, nodes_test, edges_test, outputs, False,
                        src_padding_mask, node_padding_mask, look_ahead_mask)
                    # for v in self.transformer.trainable_variables:
                    #     if v.name == "transformer_yz/encoder_graph/graph_embed/embeddings:0":
                    #         v[0]
                    predictions = predictions[:, -1:, :]
                    predicted_ids = tf.cast(tf.argmax(predictions, axis=-1),
                                            tf.int32)

                    outputs = tf.concat([outputs, predicted_ids], axis=-1)

                    count_stopped = 0
                    for output in outputs:
                        if len(comms_dic.word_index) + 2 in output.numpy(
                        ).tolist():
                            count_stopped += 1
                    if count_stopped == current_batch_size:
                        break
                print(edges_test)
                candidates = EvaluationMetrics.remove_pad(
                    outputs.numpy().tolist(),
                    len(comms_dic.word_index) + 2, "candidates")
                reverse_word_map_comms = dict(
                    map(reversed, comms_dic.word_index.items()))
                reverse_word_map_nodes = dict(
                    map(reversed, nodes_dic.word_index.items()))
                reverse_word_map_srcs = dict(
                    map(reversed, srcs_dic.word_index.items()))
                candidates = self.sequence_to_text(reverse_word_map_comms,
                                                   candidates[0])
                _nodes_test = self.sequence_to_text(reverse_word_map_nodes,
                                                    nodes_test.numpy()[0])
                _srcs_test = self.sequence_to_text(reverse_word_map_srcs,
                                                   srcs_test.numpy()[0][1:-1])
                print(_nodes_test)
                # self.plot_single(sbt_attn, _srcs_test, _srcs_test, "srcs")
                # self.plot_attention_weights(attention_weights['decoder_layer1_graph_comm'], _nodes_test, candidates, "nodes")
                # self.plot_attention_weights(attention_weights['decoder_layer1_sbt_comm'], _srcs_test, candidates, "srcs")
                self.plot_std_attn(
                    attention_weights['decoder_layer1_graph_comm'],
                    _nodes_test, candidates, "nodes")
                self.plot_std_attn(
                    attention_weights['decoder_layer1_sbt_comm'], _srcs_test,
                    candidates, "srcs")
Beispiel #8
0
    def evaluate_example(self):
        content = ""
        test_set = test_data_prepare(self.sub_data_folder)
        ckpt = tf.train.Checkpoint(transformer=self.transformer,
                                   optimizer=self.optimizer)
        checkpoint_path = "./checkpoints_4/" + self.sub_data_folder
        ckpt_manager = tf.train.CheckpointManager(ckpt,
                                                  checkpoint_path,
                                                  max_to_keep=10)
        # 如果检查点存在,则恢复最新的检查点。
        if ckpt_manager.latest_checkpoint:
            print("ckpt: ", ckpt_manager.latest_checkpoint)
            ckpt.restore(ckpt_manager.latest_checkpoint)
            print('Latest checkpoint restored!!')
        else:
            print("cannot load latest model!!")
        for batch, (srcs_test, nodes_test, edges_test,
                    comms_test) in enumerate(test_set):
            current_batch_size = comms_test.shape[0]
            tar_reals = comms_test[:, 1:]
            srcs_dic, nodes_dic, comms_dic = get_dicts(self.sub_data_folder)
            # add start tag as the first input for the decoder
            decoder_input = [len(comms_dic.word_index) + 1
                             ] * current_batch_size
            outputs = tf.expand_dims(decoder_input, 1)
            for i in range(MAX_LENGTH_COMM - 1):
                src_padding_mask, node_padding_mask, look_ahead_mask = utils.create_masks(
                    srcs_test, nodes_test, outputs)
                predictions, attention_weights = self.transformer(
                    srcs_test, nodes_test, edges_test, outputs, False,
                    src_padding_mask, node_padding_mask, look_ahead_mask)

                predictions = predictions[:, -1:, :]
                predicted_ids = tf.cast(tf.argmax(predictions, axis=-1),
                                        tf.int32)

                outputs = tf.concat([outputs, predicted_ids], axis=-1)
                count_stopped = 0
                for output in outputs:
                    if len(comms_dic.word_index) + 2 in output.numpy().tolist(
                    ):
                        count_stopped += 1
                if count_stopped == current_batch_size:
                    break
            candidates = EvaluationMetrics.remove_pad(
                outputs.numpy().tolist(),
                len(comms_dic.word_index) + 2, "candidates")
            refs = EvaluationMetrics.remove_pad(tar_reals.numpy().tolist(),
                                                len(comms_dic.word_index) + 2,
                                                "references")

            reverse_word_map_comms = dict(
                map(reversed, comms_dic.word_index.items()))
            reverse_word_map_srcs = dict(
                map(reversed, srcs_dic.word_index.items()))

            content += (str(batch) + ":\n")
            content += ("src: " + " ".join(
                self.sequence_to_text(reverse_word_map_srcs,
                                      srcs_test.numpy()[0][1:-1])) + "\n")
            content += ("candidate: " + " ".join(
                self.sequence_to_text(reverse_word_map_comms, candidates[0])) +
                        "\n")
            content += ("ref: " + " ".join(
                self.sequence_to_text(reverse_word_map_comms, refs[0][0])) +
                        "\n\n")
            print(batch)
        with open("./final results/all_results_wo_gru.txt",
                  "w",
                  encoding="utf-8") as fw:
            fw.write(content)