Beispiel #1
0
def gen_y_test(args):
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    # Dataset functions
    envityvectorpath = args.ev
    relationvectorpath = args.rv
    entityvector = loadvector(envityvectorpath)
    relationvector = loadvector(relationvectorpath)
    vector = dict(entityvector, **relationvector)
    print('Loading vectors.')
    input_vocab = Vocabulary(args.invocab, vector, padding=args.padding)
    output_vocab_entity = Vocabulary(args.evocab,
                                     vector, padding=args.padding)
    output_vocab_relation = Vocabulary(args.revocab,
                                       vector, padding=args.padding)

    print('Loading datasets.')
    #save y_test 
    test2 = Data(args.test_data, input_vocab, output_vocab_entity,output_vocab_relation)
    test2.load()
    target_list1 = test2.targets1
    #target_list2 = test2.targets2
    path = './results/y_test'
    with open(path, 'w') as f:
        for i in range(len(target_list1)):
            #f.write(str(i) + '\t'+target_list1[i]+'\t'+target_list2[i]+'\n')
            f.write(str(i) + '\t' + target_list1[i]  + '\n')
    print('ytest in file')
Beispiel #2
0
 def __init__(self, padding=None):
     """
         Visualizes attention maps
         :param padding: the padding to use for the sequences.
     """
     self.padding = padding
     self.input_vocab = Vocabulary('./data/human_vocab.json',
                                   padding=padding)
     self.output_vocab = Vocabulary('./data/machine_vocab.json',
                                    padding=padding)
Beispiel #3
0
 def __init__(self, opts):
     self.opts = opts
     self.src_length = opts.sequence_length
     self.tgt_length = 11  # YYYY-MM-DD<eot>
     self.host_embeddings = opts.host_embeddings
     self.input_vocab = Vocabulary("./data/human_vocab.json",
                                   padding=self.src_length)
     self.output_vocab = Vocabulary("./data/machine_vocab.json",
                                    padding=self.tgt_length)
     self.src_vocab_size = self.input_vocab.size()
     self.tgt_vocab_size = self.output_vocab.size()
Beispiel #4
0
def main(args):
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    # Dataset functions
    input_vocab = Vocabulary('./data/human_vocab.json', padding=args.padding)
    output_vocab = Vocabulary('./data/machine_vocab.json',
                              padding=args.padding)

    print('Loading datasets.')

    training = Data(args.training_data, input_vocab, output_vocab)
    validation = Data(args.validation_data, input_vocab, output_vocab)
    training.load()
    validation.load()
    training.transform()
    validation.transform()

    print('Datasets Loaded.')
    print('Compiling Model.')
    model = simpleNMT(pad_length=args.padding,
                      n_chars=input_vocab.size(),
                      n_labels=output_vocab.size(),
                      embedding_learnable=False,
                      encoder_units=256,
                      decoder_units=256,
                      trainable=True,
                      return_probabilities=False)

    model.summary()
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy', all_acc])
    print('Model Compiled.')
    print('Training. Ctrl+C to end early.')

    try:
        model.fit_generator(generator=training.generator(args.batch_size),
                            steps_per_epoch=100,
                            validation_data=validation.generator(args.batch_size),
                            validation_steps=100,
                            callbacks=[cp],
                            workers=1,
                            verbose=1,
                            epochs=args.epochs)

    except KeyboardInterrupt as e:
        print('Model training stopped early.')

    print('Model training complete.')

    run_examples(model, input_vocab, output_vocab)
Beispiel #5
0
 def __init__(self,
              padding=None,
              input_vocab=SAMPLE_HUMAN_VOCAB,
              output_vocab=SAMPLE_MACHINE_VOCAB):
     """
         Visualizes attention maps
         :param padding: the padding to use for the sequences.
         :param input_vocab: the location of the input human
                             vocabulary file
         :param output_vocab: the location of the output 
                              machine vocabulary file
     """
     self.padding = padding
     self.input_vocab = Vocabulary(input_vocab, padding=padding)
     self.output_vocab = Vocabulary(output_vocab, padding=padding)
Beispiel #6
0
def main(args):
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    # Dataset functions
    envityvectorpath =args.ev
    relationvectorpath =args.rv
    entityvector = loadvector(envityvectorpath)
    relationvector = loadvector(relationvectorpath)
    vector = dict(entityvector, **relationvector)
    print('Loading vectors.')
    input_vocab = Vocabulary(args.invocab, vector, padding=args.padding)
    output_vocab_entity = Vocabulary(args.evocab,
                                     vector, padding=args.padding)
    output_vocab_relation = Vocabulary(args.revocab,
                                       vector, padding=args.padding)

    print('Loading datasets.')

    training = Data(args.training_data, input_vocab, output_vocab_entity,output_vocab_relation)
    validation = Data(args.validation_data, input_vocab, output_vocab_entity,output_vocab_relation)
    test=Data(args.test_data, input_vocab, output_vocab_entity,output_vocab_relation)
    training.load()
    validation.load()
    test.load()
    training.transform(vector)
    validation.transform(vector)
    test.transform(vector)

    print('Datasets Loaded.')
    print('Compiling Model.')
    model = simpleNMT2(pad_length=args.padding,
                      n_chars=100,
                      entity_labels=output_vocab_entity.size(),
                      relation_labels=output_vocab_relation.size(),
                      dim=100,
                      embedding_learnable=False,
                      encoder_units=args.units,
                      decoder_units=args.units,
                      trainable=True,
                      return_probabilities=False,
                      )

    model.summary()
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    print('Model Compiled.')
    print('Training. Ctrl+C to end early.')

    try:
        hist=model.fit([training.inputs1,training.inputs2,training.inputs3,training.inputs4,training.inputs5],[training.targets1],epochs=args.epochs,batch_size=args.batch_size,validation_split=0.05)


    except KeyboardInterrupt as e:
        print('Model training stopped early.')
    model.save('./savemodel/model1.h5')
    print('Model training complete.')
Beispiel #7
0
def testmodel(args):
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    # Dataset functions
    envityvectorpath = args.ev
    relationvectorpath = args.rv
    entityvector = loadvector(envityvectorpath)
    relationvector = loadvector(relationvectorpath)
    vector = dict(entityvector, **relationvector)
    print('Loading vectors.')
    input_vocab = Vocabulary(args.invocab, vector,padding=args.padding)
    output_vocab_entity = Vocabulary(args.evocab,
                              vector,padding=args.padding)
    output_vocab_relation = Vocabulary(args.revocab,
                                     vector, padding=args.padding)

    print('Loading datasets.')
    test=Data(args.test_data, input_vocab, output_vocab_entity,output_vocab_relation)
    test.load()
    test.transform(vector)

    print('Test Datasets Loaded.')

    model=load_model('./savemodel/model1.h5',custom_objects={'AttentionLayer': AttentionLayer})
    print('Model Loaded. Start test.')
    #prediction = model.predict([test.inputs1, test.inputs2,test.inputs3,test.inputs4, test.inputs5])
    prediction = model.predict([test.inputs1, test.inputs2, test.inputs3])

    #/result/y_pre
    p_prediction1 = list(prediction.flatten())
    #p_prediction2 = list(prediction[1].flatten())
    #num_entity = output_vocab_entity.size()
    num_relation = output_vocab_relation.size()
    # for m in range(int(len(p_prediction)/num)):
    #     prediction_list.append('')
    prediction_list1 = [[0 for col in range(num_relation)] for row in range(int(len(p_prediction1)/num_relation))]
    #prediction_list2 = [[0 for col in range(num_entity)] for row in range(int(len(p_prediction2) / num_entity))]
    for i in range(len(p_prediction1)):
        j = int(i / num_relation)
        k = i % num_relation
        prediction_list1[j][k]=[k,p_prediction1[i]]
    # for i in range(len(p_prediction2)):
    #     j = int(i / num_entity)
    #     k = i % num_entity
    #     prediction_list2[j][k]=[k,p_prediction2[i]]
    pretarget1 = []
    pretarget2 = []
    for i in range(len(prediction_list1)):
        templist1 = prediction_list1[i]
        templist1.sort(key=takeSecond, reverse=True)
        templist11 = output_vocab_relation.int_to_string(templist1)
        pretarget1.append(templist11[:5])
        pretarget2.append(templist1)
    listinfile(pretarget1, './results/y_pre1')
    listinfile(pretarget2, './results/y_pre2')
    print('ypre1 in file')
Beispiel #8
0
 def __init__(self,
              padding=None,
              input_vocab=SAMPLE_HUMAN_VOCAB,
              output_vocab=SAMPLE_MACHINE_VOCAB):
     """
         Visualizes attention maps
         :param padding: the padding to use for the sequences.
         :param input_vocab: the location of the input human
                             vocabulary file
         :param output_vocab: the location of the output 
                              machine vocabulary file
     """
     self.padding = padding
     self.input_vocab = Vocabulary(
         input_vocab, padding=padding)
     self.output_vocab = Vocabulary(
         output_vocab, padding=padding)
Beispiel #9
0
def main(args):
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    # Dataset functions
    input_vocab = Vocabulary('./data/human_vocab.json', padding=args.padding)
    output_vocab = Vocabulary('./data/machine_vocab.json',
                              padding=args.padding)

    print('Loading datasets.')

    training = Data(args.training_data, input_vocab, output_vocab)
    validation = Data(args.validation_data, input_vocab, output_vocab)
    training.load()
    validation.load()
    training.transform()
    validation.transform()

    print('Datasets Loaded.')
    print('Compiling Model.')
    model = simpleNMT(pad_length=args.padding,
                      n_chars=input_vocab.size(),
                      n_labels=output_vocab.size(),
                      embedding_learnable=False,
                      encoder_units=256,
                      decoder_units=256,
                      trainable=True,
                      return_probabilities=False)

    model.summary()
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy', all_acc])
    print('Model Compiled.')
    print('Training. Ctrl+C to end early.')

    try:
        kwargs = dict(generator=training.generator(args.batch_size),
                      steps_per_epoch=100,
                      validation_data=validation.generator(args.batch_size),
                      validation_steps=100,
                      callbacks=[cp],
                      workers=1,
                      verbose=1,
                      epochs=args.epochs)
        model.fit_generator(**kwargs)

    except KeyboardInterrupt as e:
        print('Model training stopped early.')

    print('Model training complete.')

    run_examples(model, input_vocab, output_vocab)
class Visualizer(object):

    def __init__(self,
                 padding=None,
                 input_vocab=SAMPLE_HUMAN_VOCAB,
                 output_vocab=SAMPLE_MACHINE_VOCAB):
        """
            Visualizes attention maps
            :param padding: the padding to use for the sequences.
            :param input_vocab: the location of the input human
                                vocabulary file
            :param output_vocab: the location of the output
                                 machine vocabulary file
        """
        self.padding = padding
        self.input_vocab = Vocabulary(
            input_vocab, padding=padding)
        self.output_vocab = Vocabulary(
            output_vocab, padding=padding)

    def set_models(self, pred_model, proba_model):
        """
            Sets the models to use
            :param pred_model: the prediction model
            :param proba_model: the model that outputs the activation maps
        """
        self.pred_model = pred_model
        self.proba_model = proba_model

    def attention_map(self, text):
        """
            Text to visualze attention map for.
        """
        # encode the string
        d = self.input_vocab.string_to_int(text)

        # get the output sequence
        predicted_text = run_example(
            self.pred_model, self.input_vocab, self.output_vocab, text)

        text_ = list(text) + ['<eot>'] + ['<unk>'] * self.input_vocab.padding
        # get the lengths of the string
        input_length = len(text)+1
        output_length = predicted_text.index('<eot>')+1
        # get the activation map
        activation_map = np.squeeze(self.proba_model.predict(np.array([d])))[
            0:output_length, 0:input_length]

        # import seaborn as sns
        plt.clf()
        f = plt.figure(figsize=(8, 8.5))
        ax = f.add_subplot(1, 1, 1)

        # add image
        i = ax.imshow(activation_map, interpolation='nearest', cmap='gray')

        # add colorbar
        cbaxes = f.add_axes([0.2, 0, 0.6, 0.03])
        cbar = f.colorbar(i, cax=cbaxes, orientation='horizontal')
        cbar.ax.set_xlabel('Probability', labelpad=2)

        # add labels
        ax.set_yticks(range(output_length))
        ax.set_yticklabels(predicted_text[:output_length])

        ax.set_xticks(range(input_length))
        ax.set_xticklabels(text_[:input_length], rotation=45)

        ax.set_xlabel('Input Sequence')
        ax.set_ylabel('Output Sequence')

        # add grid and legend
        ax.grid()
        # ax.legend(loc='best')

        f.savefig(os.path.join(HERE, 'attention_maps', text.replace('/', '')+'.pdf'), bbox_inches='tight')
        f.show()
Beispiel #11
0
class Visualizer(object):

    def __init__(self,
                 padding=None,
                 input_vocab=SAMPLE_HUMAN_VOCAB,
                 output_vocab=SAMPLE_MACHINE_VOCAB):
        """
            Visualizes attention maps
            :param padding: the padding to use for the sequences.
            :param input_vocab: the location of the input human
                                vocabulary file
            :param output_vocab: the location of the output 
                                 machine vocabulary file
        """
        self.padding = padding
        self.input_vocab = Vocabulary(
            input_vocab, padding=padding)
        self.output_vocab = Vocabulary(
            output_vocab, padding=padding)

    def set_models(self, pred_model, proba_model):
        """
            Sets the models to use
            :param pred_model: the prediction model
            :param proba_model: the model that outputs the activation maps
        """
        self.pred_model = pred_model
        self.proba_model = proba_model

    def attention_map(self, text):
        """
            Text to visualze attention map for.
        """
        # encode the string
        d = self.input_vocab.string_to_int(text)

        # get the output sequence
        predicted_text = run_example(
            self.pred_model, self.input_vocab, self.output_vocab, text)

        text_ = list(text) + ['<eot>'] + ['<unk>'] * self.input_vocab.padding
        # get the lengths of the string
        input_length = len(text)+1
        output_length = predicted_text.index('<eot>')+1
        # get the activation map
        activation_map = np.squeeze(self.proba_model.predict(np.array([d])))[
            0:output_length, 0:input_length]

        # import seaborn as sns
        plt.clf()
        f = plt.figure(figsize=(8, 8.5))
        ax = f.add_subplot(1, 1, 1)

        # add image
        i = ax.imshow(activation_map, interpolation='nearest', cmap='gray')
        
        # add colorbar
        cbaxes = f.add_axes([0.2, 0, 0.6, 0.03])
        cbar = f.colorbar(i, cax=cbaxes, orientation='horizontal')
        cbar.ax.set_xlabel('Probability', labelpad=2)

        # add labels
        ax.set_yticks(range(output_length))
        ax.set_yticklabels(predicted_text[:output_length])
        
        ax.set_xticks(range(input_length))
        ax.set_xticklabels(text_[:input_length], rotation=45)
        
        ax.set_xlabel('Input Sequence')
        ax.set_ylabel('Output Sequence')

        # add grid and legend
        ax.grid()
        # ax.legend(loc='best')

        f.savefig(os.path.join(HERE, 'attention_maps', text.replace('/', '')+'.pdf'), bbox_inches='tight')
        f.show()
Beispiel #12
0
class Nmt(object):
    def __init__(self, opts):
        self.opts = opts
        self.src_length = opts.sequence_length
        self.tgt_length = 11  # YYYY-MM-DD<eot>
        self.host_embeddings = opts.host_embeddings
        self.input_vocab = Vocabulary("./data/human_vocab.json",
                                      padding=self.src_length)
        self.output_vocab = Vocabulary("./data/machine_vocab.json",
                                       padding=self.tgt_length)
        self.src_vocab_size = self.input_vocab.size()
        self.tgt_vocab_size = self.output_vocab.size()

    def _build_dataset(self):
        self.start_id = start_id(self.output_vocab)
        self.end_id = end_id(self.output_vocab)
        data_file = ("./data/validation.csv"
                     if self.opts.infer else "./data/training.csv")
        data = Data(data_file, self.input_vocab, self.output_vocab)
        data.load()
        transform(data)
        vocab = (self.input_vocab, self.output_vocab)
        self.generator = DataGenerator(data, vocab, self.opts, self.start_id,
                                       self.end_id)
        items = next(self.generator)
        output_types = {i: tf.dtypes.as_dtype(items[i].dtype) for i in items}
        output_shapes = {i: tf.TensorShape(items[i].shape) for i in items}
        total_bytes = 0
        for i in items:
            total_bytes += items[i].nbytes
        dataset = tf.data.Dataset.from_generator(self.generator,
                                                 output_types=output_types,
                                                 output_shapes=output_shapes)
        infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset,
                                                       "InfeedQueue",
                                                       replication_factor=1)
        data_init = infeed_queue.initializer

        return dataset, infeed_queue, data_init, vocab

    def infer(self):
        with tf.device("cpu"):
            dataset, infeed_queue, data_init, vocab = self._build_dataset()
            outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(
                feed_name="outfeed")
        if self.host_embeddings:
            src_embedding = Nmt._build_embedding(
                self.src_vocab_size,
                self.opts.embedding_size,
                self.opts.host_embeddings,
                name="source_embedding",
            )
            tgt_embedding = Nmt._build_embedding(
                self.tgt_vocab_size,
                self.opts.embedding_size,
                self.opts.host_embeddings,
                name="tgt_embedding",
            )

        def build_common(src_embedding, tgt_embedding, source):
            input_, encoder_outputs, encoder_state = self._build_encoder(
                src_embedding, source)
            samples, logits = self._build_decoder(encoder_outputs,
                                                  encoder_state,
                                                  tgt_embedding,
                                                  None,
                                                  train=False)
            outfeed = outfeed_queue.enqueue({"samples": samples})
            return outfeed

        def build_infer(source):
            src_embedding = Nmt._build_embedding(
                self.src_vocab_size,
                self.opts.embedding_size,
                self.opts.host_embeddings,
                name="source_embedding",
            )
            tgt_embedding = Nmt._build_embedding(
                self.tgt_vocab_size,
                self.opts.embedding_size,
                self.opts.host_embeddings,
                name="tgt_embedding",
            )
            return build_common(src_embedding, tgt_embedding, source)

        def build_infer_host_embeddings(source):
            nonlocal src_embedding, tgt_embedding
            return build_common(src_embedding, tgt_embedding, source)

        with ipu_scope("/device:IPU:0"):
            build = build_infer_host_embeddings if self.host_embeddings else build_infer
            batch = ipu_compiler.compile(lambda: loops.repeat(
                1, build, infeed_queue=infeed_queue, inputs=[]))

        # Create a restoring object
        saver = tf.train.Saver()

        ipu_options = util.get_config(report_n=0)
        utils.configure_ipu_system(ipu_options)
        session = tf.Session()
        checkpoint = CHECKPOINT_FILE + ("host_ckpt" if
                                        self.opts.host_embeddings else "ckpt")
        saver.restore(session, checkpoint)
        session.run(data_init)
        if self.host_embeddings:
            batch = [
                batch,
                src_embedding(1, 1, False),
                tgt_embedding(1, 1, False)
            ]
        result_queue = outfeed_queue.dequeue()
        # Run a dummy value to force the graph compilation
        session.run(batch)
        result = session.run(result_queue)
        predictions = result["samples"]
        print_data(self.generator.query, vocab[0], predictions, vocab[1])

        while True:
            session.run(batch)
            result = session.run(result_queue)
            predictions = result["samples"]
            print_data(self.generator.query, vocab[0], predictions, vocab[1])
            if not self.opts.interact:
                break

    def train(self):
        with tf.device("cpu"):
            dataset, infeed_queue, data_init, vocab = self._build_dataset()
            outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(
                feed_name="outfeed")
        if self.host_embeddings:
            src_embedding = Nmt._build_embedding(
                self.src_vocab_size,
                self.opts.embedding_size,
                self.opts.host_embeddings,
                name="source_embedding",
            )
            tgt_embedding = Nmt._build_embedding(
                self.tgt_vocab_size,
                self.opts.embedding_size,
                self.opts.host_embeddings,
                name="tgt_embedding",
            )

        def build_common(src_embedding, tgt_embedding, source, target, label,
                         mask):
            nonlocal outfeed_queue
            input_, encoder_outputs, encoder_state = self._build_encoder(
                src_embedding, source)
            samples, logits = self._build_decoder(encoder_outputs,
                                                  encoder_state,
                                                  tgt_embedding,
                                                  target,
                                                  train=True)
            loss = self._build_optimiser(logits, label, mask)
            outfeed = outfeed_queue.enqueue({"loss": loss, "logits": logits})
            return outfeed

        def build_train(source, target, label, mask):
            src_embedding = Nmt._build_embedding(
                self.src_vocab_size,
                self.opts.embedding_size,
                self.opts.host_embeddings,
                name="source_embedding",
            )
            tgt_embedding = Nmt._build_embedding(
                self.tgt_vocab_size,
                self.opts.embedding_size,
                self.opts.host_embeddings,
                name="tgt_embedding",
            )
            return build_common(src_embedding, tgt_embedding, source, target,
                                label, mask)

        def build_train_host_embeddings(source, target, label, mask):
            nonlocal src_embedding, tgt_embedding
            return build_common(src_embedding, tgt_embedding, source, target,
                                label, mask)

        with ipu_scope("/device:IPU:0"):
            build = build_train_host_embeddings if self.host_embeddings else build_train
            batch = ipu_compiler.compile(lambda: loops.repeat(
                self.opts.batches_per_step,
                build,
                infeed_queue=infeed_queue,
                inputs=[],
            ))

        # Create a restoring object
        saver = tf.train.Saver()

        if self.opts.save_graph:
            # Dump the graph to a logdir
            writer = tf.summary.FileWriter(
                os.path.join("./logs", "NMT",
                             time.strftime("%Y%m%d_%H%M%S_%Z")))
            writer.add_graph(tf.get_default_graph())

        ipu_options = util.get_config(report_n=0)
        utils.configure_ipu_system(ipu_options)
        session = tf.Session()
        checkpoint = CHECKPOINT_FILE + ("host_ckpt" if
                                        self.opts.host_embeddings else "ckpt")
        if self.opts.ckpt:
            saver.restore(session, checkpoint)
        else:
            utils.move_variable_initialization_to_cpu()
            session.run(tf.global_variables_initializer())
        session.run(data_init)
        print("Init done.")
        if self.host_embeddings:
            batch = [
                batch,
                src_embedding(self.opts.batches_per_step, 1),
                tgt_embedding(self.opts.batches_per_step, 1),
            ]
        result_queue = outfeed_queue.dequeue()
        session.run(batch)  # Warmup
        best_loss = float("Inf")
        for e in range(self.opts.iterations):
            start = time.time()
            session.run(batch)
            result = session.run(result_queue)
            l = result["loss"]
            avg_loss = np.mean(l)
            duration = (time.time() - start) / self.opts.batches_per_step

            print(
                "Step: {:>5}. Average Loss {:.3}. Items/sec {:.4}. Tokens/sec {}"
                .format(
                    (e + 1),
                    avg_loss,
                    self.opts.batch_size / duration,
                    self.opts.batch_size *
                    (self.src_length + self.tgt_length) / duration,
                ))
            if avg_loss < best_loss:
                best_loss = avg_loss
                saver.save(session, checkpoint)

    @staticmethod
    def _build_embedding(vocab_size,
                         embedding_size,
                         host_embeddings,
                         name="embedding"):
        if host_embeddings:
            embedding = embedding_ops.create_host_embedding(
                name,
                shape=[vocab_size, embedding_size],
                dtype=DTYPE,
                optimizer_spec=embedding_ops.HostEmbeddingOptimizerSpec(0.03),
                initializer=tf.initializers.random_uniform(maxval=1.0,
                                                           dtype=DTYPE),
            )
        else:
            with tf.variable_scope("embedding", dtype=DTYPE,
                                   use_resource=True) as scope:
                # Random embedding
                embedding = tf.get_variable(
                    name,
                    [vocab_size, embedding_size],
                    scope.dtype,
                    initializer=tf.initializers.random_uniform(
                        maxval=1.0, dtype=scope.dtype),
                    trainable=True,
                )
        return embedding

    @staticmethod
    def _build_cell(num_units, num_layers):
        if num_layers is 1:
            return tf.contrib.rnn.BasicLSTMCell(num_units,
                                                forget_bias=forget_bias,
                                                state_is_tuple=False)
        cell_list = []
        for i in range(num_layers):
            cell_list.append(
                tf.contrib.rnn.BasicLSTMCell(num_units,
                                             forget_bias=forget_bias,
                                             state_is_tuple=False))
        return tf.contrib.rnn.MultiRNNCell(cell_list)

    def _build_encoder(self, embedding, source):
        with tf.variable_scope("input", dtype=DTYPE, use_resource=True):
            if self.host_embeddings:
                encoder_emb_inp = embedding.lookup(source)
            else:
                encoder_emb_inp = tf.nn.embedding_lookup(embedding, source)

        with tf.variable_scope("encoder", dtype=DTYPE,
                               use_resource=True) as scope:  # use resource
            dtype = scope.dtype
            cell = Nmt._build_cell(self.opts.num_units, self.opts.num_layers)

            if self.opts.bi:
                outputs, states = tf.nn.bidirectional_dynamic_rnn(
                    cell,
                    Nmt._build_cell(self.opts.num_units, self.opts.num_layers),
                    encoder_emb_inp,
                    dtype=dtype,
                    time_major=time_major,
                    swap_memory=False,
                )
                encoder_outputs = tf.add_n(outputs)
                encoder_state = states[0] + states[1]
            else:
                encoder_outputs, encoder_state = tf.nn.dynamic_rnn(
                    cell,
                    encoder_emb_inp,
                    dtype=dtype,
                    time_major=time_major,
                    swap_memory=False,
                )

        return source, encoder_outputs, encoder_state

    def _build_decoder(self,
                       encoder_outputs,
                       encoder_state,
                       embedding,
                       target=None,
                       train=False):
        with tf.variable_scope("decoder", dtype=DTYPE,
                               use_resource=True) as decoder_scope:
            dtype = decoder_scope.dtype
            tgt_length = self.src_length * 2
            decoder_num_units = self.opts.num_units
            atten_num_units = self.opts.num_units

            # RNN Cell
            cell = Nmt._build_cell(decoder_num_units, self.opts.num_layers)
            initial_state = encoder_state

            # Attention wrapper
            if self.opts.attention:
                cell = self._build_attention(encoder_outputs, cell)
                initial_state = tf.contrib.seq2seq.AttentionWrapperState(
                    cell_state=encoder_state,
                    attention=tf.zeros([self.opts.batch_size, atten_num_units],
                                       dtype),
                    time=tf.constant(0, tf.int32),
                    alignments=tf.zeros(
                        [self.opts.batch_size, self.src_length], dtype),
                    alignment_history=(),
                    attention_state=tf.zeros(
                        [self.opts.batch_size, self.src_length], dtype),
                )

            # Projection Layer
            projection_layer = tf.layers.Dense(units=self.tgt_vocab_size,
                                               use_bias=False,
                                               name="projection")

            if train:
                tgt_length = self.tgt_length
                if self.host_embeddings:
                    decoder_emb_inp = embedding.lookup(target)
                else:
                    decoder_emb_inp = tf.nn.embedding_lookup(embedding, target)

                helper = TrainingHelperNoCond(
                    decoder_emb_inp,
                    np.full([self.opts.batch_size], tgt_length,
                            dtype=np.int32),
                    time_major=time_major,
                )
            else:
                # Inference
                tgt_sos_id = self.start_id
                tgt_eos_id = self.end_id

                start_tokens = np.full([self.opts.batch_size],
                                       tgt_sos_id,
                                       dtype=np.int32)
                end_token = tgt_eos_id
                if self.host_embeddings:
                    helper = GreedyEmbeddingHelperNoCond(
                        lambda i: embedding.lookup(i), start_tokens, end_token)
                else:
                    helper = GreedyEmbeddingHelperNoCond(
                        embedding, start_tokens, end_token)

            decoder = tf.contrib.seq2seq.BasicDecoder(
                cell,
                helper,
                initial_state=initial_state,
                output_layer=projection_layer
                if not train else None,  # applied per timestep
            )

            # Dynamic decoding
            outputs, final_context_state, _ = dynamic_decode(  # Contains the XLA check
                decoder,
                maximum_iterations=
                tgt_length,  # Required for static TensorArrays
                output_time_major=time_major,
                swap_memory=False,
                scope=decoder_scope,
            )

            if train:
                # Specify dynamic shapes to avoid Assert
                logits = outputs.rnn_output
                logits.set_shape(
                    [tgt_length, self.opts.batch_size, atten_num_units])
                logits = projection_layer(logits)
                return outputs.sample_id, logits
            else:
                samples = outputs.sample_id
                samples.set_shape([tgt_length, self.opts.batch_size])
                return samples, outputs.rnn_output

    def _build_attention(self, encoder_outputs, decoder_cell):
        with tf.variable_scope("attention", dtype=DTYPE,
                               use_resource=True) as scope:
            # Attention is batch major
            inputs = tf.transpose(encoder_outputs, [1, 0, 2])

            if self.opts.attention == "luong":
                attention_mechanism = tf.contrib.seq2seq.LuongAttention(
                    self.opts.num_units,
                    inputs,
                    dtype=scope.dtype,
                )
            else:
                attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                    self.opts.num_units,
                    inputs,
                    dtype=scope.dtype,
                )

            return AttentionWrapperNoAssert(decoder_cell, attention_mechanism)

    def _build_optimiser(self, logits, labels, mask):
        with tf.variable_scope("loss", use_resource=True):
            # Logits is dynamic so an Assert is added to check shapes
            crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=labels, logits=logits)
            train_loss = tf.reduce_sum(crossent * mask) / self.opts.batch_size

        # Calculate and clip gradients
        params = tf.trainable_variables()
        gradients = tf.gradients(train_loss, params)
        clipped_gradients = [
            grad if grad is None else tf.clip_by_norm(grad, max_gradient_norm)
            for grad in gradients
        ]

        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        update_step = optimizer.apply_gradients(zip(clipped_gradients, params))
        with tf.control_dependencies([update_step]):
            mean_loss = tf.reduce_mean(train_loss, name="train_loss")
        return mean_loss
Beispiel #13
0
class Visualizer(object):

    def __init__(self,
                 padding=None,
                 input_vocab=SAMPLE_HUMAN_VOCAB,
                 output_vocab=SAMPLE_MACHINE_VOCAB):
        """
            Visualizes attention maps
            :param padding: the padding to use for the sequences.
            :param input_vocab: the location of the input human
                                vocabulary file
            :param output_vocab: the location of the output 
                                 machine vocabulary file
        """
        self.padding = padding
        self.input_vocab = Vocabulary(
            input_vocab, padding=padding)
        self.output_vocab = Vocabulary(
            output_vocab, padding=padding)

    def set_models(self, pred_model, proba_model):
        """
            Sets the models to use
            :param pred_model: the prediction model
            :param proba_model: the model that outputs the activation maps
        """
        self.pred_model = pred_model
        self.proba_model = proba_model

    def attention_map(self, text):
        """
            Text to visualze attention map for.
        """
        # encode the string
        d = self.input_vocab.string_to_int(text)
        print('d: ', d)

        # get the output sequence
        predicted_text = run_example(self.pred_model, self.input_vocab, self.output_vocab, text)
        print('predicted_text: ', predicted_text)

        text_ = list(text) + ['<eot>'] + ['<unk>'] * self.input_vocab.padding
        # get the lengths of the string
        input_length = len(text)+1
        output_length = predicted_text.index('<eot>')+1
        # get the activation map
        activation_map = np.squeeze(self.proba_model.predict(np.array([d])))[0:output_length, 0:input_length]
        print('activation_map: ', activation_map)
        # [[1.04707105e-05 1.22802967e-05 8.08871482e-06 2.06340337e-05
        #   9.13377789e-06 8.17141245e-06 2.89358250e-05 1.30348863e-05
        #   3.70874773e-06 1.70587246e-05 7.16923250e-06 4.97975234e-05
        #   4.53671564e-05 2.57728461e-05 2.45305255e-05 3.59793594e-05
        #   1.75800902e-04 3.21106811e-04 2.58878747e-04 9.57598037e-04]
        # [2.88392557e-03 2.04139692e-03 8.94600758e-04 1.82232610e-03
        #  ...

        # import seaborn as sns
        plt.clf()
        f = plt.figure(figsize=(8, 8.5))
        ax = f.add_subplot(1, 1, 1)

        # add image
        i = ax.imshow(activation_map, interpolation='nearest', cmap='gray') #weight값을 회색으로 표시...
        
        # add colorbar
        cbaxes = f.add_axes([0.2, 0, 0.6, 0.03])
        cbar = f.colorbar(i, cax=cbaxes, orientation='horizontal')
        cbar.ax.set_xlabel('Probability', labelpad=2)

        # add labels
        ax.set_yticks(range(output_length))
        ax.set_yticklabels(predicted_text[:output_length])
        
        ax.set_xticks(range(input_length))
        ax.set_xticklabels(text_[:input_length], rotation=45)
        
        ax.set_xlabel('Input Sequence')
        ax.set_ylabel('Output Sequence')

        # add grid and legend
        ax.grid()
        # ax.legend(loc='best')

        f.savefig(os.path.join(HERE, 'attention_maps', text.replace('/', '')+'.pdf'), bbox_inches='tight')
        f.show()