Beispiel #1
0
    def train_model(self, train_data, epoch_number=1):
        '''Train the underlying Tensorflow graph variables.

        Keyword arguments:
            self -- SRL model
            train_data -- training data, list of
                          preprocessing.SentenceData
                          instances.
            epoch_number -- number of epochs used
                            during training.
        '''
        # Begin training
        with self._get_session() as session:
            # initialize variables
            self.init.run()

            # generate stream of data
            batch_generator = Batch_Generator(train_data, self.batch_size)

            # if possible restore the variables' values from a previous session
            try:
                self.saver.restore(session, self.savefile)
            except Exception as exp:
                print(exp.message)
            writer = tf.summary.FileWriter(self.savefile, graph=self.graph)

            # start a new epoch -----------------------------------------------
            for epoch in range(epoch_number):
                print("Epoch number: "+str(epoch+1))

                # start a new iteration in the epoch --------------------------
                for step in tqdm(range(len(batch_generator))):
                    # get batch data
                    batch = batch_generator.generate_next_batch()
                    feed_dict = self._get_feed_dict(batch)

                    # take summary
                    if step % 50 and self.savefile is not None and self.profile_data:
                        summ = session.run(self.summary, feed_dict=feed_dict)
                        writer.add_summary(summ, step)

                    # optimize weights
                    session.run(self.optimizer, feed_dict=feed_dict)
                    # end of iteration -----------------------------------------

                # saving graph variables after epoch
                print('saving graph variables ...')
                self.saver.save(session, self.savefile)
def create_prediction_file(input_file,
                           output_file,
                           model,
                           batch_size,
                           classes,
                           embedding_data):
    '''Create a prediction file, containing the predict semantic roles for the input sentences.

    This function creates an output file (in the CoNLL 2009 format) containing the
    predicted semantic roles for the sentences in the input file.

    Keyword arguments:
        input_file -- name of file containing the
            sentences for the SRL task.

        output_file -- name of the file that will
            contain the predicted roles.

        model -- instance of srl_models.Model used
            for the prediction.

        batch_size -- batch size used by the model
    '''

    from utils import Batch_Generator
    wordembeddings, posembeddings, depembeddings, predembeddings = embedding_data

    # load input data
    with open(input_file, 'r', encoding='utf-8') as infile:
        with open(output_file, 'w', encoding='utf-8') as outfile:
            with model._get_session() as session:
                # initialize variables
                model.saver.restore(session, model.savefile)
                fileisover = False
                while not fileisover:
                    sentence_data, fileisover = SentenceData.get_sentence_data(infile)

                    if sentence_data:
                        digested_data = sentence_data.digest(
                            classes=classes,
                            wordembeddings=wordembeddings,
                            posembeddings=posembeddings,
                            depembeddings=depembeddings,
                            predembeddings=predembeddings)

                        digested_data.roles = np.zeros([digested_data.predicate_count(),
                                                        len(digested_data)], dtype=np.int32)

                        def i2roles(array_like):
                            array, out = np.array(array_like), []
                            shape = array.shape
                            array_flat = np.reshape(array,[-1])
                            for i in array_flat: out.append(classes.i2c[i])
                            return np.reshape(out, shape)


                        if digested_data.predicate_count():
                            generator = Batch_Generator([digested_data], batch_size)
                            total_predictions = []
                            for _ in range(len(generator)):
                                batch = generator.generate_next_batch()
                                predictions = model.predict(session, batch)
                                endindex = min(generator.batch_size, digested_data.predicate_count())
                                predictions = predictions.transpose()[:, :endindex]
                                predictions_text = i2roles(predictions)
                                total_predictions.extend(predictions_text)
                            sentence_data.roles = total_predictions
                        outfile.write(str(sentence_data)+"\n")
Beispiel #3
0
    def evaluate_model(self, eval_data):
        '''Evaluate the neural model with respect to some evaluation data.

        This methods returns the model's performance.

        Keyword arguments:
            self -- SRL model
            eval_data -- data used for evaluation (it must )

        Returns:
            Precision
            Recall
            F1 measure
        '''

        # Begin training
        true_positive, true_negative, false_positive, false_negative = 0, 0, 0, 0
        given, present = 0, 0

        with self._get_session() as session:
            # initialize variables
            self.saver.restore(session, self.savefile)
            batch_generator = Batch_Generator(eval_data, self.batch_size)

            for _ in tqdm(range(len(batch_generator))):
                batch = batch_generator.generate_next_batch()
                roles = batch.roles
                seq_lens = batch.sequence_lengths
                feed_dict = self._get_feed_dict(batch)

                # get logits
                logits = session.run(self.logits, feed_dict=feed_dict)

                # get predictions
                predictions = np.argmax(logits, axis=2)

                # for each prediction check if it was correct
                for i in range(len(seq_lens)):
                    for j in range(seq_lens[i]):
                        role_is_null = roles[i][j] == 0
                        pred_is_null = predictions[i][j] == 0

                        if predictions[i][j] == roles[i][j]:
                            if  role_is_null:
                                true_negative += 1
                            else:
                                true_positive += 1
                        else:
                            if pred_is_null:
                                false_negative += 1
                            else:
                                false_positive += 1

                        if predictions[i][j] != 0:
                            given += 1

                        if not role_is_null:
                            present += 1

            precision = true_positive/given
            recall = true_positive/present
            f_measure = precision*recall*2/(precision+recall)

            '''
            with open('../tmp/eval_2_'+self.__class__.__name__+'.txt', 'w') as file:
                file.write('\n\nTrue positives:'+str(true_positive)+'\nFalse positives:'+str(false_positive)+'\nTrue negatives:'+str(true_negative)+'\nFalse negatives:'+str(false_negative)+'\n')
                file.write('\n\nTotal Precision:'+str(precision)+'\nTotal Recall:'+str(recall)+'\nTotal f1-measure:'+str(f_measure)+'\n')
            '''
            return precision, recall, f_measure