예제 #1
0
    def test_create_from_files(self):
        """Test dataset creation from files."""
        feature_file = "{}/{}".format(self.TEST_FOLDER, "train.feat")
        text_file = "{}/{}".format(self.TEST_FOLDER, "train.text")

        dataset = Dataset.create_dataset_from_files(feature_file, text_file)
        self.assertEqual(len(dataset.documents), 4)
        self.assertEqual(len(dataset.documents[0].X),
                         len(dataset.documents[0].Y))
        self.assertEqual(len(dataset.documents[0].X),
                         len(dataset.documents[0].text))
예제 #2
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Train et evaluate a dependency model."""

from input.dataset import Dataset
from model.parser_model import ParserModel

path = "/Users/alicia/Documents/01-Projets/04-Research/01-Dependency-parsing"

train_dataset = Dataset.create_dataset_from_files(path + "/data/train.feat",
                                                  path + "/data/train.text")
dev_dataset = Dataset.create_dataset_from_files(path + "/data/dev.feat",
                                                path + "/data/dev.text")
test_dataset = Dataset.create_dataset_from_files(path + "/data/test.feat",
                                                 path + "/data/test.text")

n_features = len(train_dataset.documents[0].X[0])

parser_model = ParserModel(n_features,
                           dropout_prob=0.6,
                           learning_rate=0.00001,
                           batch_size=5,
                           hidden_size=100,
                           model_folder=path + "/data/models")
dev_loss = parser_model.train(train_dataset, dev_dataset, 400)

Y = parser_model.predict(test_dataset)
for index, document in enumerate(test_dataset.documents):
    print("Accuracy: " + document.compute_accuracy(Y[index]))