Esempio n. 1
0
def get_train_data_from_lang(lang):
    if lang == 'swedish':
        return dataset.get_swedish_train_corpus().parsed_sents()
    elif lang == 'danish':
        return dataset.get_danish_train_corpus().parsed_sents()
    elif lang == 'english':
        return dataset.get_english_train_corpus().parsed_sents()
    else:
        raise ValueError(
            "Please don't use {}, only use english, swedish or danish".format(
                lang))
Esempio n. 2
0
import random
from providedcode import dataset
from providedcode.transitionparser import TransitionParser
from providedcode.evaluate import DependencyEvaluator
from featureextractor import FeatureExtractor
from transition import Transition

if __name__ == "__main__":
    data = dataset.get_swedish_train_corpus().parsed_sents()
    random.seed(1234)
    subdata = random.sample(data, 200)

    try:
        # tp = TransitionParser(Transition, FeatureExtractor)
        # tp.train(subdata)
        # tp.save('swedish.model')

        testdata = dataset.get_swedish_test_corpus().parsed_sents()
        tp = TransitionParser.load("badfeatures.model")

        parsed = tp.parse(testdata)

        with open("test.conll", "w") as f:
            for p in parsed:
                f.write(p.to_conll(10).encode("utf-8"))
                f.write("\n")

        ev = DependencyEvaluator(testdata, parsed)
        print "LAS: {} \nUAS: {}".format(*ev.eval())

        # parsing arbitrary sentences (english):
Esempio n. 3
0
#import os
#os.chdir("/home/sidvash/NLP_coursera/Assignment1/code/")
import random
from providedcode import dataset
from providedcode.transitionparser import TransitionParser
from providedcode.evaluate import DependencyEvaluator
from featureextractor import FeatureExtractor
from transition import Transition

if __name__ == '__main__':
    data = dataset.get_swedish_train_corpus().parsed_sents()
    random.seed(1234)
    subdata = random.sample(data, 200)

    try:

        tp = TransitionParser(Transition, FeatureExtractor)

        tp.train(subdata)
        tp.save('swedish.model')

        testdata = dataset.get_swedish_test_corpus().parsed_sents()
        tp = TransitionParser.load('swedish.model')

        parsed = tp.parse(testdata)

        with open('test.conll', 'w') as f:
            for p in parsed:
                f.write(p.to_conll(10).encode('utf-8'))
                f.write('\n')
Esempio n. 4
0
import matplotlib.pyplot as plt
from providedcode.dataset import get_swedish_train_corpus
import networkx as nx
import random

if __name__ == '__main__':
    corpus = get_swedish_train_corpus()
    dependency_graph = random.choice(corpus.parsed_sents())
    nx_graph, labels = dependency_graph.nx_graph()

    pos = nx.spring_layout(nx_graph)
    nx.draw_networkx_nodes(nx_graph, pos, node_size=1000)
    nx.draw_networkx_labels(nx_graph, pos, labels)
    nx.draw_networkx_edges(nx_graph, pos, edge_color='k', width=1)
    plt.show()