Esempio n. 1
0
def get_train_data_from_lang(lang):
    if lang == 'swedish':
        return dataset.get_swedish_train_corpus().parsed_sents()
    elif lang == 'danish':
        return dataset.get_danish_train_corpus().parsed_sents()
    elif lang == 'english':
        return dataset.get_english_train_corpus().parsed_sents()
    else:
        raise ValueError(
            "Please don't use {}, only use english, swedish or danish".format(
                lang))
Esempio n. 2
0
from providedcode import dataset
from providedcode.transitionparser import TransitionParser
from providedcode.evaluate import DependencyEvaluator
from featureextractor import FeatureExtractor
from transition import Transition

if __name__ == '__main__':

    #traindata = dataset.get_swedish_train_corpus().parsed_sents()
    traindata = dataset.get_english_train_corpus().parsed_sents()
    #traindata = dataset.get_danish_train_corpus().parsed_sents()

    try:

        tp = TransitionParser(Transition, FeatureExtractor)
        tp.train(traindata)
	#tp.save('swedish.model')
        #tp.save('english.model')
###	tp.save('danish.model')

	#labeleddata = dataset.get_swedish_dev_corpus().parsed_sents()
        labeleddata = dataset.get_english_dev_corpus().parsed_sents()
	#labeleddata = dataset.get_danish_dev_corpus().parsed_sents()
        
	#blinddata = dataset.get_swedish_dev_blind_corpus().parsed_sents()
	blinddata = dataset.get_english_dev_blind_corpus().parsed_sents()
	#blinddata = dataset.get_danish_dev_blind_corpus().parsed_sents()
        #tp = TransitionParser.load('badfeatures.model')

        parsed = tp.parse(blinddata)
Esempio n. 3
0
from providedcode import dataset
from providedcode.transitionparser import TransitionParser
from providedcode.evaluate import DependencyEvaluator
from featureextractor import FeatureExtractor
from transition import Transition

if __name__ == '__main__':
    # traindata = dataset.get_swedish_train_corpus().parsed_sents()
    traindata = dataset.get_english_train_corpus().parsed_sents()

    try:

        tp = TransitionParser(Transition, FeatureExtractor)
        tp.train(traindata)

        # tp.save('swedish.model')
        # labeleddata = dataset.get_swedish_dev_corpus().parsed_sents()
        # blinddata = dataset.get_swedish_dev_blind_corpus().parsed_sents()

        tp.save('english.model')
        labeleddata = dataset.get_english_dev_corpus().parsed_sents()
        blinddata = dataset.get_english_dev_blind_corpus().parsed_sents()

        #tp = TransitionParser.load('badfeatures.model')

        # parsed = tp.parse(labeleddata)
        parsed = tp.parse(blinddata)

        with open('test.conll', 'w') as f:
            for p in parsed:
                f.write(p.to_conll(10).encode('utf-8'))