Esempio n. 1
0
def get_data_from_lang(lang):
    if lang == 'swedish':
        return dataset.get_swedish_test_corpus().parsed_sents()
    elif lang == 'danish':
        return dataset.get_danish_test_corpus().parsed_sents()
    elif lang == 'english':
        return dataset.get_english_test_corpus().parsed_sents()
    else:
        raise ValueError("Please don't use {}, only use english, swedish or danish".format(lang))
Esempio n. 2
0
def evaluate_parse(partIdx):
    if partIdx == 3:
        print 'Evaluating your swedish model ... '
        testdata = dataset.get_swedish_test_corpus().parsed_sents()
        if not os.path.exists('./swedish.model'):
            print 'No model. Please save your model as swedish.model at current directory before submission.'
            sys.exit(0)
        tp = TransitionParser.load('swedish.model')
        parsed = tp.parse(testdata)
        ev = DependencyEvaluator(testdata, parsed)
        uas, las = ev.eval()
        print 'UAS:', uas
        print 'LAS:', las
        swed_score = (min(las, 0.7) / 0.7)**2
        return swed_score

    if partIdx == 1:
        print 'Evaluating your english model ... '
        testdata = dataset.get_english_test_corpus().parsed_sents()
        if not os.path.exists('./english.model'):
            print 'No model. Please save your model as english.model at current directory before submission.'
            sys.exit(0)
        tp = TransitionParser.load('english.model')
        parsed = tp.parse(testdata)
        ev = DependencyEvaluator(testdata, parsed)
        uas, las = ev.eval()
        print 'UAS:', uas
        print 'LAS:', las
        eng_score = (min(las, 0.7) / 0.7)**2
        return eng_score

    if partIdx == 2:
        print 'Evaluating your danish model ... '
        testdata = dataset.get_danish_test_corpus().parsed_sents()
        if not os.path.exists('./danish.model'):
            print 'No model. Please save your model danish.model at current directory before submission.'
            sys.exit(0)
        tp = TransitionParser.load('danish.model')
        parsed = tp.parse(testdata)
        ev = DependencyEvaluator(testdata, parsed)
        uas, las = ev.eval()
        print 'UAS:', uas
        print 'LAS:', las
        dan_score = (min(las, 0.7) / 0.7)**2
        return dan_score
Esempio n. 3
0
def evaluate_parse(partIdx):
  if partIdx == 3:
    print 'Evaluating your swedish model ... '
    testdata = dataset.get_swedish_test_corpus().parsed_sents()
    if not os.path.exists('./swedish.model'):
      print 'No model. Please save your model as swedish.model at current directory before submission.'
      sys.exit(0)
    tp = TransitionParser.load('swedish.model')
    parsed = tp.parse(testdata)
    ev = DependencyEvaluator(testdata, parsed)
    uas, las = ev.eval()
    print 'UAS:',uas
    print 'LAS:',las
    swed_score = (min(las, 0.7) / 0.7) ** 2
    return swed_score
  
  if partIdx == 1:
    print 'Evaluating your english model ... '
    testdata = dataset.get_english_test_corpus().parsed_sents()
    if not os.path.exists('./english.model'):
      print 'No model. Please save your model as english.model at current directory before submission.'
      sys.exit(0)
    tp = TransitionParser.load('english.model')
    parsed = tp.parse(testdata)
    ev = DependencyEvaluator(testdata, parsed)
    uas, las = ev.eval()
    print 'UAS:',uas
    print 'LAS:',las
    eng_score = (min(las, 0.7) / 0.7) ** 2
    return eng_score
  
  if partIdx == 2:
    print 'Evaluating your danish model ... '
    testdata = dataset.get_danish_test_corpus().parsed_sents()
    if not os.path.exists('./danish.model'):
      print 'No model. Please save your model danish.model at current directory before submission.'
      sys.exit(0)
    tp = TransitionParser.load('danish.model')
    parsed = tp.parse(testdata)
    ev = DependencyEvaluator(testdata, parsed)
    uas, las = ev.eval()
    print 'UAS:',uas
    print 'LAS:',las
    dan_score = (min(las, 0.7) / 0.7) ** 2
    return dan_score
Esempio n. 4
0
        # # SE
        # tp = TransitionParser(Transition, FeatureExtractor)
        # tp.train(SE_subdata)
        # tp.save('swedish.model')
        # SE_testdata = dataset.get_swedish_test_corpus().parsed_sents()
        # SE_tp = TransitionParser.load('swedish.model')
        # SE_parsed = SE_tp.parse(SE_testdata)
        #
        # DK
        tp = TransitionParser(Transition, FeatureExtractor)
        print('Training...')
        tp.train(DK_subdata)
        print('Ok. Saving the model...')
        tp.save('danish.model')
        print('Ok. Parsing the test corpus...')
        DK_testdata = dataset.get_danish_test_corpus().parsed_sents()
        #DK_tp = TransitionParser.load('danish.model')
        DK_parsed = tp.parse(DK_testdata)
        print('Ok.')


        # with open('english.conll', 'w') as f:
        #     for p in EN_parsed:
        #         f.write(p.to_conll(10).encode('utf-8'))
        #         f.write('\n')
        #
        # ev = DependencyEvaluator(EN_testdata, EN_parsed)
        # print('Evaluating EN model...')
        # print "LAS: {} \nUAS: {}".format(*ev.eval())

        with open('danish.conll', 'w') as f:
Esempio n. 5
0
    #data = dataset.get_korean_train_corpus().parsed_sents()
    data = dataset.get_danish_train_corpus().parsed_sents()

    random.seed(1234)
    subdata = random.sample(data, 200)

    try:
        tp = TransitionParser(Transition, FeatureExtractor)
        tp.train(subdata)
        #tp.save('swedish.model')
        #tp.save('korean.model')
        tp.save('danish.model')

        #testdata = dataset.get_swedish_test_corpus().parsed_sents()
        #testdata = dataset.get_korean_test_corpus().parsed_sents()
        testdata = dataset.get_danish_test_corpus().parsed_sents()

        #tp = TransitionParser.load('swedish.model')
        #tp = TransitionParser.load('korean.model')
        tp = TransitionParser.load('danish.model')

        parsed = tp.parse(testdata)

        with open('test.conll', 'w') as f:
            for p in parsed:
                f.write(p.to_conll(10).encode('utf-8'))
                f.write('\n')

        ev = DependencyEvaluator(testdata, parsed)
        print "UAS: {} \nLAS: {}".format(*ev.eval())