예제 #1
0
def evaluate_parse(partIdx):
    if partIdx == 3:
        print 'Evaluating your swedish model ... '
        testdata = dataset.get_swedish_test_corpus().parsed_sents()
        if not os.path.exists('./swedish.model'):
            print 'No model. Please save your model as swedish.model at current directory before submission.'
            sys.exit(0)
        tp = TransitionParser.load('swedish.model')
        parsed = tp.parse(testdata)
        ev = DependencyEvaluator(testdata, parsed)
        uas, las = ev.eval()
        print 'UAS:', uas
        print 'LAS:', las
        swed_score = (min(las, 0.7) / 0.7)**2
        return swed_score

    if partIdx == 1:
        print 'Evaluating your english model ... '
        testdata = dataset.get_english_test_corpus().parsed_sents()
        if not os.path.exists('./english.model'):
            print 'No model. Please save your model as english.model at current directory before submission.'
            sys.exit(0)
        tp = TransitionParser.load('english.model')
        parsed = tp.parse(testdata)
        ev = DependencyEvaluator(testdata, parsed)
        uas, las = ev.eval()
        print 'UAS:', uas
        print 'LAS:', las
        eng_score = (min(las, 0.7) / 0.7)**2
        return eng_score

    if partIdx == 2:
        print 'Evaluating your danish model ... '
        testdata = dataset.get_danish_test_corpus().parsed_sents()
        if not os.path.exists('./danish.model'):
            print 'No model. Please save your model danish.model at current directory before submission.'
            sys.exit(0)
        tp = TransitionParser.load('danish.model')
        parsed = tp.parse(testdata)
        ev = DependencyEvaluator(testdata, parsed)
        uas, las = ev.eval()
        print 'UAS:', uas
        print 'LAS:', las
        dan_score = (min(las, 0.7) / 0.7)**2
        return dan_score
예제 #2
0
def verify_lang_data(model, conll_output):
    try:
        lang = extract_lang_from_model_name(model)
        testdata = get_data_from_lang(lang)
        tp = TransitionParser.load(model)

        parsed = tp.parse(testdata)

        with open(conll_output, 'w') as f:
            for p in parsed:
                f.write(p.to_conll(10).encode('utf-8'))
                f.write('\n')

        ev = DependencyEvaluator(testdata, parsed)
        uas, las = ev.eval()
        print "\n=====Prediction of {}.model===== \nUAS: {} \nLAS: {}".format(lang, uas, las)
        return las
        pass
    except ValueError as e:
        print(e)
예제 #3
0
        tp.save('english.model')
        labeleddata = dataset.get_english_dev_corpus().parsed_sents()
        blinddata = dataset.get_english_dev_blind_corpus().parsed_sents()

        #tp = TransitionParser.load('badfeatures.model')

        # parsed = tp.parse(labeleddata)
        parsed = tp.parse(blinddata)

        with open('test.conll', 'w') as f:
            for p in parsed:
                f.write(p.to_conll(10).encode('utf-8'))
                f.write('\n')

        ev = DependencyEvaluator(labeleddata, parsed)
        print "UAS: {} \nLAS: {}".format(*ev.eval())

        # parsing arbitrary sentences (english):
        # sentence = DependencyGraph.from_sentence('Hi, this is a test')

        # tp = TransitionParser.load('english.model')
        # parsed = tp.parse([sentence])
        # print parsed[0].to_conll(10).encode('utf-8')
    except NotImplementedError:
        print """
        This file is currently broken! We removed the implementation of Transition
        (in transition.py), which tells the transitionparser how to go from one
        Configuration to another Configuration. This is an essential part of the
        arc-eager dependency parsing algorithm, so you should probably fix that :)
예제 #4
0
        tp = TransitionParser(Transition, FeatureExtractor)

        tp.train(subdata)
        tp.save('swedish.model')

        testdata = dataset.get_swedish_test_corpus().parsed_sents()
        tp = TransitionParser.load('swedish.model')

        parsed = tp.parse(testdata)

        with open('test.conll', 'w') as f:
            for p in parsed:
                f.write(p.to_conll(10).encode('utf-8'))
                f.write('\n')

        ev = DependencyEvaluator(testdata, parsed)
        print "UAS: {} \nLAS: {}".format(*ev.eval())

        # parsing arbitrary sentences (swedish):
        # sentence = DependencyGraph.from_sentence('Hi, this is a test')

        # tp = TransitionParser.load('swedish.model')
        # parsed = tp.parse([sentence])
        # print parsed[0].to_conll(10).encode('utf-8')
    except NotImplementedError:
        print
        """
        This file is currently broken! We removed the implementation of Transition
        (in transition.py), which tells the transitionparser how to go from one
        Configuration to another Configuration. This is an essential part of the
        arc-eager dependency parsing algorithm, so you should probably fix that :)
예제 #5
0
        # with open('english.conll', 'w') as f:
        #     for p in EN_parsed:
        #         f.write(p.to_conll(10).encode('utf-8'))
        #         f.write('\n')
        #
        # ev = DependencyEvaluator(EN_testdata, EN_parsed)
        # print('Evaluating EN model...')
        # print "LAS: {} \nUAS: {}".format(*ev.eval())

        with open('danish.conll', 'w') as f:
            for p in DK_parsed:
                f.write(p.to_conll(10).encode('utf-8'))
                f.write('\n')

        ev = DependencyEvaluator(DK_testdata, DK_parsed)
        print('Evaluating DK model...')
        print "LAS: {} \nUAS: {}".format(*ev.eval())

        # parsing arbitrary sentences (english):
        # sentence = DependencyGraph.from_sentence('Hi, this is a test')

        # tp = TransitionParser.load('english.model')
        # parsed = tp.parse([sentence])
        # print parsed[0].to_conll(10).encode('utf-8')
    except NotImplementedError:
        print """
        This file is currently broken! We removed the implementation of Transition
        (in transition.py), which tells the transitionparser how to go from one
        Configuration to another Configuration. This is an essential part of the
        arc-eager dependency parsing algorithm, so you should probably fix that :)