Exemple #1
0
    'lbranch': LBranch,
    'upcfg': UPCFG
}


if __name__ == '__main__':
    opts = docopt(__doc__)

    print('Loading corpus ...')
    PATH = "./../../ancora-3.0.1es"
    files = 'CESS-CAST-(A|AA|P)/.*\.tbf\.xml'
    corpus = SimpleAncoraCorpusReader(PATH, files)

    print('Training model ...')
    # x = list(corpus.parsed_sents())[:10]
    m = opts['-m']  # Modelo Elegido
    n = opts['-n']  # Orden Markovizacion Horizontal
    if (n is not None) and (m == "upcfg"):
        model = models[opts['-m']](corpus.parsed_sents(), horzMarkov=int(n))
    else:
        model = models[opts['-m']](corpus.parsed_sents())
    # model = models[opts['-m']](corpus.parsed_sents())
    # x = corpus.parsed_sents()
    # model = models[opts['-m']](x)

    print('Saving ...')
    filename = opts['-o']
    f = open(filename, 'wb')
    pickle.dump(model, f)
    f.close()
Exemple #2
0
    'lbranch': LBranch,
    'upcfg': UPCFG
}


if __name__ == '__main__':
    opts = docopt(__doc__)

    print('\nLoading corpus...')
    files = 'CESS-CAST-(A|AA|P)/.*\.tbf\.xml'
    corpus = SimpleAncoraCorpusReader('corpus/ancora-2.0/', files)

    print('Training model...')
    om, on = opts['-m'], opts['-n']
    if om == 'upcfg':
        n = None if on is None else int(on)
        print('UPCFG model selected n={}.'.format(n))
        model = models[om](corpus.parsed_sents(), horzMarkov=n)
    elif om in ['flat', 'rbranch', 'lbranch']:
        print(om + ' model selected.')
        model = models[om](corpus.parsed_sents())
    else:
        print('Bad model type.')
        exit()

    print('Saving...\n')
    filename = opts['-o']
    f = open(filename, 'wb')
    pickle.dump(model, f)
    f.close()
Exemple #3
0
    sys.stdout.flush()


if __name__ == '__main__':
    opts = docopt(__doc__)

    print('Loading model...')
    filename = opts['-i']
    f = open(filename, 'rb')
    model = pickle.load(f)
    f.close()

    print('Loading corpus...')
    files = '3LB-CAST/.*\.tbf\.xml'
    corpus = SimpleAncoraCorpusReader('ancora/ancora-2.0/', files)
    parsed_sents = list(corpus.parsed_sents())

    print('Parsing...')
    hits, total_gold, total_model = 0, 0, 0
    n = len(parsed_sents)
    format_str = '{:3.1f}% ({}/{}) (P={:2.2f}%, R={:2.2f}%, F1={:2.2f}%)'
    progress(format_str.format(0.0, 0, n, 0.0, 0.0, 0.0))
    for i, gold_parsed_sent in enumerate(parsed_sents):
        tagged_sent = gold_parsed_sent.pos()

        # parse
        model_parsed_sent = model.parse(tagged_sent)

        # compute labeled scores
        gold_spans = spans(gold_parsed_sent, unary=False)
        model_spans = spans(model_parsed_sent, unary=False)
Exemple #4
0
from docopt import docopt
import pickle

from corpus.ancora import SimpleAncoraCorpusReader

from parsing.baselines import Flat, RBranch, LBranch


models = {
    'flat': Flat,
    'rbranch': RBranch,
    'lbranch': LBranch,
}


if __name__ == '__main__':
    opts = docopt(__doc__)

    print('Loading corpus...')
    files = 'CESS-CAST-(A|AA|P)/.*\.tbf\.xml'
    corpus = SimpleAncoraCorpusReader('ancora/ancora-2.0/', files)

    print('Training model...')
    model = models[opts['-m']](corpus.parsed_sents())

    print('Saving...')
    filename = opts['-o']
    f = open(filename, 'wb')
    pickle.dump(model, f)
    f.close()