Esempio n. 1
0
def parse(sentences, weights):
    results = []
    edge_feats_list, featdict = c.extract(sentences)

    size = len(sentences)
    count = 0
    correct = 0
    total = 0

    for sentence, edge_feats in zip(sentences, edge_feats_list):
        count = count + 1
        Log.i("Parsing sentence: %d of %d" % (count, size))
        result = []
        heads = eisner.parse(sentence, weights, edge_feats)
        for index, word in enumerate(sentence):
            if index == 0:
                continue
            token = [
                str(word.id),
                word.form,
                word.lemma,
                word.cpostag,
                word.postag,
                word.feats,
                str(heads[index]),
                word.deprel
            ]
            line = "\t".join(token)
            if heads[index] == word.head:
                correct = correct + 1
            total = total + 1
            result.append(line)
        results.append(result)
    return results, float(correct / total)
Esempio n. 2
0
def main(args):
    if len(args) < 2:
        Log.w("Arguments Error")
        return
    path = args[0]
    iteration = int(args[1])
    Log.i("Training file: %s" % path)
    Log.i("----------------")
    sentences = c.readconllfile(path)
    featdict, weights = train(sentences, iteration)
    c.save(featdict, weights)
Esempio n. 3
0
def main(args):
    if len(args) < 2:
        Log.w("Arguments Error")
        return
    path = args[0]
    mfile = args[1]
    Log.i("Training file: %s" % path)
    Log.i("----------------")
    sentences = c.readconllfile(path)
    Log.i("Model file: %s" % mfile)
    Log.i("----------------")
    featdict, weights = c.load(mfile)
    output, score = parse(sentences, weights)
    for each in output:
        for token in each:
            print(token)
        print()
    Log.i("[DONE] accuracy: {:.2%}".format(score))
Esempio n. 4
0
def train(sentences, iteration=10):
    edge_feats_list, featdict = c.extract(sentences)
    weights = np.zeros(len(featdict))

    size = len(sentences)

    for i in range(1, iteration + 1):
        Log.i("Training iteration: %d of %d" % (i, iteration))
        count = 0
        for sentence, edge_feats in zip(sentences, edge_feats_list):
            count = count + 1
            Log.i("\tTraining sentence: %d of %d" % (count, size))
            predicts = eisner.parse(sentence, weights, edge_feats)
            weights = update(weights, predicts, sentence, edge_feats)
            """
            print(predicts)
            for feat, index in featdict.items():
                print(index, feat, weights[index])
            """
    return featdict, weights