def parse(sentences, weights): results = [] edge_feats_list, featdict = c.extract(sentences) size = len(sentences) count = 0 correct = 0 total = 0 for sentence, edge_feats in zip(sentences, edge_feats_list): count = count + 1 Log.i("Parsing sentence: %d of %d" % (count, size)) result = [] heads = eisner.parse(sentence, weights, edge_feats) for index, word in enumerate(sentence): if index == 0: continue token = [ str(word.id), word.form, word.lemma, word.cpostag, word.postag, word.feats, str(heads[index]), word.deprel ] line = "\t".join(token) if heads[index] == word.head: correct = correct + 1 total = total + 1 result.append(line) results.append(result) return results, float(correct / total)
def main(args): if len(args) < 2: Log.w("Arguments Error") return path = args[0] iteration = int(args[1]) Log.i("Training file: %s" % path) Log.i("----------------") sentences = c.readconllfile(path) featdict, weights = train(sentences, iteration) c.save(featdict, weights)
def main(args): if len(args) < 2: Log.w("Arguments Error") return path = args[0] mfile = args[1] Log.i("Training file: %s" % path) Log.i("----------------") sentences = c.readconllfile(path) Log.i("Model file: %s" % mfile) Log.i("----------------") featdict, weights = c.load(mfile) output, score = parse(sentences, weights) for each in output: for token in each: print(token) print() Log.i("[DONE] accuracy: {:.2%}".format(score))
def train(sentences, iteration=10): edge_feats_list, featdict = c.extract(sentences) weights = np.zeros(len(featdict)) size = len(sentences) for i in range(1, iteration + 1): Log.i("Training iteration: %d of %d" % (i, iteration)) count = 0 for sentence, edge_feats in zip(sentences, edge_feats_list): count = count + 1 Log.i("\tTraining sentence: %d of %d" % (count, size)) predicts = eisner.parse(sentence, weights, edge_feats) weights = update(weights, predicts, sentence, edge_feats) """ print(predicts) for feat, index in featdict.items(): print(index, feat, weights[index]) """ return featdict, weights