def main(): args = process_commands() linkings = corpus.load_linking(args.linking) arguments = argument.ArgumentFile(args.argument) test_arguments = argument.ArgumentFile(args.argument_test) corpus_file = corpus.CorpusFile(args.corpus, args.corpus_pos, args.corpus_parse, args.corpus_dep) fhelper = corpus.FoldsHelper(args.folds) keep_boundary = args.keep_boundary and args.argument == args.argument_test test(fhelper, arguments, test_arguments, corpus_file, linkings, args.train, args.test, args.model, args.crfsuite, args.log, keep_boundary, use_baseline=args.use_baseline, use_feature=args.select, reverse_select=args.reverse_select, rstats=args.rstats, threshold=args.threshold)
def main(): args = process_commands() # load data ltruth = linkage.LinkageFile(args.linkage) truth = ltruth.all_words() detector = linkage.LinkageDetector(args.connective) corpus_file = corpus.CorpusFile( args.corpus, args.corpus_pos, args.corpus_parse) vectors = corpus.VectorFile(args.vector) if args.output: cands, Y, X = get_features( detector, corpus_file, vectors, truth, select=args.select, reverse_select=args.reverse_select) output_file(args.output, cands, Y, X) if args.perfect_output: cands, Y, X = get_features( detector, corpus_file, vectors, truth, args.output_ambig, select=args.select, reverse_select=args.reverse_select, perfect=ltruth) output_file(args.perfect_output, cands, Y, X)
def main(): args = process_commands() detector = linkage.LinkageDetector(args.connective) corpus_file = corpus.CorpusFile(args.corpus) truth = linkage.LinkageFile(args.linkage) arg_truth = argument.ArgumentFile(args.argument) arg_truth.init_truth(corpus_file) stat_all_detect(detector, corpus_file, truth, arg_truth, args.output_count, args.output_cnnct_count)
def preprocess(args): corpus_file = corpus.CorpusFile(args.corpus) labels = [] with open(args.output, 'w') as f: for l, tokens in corpus_file.corpus.items(): labels.append(l) for s, e in corpus_file.edu_corpus[l]: f.write('{}\n'.format(' '.join(tokens[s:e]))) with open(args.label, 'w') as f: for l in labels: f.write(l + '\n')
def main(): args = process_commands() # loading data truth = linkage.LinkageFile(args.linkage) fhelper = corpus.FoldsHelper(args.folds) detector = linkage.LinkageDetector(args.connective) vectors = corpus.VectorFile(args.vector) corpus_file = corpus.CorpusFile(args.corpus, args.corpus_pos, args.corpus_parse) print('process file') if args.output: cands, Y, X = get_linkage_features(corpus_file, detector, vectors, truth, reverse_select=args.reverse_select, select=args.select, select_cnnct=args.select_cnnct) output_file(args.output, cands, Y, X) if args.check_accuracy: check_accuracy(X, Y) # extract perfect features for sense experiments if args.perfect_output: print('process perfect file') cands, Y, X = get_linkage_features(corpus_file, detector, vectors, truth, select=args.select, reverse_select=args.reverse_select, perfect=True, select_cnnct=args.select_cnnct) output_file(args.perfect_output, cands, Y, X) if args.check_accuracy: check_accuracy(X, Y)
def postprocess(args): corpus_file = corpus.CorpusFile(args.corpus) labels = [] with open(args.label) as f: for l in f: labels.append(l.strip()) with open(args.input) as f, open(args.output, 'w') as out: for lb in labels: lb = lb.strip() edus = [] for _ in corpus_file.edu_corpus[lb]: items = [] for l in f: if l == '\n': break else: items.append(l.strip()) edus.append('@@@@'.join(items)) out.write('{}\t{}\n'.format(lb, '\t'.join(edus)))
def main(): args = process_commands() corpus_file = corpus.CorpusFile(args.corpus, args.corpus_pos, args.corpus_parse) fhelper = corpus.FoldsHelper(args.folds) truth = linkage.LinkageFile(args.linkage) words = truth.all_words() detector = linkage.LinkageDetector(args.connective) feature_tbl = features.load_features_table(args.linkage_features, lambda x: tuple(x.split('-'))) linkage_counts, lcdict = count_linkage(args.linkage) linkage_probs = load_linkage_probs(args.linkage_probs) linkage_class = load_linkage_probs(args.linkage_class) word_ambig = evaluate.WordAmbig(args.word_ambig) ranking_probs = compute_ranking_probs(linkage_probs) if args.perfect: cut = lambda x, _: any((x[0], w) not in words for w in x[1]) else: cut = lambda x, _: linkage_class[x] < args.threshold # The B2 model if not args.pipeline: print('===== ranking model =====') cross_validation( corpus_file, fhelper, feature_tbl, truth, detector, linkage_counts, lcdict, ranking_probs, word_ambig, cut=cut, words=words, perfect=args.perfect, count_path=args.word_count, arg_output=args.arg_output, greedy=args.greedy, rank=args.rank, predict_sstats=args.predict_sense, predict_wstats=args.predict_wstats, ) elif not args.perfect: word_probs, word_truth = load_word_probs(args.word_probs) # cut by word probs cut = lambda x, _: any(word_probs[(x[0], w)] < args.threshold for w in x[1]) # or linkage_class[x] < args.threshold # The B1 model print('\n===== pipeline model =====') cross_validation( corpus_file, fhelper, feature_tbl, truth, detector, linkage_counts, lcdict, ranking_probs, word_ambig, cut=cut, words=words, count_path=args.word_count, perfect=args.perfect, greedy=args.greedy, rank=args.rank, predict_sstats=args.predict_sense, predict_wstats=args.predict_wstats, )