Example #1
0
def page_coverage(args):
    print("Page coverage...")
    coverage = []
    coverage_all = []
    in_path = 'data/{}.jsonl'.format(args.split)
    annotation_processor = AnnotationProcessor(in_path)
    annotation_by_id = {el.get_id(): el for el in annotation_processor}

    with open("data/{1}.pages.p{2}.jsonl".format(args.input_path, split, k),
              "r") as f:
        for idx, line in enumerate(f):
            js = json.loads(line)
            id = js['id']
            anno = annotation_by_id[id]
            docs_gold = list(
                set([clean_title(t) for t in anno.get_titles(flat=True)]))
            docs_predicted = [
                clean_title(t[0]) for t in js['predicted_pages'][:3]
            ]
            if anno.get_verdict() in ['SUPPORTS', 'REFUTES']:
                coverage_ele = len(set(docs_predicted)
                                   & set(docs_gold)) / len(docs_gold)
                coverage.append(coverage_ele)
                coverage_all.append(coverage_ele)
            else:
                coverage_ele = len(set(docs_predicted)
                                   & set(docs_gold)) / len(docs_gold)
                coverage_all.append(coverage_ele)

    print(average(coverage))
    print(average(coverage_all))
Example #2
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--input_path', type=str, help='/path/to/data', default=None)
    parser.add_argument('--wiki_path', type=str)
    parser.add_argument('--model_path', type=str)

    args = parser.parse_args()

    args.data_path_train = os.path.join(args.input_path, 'train.jsonl')
    args.data_path_dev = os.path.join(args.input_path, 'dev.jsonl')

    anno_processor_train =AnnotationProcessor(args.data_path_train)
    anno_processor_dev =AnnotationProcessor(args.data_path_dev)
    annotations_train = [annotation for annotation in anno_processor_train]
    annotations_dev = [annotation for annotation in anno_processor_dev]
    claim_hypothesis_only_bias(annotations_train, annotations_dev, args)
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--input_path', type=str, help='/path/to/data')
    parser.add_argument('--sample_nei',action='store_true', default=False)
    parser.add_argument('--model_path', type=str, help='/path/to/data')
    parser.add_argument('--wiki_path', type=str, help='/path/to/data')

    args = parser.parse_args()

    args.train_data_path = os.path.join(args.input_path, 'train.jsonl')
    args.dev_data_path = os.path.join(args.input_path, 'dev.jsonl')

    init_db(args.wiki_path)
    anno_processor_train =AnnotationProcessor(args.train_data_path, has_content = True)
    annotations_train = [annotation for annotation in anno_processor_train]
    annotations_train = annotations_train
    if args.sample_nei:
        annotations_train = sample_nei_instances(annotations_train)
    annotations_dev = None
    anno_processor_dev = AnnotationProcessor(args.dev_data_path, has_content = True)
    annotations_dev = [annotation for annotation in anno_processor_dev]

    claim_evidence_predictor(annotations_train, annotations_dev, args)
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--input_path', type=str, help='/path/to/data')
    parser.add_argument('--model_path', type=str, help='/path/to/data')
    parser.add_argument('--wiki_path', type=str)

    args = parser.parse_args()

    annotations_dev = None
    anno_processor_dev = AnnotationProcessor(
        args.input_path)  #, has_content = True)
    init_db(args.wiki_path)
    annotations_dev = [annotation for annotation in anno_processor_dev][:20]

    logger.info('Start predicting verdicts...')
    claim_evidence_predictor(annotations_dev, args)
Example #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--input_path', type=str, help='/path/to/data')
    parser.add_argument('--model_path', type=str, help='/path/to/data')
    parser.add_argument('--trivial_baseline',
                        action='store_true',
                        default=False)
    parser.add_argument('--max_sent',
                        type=int,
                        default=5,
                        help='/path/to/data')
    parser.add_argument('--wiki_path', type=str)

    args = parser.parse_args()

    anno_processor = AnnotationProcessor(args.input_path)
    annotations = [annotation for annotation in anno_processor]
    # annotations.sort(key=lambda x: x.source, reverse=True)
    logger.info('Start extracting cells from Tables...')
    extract_cells_from_tables(annotations, args)
Example #6
0
    parser.add_argument('--max_tabs', type=int, default=1)
    parser.add_argument('--all', type=int, default=0)
    args = parser.parse_args()
    split = args.split

    # q = 0
    # q_all = 0
    # score = 0
    # score_all = 0
    in_path = 'data/{0}.tables.not_precomputed.p{1}.t{2}.jsonl'.format(
        split, args.max_page, args.max_tabs)

    coverage = []
    coverage_all = []
    # in_path = 'data/annotations/{0}.sentences.not_precomputed.p{1}.s{2}.jsonl'.format(split, args.max_page, args.max_sent)
    annotation_processor = AnnotationProcessor('data/{}.jsonl'.format(
        args.split))
    if args.all == 0:
        annotation_by_id = {
            el.get_id(): el
            for el in annotation_processor
            if el.has_evidence() and el.get_evidence_type(
                flat=True) == EvidenceType.TABLE
        }
    else:
        annotation_by_id = {
            el.get_id(): el
            for el in annotation_processor if el.has_evidence()
        }

    with open(in_path, "r") as f:
        for idx, line in enumerate(f):
def evidence_coverage(args):
    print('Evidence coverage...')
    coverage = []
    coverage_all = []
    # in_path = 'data/annotations/{0}.sentences.not_precomputed.p{1}.s{2}.jsonl'.format(split, args.max_page, args.max_sent)
    annotation_processor = AnnotationProcessor('data/{}.jsonl'.format(
        args.split))
    if args.all == 0:
        annotation_by_id = {
            i: el
            for i, el in enumerate(annotation_processor)
            if el.has_evidence() and el.get_evidence_type(
                flat=True) == EvidenceType.SENTENCE
        }
    else:
        annotation_by_id = {
            i: el
            for i, el in enumerate(annotation_processor) if el.has_evidence()
        }

    with open(
            'data/{}.combined.not_precomputed.p{}.s{}.t{}.jsonl'.format(
                args.split, args.max_page, args.max_sent, args.max_tabs),
            "r") as f:
        for idx, line in enumerate(f):
            if idx == 0:
                continue
            js = json.loads(line)
            id = idx - 1  #js['id']
            if id not in annotation_by_id:
                continue
            anno = annotation_by_id[id]
            docs_gold_e = list(set(anno.get_evidence(flat=True)))
            docs_gold_s = set(
                list(
                    set([
                        clean_title(t) for t in docs_gold_e
                        if '_sentence_' in t
                    ])))
            # docs_gold = extract_tables_from_evidence(docs_gold)
            docs_gold_t = set([
                clean_title(ev.split('_')[0]) + '_table_' +
                ev.split('_')[3 if '_header_cell_' in ev else 2]
                for ev in docs_gold_e if "_cell_" in ev
            ])
            docs_gold = set(docs_gold_s | docs_gold_t)

            if len(docs_gold) == 0:
                continue
            docs_gold = set([clean_title(doc).strip() for doc in docs_gold])
            predicted_sentences = [
                clean_title(ele[0]) + '_sentence_' + ele[1].split('_')[1]
                for ele in js['predicted_evidence']
                if ele[1].split('_')[0] == 'sentence'
            ]
            predicted_tables = [
                clean_title(ele[0]) + '_table_' + ele[1].split('_')[1]
                for ele in js['predicted_evidence']
                if ele[1].split('_')[0] == 'table'
            ]
            docs_predicted = set(list(predicted_sentences + predicted_tables))
            #
            # print(docs_gold)
            # print(docs_predicted)
            # print('-----------')
            if args.all == 0:
                docs_predicted = [
                    ele for ele in docs_predicted if '_sentence_' in ele
                ]
            # print(docs_gold, docs_predicted)
            # docs_predicted =  [t[0] + '_' + t[1].replace('s_', 'sentence_') for t in js['predicted_sentences']]
            if anno.get_verdict() in ['SUPPORTS', 'REFUTES']:
                coverage_ele = len(set(docs_predicted)
                                   & set(docs_gold)) / len(docs_gold)
                coverage.append(coverage_ele)
                coverage_all.append(coverage_ele)
            else:
                coverage_ele = len(set(docs_predicted)
                                   & set(docs_gold)) / len(docs_gold)
                coverage_all.append(coverage_ele)
    print(average(coverage))
    print(average(coverage_all))
    return zip(doc_names, doc_scores)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--split', type=str)
    parser.add_argument('--count', type=int, default=1)
    parser.add_argument('--db', type=str)
    parser.add_argument('--data_path', type=str)
    parser.add_argument('--model', type=str, default=None)
    args = parser.parse_args()

    k = args.count
    split = args.split
    ranker = retriever.get_class('tfidf')(tfidf_path=args.model)
    annotation_processor = AnnotationProcessor("{}/{}.jsonl".format(
        args.data_path, args.split))
    db = DocDB(args.db)
    document_titles = set(db.get_doc_ids())

    with open("{0}/{1}.pages.p{2}.jsonl".format(args.data_path, args.split, k),
              "w+") as f2:
        annotations = [annotation for annotation in annotation_processor]
        for i, annotation in enumerate(tqdm(annotations)):
            js = {}
            # js['id'] = annotation.get_id()
            js['claim'] = annotation.get_claim()
            entities = [el[0] for el in annotation.get_claim_entities()]
            entities = [ele for ele in entities if ele in document_titles]
            if len(entities) < args.count:
                pages = list(
                    process(ranker, annotation.get_claim(), k=args.count))
Example #9
0
    parser.add_argument('--mode', type=str)
    parser.add_argument('--count', type=int, default=1)
    args = parser.parse_args()
    split = args.split
    k = args.count

    page_coverage(args)

    sys.exit()

    q = 0
    q_all = 0
    score = 0
    score_all = 0
    in_path = 'data/{0}.jsonl'.format(split)
    annotation_processor = AnnotationProcessor(in_path)
    annotation_by_id = {el.get_id(): el for el in annotation_processor}

    with open("data/{1}.pages.p{2}.jsonl".format(split, k), "r") as f:
        for idx, line in enumerate(f):
            js = json.loads(line)
            id = js['id']
            anno = annotation_by_id[id]
            docs_gold = list(set(anno.get_titles(flat=True)))
            docs_predicted = [t[0] for t in js['predicted_pages']]
            if anno.get_verdict() in ['SUPPORTS', 'REFUTES']:
                for p in docs_gold:
                    q += 1
                    if p in docs_predicted:
                        score += (1 / (docs_predicted.index(p) + 1)
                                  )  #mean reciprocal rank