def test_assembly_cycle(): stmts = stmts_from_json_file( os.path.join(HERE, 'data', 'compositional_refinement_cycle_test.json')) # 874 is a refinement of -534 pipeline = AssemblyPipeline(comp_assembly_json) assembled_stmts = pipeline.run(stmts) assert assembled_stmts[0].supported_by == [assembled_stmts[1]]
description='Choose a corpus for live curation.') parser.add_argument('--json') parser.add_argument('--raw_json') parser.add_argument('--pickle') parser.add_argument('--corpus_id', default='1') parser.add_argument('--host', default='0.0.0.0') parser.add_argument('--port', default=8001, type=int) parser.add_argument('--aws-cred', type=str, default='default', help='The name of the credential set to use when ' 'connecting to AWS services. If the name is not ' 'found in your AWS config, `[default]` is used.') args = parser.parse_args() # Load the corpus if args.json: stmts = stmts_from_json_file(args.json) elif args.pickle: with open(args.pickle, 'rb') as fh: stmts = pickle.load(fh) if args.raw_json: raw_stmts = stmts_from_json_file(args.raw_json) else: raw_stmts = None logger.info('Loaded corpus %s with %d statements.' % (args.corpus_id, len(stmts))) curator.corpora[args.corpus_id] = Corpus(stmts, raw_stmts, args.aws_cred) # Run the app app.run(host=args.host, port=args.port, threaded=False)
def test_compositional_refinement_polarity_bug(): stmts = stmts_from_json_file( os.path.join(HERE, 'data', 'test_missing_refinement.json')) pipeline = AssemblyPipeline(comp_assembly_json) assembled_stmts = pipeline.run(stmts) assert assembled_stmts[0].supported_by == [assembled_stmts[1]]
""" Methods to poke around at INDRA statements and generate a sheet for comparing the flat and compositional grounders. """ import csv from indra.statements import stmts_from_json_file from tqdm import tqdm # INDRA statements from flat ontology STMTS_FLAT = stmts_from_json_file('statements.json', format='jsonl') # does not exist yet! #stmts_comp = stmts_from_json_file('statements_comp.json', format='jsonl') def make_comparison_sheet(flat_statements, comp_statements): """Makes a tsv sheet to diff flat and compositional groundings.""" header = ["Entity Text", "Grounding", "Confidence"] with open('wm_comparison_sheet.tsv', 'wt') as out_file: tsv_writer = csv.writer(out_file, delimiter='\t') tsv_writer.writerow(header) for statement in flat_statements[:500]: # get text/groundings for flat subject/object flat_subj_text = get_text(statement.subj) flat_subj_grounding = get_groundings(statement.subj)[0] flat_obj_text = get_text(statement.obj) flat_obj_grounding = get_groundings(statement.obj)[0] for statement2 in comp_statements[:500]: # get text/groundings for compositional subject/object # TODO: get into compositional tuples! comp_subj_text = get_text(statement2.subj)
def is_intervention(grounding): return True if 'interventions' in grounding else False def get_text(ev, idx): return ev.annotations['agents']['raw_text'][idx] def get_stmt_ev_pairs(stmts): pairs = [] for stmt in stmts: pairs += [(stmt, e) for e in stmt.evidence] return pairs stmts = stmts_from_json_file('data/dart-20200102-grounding-curation-stmts-grounding.json') stmts = ac.filter_belief(stmts, 0.8) pairs = get_stmt_ev_pairs(stmts) stmt_groups = {'': pairs} nsample = 1000 idx = 1 all_fields = [] for key, pairs in stmt_groups.items(): sample = [random.choice(pairs) for _ in range(nsample)] for stmt, ev in sample: fields = {'IDX': idx, 'UUID': stmt.uuid, 'SUBJ_GROUNDING': get_grounding(stmt.subj)[0], 'SUBJ_GROUNDING_SCORE': '%.3f' % get_grounding(stmt.subj)[1], 'SUBJ_TEXT': get_text(ev, 0),
wmg = wmg.replace('wm/concept/causal_factor/', '') return wmg def get_text(ev, idx): return ev.annotations['agents']['raw_text'][idx] def get_eidos_stmt_ev_pairs(stmts): pairs = [] for stmt in stmts: pairs += [(stmt, e) for e in stmt.evidence if e.source_api == 'eidos'] return pairs stmts = stmts_from_json_file('data/dart-20191223-stmts-grounding.json') stmts = ac.filter_belief(stmts, 0.8) pairs = get_eidos_stmt_ev_pairs(stmts) subj_undef = [s for s in pairs if filter_subj_undef(s)] obj_undef = [s for s in pairs if filter_obj_undef(s)] both_undef = [s for s in pairs if filter_both_undef(s)] stmt_groups = {'SUBJ': subj_undef, 'OBJ': obj_undef, 'BOTH': both_undef} nsample = 500 idx = 1 all_fields = [] for key, pairs in stmt_groups.items(): sample = [random.choice(pairs) for _ in range(nsample)] for stmt, ev in sample: fields = { 'IDX': idx,
logger.warning('%s is not in the corpus.' % uuid) continue stmt.belief = correct belief_dict = {st.uuid: st.belief for st in stmts} return jsonify(belief_dict) if __name__ == '__main__': # Process arguments parser = argparse.ArgumentParser( description='Choose a corpus for live curation.') parser.add_argument('--json') parser.add_argument('--pickle') parser.add_argument('--corpus_id', default='1') parser.add_argument('--host', default='0.0.0.0') parser.add_argument('--port', default=8001, type=int) args = parser.parse_args() # Load the corpus if args.json: stmts = stmts_from_json_file(args.json) elif args.pickle: with open(args.pickle, 'rb') as fh: stmts = pickle.load(fh) logger.info('Loaded corpus %s with %d statements.' % (args.corpus_id, len(stmts))) corpora[args.corpus_id] = Corpus(stmts) # Run the app app.run(host=args.host, port=args.port)
if __name__ == '__main__': # Process arguments parser = argparse.ArgumentParser( description='Choose a corpus for live curation.') parser.add_argument('--json') parser.add_argument('--raw_json') parser.add_argument('--pickle') parser.add_argument('--corpus_id', default='1') parser.add_argument('--host', default='0.0.0.0') parser.add_argument('--port', default=8001, type=int) args = parser.parse_args() # Load the corpus if args.json: stmts = stmts_from_json_file(args.json) elif args.pickle: with open(args.pickle, 'rb') as fh: stmts = pickle.load(fh) if args.raw_json: raw_stmts = stmts_from_json_file(args.raw_json) else: raw_stmts = None logger.info('Loaded corpus %s with %d statements.' % (args.corpus_id, len(stmts))) curator.corpora[args.corpus_id] = Corpus(stmts, raw_stmts) # Run the app app.run(host=args.host, port=args.port, threaded=False)