Beispiel #1
0
def test_assembly_cycle():
    stmts = stmts_from_json_file(
        os.path.join(HERE, 'data', 'compositional_refinement_cycle_test.json'))
    # 874 is a refinement of -534
    pipeline = AssemblyPipeline(comp_assembly_json)
    assembled_stmts = pipeline.run(stmts)
    assert assembled_stmts[0].supported_by == [assembled_stmts[1]]
Beispiel #2
0
        description='Choose a corpus for live curation.')
    parser.add_argument('--json')
    parser.add_argument('--raw_json')
    parser.add_argument('--pickle')
    parser.add_argument('--corpus_id', default='1')
    parser.add_argument('--host', default='0.0.0.0')
    parser.add_argument('--port', default=8001, type=int)
    parser.add_argument('--aws-cred', type=str, default='default',
                        help='The name of the credential set to use when '
                             'connecting to AWS services. If the name is not '
                             'found in your AWS config, `[default]`  is used.')
    args = parser.parse_args()

    # Load the corpus
    if args.json:
        stmts = stmts_from_json_file(args.json)
    elif args.pickle:
        with open(args.pickle, 'rb') as fh:
            stmts = pickle.load(fh)
    if args.raw_json:
        raw_stmts = stmts_from_json_file(args.raw_json)
    else:
        raw_stmts = None

    logger.info('Loaded corpus %s with %d statements.' %
                (args.corpus_id, len(stmts)))
    curator.corpora[args.corpus_id] = Corpus(stmts, raw_stmts, args.aws_cred)

    # Run the app
    app.run(host=args.host, port=args.port, threaded=False)
Beispiel #3
0
def test_compositional_refinement_polarity_bug():
    stmts = stmts_from_json_file(
        os.path.join(HERE, 'data', 'test_missing_refinement.json'))
    pipeline = AssemblyPipeline(comp_assembly_json)
    assembled_stmts = pipeline.run(stmts)
    assert assembled_stmts[0].supported_by == [assembled_stmts[1]]
"""
Methods to poke around at INDRA statements and generate a sheet for
comparing the flat and compositional grounders.
"""
import csv
from indra.statements import stmts_from_json_file
from tqdm import tqdm

# INDRA statements from flat ontology
STMTS_FLAT = stmts_from_json_file('statements.json', format='jsonl')

# does not exist yet!
#stmts_comp = stmts_from_json_file('statements_comp.json', format='jsonl')


def make_comparison_sheet(flat_statements, comp_statements):
    """Makes a tsv sheet to diff flat and compositional groundings."""
    header = ["Entity Text", "Grounding", "Confidence"]
    with open('wm_comparison_sheet.tsv', 'wt') as out_file:
        tsv_writer = csv.writer(out_file, delimiter='\t')
        tsv_writer.writerow(header)
        for statement in flat_statements[:500]:
            # get text/groundings for flat subject/object
            flat_subj_text = get_text(statement.subj)
            flat_subj_grounding = get_groundings(statement.subj)[0]
            flat_obj_text = get_text(statement.obj)
            flat_obj_grounding = get_groundings(statement.obj)[0]
            for statement2 in comp_statements[:500]:
                # get text/groundings for compositional subject/object
                # TODO: get into compositional tuples!
                comp_subj_text = get_text(statement2.subj)
def is_intervention(grounding):
    return True if 'interventions' in grounding else False


def get_text(ev, idx):
    return ev.annotations['agents']['raw_text'][idx]


def get_stmt_ev_pairs(stmts):
    pairs = []
    for stmt in stmts:
        pairs += [(stmt, e) for e in stmt.evidence]
    return pairs


stmts = stmts_from_json_file('data/dart-20200102-grounding-curation-stmts-grounding.json')
stmts = ac.filter_belief(stmts, 0.8)
pairs = get_stmt_ev_pairs(stmts)

stmt_groups = {'': pairs}
nsample = 1000
idx = 1
all_fields = []
for key, pairs in stmt_groups.items():
    sample = [random.choice(pairs) for _ in range(nsample)]
    for stmt, ev in sample:
        fields = {'IDX': idx,
                  'UUID': stmt.uuid,
                  'SUBJ_GROUNDING': get_grounding(stmt.subj)[0],
                  'SUBJ_GROUNDING_SCORE': '%.3f' % get_grounding(stmt.subj)[1],
                  'SUBJ_TEXT': get_text(ev, 0),
    wmg = wmg.replace('wm/concept/causal_factor/', '')
    return wmg


def get_text(ev, idx):
    return ev.annotations['agents']['raw_text'][idx]


def get_eidos_stmt_ev_pairs(stmts):
    pairs = []
    for stmt in stmts:
        pairs += [(stmt, e) for e in stmt.evidence if e.source_api == 'eidos']
    return pairs


stmts = stmts_from_json_file('data/dart-20191223-stmts-grounding.json')
stmts = ac.filter_belief(stmts, 0.8)
pairs = get_eidos_stmt_ev_pairs(stmts)
subj_undef = [s for s in pairs if filter_subj_undef(s)]
obj_undef = [s for s in pairs if filter_obj_undef(s)]
both_undef = [s for s in pairs if filter_both_undef(s)]

stmt_groups = {'SUBJ': subj_undef, 'OBJ': obj_undef, 'BOTH': both_undef}
nsample = 500
idx = 1
all_fields = []
for key, pairs in stmt_groups.items():
    sample = [random.choice(pairs) for _ in range(nsample)]
    for stmt, ev in sample:
        fields = {
            'IDX': idx,
Beispiel #7
0
                logger.warning('%s is not in the corpus.' % uuid)
                continue
            stmt.belief = correct
        belief_dict = {st.uuid: st.belief for st in stmts}
        return jsonify(belief_dict)


if __name__ == '__main__':
    # Process arguments
    parser = argparse.ArgumentParser(
        description='Choose a corpus for live curation.')
    parser.add_argument('--json')
    parser.add_argument('--pickle')
    parser.add_argument('--corpus_id', default='1')
    parser.add_argument('--host', default='0.0.0.0')
    parser.add_argument('--port', default=8001, type=int)
    args = parser.parse_args()

    # Load the corpus
    if args.json:
        stmts = stmts_from_json_file(args.json)
    elif args.pickle:
        with open(args.pickle, 'rb') as fh:
            stmts = pickle.load(fh)
    logger.info('Loaded corpus %s with %d statements.' %
                (args.corpus_id, len(stmts)))
    corpora[args.corpus_id] = Corpus(stmts)

    # Run the app
    app.run(host=args.host, port=args.port)
Beispiel #8
0
if __name__ == '__main__':
    # Process arguments
    parser = argparse.ArgumentParser(
        description='Choose a corpus for live curation.')
    parser.add_argument('--json')
    parser.add_argument('--raw_json')
    parser.add_argument('--pickle')
    parser.add_argument('--corpus_id', default='1')
    parser.add_argument('--host', default='0.0.0.0')
    parser.add_argument('--port', default=8001, type=int)
    args = parser.parse_args()

    # Load the corpus
    if args.json:
        stmts = stmts_from_json_file(args.json)
    elif args.pickle:
        with open(args.pickle, 'rb') as fh:
            stmts = pickle.load(fh)
    if args.raw_json:
        raw_stmts = stmts_from_json_file(args.raw_json)
    else:
        raw_stmts = None

    logger.info('Loaded corpus %s with %d statements.' %
                (args.corpus_id, len(stmts)))
    curator.corpora[args.corpus_id] = Corpus(stmts, raw_stmts)

    # Run the app
    app.run(host=args.host, port=args.port, threaded=False)