Beispiel #1
0
def test_human_only():
    im = IncrementalModel()
    stmt1 = Phosphorylation(None, Agent('BRAF', db_refs={'UP': 'P15056'}))
    stmt2 = Phosphorylation(None, Agent('BRAF', db_refs={'UP': 'P28028'}))
    stmt3 = Phosphorylation(None, Agent('BRAF', db_refs={'HGNC': 'BRAF'}))
    stmt4 = Phosphorylation(None, Agent('BRAF', db_refs={}))
    im.add_statements('12345', [stmt1], filters=['human_only'])
    assert(len(im.get_statements()) == 1)
    im.add_statements('12346', [stmt2], filters=['human_only'])
    assert(len(im.get_statements()) == 1)
    im.add_statements('12346', [stmt3], filters=['human_only'])
    assert (len(im.get_statements()) == 2)
    im.add_statements('12346', [stmt4], filters=['human_only'])
    assert (len(im.get_statements()) == 3)
Beispiel #2
0
def summarize_helper(model_path):
    logger.info(time.strftime('%c'))
    logger.info('Loading original model.')
    inc_model_file = os.path.join(model_path, 'model.pkl')
    model = IncrementalModel(inc_model_file)
    stmts = model.get_statements()
    click.echo('Number of statements: {}'.format(len(stmts)))
    agents = model.get_model_agents()
    click.echo('Number of agents: {}'.format(len(agents)))
Beispiel #3
0
def test_grounding_none_agent():
    im = IncrementalModel()
    im.add_statements('12345', stmts2, filters=['grounding'])
    assert(len(im.get_statements()) == 1)
Beispiel #4
0
def test_grounding_all():
    im = IncrementalModel()
    stmt = Complex([Agent('A', db_refs={'UP': 'ABCD'}), 
                   Agent('B', db_refs={'HGNC': '1234'})])
    im.add_statements('12345', [stmt], filters=['grounding'])
    assert(len(im.get_statements()) == 1)
Beispiel #5
0
def test_add_stmts_prior_all():
    im = IncrementalModel()
    im.stmts['prior'] = [stmts[0]]
    im.add_statements('12345', [stmts[1]], filters=['prior_all'])
    assert(len(im.get_statements()) == 1)
Beispiel #6
0
def test_add_stmts_model_all():
    im = IncrementalModel()
    im.add_statements('12345', [stmts[0]])
    im.add_statements('23456', [stmts[1]], filters=['model_all'])
    assert(len(im.get_statements()) == 1)
Beispiel #7
0
def test_add_stmts_blank_noprior2():
    im = IncrementalModel()
    im.add_statements('12345', stmts, filters=['prior_all'])
    assert(len(im.get_statements()) == 2)
Beispiel #8
0
def test_add_stmts_blank_emptyfilter():
    im = IncrementalModel()
    im.add_statements('12345', stmts, filters=[])
    assert(len(im.get_statements()) == 2)
Beispiel #9
0
def test_add_stmts_blank():
    im = IncrementalModel()
    im.add_statements('12345', stmts)
    assert(len(im.get_statements()) == 2)
Beispiel #10
0
def run_machine(model_path,
                pmids,
                belief_threshold,
                search_genes=None,
                ndex_cred=None,
                twitter_cred=None):
    start_time_local = datetime.datetime.now(tzlocal.get_localzone())
    date_str = make_date_str()

    # Save PMIDs in file and send for remote reading
    if aws_available:
        pmid_fname = 'pmids-%s.txt' % date_str
        all_pmids = []
        for v in pmids.values():
            all_pmids += v
        all_pmids = list(set(all_pmids))

        with open(pmid_fname, 'wt') as fh:
            for pmid in all_pmids:
                fh.write('%s\n' % pmid)
        # Submit reading
        job_list = submit_reading('rasmachine', pmid_fname, ['reach'])

        # Wait for reading to complete
        wait_for_complete('run_reach_queue',
                          job_list,
                          idle_log_timeout=600,
                          kill_on_log_timeout=True)

    # Load the model
    logger.info(time.strftime('%c'))
    logger.info('Loading original model.')
    inc_model_file = os.path.join(model_path, 'model.pkl')
    model = IncrementalModel(inc_model_file)
    # Include search genes as prior genes
    if search_genes:
        model.prior_genes = search_genes
    stats = {}
    logger.info(time.strftime('%c'))
    logger.info('Preassembling original model.')
    model.preassemble(filters=global_filters)
    logger.info(time.strftime('%c'))

    # Original statistics
    stats['orig_stmts'] = len(model.get_statements())
    stats['orig_assembled'] = len(model.assembled_stmts)
    orig_stmts = filter_db_highbelief(model.assembled_stmts, ['bel', 'biopax'],
                                      belief_threshold)
    orig_stmts = ac.filter_top_level(orig_stmts)
    stats['orig_final'] = len(orig_stmts)
    logger.info('%d final statements' % len(orig_stmts))

    # Extend the model with PMIDs
    logger.info('----------------')
    logger.info(time.strftime('%c'))
    logger.info('Extending model.')
    stats['new_papers'], stats['new_abstracts'], stats['existing'] = \
        extend_model(model_path, model, pmids, start_time_local)
    # Having added new statements, we preassemble the model
    model.preassemble(filters=global_filters)

    # New statistics
    stats['new_stmts'] = len(model.get_statements())
    stats['new_assembled'] = len(model.assembled_stmts)
    new_stmts = filter_db_highbelief(model.assembled_stmts, ['bel', 'biopax'],
                                     belief_threshold)
    new_stmts = ac.filter_top_level(new_stmts)
    stats['new_final'] = len(new_stmts)
    logger.info('%d final statements' % len(new_stmts))

    check_pmids(model.get_statements())

    # Save model
    logger.info(time.strftime('%c'))
    logger.info('Saving model')
    model.save(inc_model_file)
    logger.info(time.strftime('%c'))

    # Save a time stamped version of the pickle for backup/diagnostic purposes
    if not aws_available:
        inc_model_bkp_file = os.path.join(model_path,
                                          'model-%s.pkl' % date_str)
        model.save(inc_model_bkp_file)
    else:
        key = 'rasmachine/%s/model-%s.pkl' % (model_path.replace(
            '/', '_'), date_str)
        s3 = boto3.client('s3')
        s3.upload_file(inc_model_file, 'bigmech', key)

    # Upload the new, final statements to NDEx
    if ndex_cred:
        upload_new_ndex(model_path, new_stmts, ndex_cred)

    # Print and tweet the status message
    logger.info('--- Final statistics ---')
    for k, v in sorted(stats.items(), key=lambda x: x[0]):
        logger.info('%s: %s' % (k, v))
    logger.info('------------------------')

    msg_str = make_status_message(stats)
    if msg_str is not None:
        logger.info('Status message: %s' % msg_str)
        if twitter_cred:
            logger.info('Now tweeting: %s' % msg_str)
            twitter_client.update_status(msg_str, twitter_cred)
Beispiel #11
0
def test_add_stmts_blank():
    im = IncrementalModel()
    im.add_statements('12345', stmts)
    assert len(im.get_statements()) == 2
    im.preassemble()
    assert len(im.assembled_stmts) == 2
def test_add_stmts_blank():
    im = IncrementalModel()
    im.add_statements('12345', stmts)
    assert len(im.get_statements()) == 2
    im.preassemble()
    assert len(im.assembled_stmts) == 2
Beispiel #13
0
def run_machine(model_path, pmids, belief_threshold, search_genes=None,
                ndex_cred=None, twitter_cred=None, grounding_map=None):
    start_time_local = datetime.datetime.now(tzlocal.get_localzone())
    date_str = make_date_str()

    # Save PMIDs in file and send for remote reading
    if aws_available:
        pmid_fname = 'pmids-%s.txt' % date_str
        all_pmids = []
        for v in pmids.values():
            all_pmids += v
        all_pmids = list(set(all_pmids))

        with open(pmid_fname, 'wt') as fh:
            for pmid in all_pmids:
                fh.write('%s\n' % pmid)
        # Submit reading
        job_list = submit_reading('rasmachine', pmid_fname, ['reach'])

        # Wait for reading to complete
        wait_for_complete('run_reach_queue', job_list, idle_log_timeout=600,
                          kill_on_log_timeout=True)

    # Load the model
    logger.info(time.strftime('%c'))
    logger.info('Loading original model.')
    inc_model_file = os.path.join(model_path, 'model.pkl')
    model = IncrementalModel(inc_model_file)
    # Include search genes as prior genes
    if search_genes:
        model.prior_genes = search_genes
    stats = {}
    logger.info(time.strftime('%c'))
    logger.info('Preassembling original model.')
    model.preassemble(filters=global_filters, grounding_map=grounding_map)
    logger.info(time.strftime('%c'))

    # Original statistics
    stats['orig_stmts'] = len(model.get_statements())
    stats['orig_assembled'] = len(model.assembled_stmts)
    orig_stmts = filter_db_highbelief(model.assembled_stmts, ['bel', 'biopax'],
                                      belief_threshold)
    orig_stmts = ac.filter_top_level(orig_stmts)
    stats['orig_final'] = len(orig_stmts)
    logger.info('%d final statements' % len(orig_stmts))

    # Extend the model with PMIDs
    logger.info('----------------')
    logger.info(time.strftime('%c'))
    logger.info('Extending model.')
    stats['new_papers'], stats['new_abstracts'], stats['existing'] = \
        extend_model(model_path, model, pmids, start_time_local)
    # Having added new statements, we preassemble the model
    model.preassemble(filters=global_filters, grounding_map=grounding_map)

    # New statistics
    stats['new_stmts'] = len(model.get_statements())
    stats['new_assembled'] = len(model.assembled_stmts)
    new_stmts = filter_db_highbelief(model.assembled_stmts, ['bel', 'biopax'],
                                     belief_threshold)
    new_stmts = ac.filter_top_level(new_stmts)
    stats['new_final'] = len(new_stmts)
    logger.info('%d final statements' % len(new_stmts))

    check_pmids(model.get_statements())

    # Save model
    logger.info(time.strftime('%c'))
    logger.info('Saving model')
    model.save(inc_model_file)
    logger.info(time.strftime('%c'))

    # Save a time stamped version of the pickle for backup/diagnostic purposes
    if not aws_available:
        inc_model_bkp_file = os.path.join(model_path,
                                          'model-%s.pkl' % date_str)
        model.save(inc_model_bkp_file)
    else:
        key = 'rasmachine/%s/model-%s.pkl' % (model_path.replace('/', '_'),
                                              date_str)
        s3 = boto3.client('s3')
        s3.upload_file(inc_model_file, 'bigmech', key)

    # Upload the new, final statements to NDEx
    if ndex_cred:
        upload_new_ndex(model_path, new_stmts, ndex_cred)

    # Print and tweet the status message
    logger.info('--- Final statistics ---')
    for k, v in sorted(stats.items(), key=lambda x: x[0]):
        logger.info('%s: %s' % (k, v))
    logger.info('------------------------')

    msg_str = make_status_message(stats)
    if msg_str is not None:
        logger.info('Status message: %s' % msg_str)
        if twitter_cred:
            logger.info('Now tweeting: %s' % msg_str)
            twitter_client.update_status(msg_str, twitter_cred)
Beispiel #14
0
    # Load the model
    logger.info(time.strftime('%c'))
    logger.info('Loading original model.')
    inc_model_file = os.path.join(model_path, model_name, 'model.pkl')
    model = IncrementalModel(inc_model_file)
    # Include search genes as prior genes
    model.prior_genes = search_genes
    stats = {}
    logger.info(time.strftime('%c'))
    logger.info('Preassembling original model.')
    model.preassemble(filters=global_filters)
    logger.info(time.strftime('%c'))

    # Original statistics
    stats['orig_stmts'] = len(model.get_statements())
    stats['orig_assembled'] = len(model.assembled_stmts)
    db_stmts = ac.filter_evidence_source(model.assembled_stmts,
                                         ['biopax', 'bel'],
                                         policy='one')
    no_db_stmts = ac.filter_evidence_source(model.assembled_stmts,
                                            ['biopax', 'bel'],
                                            policy='none')
    no_db_stmts = ac.filter_belief(no_db_stmts, belief_threshold)
    orig_stmts = db_stmts + no_db_stmts
    stats['orig_final'] = len(orig_stmts)
    logger.info('%d final statements' % len(orig_stmts))

    # Extend the model with PMIDs
    logger.info('----------------')
    logger.info(time.strftime('%c'))