コード例 #1
0
def filter(stmts, cutoff, filename):
    stmts = ac.filter_belief(stmts, cutoff)
    stmts = ac.filter_top_level(stmts)
    stmts = ac.filter_direct(stmts)
    #stmts = ac.filter_enzyme_kinase(stmts)
    ac.dump_statements(stmts, filename)
    return stmts
コード例 #2
0
    def run_assembly(self):
        """Run INDRA's assembly pipeline on the Statements."""
        self.eliminate_copies()
        stmts = self.get_indra_stmts()
        stmts = self.filter_event_association(stmts)
        stmts = ac.filter_no_hypothesis(stmts)
        if not self.assembly_config.get('skip_map_grounding'):
            stmts = ac.map_grounding(stmts)
        if self.assembly_config.get('standardize_names'):
            ac.standardize_names_groundings(stmts)
        if self.assembly_config.get('filter_ungrounded'):
            score_threshold = self.assembly_config.get('score_threshold')
            stmts = ac.filter_grounded_only(stmts,
                                            score_threshold=score_threshold)
        if self.assembly_config.get('merge_groundings'):
            stmts = ac.merge_groundings(stmts)
        if self.assembly_config.get('merge_deltas'):
            stmts = ac.merge_deltas(stmts)
        relevance_policy = self.assembly_config.get('filter_relevance')
        if relevance_policy:
            stmts = self.filter_relevance(stmts, relevance_policy)
        if not self.assembly_config.get('skip_filter_human'):
            stmts = ac.filter_human_only(stmts)
        if not self.assembly_config.get('skip_map_sequence'):
            stmts = ac.map_sequence(stmts)
        # Use WM hierarchies and belief scorer for WM preassembly
        preassembly_mode = self.assembly_config.get('preassembly_mode')
        if preassembly_mode == 'wm':
            hierarchies = get_wm_hierarchies()
            belief_scorer = get_eidos_scorer()
            stmts = ac.run_preassembly(stmts,
                                       return_toplevel=False,
                                       belief_scorer=belief_scorer,
                                       hierarchies=hierarchies)
        else:
            stmts = ac.run_preassembly(stmts, return_toplevel=False)
        belief_cutoff = self.assembly_config.get('belief_cutoff')
        if belief_cutoff is not None:
            stmts = ac.filter_belief(stmts, belief_cutoff)
        stmts = ac.filter_top_level(stmts)

        if self.assembly_config.get('filter_direct'):
            stmts = ac.filter_direct(stmts)
            stmts = ac.filter_enzyme_kinase(stmts)
            stmts = ac.filter_mod_nokinase(stmts)
            stmts = ac.filter_transcription_factor(stmts)

        if self.assembly_config.get('mechanism_linking'):
            ml = MechLinker(stmts)
            ml.gather_explicit_activities()
            ml.reduce_activities()
            ml.gather_modifications()
            ml.reduce_modifications()
            ml.gather_explicit_activities()
            ml.replace_activations()
            ml.require_active_forms()
            stmts = ml.statements

        self.assembled_stmts = stmts
コード例 #3
0
ファイル: test_assemble_corpus.py プロジェクト: adarshp/indra
def test_belief_cut_plus_filter_top():
    st1 = Phosphorylation(None, Agent('a'))
    st2 = Phosphorylation(Agent('b'), Agent('a'))
    st1.supports = [st2]
    st2.supported_by = [st1]
    st1.belief = 0.9
    st2.belief = 0.1
    st_high_belief = ac.filter_belief([st1, st2], 0.5)
    st_top_level = ac.filter_top_level(st_high_belief)
    assert len(st_top_level) == 1
コード例 #4
0
ファイル: assemble_cx.py プロジェクト: jmuhlich/indra
def assemble_cx(stmts, out_file):
    """Return a CX assembler."""
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    stmts = ac.strip_agent_context(stmts)
    ca = CxAssembler()
    ca.add_statements(stmts)
    model = ca.make_model()
    ca.save_model(out_file)
    return ca
コード例 #5
0
ファイル: assemble_cx.py プロジェクト: jmuhlich/indra
def assemble_cx(stmts, out_file):
    """Return a CX assembler."""
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    stmts = ac.strip_agent_context(stmts)
    ca = CxAssembler()
    ca.add_statements(stmts)
    model = ca.make_model()
    ca.save_model(out_file)
    return ca
コード例 #6
0
ファイル: test_belief_sklearn.py プロジェクト: steppi/indra
def test_set_hierarchy_probs_specific_false():
    # Get probs for a set of statements, and a belief engine instance
    be, test_stmts_copy, prior_probs = setup_belief(
        include_more_specific=False)
    # Set beliefs on the flattened statements
    top_level = ac.filter_top_level(test_stmts_copy)
    be.set_hierarchy_probs(test_stmts_copy)
    # Compare hierarchy probs to prior probs
    for stmt, prior_prob in zip(test_stmts_copy, prior_probs):
        # Because we haven't included any supports, the beliefs should
        # not have changed
        assert stmt.belief == prior_prob
コード例 #7
0
def assemble_cx(stmts, out_file_prefix, network_type):
    """Return a CX assembler."""
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    if network_type == 'direct':
        stmts = ac.filter_direct(stmts)

    out_file = '%s_%s.cx' % (out_file_prefix, network_type)

    ca = CxAssembler()
    ca.add_statements(stmts)
    model = ca.make_model()
    ca.save_model(out_file)
    return ca
コード例 #8
0
ファイル: assemble_model.py プロジェクト: steppi/indra_apps
def run_assembly(stmts, save_file):
    stmts = ac.map_grounding(stmts)
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.filter_human_only(stmts)
    stmts = ac.expand_families(stmts)
    stmts = ac.filter_gene_list(stmts, gene_names, 'one')
    stmts = ac.map_sequence(stmts)
    stmts = ac.run_preassembly(stmts, return_toplevel=False)
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    stmts = ac.filter_direct(stmts)
    stmts = ac.filter_enzyme_kinase(stmts)
    ac.dump_statements(stmts, save_file)
    return stmts
コード例 #9
0
ファイル: test_belief_sklearn.py プロジェクト: steppi/indra
def test_set_hierarchy_probs():
    # Get probs for a set of statements, and a belief engine instance
    be, test_stmts_copy, prior_probs = setup_belief(include_more_specific=True)
    # Set beliefs on the flattened statements
    top_level = ac.filter_top_level(test_stmts_copy)
    be.set_hierarchy_probs(test_stmts_copy)
    # Compare hierarchy probs to prior probs
    for stmt, prior_prob in zip(test_stmts_copy, prior_probs):
        # Check that the top-level statements beliefs have not changed
        if stmt in top_level:
            assert stmt.belief == prior_prob
        # We expect the belief to change if including more evidence
        else:
            assert stmt.belief != prior_prob
コード例 #10
0
ファイル: assemble_pybel.py プロジェクト: pupster90/indra
def assemble_pybel(stmts, out_file_prefix):
    """Return a PyBEL Assembler"""
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)

    pba = PybelAssembler(stmts,
                         name='INDRA/REACH Korkut Model',
                         description='Automatically assembled model of '
                         'cancer signaling.',
                         version='0.0.10')
    pba.make_model()
    pybel.to_bel_path(pba.model, out_file_prefix + '.bel')
    with open(out_file_prefix, 'wt') as f:
        pybel.to_json_file(pba.model, f)
    url = 'https://pybel.scai.fraunhofer.de/api/receive'
    headers = {'content-type': 'application/json'}
    requests.post(url, json=pybel.to_json(pba.model), headers=headers)
コード例 #11
0
ファイル: assemble_pysb.py プロジェクト: jmuhlich/indra
def assemble_pysb(stmts, data_genes, out_file):
    """Return an assembled PySB model."""
    stmts = ac.filter_direct(stmts)
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    stmts = ac.filter_gene_list(stmts, data_genes, 'all')
    stmts = ac.reduce_activities(stmts)
    pa = PysbAssembler()
    pa.add_statements(stmts)
    model = pa.make_model()
    # Add observables
    o = Observable('MAPK1p', model.monomers['MAPK1'](T185='p', Y187='p'))
    model.add_component(o)
    o = Observable('MAPK3p', model.monomers['MAPK3'](T202='p', Y204='p'))
    model.add_component(o)
    o = Observable('GSK3Ap', model.monomers['GSK3A'](S21='p'))
    model.add_component(o)
    o = Observable('GSK3Bp', model.monomers['GSK3B'](S9='p'))
    model.add_component(o)
    o = Observable('RPS6p', model.monomers['RPS6'](S235='p'))
    model.add_component(o)
    o = Observable('EIF4EBP1p', model.monomers['EIF4EBP1'](S65='p'))
    model.add_component(o)
    o = Observable('JUNp', model.monomers['JUN'](S73='p'))
    model.add_component(o)
    o = Observable('FOXO3p', model.monomers['FOXO3'](S315='p'))
    model.add_component(o)
    o = Observable('AKT1p', model.monomers['AKT1'](S473='p'))
    model.add_component(o)
    o = Observable('AKT2p', model.monomers['AKT2'](S474='p'))
    model.add_component(o)
    o = Observable('AKT3p', model.monomers['AKT3'](S='p'))
    model.add_component(o)
    o = Observable('ELK1', model.monomers['ELK1'](S383='p'))
    model.add_component(o)
    # Set context
    pa.set_context('SKMEL28_SKIN')
    pa.save_model(out_file)

    ke = KappaExporter(model)
    with open('%s.ka' % base_file, 'wb') as fh:
        base_file, _ = os.path.splitext(out_file)
        fh.write(ke.export().encode('utf-8'))

    return model
コード例 #12
0
def run_preassembly(statements, hierarchies):
    print('%d total statements' % len(statements))
    # Filter to grounded only
    statements = ac.filter_grounded_only(statements, score_threshold=0.4)
    # Make a Preassembler with the Eidos and TRIPS ontology
    pa = Preassembler(hierarchies, statements)
    # Make a BeliefEngine and run combine duplicates
    be = BeliefEngine()
    unique_stmts = pa.combine_duplicates()
    print('%d unique statements' % len(unique_stmts))
    be.set_prior_probs(unique_stmts)
    # Run combine related
    related_stmts = pa.combine_related(return_toplevel=False)
    be.set_hierarchy_probs(related_stmts)
    # Filter to top-level Statements
    top_stmts = ac.filter_top_level(related_stmts)
    print('%d top-level statements' % len(top_stmts))
    return top_stmts
コード例 #13
0
def run_preassembly(statements, hierarchies):
    print('%d total statements' % len(statements))
    # Filter to grounded only
    statements = map_onto(statements)
    ac.dump_statements(statements, 'pi_mtg_demo_unfiltered.pkl')
    statements = ac.filter_grounded_only(statements, score_threshold=0.7)

    #statements = ac.filter_by_db_refs(statements, 'UN',
    #    ['conflict', 'food_security', 'precipitation'], policy='one',
    #    match_suffix=True)
    statements = ac.filter_by_db_refs(
        statements,
        'UN', [
            'conflict', 'food_security', 'flooding', 'food_production',
            'human_migration', 'drought', 'food_availability', 'market',
            'food_insecurity'
        ],
        policy='all',
        match_suffix=True)
    assume_polarity(statements)
    statements = filter_has_polarity(statements)

    # Make a Preassembler with the Eidos and TRIPS ontology
    pa = Preassembler(hierarchies, statements)
    # Make a BeliefEngine and run combine duplicates
    be = BeliefEngine()
    unique_stmts = pa.combine_duplicates()
    print('%d unique statements' % len(unique_stmts))
    be.set_prior_probs(unique_stmts)
    # Run combine related
    related_stmts = pa.combine_related(return_toplevel=False)
    be.set_hierarchy_probs(related_stmts)
    #related_stmts = ac.filter_belief(related_stmts, 0.8)
    # Filter to top-level Statements
    top_stmts = ac.filter_top_level(related_stmts)

    pa.stmts = top_stmts
    print('%d top-level statements' % len(top_stmts))
    conflicts = pa.find_contradicts()
    top_stmts = remove_contradicts(top_stmts, conflicts)

    ac.dump_statements(top_stmts, 'pi_mtg_demo.pkl')

    return top_stmts
コード例 #14
0
def preprocess_stmts(stmts, data_genes):
    # Filter the INDRA Statements to be put into the model
    stmts = ac.filter_mutation_status(stmts,
                                      {'BRAF': [('V', '600', 'E')]}, ['PTEN'])
    stmts = ac.filter_by_type(stmts, Complex, invert=True)
    stmts = ac.filter_direct(stmts)
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    stmts = ac.filter_gene_list(stmts, data_genes, 'all')
    stmts = ac.filter_enzyme_kinase(stmts)
    stmts = ac.filter_mod_nokinase(stmts)
    stmts = ac.filter_transcription_factor(stmts)
    # Simplify activity types
    ml = MechLinker(stmts)
    ml.gather_explicit_activities()
    ml.reduce_activities()
    ml.gather_modifications()
    ml.reduce_modifications()
    af_stmts = ac.filter_by_type(ml.statements, ActiveForm)
    non_af_stmts = ac.filter_by_type(ml.statements, ActiveForm, invert=True)
    af_stmts = ac.run_preassembly(af_stmts)
    stmts = af_stmts + non_af_stmts
    # Replace activations when possible
    ml = MechLinker(stmts)
    ml.gather_explicit_activities()
    ml.replace_activations()
    # Require active forms
    ml.require_active_forms()
    num_stmts = len(ml.statements)
    while True:
        # Remove inconsequential PTMs
        ml.statements = ac.filter_inconsequential_mods(ml.statements,
                                                       get_mod_whitelist())
        ml.statements = ac.filter_inconsequential_acts(ml.statements,
                                                       get_mod_whitelist())
        if num_stmts <= len(ml.statements):
            break
        num_stmts = len(ml.statements)
    stmts = ml.statements
    return stmts
コード例 #15
0
def assemble_sif(stmts, data, out_file):
    """Return an assembled SIF."""
    # Filter for high-belief statements
    stmts = ac.filter_belief(stmts, 0.99)
    stmts = ac.filter_top_level(stmts)
    # Filter for Activation / Inhibition
    stmts_act = ac.filter_by_type(stmts, Activation)
    stmts_inact = ac.filter_by_type(stmts, Inhibition)
    stmts = stmts_act + stmts_inact
    # Get Ras227 and filter statments
    ras_genes = process_data.get_ras227_genes()
    ras_genes = [x for x in ras_genes if x not in ['YAP1']]
    stmts = ac.filter_gene_list(stmts, ras_genes, 'all')
    # Get the drugs inhibiting their targets as INDRA
    # statements
    def get_drug_statements():
        drug_targets = process_data.get_drug_targets()
        drug_stmts = []
        for dn, tns in drug_targets.items():
            da = Agent(dn + ':Drugs')
            for tn in tns:
                ta = Agent(tn)
                drug_stmt = Inhibition(da, ta)
                drug_stmts.append(drug_stmt)
        return drug_stmts
    drug_stmts = get_drug_statements()
    stmts = stmts + drug_stmts
    # Because of a bug in CNO, node names containing AND
    # need to be replaced
    def rename_and_nodes(st):
        for s in st:
            for a in s.agent_list():
                if a is not None:
                    if a.name.find('AND') != -1:
                        a.name = a.name.replace('AND', 'A_ND')
    rename_and_nodes(stmts)
    # Rewrite statements to replace genes with their corresponding
    # antibodies when possible
    stmts = rewrite_ab_stmts(stmts, data)
    def filter_ab_edges(st, policy='all'):
        st_out = []
        for s in st:
            if policy == 'all':
                all_ab = True
                for a in s.agent_list():
                    if a is not None:
                        if a.name.find('_p') == -1 and \
                           a.name.find('Drugs') == -1:
                            all_ab = False
                            break
                if all_ab:
                    st_out.append(s)
            elif policy == 'one':
                any_ab = False
                for a in s.agent_list():
                    if a is not None and a.name.find('_p') != -1:
                        any_ab = True
                        break
                if any_ab:
                    st_out.append(s)
        return st_out
    stmts = filter_ab_edges(stmts, 'all')
    # Get a list of the AB names that end up being covered in the prior network
    # This is important because other ABs will need to be taken out of the
    # MIDAS file to work.
    def get_ab_names(st):
        prior_abs = set()
        for s in st:
            for a in s.agent_list():
                if a is not None:
                    if a.name.find('_p') != -1:
                        prior_abs.add(a.name)
        return sorted(list(prior_abs))
    pkn_abs = get_ab_names(stmts)
    print('Boolean PKN contains these antibodies: %s' % ', '.join(pkn_abs))
    # Make the SIF model
    sa = SifAssembler(stmts)
    sa.make_model(use_name_as_key=True)
    sif_str = sa.print_model()
    with open(out_file, 'wb') as fh:
        fh.write(sif_str.encode('utf-8'))
    # Make the MIDAS data file used for training the model
    midas_data = process_data.get_midas_data(data, pkn_abs)
    return sif_str
コード例 #16
0
    plt.plot(lengths, norm_node_counts, color='blue', alpha=0.8, label='Nodes')
    plt.legend(loc='upper left', fontsize=pf.fontsize, frameon=False)
    ax = plt.gca()
    pf.format_axis(ax)


if __name__ == '__main__':
    source = sys.argv[2]
    target = sys.argv[3]
    if len(sys.argv) > 4:
        max_depth = int(sys.argv[4])
    stmts = ac.load_statements(sys.argv[1])
    print(len(stmts))
    stmts = ac.filter_direct(stmts)
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    stmts = [s for s in stmts if s.agent_list()[0]]
    print(len(stmts))
    from util import pkldump
    import ipdb
    ipdb.set_trace()

    #ppa = PysbPreassembler(stmts)
    #ppa.replace_activities()
    #stmts = ppa.statements

    #g = stmts_to_digraph(stmts)
    g = stmts_to_pybel_graph(stmts)
    scc_lens = [len(s) for s in nx.strongly_connected_components(g)]
    scc_lens.sort(reverse=True)
    print("Largest strongly connected components: %s" % str(scc_lens[0:3]))
コード例 #17
0
ファイル: machine.py プロジェクト: johnbachman/indra
def run_machine(model_path, pmids, belief_threshold, search_genes=None,
                ndex_cred=None, twitter_cred=None, grounding_map=None):
    start_time_local = datetime.datetime.now(tzlocal.get_localzone())
    date_str = make_date_str()

    # Save PMIDs in file and send for remote reading
    if aws_available:
        pmid_fname = 'pmids-%s.txt' % date_str
        all_pmids = []
        for v in pmids.values():
            all_pmids += v
        all_pmids = list(set(all_pmids))

        with open(pmid_fname, 'wt') as fh:
            for pmid in all_pmids:
                fh.write('%s\n' % pmid)
        # Submit reading
        job_list = submit_reading('rasmachine', pmid_fname, ['reach'])

        # Wait for reading to complete
        wait_for_complete('run_reach_queue', job_list, idle_log_timeout=600,
                          kill_on_log_timeout=True)

    # Load the model
    logger.info(time.strftime('%c'))
    logger.info('Loading original model.')
    inc_model_file = os.path.join(model_path, 'model.pkl')
    model = IncrementalModel(inc_model_file)
    # Include search genes as prior genes
    if search_genes:
        model.prior_genes = search_genes
    stats = {}
    logger.info(time.strftime('%c'))
    logger.info('Preassembling original model.')
    model.preassemble(filters=global_filters, grounding_map=grounding_map)
    logger.info(time.strftime('%c'))

    # Original statistics
    stats['orig_stmts'] = len(model.get_statements())
    stats['orig_assembled'] = len(model.assembled_stmts)
    orig_stmts = filter_db_highbelief(model.assembled_stmts, ['bel', 'biopax'],
                                      belief_threshold)
    orig_stmts = ac.filter_top_level(orig_stmts)
    stats['orig_final'] = len(orig_stmts)
    logger.info('%d final statements' % len(orig_stmts))

    # Extend the model with PMIDs
    logger.info('----------------')
    logger.info(time.strftime('%c'))
    logger.info('Extending model.')
    stats['new_papers'], stats['new_abstracts'], stats['existing'] = \
        extend_model(model_path, model, pmids, start_time_local)
    # Having added new statements, we preassemble the model
    model.preassemble(filters=global_filters, grounding_map=grounding_map)

    # New statistics
    stats['new_stmts'] = len(model.get_statements())
    stats['new_assembled'] = len(model.assembled_stmts)
    new_stmts = filter_db_highbelief(model.assembled_stmts, ['bel', 'biopax'],
                                     belief_threshold)
    new_stmts = ac.filter_top_level(new_stmts)
    stats['new_final'] = len(new_stmts)
    logger.info('%d final statements' % len(new_stmts))

    check_pmids(model.get_statements())

    # Save model
    logger.info(time.strftime('%c'))
    logger.info('Saving model')
    model.save(inc_model_file)
    logger.info(time.strftime('%c'))

    # Save a time stamped version of the pickle for backup/diagnostic purposes
    if not aws_available:
        inc_model_bkp_file = os.path.join(model_path,
                                          'model-%s.pkl' % date_str)
        model.save(inc_model_bkp_file)
    else:
        key = 'rasmachine/%s/model-%s.pkl' % (model_path.replace('/', '_'),
                                              date_str)
        s3 = boto3.client('s3')
        s3.upload_file(inc_model_file, 'bigmech', key)

    # Upload the new, final statements to NDEx
    if ndex_cred:
        upload_new_ndex(model_path, new_stmts, ndex_cred)

    # Print and tweet the status message
    logger.info('--- Final statistics ---')
    for k, v in sorted(stats.items(), key=lambda x: x[0]):
        logger.info('%s: %s' % (k, v))
    logger.info('------------------------')

    msg_str = make_status_message(stats)
    if msg_str is not None:
        logger.info('Status message: %s' % msg_str)
        if twitter_cred:
            logger.info('Now tweeting: %s' % msg_str)
            twitter_client.update_status(msg_str, twitter_cred)
コード例 #18
0
def test_filter_top_level():
    st_out = ac.filter_top_level([st14, st15])
    assert (len(st_out) == 1)
コード例 #19
0
ファイル: machine.py プロジェクト: djmilstein/indra
def run_machine(model_path,
                pmids,
                belief_threshold,
                search_genes=None,
                ndex_cred=None,
                twitter_cred=None):
    start_time_local = datetime.datetime.now(tzlocal.get_localzone())
    date_str = make_date_str()

    # Save PMIDs in file and send for remote reading
    if aws_available:
        pmid_fname = 'pmids-%s.txt' % date_str
        all_pmids = []
        for v in pmids.values():
            all_pmids += v
        all_pmids = list(set(all_pmids))

        with open(pmid_fname, 'wt') as fh:
            for pmid in all_pmids:
                fh.write('%s\n' % pmid)
        # Submit reading
        job_list = submit_reading('rasmachine', pmid_fname, ['reach'])

        # Wait for reading to complete
        wait_for_complete('run_reach_queue',
                          job_list,
                          idle_log_timeout=600,
                          kill_on_log_timeout=True)

    # Load the model
    logger.info(time.strftime('%c'))
    logger.info('Loading original model.')
    inc_model_file = os.path.join(model_path, 'model.pkl')
    model = IncrementalModel(inc_model_file)
    # Include search genes as prior genes
    if search_genes:
        model.prior_genes = search_genes
    stats = {}
    logger.info(time.strftime('%c'))
    logger.info('Preassembling original model.')
    model.preassemble(filters=global_filters)
    logger.info(time.strftime('%c'))

    # Original statistics
    stats['orig_stmts'] = len(model.get_statements())
    stats['orig_assembled'] = len(model.assembled_stmts)
    orig_stmts = filter_db_highbelief(model.assembled_stmts, ['bel', 'biopax'],
                                      belief_threshold)
    orig_stmts = ac.filter_top_level(orig_stmts)
    stats['orig_final'] = len(orig_stmts)
    logger.info('%d final statements' % len(orig_stmts))

    # Extend the model with PMIDs
    logger.info('----------------')
    logger.info(time.strftime('%c'))
    logger.info('Extending model.')
    stats['new_papers'], stats['new_abstracts'], stats['existing'] = \
        extend_model(model_path, model, pmids, start_time_local)
    # Having added new statements, we preassemble the model
    model.preassemble(filters=global_filters)

    # New statistics
    stats['new_stmts'] = len(model.get_statements())
    stats['new_assembled'] = len(model.assembled_stmts)
    new_stmts = filter_db_highbelief(model.assembled_stmts, ['bel', 'biopax'],
                                     belief_threshold)
    new_stmts = ac.filter_top_level(new_stmts)
    stats['new_final'] = len(new_stmts)
    logger.info('%d final statements' % len(new_stmts))

    check_pmids(model.get_statements())

    # Save model
    logger.info(time.strftime('%c'))
    logger.info('Saving model')
    model.save(inc_model_file)
    logger.info(time.strftime('%c'))

    # Save a time stamped version of the pickle for backup/diagnostic purposes
    if not aws_available:
        inc_model_bkp_file = os.path.join(model_path,
                                          'model-%s.pkl' % date_str)
        model.save(inc_model_bkp_file)
    else:
        key = 'rasmachine/%s/model-%s.pkl' % (model_path.replace(
            '/', '_'), date_str)
        s3 = boto3.client('s3')
        s3.upload_file(inc_model_file, 'bigmech', key)

    # Upload the new, final statements to NDEx
    if ndex_cred:
        upload_new_ndex(model_path, new_stmts, ndex_cred)

    # Print and tweet the status message
    logger.info('--- Final statistics ---')
    for k, v in sorted(stats.items(), key=lambda x: x[0]):
        logger.info('%s: %s' % (k, v))
    logger.info('------------------------')

    msg_str = make_status_message(stats)
    if msg_str is not None:
        logger.info('Status message: %s' % msg_str)
        if twitter_cred:
            logger.info('Now tweeting: %s' % msg_str)
            twitter_client.update_status(msg_str, twitter_cred)
コード例 #20
0
def test_filter_top_level():
    st_out = ac.filter_top_level([st14, st15])
    assert len(st_out) == 1
コード例 #21
0
def assemble_pysb(stmts, data_genes, contextualize=False):
    # Filter the INDRA Statements to be put into the model
    stmts = ac.filter_by_type(stmts, Complex, invert=True)
    stmts = ac.filter_direct(stmts)
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    # Strip the extraneous supports/supported by here
    strip_supports(stmts)
    stmts = ac.filter_gene_list(stmts, data_genes, 'all')
    stmts = ac.filter_enzyme_kinase(stmts)
    stmts = ac.filter_mod_nokinase(stmts)
    stmts = ac.filter_transcription_factor(stmts)
    # Simplify activity types
    ml = MechLinker(stmts)
    ml.gather_explicit_activities()
    ml.reduce_activities()
    ml.gather_modifications()
    ml.reduce_modifications()
    stmts = normalize_active_forms(ml.statements)
    # Replace activations when possible
    ml = MechLinker(stmts)
    ml.gather_explicit_activities()
    ml.replace_activations()
    # Require active forms
    ml.require_active_forms()
    num_stmts = len(ml.statements)
    while True:
        # Remove inconsequential PTMs
        ml.statements = ac.filter_inconsequential_mods(ml.statements,
                                                       get_mod_whitelist())
        ml.statements = ac.filter_inconsequential_acts(ml.statements,
                                                       get_mod_whitelist())
        if num_stmts <= len(ml.statements):
            break
        num_stmts = len(ml.statements)
    stmts = ml.statements
    # Save the Statements here
    ac.dump_statements(stmts, prefixed_pkl('pysb_stmts'))


    # Add drug target Statements
    drug_target_stmts = get_drug_target_statements()
    stmts += drug_target_stmts

    # Just generate the generic model
    pa = PysbAssembler()
    pa.add_statements(stmts)
    model = pa.make_model()
    with open(prefixed_pkl('pysb_model'), 'wb') as f:
        pickle.dump(model, f)

    # Run this extra part only if contextualize is set to True
    if not contextualize:
        return

    cell_lines_no_data = ['COLO858', 'K2', 'MMACSF', 'MZ7MEL', 'WM1552C']
    for cell_line in cell_lines:
        if cell_line not in cell_lines_no_data:
            stmtsc = contextualize_stmts(stmts, cell_line, data_genes)
        else:
            stmtsc = stmts
        pa = PysbAssembler()
        pa.add_statements(stmtsc)
        model = pa.make_model()
        if cell_line not in cell_lines_no_data:
            contextualize_model(model, cell_line, data_genes)
        ac.dump_statements(stmtsc, prefixed_pkl('pysb_stmts_%s' % cell_line))
        with open(prefixed_pkl('pysb_model_%s' % cell_line), 'wb') as f:
            pickle.dump(model, f)