コード例 #1
0
def get_reader_sites(input_file):
    input_stmts = ac.load_statements(input_file)
    readers = ('reach', 'sparser', 'rlimsp')
    pm = ProtMapper(use_cache=True, cache_path=CACHE_PATH)
    sites_by_reader = {}
    # For all readers
    for reader in readers:
        sites = []
        # Filter to stmts for this reader
        reader_stmts = [
            s for s in input_stmts if s.evidence[0].source_api == reader
        ]
        for s in reader_stmts:
            up_id = s.sub.db_refs.get('UP')
            # Filter to stmts with substrate UP ID, residue and position
            if up_id is None or s.residue is None or s.position is None:
                continue
            if s.residue not in ('S', 'T', 'Y'):
                continue
            site = (up_id, s.residue, s.position)
            # Get the mapped site for the residue
            ms = pm.map_to_human_ref(up_id, 'uniprot', s.residue, s.position)
            sites.append(ms)
        # Group, tabulate frequency
        site_ctr = Counter(sites)
        # Store in dict
        sites_by_reader[reader] = site_ctr
    # Save sites
    with open('output/reader_sites.pkl', 'wb') as f:
        pickle.dump(sites_by_reader, f)
    # Save cache
    pm.save_cache()
コード例 #2
0
def main(args):
    # This file takes about 32 GB to load
    if not args.infile:
        args.infile = './Data/indra_raw/bioexp_all_raw.pkl'
    if not args.outfile:
        args.outfile = './filtered_indra_network.sif'

    # Load statements from file
    stmts_raw = assemble_corpus.load_statements(args.infile)

    # Expand families, fix grounding errors and run run preassembly
    stmts_fixed = assemble_corpus.run_preassembly(
                    assemble_corpus.map_grounding(
                        assemble_corpus.expand_families(stmts_raw)))

    # Default filtering: specific (unique) genes that are grounded.
    stmts_filtered = assemble_corpus.filter_grounded_only(
                         assemble_corpus.filter_genes_only(stmts_fixed, specific_only=True))
    # Custom filters
    if args.human_only:
        stmts_filtered = assemble_corpus.filter_human_only(stmts_filtered)
    if args.filter_direct:
        stmts_filtered = assemble_corpus.filter_direct(stmts_filtered)

    binary_stmts = [s for s in stmts_filtered if len(s.agent_list()) == 2 and s.agent_list()[0] is not None]
    rows = []
    for s in binary_stmts:
        rows.append([ag.name for ag in s.agent_list()])

    # Write rows to .sif file
    with open(args.outfile, 'w', newline='') as csvfile:
        wrtr = csv.writer(csvfile, delimiter='\t')
        for row in rows:
            wrtr.writerow(row)
コード例 #3
0
def combine_all_stmts(pkl_list, output_file):
    all_stmts = []
    for pkl_file in pkl_list:
        all_stmts.extend(ac.load_statements(pkl_file))
    ac.dump_statements(all_stmts, output_file)
    stmt_json = stmts_to_json(all_stmts)
    output_json = f"{output_file.rsplit('.', maxsplit=1)[0]}.json"
    with open(output_json, 'wt') as f:
        json.dump(stmt_json, f, indent=2)
    return all_stmts
コード例 #4
0
ファイル: incremental_model.py プロジェクト: lijielife/indra
    def load_prior(self, prior_fname):
        """Load a set of prior statements from a pickle file.

        The prior statements have a special key in the stmts dictionary
        called "prior".

        Parameters
        ----------
        prior_fname : str
            The name of the pickle file containing the prior Statements.
        """
        self.stmts['prior'] = ac.load_statements(prior_fname)
コード例 #5
0
    def load_prior(self, prior_fname):
        """Load a set of prior statements from a pickle file.

        The prior statements have a special key in the stmts dictionary
        called "prior".

        Parameters
        ----------
        prior_fname : str
            The name of the pickle file containing the prior Statements.
        """
        self.stmts['prior'] = ac.load_statements(prior_fname)
コード例 #6
0
ファイル: expression_prior.py プロジェクト: steppi/indra_apps
def get_indra_expression():
    #inc_stmts = by_gene_role_type(stmt_type='IncreaseAmount')
    #dec_stmts = by_gene_role_type(stmt_type='DecreaseAmount')
    #stmts = inc_stmts + dec_stmts
    #ac.dump_statements(stmts, 'indra_regulate_amount_stmts.pkl')
    #stmts = ac.load_statements('indra_regulate_amount_stmts.pkl')
    #stmts = ac.map_grounding(stmts)
    # Expand families before site mapping
    #stmts = ac.expand_families(stmts)
    #stmts = ac.filter_grounded_only(stmts)
    #stmts = ac.map_sequence(stmts)
    #stmts = ac.run_preassembly(stmts, poolsize=4,
    #                           save='indra_regulate_amount_pre.pkl')
    stmts = ac.load_statements('indra_regulate_amount_pre.pkl')
    stmts = ac.filter_human_only(stmts)
    stmts = ac.filter_genes_only(stmts)
    stmts = [s for s in stmts if s.agent_list()[0] is not None]
    return stmts
コード例 #7
0
ファイル: phospho_prior.py プロジェクト: kkaris/sitemapper
def get_indra_reg_act_stmts():
    try:
        stmts = ac.load_statements('sources/indra_reg_act_stmts.pkl')
        return stmts
    except:
        pass
    stmts = []
    for stmt_type in ('Activation', 'Inhibition', 'ActiveForm'):
        print("Getting %s statements from INDRA DB" % stmt_type)
        stmts += by_gene_role_type(stmt_type=stmt_type)
    stmts = ac.map_grounding(stmts, save='sources/indra_reg_act_gmap.pkl')
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.run_preassembly(stmts,
                               poolsize=4,
                               save='sources/indra_reg_act_pre.pkl')
    stmts = ac.filter_human_only(stmts)
    stmts = ac.filter_genes_only(stmts, specific_only=True)
    ac.dump_statements(stmts, 'sources/indra_reg_act_stmts.pkl')
    return stmts
コード例 #8
0
def test_dump_stmts():
    ac.dump_statements([st1], '_test.pkl')
    st_loaded = ac.load_statements('_test.pkl')
    assert len(st_loaded) == 1
    assert st_loaded[0].equals(st1)
コード例 #9
0
ファイル: test_assemble_corpus.py プロジェクト: adarshp/indra
def test_load_stmts():
    with open('_test.pkl', 'wb') as fh:
        pickle.dump([st1], fh)
    st_loaded = ac.load_statements('_test.pkl')
    assert len(st_loaded) == 1
    assert st_loaded[0].equals(st1)
コード例 #10
0
ファイル: emmaa_update.py プロジェクト: steppi/covid-19
                        '--ctd_stmts',
                        help='Path to CTD statements pkl file',
                        required=True)
    parser.add_argument('-f',
                        '--output_file',
                        help='Output file for combined pkl',
                        required=True)
    args = parser.parse_args()

    # Load everything
    logger.info('Loading statements from pickle files')
    with open(args.old_mm, 'rb') as f:
        old_mm_emmaa_stmts = pickle.load(f)
        old_mm_stmts = [es.stmt for es in old_mm_emmaa_stmts]
    if args.new_cord:
        new_cord_stmts = ac.load_statements(args.new_cord)
    else:
        new_cord_stmts = None
    drug_stmts = ac.load_statements(args.drug_stmts)
    gordon_stmts = ac.load_statements(args.gordon_stmts)
    virhostnet_stmts = ac.load_statements(args.virhostnet_stmts)
    ctd_stmts = ac.load_statements(args.ctd_stmts)

    other_stmts = drug_stmts + gordon_stmts + virhostnet_stmts + ctd_stmts

    combined_stmts = make_model_stmts(old_mm_stmts, other_stmts,
                                      new_cord_stmts)

    # Dump new pickle
    ac.dump_statements(combined_stmts, args.output_file)
コード例 #11
0
ファイル: phospho_prior.py プロジェクト: kkaris/sitemapper
def get_phosphosite_stmts():
    stmts = ac.load_statements('sources/phosphosite_stmts.pkl')
    stmts = ac.filter_human_only(stmts)
    stmts = ac.filter_genes_only(stmts, specific_only=True)
    return stmts
コード例 #12
0
def main(args):

    global any_expl, any_expl_not_sr, common_parent, ab_expl_count, \
        directed_im_expl_count, both_im_dir_expl_count, \
        any_axb_non_sr_expl_count, sr_expl_count, \
        shared_regulator_only_expl_count, explanations_of_pairs, unexplained, \
        explained_nested_dict, id1, id2, nested_dict_statements, dataset_dict, \
        avg_corr, dir_node_set, nx_dir_graph, explained_set, part_of_explained,\
        sr_explanations, any_expl_ign_sr

    if args.cell_line_filter and not len(args.cell_line_filter) > 2:
        logger.info('Filtering to provided cell lines in correlation '
                    'calculations.')
        cell_lines = _parse_cell_filter(*args.cell_line_filter)
        assert len(cell_lines) > 0
    elif args.cell_line_filter and len(args.cell_line_filter) > 2:
        sys.exit('Argument --cell-line-filter only takes one or two arguments')
    # No cell line dictionary and rnai data and filtering is requested
    elif args.cell_line_filter and len(args.cell_line_filter) == 1 and \
            args.rnai_data_file:
        sys.exit('Need a translation dictionary if RNAi data is provided and '
                 'filter is requested')
    else:
        # Should be empty only when --cell-line-filter is not provided
        logger.info('No cell line filter provided. Using all cell lines in '
                    'correlation calculations.')
        cell_lines = []

    # Parse "explained genes"
    if args.explained_set and len(args.explained_set) == 2:
        explained_set = _parse_explained_genes(
            gene_set_file=args.explained_set[0],
            check_column=args.explained_set[1])
        logger.info('Loading "explained pairs."')
    elif args.explained_set and len(args.explained_set) != 2:
        sys.exit('Argument --explained-set takes exactly two arguments: '
                 '--explained-set <file> <column name>')

    # Check if belief dict is provided
    if not args.belief_score_dict and not args.nested_dict_in:
        logger.error('Belief dict must be provided through the `-b ('
                     '--belief-score-dict)` argument if no nested dict '
                     'of statements with belief score is provided through the '
                     '`-ndi (--nested-dict-in)` argument.')
        raise FileNotFoundError

    # Get dict of {hash: belief score}
    belief_dict = None  # ToDo use api to query belief scores if not loaded
    if args.belief_score_dict:
        if args.belief_score_dict.endswith('.json'):
            belief_dict = _json_open(args.belief_score_dict)
        elif args.belief_score_dict.endswith('.pkl'):
            belief_dict = _pickle_open(args.belief_score_dict)

    args_dict = _arg_dict(args)
    npairs = 0

    filter_settings = {
        'gene_set_filter':
        args.gene_set_filter,
        'strict':
        args.strict,
        'cell_line_filter':
        cell_lines,
        'cell_line_translation_dict':
        _pickle_open(args.cell_line_filter[1])
        if args.cell_line_filter and len(args.cell_line_filter) == 2 else None,
        'margin':
        args.margin,
        'filter_type': (args.filter_type if args.filter_type else None)
    }

    output_settings = {
        'dump_unique_pairs': args.dump_unique_pairs,
        'outbasename': args.outbasename
    }

    # Parse CRISPR and/or RNAi data
    if args_dict.get('crispr') or args_dict.get('rnai'):
        if not filter_settings['filter_type'] and \
            args.crispr_data_file and \
                args.rnai_data_file:
            logger.info('No merge filter set. Output will be intersection of '
                        'the two data sets.')
        elif filter_settings.get('filter_type'):
            logger.info('Using filter type "%s"' %
                        filter_settings['filter_type'])
        master_corr_dict, all_hgnc_ids, stats_dict = \
            dnf.get_combined_correlations(dict_of_data_sets=args_dict,
                                          filter_settings=filter_settings,
                                          output_settings=output_settings)

        # Count pairs in merged correlation dict and dum it
        npairs = dnf._dump_master_corr_dict_to_pairs_in_csv(
            fname=args.outbasename + '_merged_corr_pairs.csv',
            nest_dict=master_corr_dict)

        if args.gene_set_filter:
            gene_filter_list = None
            if args_dict.get('crispr') and not args_dict.get('rnai'):
                gene_filter_list = dnf._read_gene_set_file(
                    gf=filter_settings['gene_set_filter'],
                    data=pd.read_csv(args_dict['crispr']['data'],
                                     index_col=0,
                                     header=0))
            elif args_dict.get('rnai') and not args_dict.get('crispr'):
                gene_filter_list = dnf._read_gene_set_file(
                    gf=filter_settings['gene_set_filter'],
                    data=pd.read_csv(args_dict['rnai']['data'],
                                     index_col=0,
                                     header=0))
            elif args_dict.get('crispr') and args_dict.get('rnai'):
                gene_filter_list = \
                    set(dnf._read_gene_set_file(
                        gf=filter_settings['gene_set_filter'],
                        data=pd.read_csv(args_dict['crispr']['data'],
                                         index_col=0, header=0))) & \
                    set(dnf._read_gene_set_file(
                        gf=filter_settings['gene_set_filter'],
                        data=pd.read_csv(args_dict['rnai']['data'],
                                         index_col=0, header=0)))
            assert gene_filter_list is not None

        else:
            gene_filter_list = None
    else:
        stats_dict = None

    # LOADING INDRA STATEMENTS
    # Get statements from file or from database that contain any gene from
    # provided list as set unless you're already loading a pre-calculated
    # nested dict and/or precalculated directed graph.

    if not (args.light_weight_stmts or args.nested_dict_in):
        if args.statements_in:  # Get statments from file
            stmts_all = set(ac.load_statements(args.statements_in))
        # Use api to get statements. _NOT_ the same as querying for each ID
        else:
            if args.gene_set_filter:
                stmts_all = dnf.dbc_load_statements(gene_filter_list)
            else:
                # if there is no gene set file, restrict to gene ids in
                # input data
                stmts_all = dnf.dbc_load_statements(list(all_hgnc_ids))

        # Dump statements to pickle file if output name has been given
        if args.statements_out:
            logger.info('Dumping read raw statements')
            ac.dump_statements(stmts=stmts_all, fname=args.statements_out)

    # Get nested dicts from statements
    if args.light_weight_stmts:
        hash_df = pd.read_csv(args.light_weight_stmts, delimiter='\t')
        nested_dict_statements = dnf.nested_hash_dict_from_pd_dataframe(
            hash_df)
    elif args.nested_dict_in:
        nested_dict_statements = _pickle_open(args.nested_dict_in)
    else:
        nested_dict_statements = dnf.dedupl_nested_dict_gen(
            stmts_all, belief_dict)
        if args.nested_dict_out:
            _dump_it_to_pickle(fname=args.nested_dict_out,
                               pyobj=nested_dict_statements)

    # Get directed simple graph
    if args.directed_graph_in:
        with open(args.directed_graph_in, 'rb') as rpkl:
            nx_dir_graph = pkl.load(rpkl)
    else:
        # Create directed graph from statement dict
        nx_dir_graph = dnf.nx_directed_graph_from_nested_dict_2layer(
            nest_d=nested_dict_statements, belief_dict=belief_dict)
        # Save as pickle file
        if args.directed_graph_out:
            _dump_it_to_pickle(fname=args.directed_graph_out,
                               pyobj=nx_dir_graph)
    dir_node_set = set(nx_dir_graph.nodes)

    # LOOP THROUGH THE UNIQUE CORRELATION PAIRS, MATCH WITH INDRA NETWORK
    any_expl = 0  # Count if any explanation per (A,B) correlation found
    any_expl_not_sr = 0  # Count any explanation, exlcuding when shared
    # regulator is the only explanation
    any_expl_ign_sr = 0  # Count any explanation, ingoring shared regulator
    # explanations
    common_parent = 0  # Count if common parent found per set(A,B)
    part_of_explained = 0  # Count pairs part the "explained set"
    ab_expl_count = 0  # Count A-B/B-A as one per set(A,B)
    directed_im_expl_count = 0  # Count any A->X->B,B->X->A as one per set(A,B)
    any_axb_non_sr_expl_count = 0  # Count if shared target found per set(A,B)
    sr_expl_count = 0  # Count if shared regulator found per set(A,B)
    shared_regulator_only_expl_count = 0  # Count if only shared regulator found
    explanations_of_pairs = []  # Saves all non shared regulator explanations
    sr_explanations = []  # Saves all shared regulator explanations
    unexplained = []  # Unexplained correlations
    skipped = 0

    # The explained nested dict: (1st key = subj, 2nd key = obj, 3rd key =
    # connection type or correlation).
    #
    # directed: any A->B or B->A
    # undirected: any of complex, selfmodification, parent
    # x_is_intermediary: A->X->B or B->X->A
    # x_is_downstream: A->X<-B
    # x_is_upstream: A<-X->B
    #
    # d[subj][obj] = {correlation: {gene_set1: corr, gene_set2: corr, ...},
    #                 directed: [(stmt/stmt hash, belief score)],
    #                 undirected: [(stmt/stmt hash, belief score)],
    #                 common_parents: [list of parents]
    #                 x_is_intermediary: [(X, belief rank)],
    #                 x_is_downstream: [(X, belief rank)],
    #                 x_is_upstream: [(X, belief rank)]}
    #
    # Then in javascript you can for example do:
    # if SUBJ_is_subj_dict.obj.direct.length <-- should return zero if []
    #
    # Used to get: directed graph
    # 1. all nodes of directed graph -> 1st dropdown
    # 2. dir -> undir graph -> jsons to check all corr neighbors -> 2nd dropdown
    # 3. jsons to check if connection is direct or intermediary

    # Using the following loop structure for counter variables:
    # a = 2
    # def for_loop_body():
    #     global a
    #     a += 1
    # # Then loop like:
    # if dict:
    #     for pairs in dict:
    #         for_loop_body(args)
    # elif random:
    #     for random pair:
    #         for_loop_body(args)

    explained_nested_dict = dnf.create_nested_dict()

    # Loop rnai and/or crispr only
    if args_dict.get('rnai') or args_dict.get('crispr') and \
            not args.brca_dependencies:
        logger.info('Gene pairs generated from DepMap knockout screening data '
                    'sets')
        logger.info('Looking for connections between %i pairs' %
                    (npairs if npairs > 0 else args.max_pairs))
        for outer_id, do in master_corr_dict.items():
            for inner_id, dataset_dict in do.items():
                if len(dataset_dict.keys()) == 0:
                    skipped += 1
                    if args.verbosity:
                        logger.info('Skipped outer_id=%s and inner_id=%s' %
                                    (outer_id, inner_id))
                    continue

                id1, id2 = outer_id, inner_id
                loop_body(args)

    # Loop rnai and/or crispr AND BRCA cell line dependencies
    elif args_dict.get('rnai') or args_dict.get('crispr') and \
            args.brca_dependencies:
        logger.info('Gene pairs generated from combined knockout screens. '
                    'Output data will incluide BRCA cell line dependency\n'
                    'data as well as correlation data from knockout screens.')
        logger.info('Looking for connections between %i pairs' %
                    (npairs if npairs > 0 else args.max_pairs))

        # Load BRCA dependency data
        brca_data_set = pd.read_csv(args.brca_dependencies, header=0)
        depend_in_breast_genes = brca_data_set.drop(
            axis=1, labels=['Url Label',
                            'Type'])[brca_data_set['Type'] == 'gene']
        genes = set(depend_in_breast_genes['Gene/Compound'].values)

        for outer_id, do in master_corr_dict.items():
            for inner_id, knockout_dict in do.items():
                if len(knockout_dict.keys()) == 0:
                    skipped += 1
                    if args.verbosity:
                        logger.info('Skipped outer_id=%s and inner_id=%s' %
                                    (outer_id, inner_id))
                    continue

                id1, id2 = outer_id, inner_id
                dataset_dict = {}
                gene1_data = []
                gene2_data = []

                # Get BRCA dep data
                if id1 in genes:
                    for row in depend_in_breast_genes[
                            depend_in_breast_genes['Gene/Compound'] ==
                            id1].iterrows():
                        gene1_data.append(
                            (row[1]['Dataset'], row[1]['T-Statistic'],
                             row[1]['P-Value']))
                if id2 in genes:
                    for row in depend_in_breast_genes[
                            depend_in_breast_genes['Gene/Compound'] ==
                            id2].iterrows():
                        gene2_data.append(
                            (row[1]['Dataset'], row[1]['T-Statistic'],
                             row[1]['P-Value']))

                dataset_dict[id1] = gene1_data
                dataset_dict[id2] = gene2_data

                dataset_dict['crispr'] = (knockout_dict['crispr']
                                          if knockout_dict.get('crispr') else
                                          None),
                dataset_dict['rnai'] = (knockout_dict['rnai']
                                        if knockout_dict.get('rnai') else None)

                if id1 not in genes and id2 not in genes:
                    dataset_dict = knockout_dict

                # Run loop body
                loop_body(args)

    # loop brca dependency ONLY
    elif args.brca_dependencies and not \
            (args_dict.get('rnai') or args_dict.get('crispr')):
        logger.info(
            'Gene pairs generated from BRCA gene enrichment data only.')
        brca_data_set = pd.read_csv(args.brca_dependencies, header=0)
        depend_in_breast_genes = brca_data_set.drop(
            axis=1, labels=['Url Label',
                            'Type'])[brca_data_set['Type'] == 'gene']
        genes = set(depend_in_breast_genes['Gene/Compound'].values)
        npairs = len(list(itt.combinations(genes, 2)))
        logger.info('Looking for connections between %i pairs' %
                    (npairs if npairs > 0 else args.max_pairs))
        for id1, id2 in itt.combinations(genes, 2):
            gene1_data = []
            gene2_data = []
            # For each non-diagonal pair in file, insert in dataset_dict:
            # geneA, geneB,
            # dataset for A, dataset for B,
            # T-stat for A, T-stat for B,
            # P-value for A, P-value
            for row in depend_in_breast_genes[
                    depend_in_breast_genes['Gene/Compound'] == id1].iterrows():
                gene1_data.append((row[1]['Dataset'], row[1]['T-Statistic'],
                                   row[1]['P-Value']))

            for row in depend_in_breast_genes[
                    depend_in_breast_genes['Gene/Compound'] == id2].iterrows():
                gene2_data.append((row[1]['Dataset'], row[1]['T-Statistic'],
                                   row[1]['P-Value']))
            # dataset_dict = {id1:
            #                 [(dataset1, T-stat1, P-value1),
            #                  (dataset2, T-stat2, P-value2)],
            #                 id2:
            #                  [(..., ...)],
            #                  ...}
            dataset_dict = {id1: gene1_data, id2: gene2_data}
            loop_body(args)

    # loop random pairs from data set
    elif args_dict.get('sampling_gene_file'):
        logger.info('Gene pairs generated at random from %s' %
                    args_dict['sampling_gene_file'])
        with open(args_dict['sampling_gene_file'], 'r') as fi:
            rnd_gene_set = [l.strip() for l in fi.readlines()]

        npairs = args.max_pairs
        dataset_dict = None
        logger.info('Looking for connections between %i pairs' %
                    (npairs if npairs > 0 else args.max_pairs))
        for _ in range(npairs):
            id1, id2 = _rnd_pair_gen(rnd_gene_set)
            assert not isinstance(id1, list)
            loop_body(args)

    long_string = ''
    long_string += '-' * 63 + '\n'
    long_string += 'Summary for matching INDRA network to correlation pairs:'\
                   + '\n\n'
    long_string += '> Total number of correlation pairs checked: %i' % npairs\
                   + '\n'
    if args.verbosity:
        long_string += '> Skipped %i empty doublets in corr dict\n' % skipped

    long_string += '> Total correlations unexplained: %i' % len(unexplained)\
                   + '\n'
    long_string += '> Total correlations explained: %i' % any_expl + '\n'
    long_string += '> Total correlations explained, ignoring shared ' \
                   'regulator: %i' % any_expl_ign_sr + '\n'
    long_string += '> Total correlations explained, excluding shared ' \
                   'regulator (total - shared only): %i' % \
                   (any_expl - shared_regulator_only_expl_count) + '\n'
    long_string += '>    %i correlations have an explanation involving a ' \
                   'common parent' % common_parent + '\n'
    if args.explained_set:
        long_string += '>    %i gene pairs were considered explained as part ' \
                       'of the "explained set"' % part_of_explained + '\n'
    long_string += '>    %i explanations involving direct connection or ' \
                   'complex' % ab_expl_count + '\n'
    long_string += '>    %i correlations have a directed explanation ' \
                   'involving an intermediate node (A->X->B/A<-X<-B)' \
                   % directed_im_expl_count + '\n'
    long_string += '>    %i correlations have an explanation involving an ' \
                   'intermediate node excluding shared regulators' % \
                   any_axb_non_sr_expl_count + '\n'
    long_string += '>    %i correlations have an explanation involving a ' \
                   'shared regulator (A<-X->B)' % sr_expl_count + '\n'
    long_string += '>    %i correlations have shared regulator as only ' \
                   'explanation' % shared_regulator_only_expl_count + '\n\n'

    if stats_dict and (stats_dict.get('rnai') or stats_dict.get('crispr')):
        long_string += 'Statistics of input data:' + '\n\n'
    if stats_dict and stats_dict.get('rnai'):
        long_string += '  RNAi data ' + '\n'
        long_string += ' -----------' + '\n'
        long_string += '> mean: %f\n' % stats_dict['rnai']['mean']
        long_string += '> SD: %f\n' % stats_dict['rnai']['sigma']
        long_string += '> lower bound: %.3f*SD = %.4f\n' % (
            args_dict['rnai']['ll'],
            args_dict['rnai']['ll'] * stats_dict['rnai']['sigma'])
        if args_dict['rnai']['ul']:
            long_string += '> upper bound: %.3f*SD = %.4f\n\n' % (
                args_dict['rnai']['ul'],
                args_dict['rnai']['ul'] * stats_dict['rnai']['sigma'])
    if stats_dict and stats_dict.get('crispr'):
        long_string += '  CRISPR data ' + '\n'
        long_string += ' -------------' + '\n'
        long_string += '> mean: %f\n' % stats_dict['crispr']['mean']
        long_string += '> SD: %f\n' % stats_dict['crispr']['sigma']
        long_string += '> lower bound: %.3f*SD = %.4f\n' % (
            args_dict['crispr']['ll'],
            args_dict['crispr']['ll'] * stats_dict['crispr']['sigma'])
        if args_dict['crispr']['ul']:
            long_string += '> upper bound: %.3f*SD = %.4f\n\n' % (
                args_dict['crispr']['ul'],
                args_dict['crispr']['ul'] * stats_dict['crispr']['sigma'])
    long_string += '-' * 63 + '\n\n'

    logger.info('\n' + long_string)

    # Here create directed graph from explained nested dict
    nx_expl_dir_graph = dnf.nx_directed_graph_from_nested_dict_3layer(
        nest_d=explained_nested_dict)

    if not args.no_web_files:
        # 'explained_nodes' are used to produce first drop down
        explained_nodes = list(nx_expl_dir_graph.nodes)
        logger.info('Dumping json "explainable_ids.json" for first dropdown.')
        _dump_it_to_json(args.outbasename + '_explainable_ids.json',
                         explained_nodes)

        # Get undir graph and save each neighbor lookup as json for 2nd dropdown
        nx_expl_undir_graph = nx_expl_dir_graph.to_undirected()
        dnf.nx_undir_to_neighbor_lookup_json(
            expl_undir_graph=nx_expl_undir_graph, outbasename=args.outbasename)

    # Easiest way to check if pairs are explained or not is to loop explained
    # dict. Skip shared regulators.
    _dump_nest_dict_to_csv(fname=args.outbasename +
                           '_explained_correlations.csv',
                           nested_dict=explained_nested_dict,
                           header=['gene1', 'gene2', 'meta_data'],
                           excl_sr=True)

    _dump_it_to_pickle(fname=args.outbasename + '_explained_nest_dict.pkl',
                       pyobj=explained_nested_dict)
    headers = ['subj', 'obj', 'type', 'X', 'meta_data']
    _dump_it_to_csv(fname=args.outbasename + '_explanations_of_pairs.csv',
                    pyobj=explanations_of_pairs,
                    header=headers)
    _dump_it_to_csv(fname=args.outbasename +
                    '_explanations_of_shared_regulators.csv',
                    pyobj=sr_explanations,
                    header=headers)
    _dump_it_to_csv(fname=args.outbasename + '_unexpl_correlations.csv',
                    pyobj=unexplained,
                    header=headers[:-2])
    with open(args.outbasename + '_script_summary.txt', 'w') as fo:
        fo.write(long_string)
    return 0
コード例 #13
0
                                       username=ndex_cred['user'],
                                       password=ndex_cred['password'])
    gene_names = [
        hgnc_client.get_hgnc_name(ag.db_refs['HGNC'])
        for ag in ncp.get_agents()
    ]
    """
    # Get PMIDs for reading
    entrez_pmids = get_pmids(gene_names)
    network_pmids = ncp.get_pmids()
    pmids = list(set(entrez_pmids + network_pmids))
    save_pmids_for_reading(pmids, 'dna_damage_pmids.txt')
    """

    # Build the model
    prior_stmts = build_prior(gene_names, 'prior_stmts.pkl')
    reach_stmts = ac.load_statements('reach_stmts.pkl')
    stmts = ncp.statements + reach_stmts + prior_stmts
    stmts = run_assembly(stmts, 'unfiltered_assembled_stmts.pkl')

    # Filter the statements at different levels
    ids_cutoffs = (('4e26a4f0-9388-11e7-a10d-0ac135e8bacf',
                    0.90), ('527fecf7-9388-11e7-a10d-0ac135e8bacf', 0.95),
                   ('2f0e17bc-9387-11e7-a10d-0ac135e8bacf', 0.99))

    for net_id, cutoff in ids_cutoffs:
        stmts_filt = filter(stmts, cutoff, 'stmts_%.2f.pkl' % cutoff)
        cxa = assemble_cx(stmts_filt, 'dna_damage_%.2f.cx' % cutoff)
        cx_str = cxa.print_cx()
        ndex_client.update_network(cx_str, net_id, ndex_cred)
コード例 #14
0
ファイル: assemble_model.py プロジェクト: steppi/indra_apps
def get_reach_output(path):
    stmts = ac.load_statements(path)
    return stmts
コード例 #15
0
ファイル: assemble_db_models.py プロジェクト: pupster90/indra
        model_types = sys.argv[1:]
        if 'all' in model_types:
            assemble_models = ['pysb', 'sif', 'cx']
        else:
            assemble_models = sys.argv[1:]

    print('Assembling the following model types: %s' % \
          ', '.join(assemble_models))
    print('##############')

    outf = 'output/'
    data = process_data.read_data(process_data.data_file)
    data_genes = process_data.get_all_gene_names(data)
    reassemble = True
    if not reassemble:
        stmts = ac.load_statements(pjoin(outf, 'preassembled_db.pkl'))
    else:
        #prior_stmts = build_prior(data_genes, pjoin(outf, 'prior.pkl'))
        prior_stmts = ac.load_statements(pjoin(outf, 'prior.pkl'))
        prior_stmts = ac.map_grounding(prior_stmts,
                                       save=pjoin(outf, 'gmapped_prior.pkl'))
        #reach_stmts = ac.load_statements(pjoin(outf, 'phase3_stmts.pkl'))
        #reach_stmts = ac.filter_no_hypothesis(reach_stmts)
        extra_stmts = read_extra_sources(pjoin(outf, 'extra_stmts.pkl'))
        #reading_stmts = reach_stmts + extra_stmts
        #reading_stmts = ac.map_grounding(reading_stmts,
        #                                save=pjoin(outf, 'gmapped_reading.pkl'))
        #stmts = prior_stmts + reading_stmts + extra_stmts
        stmts = prior_stmts + extra_stmts

        stmts = ac.filter_grounded_only(stmts)
コード例 #16
0
    gene_names = process_data.get_gene_names(data)

    # If generic assembly needs to be done (instead of just loading the result)
    # set this to True
    reassemble = False

    # The file in which the preassembled statements will be saved
    pre_stmts_file = prefixed_pkl('preassembled')
    if reassemble:
        # Load various files that were previously produced
        sources = [
            'indradb', 'trips', 'bel', 'biopax', 'phosphosite', 'r3', 'sparser'
        ]
        stmts = []
        for source in sources:
            stmts += ac.load_statements(prefixed_pkl(source))
        stmts = ac.filter_no_hypothesis(stmts)
        # Fix grounding and filter to grounded entities and for proteins,
        # filter to the human ones
        stmts = ac.map_grounding(stmts)
        stmts = ac.filter_grounded_only(stmts)
        stmts = ac.filter_human_only(stmts)
        # Combinatorially expand protein families
        stmts = ac.expand_families(stmts)
        # Apply a strict filter to statements based on the gene names
        stmts = ac.filter_gene_list(stmts, gene_names, 'all')
        # Fix errors in references to protein sequences
        stmts = ac.map_sequence(stmts)
        # Run preassembly and save result
        stmts = ac.run_preassembly(stmts, return_toplevel=False)
        ac.dump_statements(stmts, pre_stmts_file)
コード例 #17
0
ファイル: assemble_models.py プロジェクト: jmuhlich/indra
    stmts = trips_stmts + sparser_stmts + r3_stmts
    return stmts

def get_prior_genes(fname):
    """Get the list of prior genes."""
    with open(fname, 'rt') as fh:
        genes = fh.read().strip().split('\n')
        return genes

if __name__ == '__main__':
    outf = 'output/'
    data = process_data.read_data(process_data.data_file)
    data_genes = process_data.get_all_gene_names(data)
    reassemble = False
    if not reassemble:
        stmts = ac.load_statements(pjoin(outf, 'preassembled.pkl'))
        #stmts = ac.load_statements(pjoin(outf, 'prior.pkl'))
    else:
        #prior_stmts = build_prior(data_genes, pjoin(outf, 'prior.pkl'))
        prior_stmts = ac.load_statements(pjoin(outf, 'prior.pkl'))
        prior_stmts = ac.map_grounding(prior_stmts,
                                       save=pjoin(outf, 'gmapped_prior.pkl'))
        reading_stmts = ac.load_statements(pjoin(outf, 'phase3_stmts.pkl'))
        reading_stmts = ac.map_grounding(reading_stmts,
                                    save=pjoin(outf, 'gmapped_reading.pkl'))
        stmts = prior_stmts + reading_stmts

        stmts = ac.filter_grounded_only(stmts)
        stmts = ac.filter_genes_only(stmts, specific_only=False)
        stmts = ac.filter_human_only(stmts)
        stmts = ac.expand_families(stmts)
コード例 #18
0
    statements' db_refs dictionary.
    """.rstrip()

    parser = argparse.ArgumentParser(description=doc)
    parser.add_argument('--input', '-i', type=str, required=True,
                        help='Pickle file with a dictionary mapping each ' +
                        'pmid to a list of INDRA statements',
                        dest='input_file')
    parser.add_argument('--output', '-o', type=str, required=True,
                        help='Output csv test file containing the extracted ' +
                        ' grounding map',
                        dest='output_file')
    args = parser.parse_args()

    # Load the statements from the pickle
    statement_list = ac.load_statements(args.input_file)

    # Make a dictionary mapping the raw text mention to db_refs
    logger.info('Extracting grounding information')
    text_to_refs = {}
    counter = 0
    percent_done = 0
    start_time = time.time()
    for statement in statement_list:
        for a in statement.agent_list():
            db_refs = copy.copy(a.db_refs)
            text = db_refs.pop('TEXT', None)

            # Convert HGNC ids to names
            if 'HGNC' in db_refs and string_is_integer(db_refs['HGNC']):
                db_refs['HGNC'] = get_hgnc_name(db_refs['HGNC'])
コード例 #19
0
def main(args):
    uniq_pairs, all_hgnc_ids, fsort_corrs = \
            get_correlations(args.ceres_file, args.geneset_file,
                             args.corr_file, args.strict,
                             args.outbasename, args.recalc, args.ll, args.ul)

    # Get statements from file or from database that contain any gene from
    # provided list as set
    if args.statements_in:  # Get statments from file
        stmts_all = set(ac.load_statements(args.statements_in))
    else:  # Use api to get statements. NOT the same as querying for each ID
        if args.geneset_file:
            stmts_all = dnf.dbc_load_statements(gene_filter_list)
        else:
            # if there is no gene set file, restrict to gene ids in
            # correlation data
            stmts_all = dnf.dbc_load_statements(list(all_hgnc_ids))

    # Dump statements to pickle file if output name has been given
    if args.statements_out:
        ac.dump_statements(stmts=stmts_all, fname=args.statements_out)

    # Get nested dicts from statements
    nested_dict_statements = dnf.nested_dict_gen(stmts_all)

    # Loop through the unique pairs
    dir_conn_pairs = []
    dir_neg_conn_pairs = []
    unexplained = []
    npairs = len(uniq_pairs)

    f_con = open(args.outbasename + '_connections_latex.tex', 'w')

    f_neg_c = open(args.outbasename + '_neg_conn_latex.tex', 'w')

    logger.info('Looking for connections between %i pairs' % npairs)
    for pair in uniq_pairs:
        pl = list(pair)
        for li in pl:
            if _is_float(li):
                correlation = li
                fmt_corr = '{0:.04}'.format(correlation)
                break
        pl.remove(correlation)
        id1, id2 = pl

        forward_fail = False
        backward_fail = False

        if (nested_dict_statements.get(id1) and
                nested_dict_statements.get(id1).get(id2)) or \
                (nested_dict_statements.get(id2) and
                 nested_dict_statements.get(id2).get(id1)):
            new_pair = r'\section{{{}, {}: {}}}'.format(id1, id2, fmt_corr) \
                 +'\n'+ \
                 r'See correlation plot \href{{' \
                 r'https://depmap.org/portal/interactive/?xDataset=Avana' \
                 r'&xFeature={}&yDataset=Avana&yFeature={}&colorDataset=' \
                 r'lineage&colorFeature=all&filterDataset=context' \
                 r'&filterFeature=&regressionLine=false&statisticsTable=false' \
                 r'&associationTable=true&plotOnly=false}}{{here}}'.format(
                     id1, id2) + '\n\n'
            f_con.write(new_pair)
            if correlation < 0:
                f_neg_c.write(new_pair)

        # nested_dict_statements.get(id1).get(id2) raises AttributeError
        # if nested_dict_statements.get(id1) returns {}

        ev_fltr = 0

        # Checks subj=id1, obj=id2
        if nested_dict_statements.get(id1) and \
                nested_dict_statements.get(id1).get(id2):
            stmts = nested_dict_statements[id1][id2]
            logger.info('Found connection between %s and %s' % (id1, id2))
            dir_conn_pairs.append((id1, id2, correlation, stmts))
            output = dnf.latex_output(subj=id1,
                                      obj=id2,
                                      corr=correlation,
                                      ev_len_fltr=ev_fltr,
                                      stmts=stmts,
                                      ignore_str='parent')
            f_con.write(output)

            if correlation < 0:
                dir_neg_conn_pairs.append((id1, id2, correlation, stmts))
                f_neg_c.write(output)
        else:
            forward_fail = True

        # Checks subj=id2, obj=id1
        if nested_dict_statements.get(id2) and \
                nested_dict_statements.get(id2).get(id1):
            stmts = nested_dict_statements[id2][id1]
            logger.info('Found connection between %s and %s' % (id2, id1))
            dir_conn_pairs.append((id2, id1, correlation, stmts))
            output = dnf.latex_output(subj=id2,
                                      obj=id1,
                                      corr=correlation,
                                      ev_len_fltr=ev_fltr,
                                      stmts=stmts,
                                      ignore_str='parent')
            f_con.write(output)

            if correlation < 0:
                dir_neg_conn_pairs.append((id2, id1, correlation, stmts))
                f_neg_c.write(output)

        else:
            backward_fail = True

        # If both failed, count as unexplained
        if forward_fail and backward_fail:
            unexplained.append([id1, id2, correlation])

    with open(args.outbasename + '_connections.csv', 'w', newline='') as csvf:
        wrtr = csv.writer(csvf, delimiter=',')
        wrtr.writerows(dir_conn_pairs)

    with open(args.outbasename + '_neg_conn.csv', 'w', newline='') as csvf:
        wrtr = csv.writer(csvf, delimiter=',')
        wrtr.writerows(dir_neg_conn_pairs)

    with open(args.outbasename + '_unexplained.csv', 'w', newline='') as csvf:
        wrtr = csv.writer(csvf, delimiter=',')
        wrtr.writerows(unexplained)

    f_con.close()
    f_neg_c.close()
コード例 #20
0
ファイル: phospho_prior.py プロジェクト: kkaris/sitemapper
    reg_stmts = act_stmts + inh_stmts
    reg_stmts = [s for s in reg_stmts if s.subj is not None]
    reg_stmts = ac.filter_genes_only(reg_stmts, specific_only=True)
    """

    #indra_stmts = get_indra_phos_stmts()
    """
    indra_stmts = ac.load_statements('sources/indra_phos_stmts.pkl')
    syn_stmts = load_statements_from_synapse(synapse_id='syn10998244')
    pc_stmts = load_pc_phos()
    omni_stmts = get_omnipath_stmts()
    phos_stmts = get_phosphosite_stmts()
    all_stmts = syn_stmts + omni_stmts + phos_stmts + indra_stmts + pc_stmts
    ac.dump_statements(all_stmts, 'sources/all_stmts.pkl')
    """
    all_stmts = ac.load_statements('sources/all_stmts.pkl')
    nsprior = to_nonspec_prior(all_stmts)
    nsprior_filename = 'priors/indra_nkconf2_combined_prot_spec.txt'
    save_gene_prior(nsprior, nsprior_filename)
    syn = synapseclient.login()
    syn_file = synapseclient.File(nsprior_filename, parent='syn11272284')
    syn.store(syn_file)

    all_kinases = [k for kin_list in nsprior.values() for k in kin_list]
    kin_ctr = Counter(all_kinases)
    kin_ctr = sorted([(k, v) for k, v in kin_ctr.items()],
                     key=lambda x: x[1],
                     reverse=True)

    default_prior_list = [t[0] for t in kin_ctr[0:200]]
    default_prior_filename = 'priors/indra_nkconf2_combined_default200.txt'
コード例 #21
0
                        type=str,
                        required=True,
                        help='Pickle file with a dictionary mapping each ' +
                        'pmid to a list of INDRA statements',
                        dest='input_file')
    parser.add_argument('--output',
                        '-o',
                        type=str,
                        required=True,
                        help='Output csv test file containing the extracted ' +
                        ' grounding map',
                        dest='output_file')
    args = parser.parse_args()

    # Load the statements from the pickle
    statement_list = ac.load_statements(args.input_file)

    # Make a dictionary mapping the raw text mention to db_refs
    logger.info('Extracting grounding information')
    text_to_refs = {}
    counter = 0
    percent_done = 0
    start_time = time.time()
    for statement in statement_list:
        for a in statement.agent_list():
            db_refs = copy.copy(a.db_refs)
            text = db_refs.pop('TEXT', None)

            # Convert HGNC ids to names
            if 'HGNC' in db_refs and string_is_integer(db_refs['HGNC']):
                db_refs['HGNC'] = get_hgnc_name(db_refs['HGNC'])
コード例 #22
0
        csvwriter.writerows(interactome_rows)
    with open(prize_outpath, 'wt') as f:
        csvwriter = csv.writer(f, delimiter='\t')
        csvwriter.writerows(prize_rows)

    return


if __name__ == "__main__":
    stmts = "../work/phospho_stmts.pkl"
    prize_outpath = "../work/pybel_prize.tsv"
    interactome_path = "../work/big_pybel_interactome2.tsv"
    site_file = "../work/gsea_sites.rnk"
    # Load the statements linking kinases/regulators to phospho sites
    # in the data
    stmts = ac.load_statements(stmts)

    # Employ filters to reduce network size
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.filter_human_only(stmts)
    stmts = ac.filter_genes_only(stmts)
    # In this data, statements of these two types will not act on
    # a short enough timescale to play a meaningful role
    stmts = ac.filter_by_type(stmts, DecreaseAmount, invert=True)
    stmts = ac.filter_by_type(stmts, IncreaseAmount, invert=True)
    stmts = ac.filter_by_type(stmts, Complex, invert=True)
    stmts = ac.filter_enzyme_kinase(stmts)

    # Assemble a pybel graph from statements
    pba = PybelAssembler(stmts)
    pb_graph = make_model(pba)
コード例 #23
0
ファイル: check_pysb_model.py プロジェクト: pupster90/indra
    for drug, stmtd in data_stmts.items():
        print(drug)
        for ab in stmtd.keys():
            print('-'+ ab)

    agent_obs = list(itertools.chain.from_iterable(ab_map.values()))
    # Here we need to cross-reference the antbody map with the data values
    agent_data = {}
    for drug_name, values in data_values.items():
        agent_data[drug_name] = {}
        for ab_name, value in values.items():
            agents = ab_map[ab_name]
            for agent in agents:
                agent_data[drug_name][agent] = value

    base_stmts = ac.load_statements('output/korkut_model_pysb_before_pa.pkl')
    for st in base_stmts:
        st.uuid = str(st.uuid)

    """
    # Merge the sources of statements
    # stmts = manual_stmts + base_stmts
    stmts = base_stmts
    #stmts = manual_stmts

    # Assemble model
    pa = PysbAssembler()
    pa.add_statements(stmts)
    model = pa.make_model()

    with open('korkut_pysb.pkl', 'wb') as f:
コード例 #24
0
ファイル: rank_docs.py プロジェクト: kkaris/covid-19
if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='Generate ranked lists of COVID docs for curation.')
    parser.add_argument('-i',
                        '--input_file',
                        help='Name of stmt pkl file',
                        required=True)
    parser.add_argument('-o',
                        '--output_base',
                        help='Basename for output files.',
                        required=True)
    args = parser.parse_args()

    # Load statements and filter to grounded only
    stmts = ac.load_statements(args.input_file)
    stmts = ac.filter_grounded_only(stmts)

    # Sort by TextRefs
    by_tr, no_tr = stmts_by_text_refs(stmts)

    # Combine duplicates in each statement list
    by_tr_pa = {}
    for tr, stmt_list in by_tr.items():
        pa = Preassembler(bio_ontology, stmt_list)
        uniq_stmts = pa.combine_duplicates()
        by_tr_pa[tr] = uniq_stmts

    # Filter to MESH term for "Coronavirus"
    mesh_id = 'D017934'
    mesh_children = get_mesh_children(mesh_id)
コード例 #25
0
    else:
        on_nodes = on
    coll = boolean2.util.Collector()
    bn_str = boolean2.modify_states(bn_str, turnon=on, turnoff=off)
    model = boolean2.Model(text=bn_str, mode='async')
    for i in range(nsim):
        model.initialize()
        model.iterate(steps=nsteps)
        coll.collect(states=model.states, nodes=model.nodes)
    avgs = coll.get_averages(normalize=True)
    return avgs


if __name__ == '__main__':
    # Build Boolean net for basic pathway
    st = ac.load_statements('ras_pathway.pkl')
    sa = SifAssembler(st)
    sa.make_model(use_name_as_key=True)
    sa.save_model('ras_pathway.sif')
    bn_str = sa.print_boolean_net('ras_pathway_bn.txt')

    # Build Boolean net for extended pathway
    st_ext = ac.load_statements('ras_pathway_extension.pkl')
    sa = SifAssembler(st + st_ext)
    sa.make_model(use_name_as_key=True)
    sa.save_model('ras_pathway_extension.sif')
    bn_str = sa.print_boolean_net('ras_pathway_extension_bn.txt')

    # Condition 1
    off = []
    on = ['GROWTH-FACTOR']
コード例 #26
0
from indra.util import _require_python3
from indra.assemblers.sif import SifAssembler
import indra.tools.assemble_corpus as ac

stmts = ac.load_statements('output/preassembled.pkl')
stmts = ac.filter_belief(stmts, 0.95)
stmts = ac.filter_direct(stmts)
sa = SifAssembler(stmts)
sa.make_model(True, True, False)
sa.set_edge_weights('support_all')
fname = 'model_high_belief_v2.sif'
with open(fname, 'wt') as fh:
    for s, t, d in sa.graph.edges(data=True):
        source = sa.graph.nodes[s]['name']
        target = sa.graph.nodes[t]['name']
        fh.write('%s %f %s\n' % (source, d['weight'], target))
コード例 #27
0
def test_dump_stmts():
    ac.dump_statements([st1], '_test.pkl')
    st_loaded = ac.load_statements('_test.pkl')
    assert (len(st_loaded) == 1)
    assert (st_loaded[0].equals(st1))
コード例 #28
0
    # Create EMMAA model
    emmaa_model = EmmaaModel(model_name, config_dict)
    emmaa_model.add_statements(emmaa_stmts)
    # Upload model to S3 with config as YAML and JSON
    emmaa_model.save_to_s3()
    s3_client = boto3.client('s3')
    save_config_to_s3(model_name, config_dict)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='Create and upload an EMMAA model from INDRA Statements.')
    parser.add_argument('-m', '--model_name', help='Model name', required=True)
    parser.add_argument('-s',
                        '--stmt_pkl',
                        help='Statement pickle file',
                        required=True)
    parser.add_argument('-n',
                        '--ndex_id',
                        help='NDex ID. If not given, a new NDEx network will '
                        'be created. If given, will update the NDEx '
                        'network.',
                        required=False)
    args = parser.parse_args()

    # Load the statements
    indra_stmts = ac.load_statements(args.stmt_pkl)

    # Create the model
    create_upload_model(args.model_name, indra_stmts, args.ndex_id)
コード例 #29
0
                        required=True)
    args = parser.parse_args()

    # Load model statements and tests
    model_stmts, _ = get_assembled_statements('covid19')
    curated_tests, _ = load_tests_from_s3('covid19_curated_tests')
    if isinstance(curated_tests, dict):  # if descriptions were added
        curated_tests = curated_tests['tests']
    mitre_tests, _ = load_tests_from_s3('covid19_mitre_tests')
    if isinstance(mitre_tests, dict):  # if descriptions were added
        mitre_tests = mitre_tests['tests']
    all_test_stmts = [test.stmt for test in curated_tests] + \
        [test.stmt for test in mitre_tests]

    # Load CTD statements
    chem_dis_stmts = ac.load_statements(args.chemical_disease)
    chem_gene_stmts = ac.load_statements(args.chemical_gene)
    gene_dis_stmts = ac.load_statements(args.gene_disease)
    all_ctd_stmts = chem_dis_stmts + chem_gene_stmts + gene_dis_stmts

    # Collect most frequents gene groundings for model statements and
    # chemical groundings for test statements
    model_gene_groundings = get_groundings(model_stmts, 'HGNC', cutoff=100)
    chem_test_groundings = get_groundings(all_test_stmts, 'CHEBI', None)
    gene_chem_groundings = model_gene_groundings + chem_test_groundings
    gene_chem_groundings = set(gene_chem_groundings)
    # Filter ctd statements to those having matching genes and chemicals
    gene_chem_stmts = filter_by_groundings(all_ctd_stmts, gene_chem_groundings,
                                           'all')
    # Filter ctd statements to those having matching diseases
    mesh_groundings = set([('MESH', dis) for dis in diseases])
コード例 #30
0
        model_types = sys.argv[1:]
        if 'all' in model_types:
            assemble_models = ['pysb', 'sif', 'cx']
        else:
            assemble_models = sys.argv[1:]

    print('Assembling the following model types: %s' % \
          ', '.join(assemble_models))
    print('##############')

    outf = 'output/'
    data = process_data.read_data(process_data.data_file)
    data_genes = process_data.get_all_gene_names(data)
    reassemble = False
    if not reassemble:
        stmts = ac.load_statements(pjoin(outf, 'preassembled.pkl'))
    else:
        #prior_stmts = build_prior(data_genes, pjoin(outf, 'prior.pkl'))
        prior_stmts = ac.load_statements(pjoin(outf, 'prior.pkl'))
        prior_stmts = ac.map_grounding(prior_stmts,
                                       save=pjoin(outf, 'gmapped_prior.pkl'))
        reach_stmts = ac.load_statements(pjoin(outf, 'phase3_stmts.pkl'))
        reach_stmts = ac.filter_no_hypothesis(reach_stmts)
        #extra_stmts = ac.load_statements(pjoin(outf, 'extra_stmts.pkl'))
        extra_stmts = read_extra_sources(pjoin(outf, 'extra_stmts.pkl'))
        reading_stmts = reach_stmts + extra_stmts
        reading_stmts = ac.map_grounding(reading_stmts,
                                         save=pjoin(outf,
                                                    'gmapped_reading.pkl'))
        stmts = prior_stmts + reading_stmts + extra_stmts
コード例 #31
0
    rows = []
    for kinase, sites in regulons.items():
        rows.append([kinase, 'Description'] + [s for s in sites])
    with open(filename, 'wt') as f:
        csvwriter = csv.writer(f, delimiter='\t')
        csvwriter.writerows(rows)


if __name__ == '__main__':
    reload = False
    if reload:
        phos_stmts = \
                get_phosphorylation_stmts('../work/gsea_sites.rnk')
        ac.dump_statements(phos_stmts, '../work/phospho_stmts.pkl')
    else:
        phos_stmts = ac.load_statements('../work/phospho_stmts.pkl')

    regulons_from_stmts(phos_stmts, '../work/kinase_regulons.gmt')

    #kinases = get_kinase_counts(phos_stmts)

    target_list = get_stmt_subject_object(phos_stmts, 'SUBJECT')

    # Get all Tubulin child nodes as the source list
    source_list = [('FPLX', 'Tubulin')]
    tubulin_ag = Agent('Tubulin', db_refs={'FPLX': 'Tubulin'})
    ex = Expander(bio_ontology)
    for ag_ns, ag_id in ex.get_children(tubulin_ag, ns_filter=None):
        #if ag_ns == 'HGNC':
        #    ag_id = hgnc_client.get_hgnc_id(ag_id)
        source_list.append((ag_ns, ag_id))
コード例 #32
0
def test_load_stmts():
    with open('_test.pkl', 'wb') as fh:
        pickle.dump([st1], fh, protocol=2)
    st_loaded = ac.load_statements('_test.pkl')
    assert (len(st_loaded) == 1)
    assert (st_loaded[0].equals(st1))
コード例 #33
0
             norm_uuid_counts,
             color='orange',
             alpha=0.8,
             label='Statements')
    plt.plot(lengths, norm_node_counts, color='blue', alpha=0.8, label='Nodes')
    plt.legend(loc='upper left', fontsize=pf.fontsize, frameon=False)
    ax = plt.gca()
    pf.format_axis(ax)


if __name__ == '__main__':
    source = sys.argv[2]
    target = sys.argv[3]
    if len(sys.argv) > 4:
        max_depth = int(sys.argv[4])
    stmts = ac.load_statements(sys.argv[1])
    print(len(stmts))
    stmts = ac.filter_direct(stmts)
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    stmts = [s for s in stmts if s.agent_list()[0]]
    print(len(stmts))
    from util import pkldump
    import ipdb
    ipdb.set_trace()

    #ppa = PysbPreassembler(stmts)
    #ppa.replace_activities()
    #stmts = ppa.statements

    #g = stmts_to_digraph(stmts)
コード例 #34
0
    print("  Mapped:  %d (%0.1f)" % (n_map, pct(n_map, n)))
    print("%% Mapped:  %0.1f" % pct(n_map, n_inv))
    print()
    print("Total site occurrences: %d" % f)
    print("  Valid:   %d (%0.1f)" % (f_val, pct(f_val, f)))
    print("  Invalid: %d (%0.1f)" % (f_inv, pct(f_inv, f)))
    print("  Mapped:  %d (%0.1f)" % (f_map, pct(f_map, f)))
    print("Pct occurrences mapped: %0.1f" % pct(f_map, f_inv))
    print()
    # Sample 100 invalid-unmapped (by unique sites)
    # Sample 100 invalid-mapped (by unique sites)


if __name__ == '__main__':
    outf = '../phase3_eval/output'
    prior_stmts = ac.load_statements(pjoin(outf, 'prior.pkl'))
    site_info = map_statements(prior_stmts,
                               source='prior',
                               outfile='prior_sites.csv')

    #reach_stmts = ac.load_statements(pjoin(outf, 'phase3_stmts.pkl'))
    #stmts = prior_stmts
    #stmts = reach_stmts
    #stmts = ac.map_grounding(stmts, save=pjoin(outf, 'gmapped_stmts.pkl'))
    #stmts = ac.load_statements(pjoin(outf, 'gmapped_stmts.pkl'))

    sys.exit()
    """
    valid, sites, sm = get_incorrect_sites(do_methionine_offset=True,
                                 do_orthology_mapping=True,
                                 do_isoform_mapping=True)
コード例 #35
0
ファイル: run_ras_boolnet.py プロジェクト: johnbachman/indra
    else:
        on_nodes = on
    coll = boolean2.util.Collector()
    bn_str = boolean2.modify_states(bn_str, turnon=on, turnoff=off)
    model = boolean2.Model(text=bn_str, mode='async')
    for i in range(nsim):
        model.initialize()
        model.iterate(steps=nsteps)
        coll.collect(states=model.states, nodes=model.nodes)
    avgs = coll.get_averages(normalize=True)
    return avgs


if __name__ == '__main__':
    # Build Boolean net for basic pathway
    st = ac.load_statements('ras_pathway.pkl')
    sa = SifAssembler(st)
    sa.make_model(use_name_as_key=True)
    sa.save_model('ras_pathway.sif')
    bn_str = sa.print_boolean_net('ras_pathway_bn.txt')

    # Build Boolean net for extended pathway
    st_ext = ac.load_statements('ras_pathway_extension.pkl')
    sa = SifAssembler(st + st_ext)
    sa.make_model(use_name_as_key=True)
    sa.save_model('ras_pathway_extension.sif')
    bn_str = sa.print_boolean_net('ras_pathway_extension_bn.txt')

    # Condition 1
    off = []
    on = ['GROWTH-FACTOR']