Esempio n. 1
0
def biopax_process_pc_neighborhood():
    """Process PathwayCommons neighborhood, return INDRA Statements."""
    if request.method == 'OPTIONS':
        return {}
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    genes = body.get('genes')
    bp = biopax.process_pc_neighborhood(genes)
    return _stmts_from_proc(bp)
Esempio n. 2
0
def biopax_process_pc_neighborhood():
    """Process PathwayCommons neighborhood, return INDRA Statements."""
    if request.method == 'OPTIONS':
        return {}
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    genes = body.get('genes')
    bp = biopax.process_pc_neighborhood(genes)
    return _stmts_from_proc(bp)
Esempio n. 3
0
File: api.py Progetto: budakn/INDRA
def biopax_process_pc_neighborhood():
    """Process PathwayCommons neighborhood, return INDRA Statements."""
    if request.method == 'OPTIONS':
        return {}
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    genes = body.get('genes')
    bp = biopax.process_pc_neighborhood(genes)
    if bp and bp.statements:
        stmts = stmts_to_json(bp.statements)
        res = {'statements': stmts}
        return res
    else:
        res = {'statements': []}
    return res
Esempio n. 4
0
File: api.py Progetto: steppi/indra
    def post(self):
        """Process PathwayCommons neighborhood, return INDRA Statements.

        Parameters
        ----------
        genes : list
            A list of HGNC gene symbols to search the neighborhood of.
            Examples: ['BRAF'], ['BRAF', 'MAP2K1']

        Returns
        -------
        statements : list[indra.statements.Statement.to_json()]
            A list of extracted INDRA Statements.
        """
        args = request.json
        genes = args.get('genes')
        bp = biopax.process_pc_neighborhood(genes)
        return _stmts_from_proc(bp)
Esempio n. 5
0
    def get_biopax_stmts(self,
                         filter=False,
                         query='pathsbetween',
                         database_filter=None):
        """Get relevant statements from Pathway Commons.

        Performs a "paths between" query for the genes in :py:attr:`gene_list`
        and uses the results to build statements. This function caches two
        files: the list of statements built from the query, which is cached in
        `<basename>_biopax_stmts.pkl`, and the OWL file returned by the Pathway
        Commons Web API, which is cached in `<basename>_pc_pathsbetween.owl`.
        If these cached files are found, then the results are returned based
        on the cached file and Pathway Commons is not queried again.

        Parameters
        ----------
        filter : Optional[bool]
            If True, includes only those statements that exclusively mention
            genes in :py:attr:`gene_list`. Default is False.
        query : Optional[str]
            Defined what type of query is executed. The two options are
            'pathsbetween' which finds paths between the given list of genes
            and only works if more than 1 gene is given, and 'neighborhood'
            which searches the immediate neighborhood of each given gene.
            Note that for pathsbetween queries with more thatn 60 genes, the
            query will be executed in multiple blocks for scalability.
        database_filter: Optional[list[str]]
            A list of PathwayCommons databases to include in the query.

        Returns
        -------
        list of :py:class:`indra.statements.Statement`
            List of INDRA statements extracted from Pathway Commons.
        """
        # If we're using a cache, initialize the appropriate filenames
        if self.basename is not None:
            biopax_stmt_path = '%s_biopax_stmts.pkl' % self.basename
            biopax_ras_owl_path = '%s_pc_pathsbetween.owl' % self.basename
        # Check for cached Biopax stmt file at the given path
        # if it's there, return the statements from the cache
        if self.basename is not None and os.path.exists(biopax_stmt_path):
            logger.info("Loading Biopax statements from %s" % biopax_stmt_path)
            with open(biopax_stmt_path, 'rb') as f:
                bp_statements = pickle.load(f)
            return bp_statements
        # Check for cached file before querying Pathway Commons Web API
        if self.basename is not None and os.path.exists(biopax_ras_owl_path):
            logger.info("Loading Biopax from OWL file %s" %
                        biopax_ras_owl_path)
            bp = biopax.process_owl(biopax_ras_owl_path)
        # OWL file not found; do query and save to file
        else:
            if (len(self.gene_list) < 2) and (query == 'pathsbetween'):
                logger.warning('Using neighborhood query for one gene.')
                query = 'neighborhood'
            if query == 'pathsbetween':
                if len(self.gene_list) > 60:
                    block_size = 60
                else:
                    block_size = None
                bp = biopax.process_pc_pathsbetween(
                    self.gene_list,
                    database_filter=database_filter,
                    block_size=block_size)
            elif query == 'neighborhood':
                bp = biopax.process_pc_neighborhood(
                    self.gene_list, database_filter=database_filter)
            else:
                logger.error('Invalid query type: %s' % query)
                return []
            # Save the file if we're caching
            if self.basename is not None:
                bp.save_model(biopax_ras_owl_path)
        # Save statements to pickle file if we're caching
        if self.basename is not None:
            with open(biopax_stmt_path, 'wb') as f:
                pickle.dump(bp.statements, f)
        # Optionally filter out statements not involving only our gene set
        if filter:
            policy = 'one' if len(self.gene_list) > 1 else 'all'
            stmts = ac.filter_gene_list(bp.statements, self.gene_list, policy)
        else:
            stmts = bp.statements
        return stmts
Esempio n. 6
0
if __name__ == '__main__':
    all_genes = \
        OrderedDict({'signaling': ['MAPK1', 'AKT1', 'JAK1', 'GNAS', 'CTNNB1'],
                     'genereg': ['MYC', 'TP53', 'STAT3', 'FOXO3', 'JUN'],
                     'metabolism': ['IDH1', 'PFKL', 'DHFR', 'GLUL', 'NOS1',
                                    'CHEBI:20506', 'CHEBI:28300', 'CHEBI:16084',
                                    'CHEBI:32816', 'CHEBI:16480']})

    stats = {group: {g: {} for g in genes}
             for group, genes in all_genes.items()}
    for group, genes in all_genes.items():
        for gene in genes:
            print('%s\n======' % gene)
            belp = bel.process_ndex_neighborhood([gene])
            num_all, num_extracted = get_bel_statistics(belp)
            stats[group][gene]['bel'] = (num_all, num_extracted)
            print(num_all, num_extracted)
            if gene.startswith('CHEBI:'):
                bpx_query = 'http://identifiers.org/chebi/' + gene
            else:
                bpx_query = gene
            biopp = biopax.process_pc_neighborhood([bpx_query])
            num_all, num_extracted = get_biopax_statistics(biopp)
            print(num_all, num_extracted)
            stats[group][gene]['biopax'] = (num_all, num_extracted)

    with open('db_coverage_stats.json', 'w') as fh:
        json.dump(stats, fh)

    print_stats(stats)
Esempio n. 7
0
                     'genereg': ['MYC', 'TP53', 'STAT3', 'FOXO3', 'JUN'],
                     'metabolism': ['IDH1', 'PFKL', 'DHFR', 'GLUL', 'NOS1',
                                    'CHEBI:20506', 'CHEBI:28300', 'CHEBI:16084',
                                    'CHEBI:32816', 'CHEBI:16480']})

    stats = {
        group: {g: {}
                for g in genes}
        for group, genes in all_genes.items()
    }
    for group, genes in all_genes.items():
        for gene in genes:
            print('%s\n======' % gene)
            belp = bel.process_ndex_neighborhood([gene])
            num_all, num_extracted = get_bel_statistics(belp)
            stats[group][gene]['bel'] = (num_all, num_extracted)
            print(num_all, num_extracted)
            if gene.startswith('CHEBI:'):
                bpx_query = 'http://identifiers.org/chebi/' + gene
            else:
                bpx_query = gene
            biopp = biopax.process_pc_neighborhood([bpx_query])
            num_all, num_extracted = get_biopax_statistics(biopp)
            print(num_all, num_extracted)
            stats[group][gene]['biopax'] = (num_all, num_extracted)

    with open('db_coverage_stats.json', 'w') as fh:
        json.dump(stats, fh)

    print_stats(stats)
Esempio n. 8
0
    def get_biopax_stmts(self, filter=False, query='pathsbetween',
                         database_filter=None):
        """Get relevant statements from Pathway Commons.

        Performs a "paths between" query for the genes in :py:attr:`gene_list`
        and uses the results to build statements. This function caches two
        files: the list of statements built from the query, which is cached in
        `<basename>_biopax_stmts.pkl`, and the OWL file returned by the Pathway
        Commons Web API, which is cached in `<basename>_pc_pathsbetween.owl`.
        If these cached files are found, then the results are returned based
        on the cached file and Pathway Commons is not queried again.

        Parameters
        ----------
        filter : Optional[bool]
            If True, includes only those statements that exclusively mention
            genes in :py:attr:`gene_list`. Default is False.
        query : Optional[str]
            Defined what type of query is executed. The two options are
            'pathsbetween' which finds paths between the given list of genes
            and only works if more than 1 gene is given, and 'neighborhood'
            which searches the immediate neighborhood of each given gene.
            Note that for pathsbetween queries with more thatn 60 genes, the
            query will be executed in multiple blocks for scalability.
        database_filter: Optional[list[str]]
            A list of PathwayCommons databases to include in the query.

        Returns
        -------
        list of :py:class:`indra.statements.Statement`
            List of INDRA statements extracted from Pathway Commons.
        """
        # If we're using a cache, initialize the appropriate filenames
        if self.basename is not None:
            biopax_stmt_path = '%s_biopax_stmts.pkl' % self.basename
            biopax_ras_owl_path = '%s_pc_pathsbetween.owl' % self.basename
        # Check for cached Biopax stmt file at the given path
        # if it's there, return the statements from the cache
        if self.basename is not None and os.path.isfile(biopax_stmt_path):
            logger.info("Loading Biopax statements from %s" % biopax_stmt_path)
            with open(biopax_stmt_path, 'rb') as f:
                bp_statements = pickle.load(f)
            return bp_statements
        # Check for cached file before querying Pathway Commons Web API
        if self.basename is not None and os.path.isfile(biopax_ras_owl_path):
            logger.info("Loading Biopax from OWL file %s" % biopax_ras_owl_path)
            bp = biopax.process_owl(biopax_ras_owl_path)
        # OWL file not found; do query and save to file
        else:
            if (len(self.gene_list) < 2) and (query == 'pathsbetween'):
                logger.warning('Using neighborhood query for one gene.')
                query = 'neighborhood'
            if query == 'pathsbetween':
                if len(self.gene_list) > 60:
                    block_size = 60
                else:
                    block_size = None
                bp = biopax.process_pc_pathsbetween(self.gene_list,
                                                database_filter=database_filter,
                                                block_size=block_size)
            elif query == 'neighborhood':
                bp = biopax.process_pc_neighborhood(self.gene_list,
                                                database_filter=database_filter)
            else:
                logger.error('Invalid query type: %s' % query)
                return []
            # Save the file if we're caching
            if self.basename is not None:
                bp.save_model(biopax_ras_owl_path)
        # Save statements to pickle file if we're caching
        if self.basename is not None:
            with open(biopax_stmt_path, 'wb') as f:
                pickle.dump(bp.statements, f)
        # Optionally filter out statements not involving only our gene set
        if filter:
            policy = 'one' if len(self.gene_list) > 1 else 'all'
            stmts = ac.filter_gene_list(bp.statements, self.gene_list, policy)
        else:
            stmts = bp.statements
        return stmts