Example #1
0
def test_get_protein_expression_cell_type_missing():
    protein_amounts = context_client.get_protein_expression(['EGFR'],
                                                            ['BT20_BREAST', 'XYZ'])
    assert 'BT20_BREAST' in protein_amounts
    assert protein_amounts['BT20_BREAST']['EGFR'] > 10000
    assert 'XYZ' in protein_amounts
    assert protein_amounts['XYZ'] is None
Example #2
0
def test_get_protein_expression():
    res = context_client.get_protein_expression(['EGFR'], ['BT20_BREAST'])
    assert res is not None
    assert res.get('BT20_BREAST') is not None
    assert res['BT20_BREAST'].get('EGFR') is not None
    assert res['BT20_BREAST']['EGFR'] > 1000
    assert unicode_strs(res)
Example #3
0
    def set_context(self, cell_type):
        """Set protein expression data and mutational status as node attribute

        This method uses :py:mod:`indra.databases.context_client` to get
        protein expression levels and mutational status for a given cell type
        and set a node attribute for proteins accordingly.

        Parameters
        ----------
        cell_type : str
            Cell type name for which expression levels are queried.
            The cell type name follows the CCLE database conventions.
            Example: LOXIMVI_SKIN, BT20_BREAST
        """
        node_names = [node['n'] for node in self.cx['nodes']]
        res_expr = context_client.get_protein_expression(
            node_names, [cell_type])
        res_mut = context_client.get_mutations(node_names, [cell_type])
        res_expr = res_expr.get(cell_type)
        res_mut = res_mut.get(cell_type)
        if not res_expr:
            msg = 'Could not get protein expression for %s cell type.' % \
                  cell_type
            logger.warning(msg)

        if not res_mut:
            msg = 'Could not get mutational status for %s cell type.' % \
                  cell_type
            logger.warning(msg)

        if not res_expr and not res_mut:
            return

        self.cx['networkAttributes'].append({
            'n': 'cellular_context',
            'v': cell_type
        })
        counter = 0
        for node in self.cx['nodes']:
            amount = res_expr.get(node['n'])
            mut = res_mut.get(node['n'])
            if amount is not None:
                node_attribute = {
                    'po': node['@id'],
                    'n': 'expression_amount',
                    'v': int(amount)
                }
                self.cx['nodeAttributes'].append(node_attribute)
            if mut is not None:
                is_mutated = 1 if mut else 0
                node_attribute = {
                    'po': node['@id'],
                    'n': 'is_mutated',
                    'v': is_mutated
                }
                self.cx['nodeAttributes'].append(node_attribute)
            if mut is not None or amount is not None:
                counter += 1
        logger.info('Set context for %d nodes.' % counter)
Example #4
0
 def get_expression(self, genes, cell_line):
     if cell_line in self.expr:
         ret = {cell_line: {g: None for g in genes}}
         for gene in genes:
             ret[cell_line][gene] = self.expr[cell_line].get(gene, None)
         return ret
     else:
         return context_client.get_protein_expression(genes, [cell_line])
Example #5
0
 def get_expression(self, genes, cell_line):
     if cell_line in self.expr:
         ret = {cell_line: {g: None for g in genes}}
         for gene in genes:
             ret[cell_line][gene] = self.expr[cell_line].get(gene, None)
         return ret
     else:
         return context_client.get_protein_expression(genes, [cell_line])
Example #6
0
    def set_CCLE_context(self, cell_types):
        """Set context of all nodes and node members from CCLE."""
        self.get_gene_names()

        # Get expression and mutations from context client
        exp_values = \
            context_client.get_protein_expression(self._gene_names, cell_types)
        mut_values = \
            context_client.get_mutations(self._gene_names, cell_types)

        # Make a dict of presence/absence of mutations
        muts = {cell_line: {} for cell_line in cell_types}
        for cell_line, entries in mut_values.items():
            if entries is not None:
                for gene, mutations in entries.items():
                    if mutations:
                        muts[cell_line][gene] = 1
                    else:
                        muts[cell_line][gene] = 0

        # Create bins for the exp values
        # because colorbrewer only does 3-9 bins and I don't feel like
        # reinventing color scheme theory, this will only bin 3-9 bins
        def bin_exp(expression_dict):
            d = expression_dict
            exp_values = []
            for line in d:
                for gene in d[line]:
                    val = d[line][gene]
                    if val is not None:
                        exp_values.append(val)
            thr_dict = {}
            for n_bins in range(3, 10):
                bin_thr = np.histogram(np.log10(exp_values), n_bins)[1][1:]
                thr_dict[n_bins] = bin_thr
            # this dict isn't yet binned, that happens in the loop
            binned_dict = {x: deepcopy(expression_dict) for x in range(3, 10)}
            for n_bins in binned_dict:
                for line in binned_dict[n_bins]:
                    for gene in binned_dict[n_bins][line]:
                        # last bin is reserved for None
                        if binned_dict[n_bins][line][gene] is None:
                            binned_dict[n_bins][line][gene] = n_bins
                        else:
                            val = np.log10(binned_dict[n_bins][line][gene])
                            for thr_idx, thr in enumerate(thr_dict[n_bins]):
                                if val <= thr:
                                    binned_dict[n_bins][line][gene] = thr_idx
                                    break
            return binned_dict

        binned_exp = bin_exp(exp_values)

        context = {'bin_expression': binned_exp, 'mutation': muts}
        self._context['CCLE'] = context
Example #7
0
    def set_CCLE_context(self, cell_types):
        """Set context of all nodes and node members from CCLE."""
        self.get_gene_names()

        # Get expression and mutations from context client
        exp_values = \
            context_client.get_protein_expression(self._gene_names, cell_types)
        mut_values = \
            context_client.get_mutations(self._gene_names, cell_types)

        # Make a dict of presence/absence of mutations
        muts = {cell_line: {} for cell_line in cell_types}
        for cell_line, entries in mut_values.items():
            if entries is not None:
                for gene, mutations in entries.items():
                    if mutations:
                        muts[cell_line][gene] = 1
                    else:
                        muts[cell_line][gene] = 0

        # Create bins for the exp values
        # because colorbrewer only does 3-9 bins and I don't feel like
        # reinventing color scheme theory, this will only bin 3-9 bins
        def bin_exp(expression_dict):
            d = expression_dict
            exp_values = []
            for line in d:
                for gene in d[line]:
                    val = d[line][gene]
                    if val is not None:
                        exp_values.append(val)
            thr_dict = {}
            for n_bins in range(3, 10):
                bin_thr = np.histogram(np.log10(exp_values), n_bins)[1][1:]
                thr_dict[n_bins] = bin_thr
            # this dict isn't yet binned, that happens in the loop
            binned_dict = {x: deepcopy(expression_dict) for x in range(3, 10)}
            for n_bins in binned_dict:
                for line in binned_dict[n_bins]:
                    for gene in binned_dict[n_bins][line]:
                        # last bin is reserved for None
                        if binned_dict[n_bins][line][gene] is None:
                            binned_dict[n_bins][line][gene] = n_bins
                        else:
                            val = np.log10(binned_dict[n_bins][line][gene])
                            for thr_idx, thr in enumerate(thr_dict[n_bins]):
                                if val <= thr:
                                    binned_dict[n_bins][line][gene] = thr_idx
                                    break
            return binned_dict
        binned_exp = bin_exp(exp_values)

        context = {'bin_expression': binned_exp,
                   'mutation': muts}
        self._context['CCLE'] = context
Example #8
0
    def set_context(self, cell_type):
        """Set protein expression data and mutational status as node attribute

        This method uses :py:mod:`indra.databases.context_client` to get
        protein expression levels and mutational status for a given cell type
        and set a node attribute for proteins accordingly.

        Parameters
        ----------
        cell_type : str
            Cell type name for which expression levels are queried.
            The cell type name follows the CCLE database conventions.
            Example: LOXIMVI_SKIN, BT20_BREAST
        """
        node_names = [node['n'] for node in self.cx['nodes']]
        res_expr = context_client.get_protein_expression(node_names,
                                                         [cell_type])
        res_mut = context_client.get_mutations(node_names,
                                               [cell_type])
        res_expr = res_expr.get(cell_type)
        res_mut = res_mut.get(cell_type)
        if not res_expr:
            msg = 'Could not get protein expression for %s cell type.' % \
                  cell_type
            logger.warning(msg)

        if not res_mut:
            msg = 'Could not get mutational status for %s cell type.' % \
                  cell_type
            logger.warning(msg)

        if not res_expr and not res_mut:
            return

        self.cx['networkAttributes'].append({'n': 'cellular_context',
                                             'v': cell_type})
        counter = 0
        for node in self.cx['nodes']:
            amount = res_expr.get(node['n'])
            mut = res_mut.get(node['n'])
            if amount is not None:
                node_attribute = {'po': node['@id'],
                                  'n': 'expression_amount',
                                  'v': int(amount)}
                self.cx['nodeAttributes'].append(node_attribute)
            if mut is not None:
                is_mutated = 1 if mut else 0
                node_attribute = {'po': node['@id'],
                                  'n': 'is_mutated',
                                  'v': is_mutated}
                self.cx['nodeAttributes'].append(node_attribute)
            if mut is not None or amount is not None:
                counter += 1
        logger.info('Set context for %d nodes.' % counter)
Example #9
0
if __name__ == '__main__':
    # Run run_task1.py before running this one
    with open(prefixed_pkl('pysb_stmts'), 'rb') as f:
        stmts = pickle.load(f)
    with open('scored_paths.pkl', 'rb') as f:
        (scored_paths, model) = pickle.load(f)

    all_groups = set()
    all_path_details = {}
    for cell_line, drug_dict in scored_paths.items():
        for drug, paths in drug_dict.items():
            groups, path_details = group_scored_paths(paths, model, stmts)
            for pg, path_list in path_details.items():
                if pg in all_path_details:
                    all_path_details[pg] |= path_list
                else:
                    all_path_details[pg] = path_list
            all_groups |= groups

    gene_names = set([tup[0] for group in all_groups for tup in group])
    cell_lines_skin = ['%s_SKIN' % cl for cl in cell_lines]
    protein_data = get_protein_expression(gene_names, cell_lines_skin)

    top_scores = {}
    for group in all_groups:
        scores = [score for path, score in all_path_details[group]]
        top_scores[group] = max(scores)

    print_dict(top_scores)
Example #10
0
    def set_context(self, *args, **kwargs):
        """Set protein expression data as node attribute

        This method uses :py:mod:`indra.databases.context_client` to get
        protein expression levels for a given cell type and set a node
        attribute for proteins accordingly.

        Parameters
        ----------
        cell_type : str
            Cell type name for which expression levels are queried.
            The cell type name follows the CCLE database conventions.
        Example: LOXIMVI_SKIN, BT20_BREAST

        bin_expression : bool
            If True, the gene expression will be put into 5 bins based on
            all gene expression values. An additional bin is used to indicate
            that the context_client returned None.

        user_bins : int
            If specified, split the expression levels into the given number
            of bins. If not specified, default will be 5.
        """
        cell_type = kwargs.get('cell_type')
        if not cell_type:
            logger.warning('No cell type given.')
            return

        # Collect all gene names in network
        gene_names = []
        for node in self._nodes:
            members = node['data'].get('members')
            if members:
                gene_names += list(members.keys())
            else:
                if node['data']['name'].startswith('Group'):
                    continue
                gene_names.append(node['data']['name'])

        # Get expression and mutation from context client
        exp = context_client.get_protein_expression(gene_names, cell_type)
        mut = context_client.get_mutations(gene_names, cell_type)
        if not exp:
            logger.warning('Could not get context for %s cell type.' %
                           cell_type)
            return
        else:
            exp = {k: v[cell_type] for k, v in exp.items()}
        if not mut:
            logger.warning('Could not get mutations for %s cell type.' %
                           cell_type)
            return
        else:
            mut = {k: v[cell_type] for k, v in mut.items()}

        # Get expression and mutation for specific gene
        def get_expr_mut(name, expr_data, mut_data):
            amount = expr_data.get(name)
            if amount is None:
                expression = None
            else:
                expression = np.log10(amount)
            mutation = mut_data.get(name)
            if mutation is not None:
                mutation = int(mutation)
            else:
                mutation = 0
            return expression, mutation

        # Set node properties for expression and mutation
        for node in self._nodes:
            members = node['data'].get('members')
            if members:
                for member in members.keys():
                    expression, mutation = get_expr_mut(member, exp, mut)
                    node['data']['members'][member]['expression'] = expression
                    node['data']['members'][member]['mutation'] = mutation
                node['data']['expression'] = None
                node['data']['mutation'] = 0
            else:
                if node['data']['name'].startswith('Group'):
                    node['data']['expression'] = None
                    node['data']['mutation'] = 0
                else:
                    expression, mutation = get_expr_mut(
                        node['data']['name'], exp, mut)
                    node['data']['expression'] = expression
                    node['data']['mutation'] = mutation

        # Binning for the purpose of assigning colors
        if kwargs.get('bin_expression'):
            # how many bins? If not specified, set to 5
            n_bins = 5
            user_bins = kwargs.get('n_bins')
            if type(user_bins) == int:
                n_bins = user_bins
                if n_bins > 9:
                    n_bins = 9
                    logger.info('Only 9 bins allowed. Setting n_bins = 9.')
                if n_bins < 3:
                    n_bins = 3
                    logger.info('Need at least 3 bin. Setting n_bins = 3.')
            # Create color scale for unmutated gene expression
            # feed in hex values from colorbrewer2 9-class PuBuGn
            wt_hexes = [
                '#f7fcf5', '#e5f5e0', '#c7e9c0', '#a1d99b', '#74c476',
                '#41ab5d', '#238b45', '#006d2c', '#00441b'
            ]
            exp_wt_colorscale = _build_color_scale(wt_hexes, n_bins)
            # tack on a gray for no expression data
            exp_wt_colorscale.append('#bdbdbd')
            self._exp_colorscale = exp_wt_colorscale
            # create color scale for mutated gene expression
            # feed in hex values from colorbrewer2 9-class YlOrRd
            mut_hexes = [
                '#fff5eb', '#fee6ce', '#fdd0a2', '#fdae6b', '#fd8d3c',
                '#f16913', '#d94801', '#a63603', '#7f2704'
            ]
            exp_mut_colorscale = _build_color_scale(mut_hexes, n_bins)
            # tack on a gray for no expression data
            exp_mut_colorscale.append('#bdbdbd')
            self._mut_colorscale = exp_mut_colorscale
            # capture the expression levels of every gene in nodes
            exp_lvls = [n['data'].get('expression') for n in self._nodes]
            # capture the expression levels of every gene in family members
            m_exp_lvls = []
            for n in self._nodes:
                if n['data'].get('members'):
                    members = n['data']['members']
                    for m in members:
                        m_exp_lvls.append(members[m]['expression'])
            # combine node expressions and family expressions
            exp_lvls = exp_lvls + m_exp_lvls
            # get rid of None gene expressions
            exp_lvls = [x for x in exp_lvls if x is not None]
            # bin expression levels into n equally sized bins
            # bin n+1 reserved for None
            # this returns the bounds of each bin. so n_bins+1 bounds.
            # get rid of first value which is the leftmost bound
            bin_thr = np.histogram(exp_lvls, n_bins)[1][1:]
            # iterate over nodes
            for n in self._nodes:
                # if node has members set member bin_expression values
                if n['data'].get('members'):
                    members = n['data']['members']
                    for m in members:
                        # if expression is None, set to bin index n_bins
                        if members[m]['expression'] is None:
                            members[m]['bin_expression'] = n_bins
                        else:
                            for thr_idx, thr in enumerate(bin_thr):
                                if members[m]['expression'] <= thr:
                                    members[m]['bin_expression'] = thr_idx
                                    break
                # set bin_expression for the node itself
                if n['data']['expression'] is None:
                    n['data']['bin_expression'] = n_bins
                else:
                    for thr_idx, thr in enumerate(bin_thr):
                        if n['data']['expression'] <= thr:
                            n['data']['bin_expression'] = thr_idx
                            break
Example #11
0
    def set_CCLE_context(self, cell_types):
        """Set context of all nodes and node members from CCLE."""
        self.get_gene_names()
        gene_names = self._gene_names
        exp = {}
        mut = {}

        # context_client gives back a dict with genes as keys.
        # prefer lines keys, so this will need to be transposed
        def transpose_context(context_dict):
            d = context_dict
            d_genes = [x for x in d]
            d_lines = [x for x in d[d_genes[0]]]
            transposed = {x: {y: d[y][x] for y in d_genes} for x in d_lines}
            return transposed
        # access the context service in chunks of cell types.
        # it will timeout if queried with larger chunks.
        while len(cell_types) > 0:
            cell_types_chunk = cell_types[:10]
            del cell_types[:10]
            exp_temp = context_client.get_protein_expression(gene_names,
                                                             cell_types_chunk)
            exp_temp = transpose_context(exp_temp)
            for e in exp_temp:
                exp[e] = exp_temp[e]
            mut_temp = context_client.get_mutations(gene_names,
                                                    cell_types_chunk)
            mut_temp = transpose_context(mut_temp)
            for m in mut_temp:
                mut[m] = mut_temp[m]
        # create bins for the exp values
        # because colorbrewer only does 3-9 bins and I don't feel like
        # reinventing color scheme theory, this will only bin 3-9 bins

        def bin_exp(expression_dict):
            d = expression_dict
            exp_values = []
            for line in d:
                for gene in d[line]:
                    val = d[line][gene]
                    if val is not None:
                        exp_values.append(val)
            thr_dict = {}
            for n_bins in range(3, 10):
                bin_thr = np.histogram(np.log10(exp_values), n_bins)[1][1:]
                thr_dict[n_bins] = bin_thr
            # this dict isn't yet binned, that happens in the loop
            binned_dict = {x: deepcopy(expression_dict) for x in range(3, 10)}
            for n_bins in binned_dict:
                for line in binned_dict[n_bins]:
                    for gene in binned_dict[n_bins][line]:
                        # last bin is reserved for None
                        if binned_dict[n_bins][line][gene] is None:
                            binned_dict[n_bins][line][gene] = n_bins
                        else:
                            val = np.log10(binned_dict[n_bins][line][gene])
                            for thr_idx, thr in enumerate(thr_dict[n_bins]):
                                if val <= thr:
                                    binned_dict[n_bins][line][gene] = thr_idx
                                    break
            return binned_dict
        binned_exp = bin_exp(exp)
        context = {'bin_expression': binned_exp,
                   'mutation': mut}
        self._context['CCLE'] = context
Example #12
0
def test_get_protein_expression():
    res = context_client.get_protein_expression('EGFR', 'BT20_BREAST')
    assert(res is not None)
    assert(res.get('EGFR') is not None)
    assert(res['EGFR'].get('BT20_BREAST') is not None)
    assert(res['EGFR']['BT20_BREAST'] > 1000)
Example #13
0
def test_get_protein_expression_gene_missing():
    protein_amounts = context_client.get_protein_expression(['EGFR', 'XYZ'],
                                                            ['BT20_BREAST'])
    assert ('BT20_BREAST' in protein_amounts)
    assert (protein_amounts['BT20_BREAST']['EGFR'] > 10000)
    assert (protein_amounts['BT20_BREAST']['XYZ'] is None)
Example #14
0
    def set_context(self, *args, **kwargs):
        """Set protein expression data as node attribute

        This method uses :py:mod:`indra.databases.context_client` to get
        protein expression levels for a given cell type and set a node
        attribute for proteins accordingly.

        Parameters
        ----------
        cell_type : str
            Cell type name for which expression levels are queried.
            The cell type name follows the CCLE database conventions.
        Example: LOXIMVI_SKIN, BT20_BREAST

        bin_expression : bool
            If True, the gene expression will be put into 5 bins based on
            all gene expression values. An additional bin is used to indicate
            that the context_client returned None.

        user_bins : int
            If specified, split the expression levels into the given number
            of bins. If not specified, default will be 5.
        """
        cell_type = kwargs.get('cell_type')
        if not cell_type:
            logger.warning('No cell type given.')
            return

        # Collect all gene names in network
        gene_names = []
        for node in self._nodes:
            members = node['data'].get('members')
            if members:
                gene_names += list(members.keys())
            else:
                if node['data']['name'].startswith('Group'):
                    continue
                gene_names.append(node['data']['name'])

        # Get expression and mutation from context client
        exp = context_client.get_protein_expression(gene_names, cell_type)
        mut = context_client.get_mutations(gene_names, cell_type)
        if not exp:
            logger.warning('Could not get context for %s cell type.' %
                           cell_type)
            return
        else:
            exp = {k: v[cell_type] for k, v in exp.items()}
        if not mut:
            logger.warning('Could not get mutations for %s cell type.' %
                           cell_type)
            return
        else:
            mut = {k: v[cell_type] for k, v in mut.items()}

        # Get expression and mutation for specific gene
        def get_expr_mut(name, expr_data, mut_data):
            amount = expr_data.get(name)
            if amount is None:
                expression = None
            else:
                expression = np.log10(amount)
            mutation = mut_data.get(name)
            if mutation is not None:
                mutation = int(mutation)
            else:
                mutation = 0
            return expression, mutation

        # Set node properties for expression and mutation
        for node in self._nodes:
            members = node['data'].get('members')
            if members:
                for member in members.keys():
                    expression, mutation = get_expr_mut(member, exp, mut)
                    node['data']['members'][member]['expression'] = expression
                    node['data']['members'][member]['mutation'] = mutation
                node['data']['expression'] = None
                node['data']['mutation'] = 0
            else:
                if node['data']['name'].startswith('Group'):
                    node['data']['expression'] = None
                    node['data']['mutation'] = 0
                else:
                    expression, mutation = get_expr_mut(node['data']['name'],
                                                        exp, mut)
                    node['data']['expression'] = expression
                    node['data']['mutation'] = mutation

        # Binning for the purpose of assigning colors
        if kwargs.get('bin_expression'):
            # how many bins? If not specified, set to 5
            n_bins = 5
            user_bins = kwargs.get('n_bins')
            if type(user_bins) == int:
                n_bins = user_bins
                if n_bins > 9:
                    n_bins = 9
                    logger.info('Only 9 bins allowed. Setting n_bins = 9.')
                if n_bins < 3:
                    n_bins = 3
                    logger.info('Need at least 3 bin. Setting n_bins = 3.')
            # Create color scale for unmutated gene expression
            # feed in hex values from colorbrewer2 9-class PuBuGn
            wt_hexes = ['#f7fcf5', '#e5f5e0', '#c7e9c0', '#a1d99b', '#74c476',
                        '#41ab5d', '#238b45', '#006d2c', '#00441b']
            exp_wt_colorscale = _build_color_scale(wt_hexes, n_bins)
            # tack on a gray for no expression data
            exp_wt_colorscale.append('#bdbdbd')
            self._exp_colorscale = exp_wt_colorscale
            # create color scale for mutated gene expression
            # feed in hex values from colorbrewer2 9-class YlOrRd
            mut_hexes = ['#fff5eb', '#fee6ce', '#fdd0a2', '#fdae6b', '#fd8d3c',
                         '#f16913', '#d94801', '#a63603', '#7f2704']
            exp_mut_colorscale = _build_color_scale(mut_hexes, n_bins)
            # tack on a gray for no expression data
            exp_mut_colorscale.append('#bdbdbd')
            self._mut_colorscale = exp_mut_colorscale
            # capture the expression levels of every gene in nodes
            exp_lvls = [n['data'].get('expression') for n in self._nodes]
            # capture the expression levels of every gene in family members
            m_exp_lvls = []
            for n in self._nodes:
                if n['data'].get('members'):
                    members = n['data']['members']
                    for m in members:
                        m_exp_lvls.append(members[m]['expression'])
            # combine node expressions and family expressions
            exp_lvls = exp_lvls + m_exp_lvls
            # get rid of None gene expressions
            exp_lvls = [x for x in exp_lvls if x is not None]
            # bin expression levels into n equally sized bins
            # bin n+1 reserved for None
            # this returns the bounds of each bin. so n_bins+1 bounds.
            # get rid of first value which is the leftmost bound
            bin_thr = np.histogram(exp_lvls, n_bins)[1][1:]
            # iterate over nodes
            for n in self._nodes:
                # if node has members set member bin_expression values
                if n['data'].get('members'):
                    members = n['data']['members']
                    for m in members:
                        # if expression is None, set to bin index n_bins
                        if members[m]['expression'] is None:
                            members[m]['bin_expression'] = n_bins
                        else:
                            for thr_idx, thr in enumerate(bin_thr):
                                if members[m]['expression'] <= thr:
                                    members[m]['bin_expression'] = thr_idx
                                    break
                # set bin_expression for the node itself
                if n['data']['expression'] is None:
                    n['data']['bin_expression'] = n_bins
                else:
                    for thr_idx, thr in enumerate(bin_thr):
                        if n['data']['expression'] <= thr:
                            n['data']['bin_expression'] = thr_idx
                            break