def test_get_mutations(): res = context_client.get_mutations(['BRAF'], ['A375_SKIN']) assert res is not None assert res.get('A375_SKIN') is not None assert res['A375_SKIN'].get('BRAF') is not None assert res['A375_SKIN']['BRAF'] == ['V600E'] assert unicode_strs(res)
def test_get_mutations(): res = context_client.get_mutations('BRAF', 'A375_SKIN') assert(res is not None) assert(res.get('BRAF') is not None) assert(res['BRAF'].get('A375_SKIN') is not None) assert(res['BRAF']['A375_SKIN'] == 1.0) assert unicode_strs(res)
def set_context(self, cell_type): """Set protein expression data and mutational status as node attribute This method uses :py:mod:`indra.databases.context_client` to get protein expression levels and mutational status for a given cell type and set a node attribute for proteins accordingly. Parameters ---------- cell_type : str Cell type name for which expression levels are queried. The cell type name follows the CCLE database conventions. Example: LOXIMVI_SKIN, BT20_BREAST """ node_names = [node['n'] for node in self.cx['nodes']] res_expr = context_client.get_protein_expression( node_names, [cell_type]) res_mut = context_client.get_mutations(node_names, [cell_type]) res_expr = res_expr.get(cell_type) res_mut = res_mut.get(cell_type) if not res_expr: msg = 'Could not get protein expression for %s cell type.' % \ cell_type logger.warning(msg) if not res_mut: msg = 'Could not get mutational status for %s cell type.' % \ cell_type logger.warning(msg) if not res_expr and not res_mut: return self.cx['networkAttributes'].append({ 'n': 'cellular_context', 'v': cell_type }) counter = 0 for node in self.cx['nodes']: amount = res_expr.get(node['n']) mut = res_mut.get(node['n']) if amount is not None: node_attribute = { 'po': node['@id'], 'n': 'expression_amount', 'v': int(amount) } self.cx['nodeAttributes'].append(node_attribute) if mut is not None: is_mutated = 1 if mut else 0 node_attribute = { 'po': node['@id'], 'n': 'is_mutated', 'v': is_mutated } self.cx['nodeAttributes'].append(node_attribute) if mut is not None or amount is not None: counter += 1 logger.info('Set context for %d nodes.' % counter)
def set_CCLE_context(self, cell_types): """Set context of all nodes and node members from CCLE.""" self.get_gene_names() # Get expression and mutations from context client exp_values = \ context_client.get_protein_expression(self._gene_names, cell_types) mut_values = \ context_client.get_mutations(self._gene_names, cell_types) # Make a dict of presence/absence of mutations muts = {cell_line: {} for cell_line in cell_types} for cell_line, entries in mut_values.items(): if entries is not None: for gene, mutations in entries.items(): if mutations: muts[cell_line][gene] = 1 else: muts[cell_line][gene] = 0 # Create bins for the exp values # because colorbrewer only does 3-9 bins and I don't feel like # reinventing color scheme theory, this will only bin 3-9 bins def bin_exp(expression_dict): d = expression_dict exp_values = [] for line in d: for gene in d[line]: val = d[line][gene] if val is not None: exp_values.append(val) thr_dict = {} for n_bins in range(3, 10): bin_thr = np.histogram(np.log10(exp_values), n_bins)[1][1:] thr_dict[n_bins] = bin_thr # this dict isn't yet binned, that happens in the loop binned_dict = {x: deepcopy(expression_dict) for x in range(3, 10)} for n_bins in binned_dict: for line in binned_dict[n_bins]: for gene in binned_dict[n_bins][line]: # last bin is reserved for None if binned_dict[n_bins][line][gene] is None: binned_dict[n_bins][line][gene] = n_bins else: val = np.log10(binned_dict[n_bins][line][gene]) for thr_idx, thr in enumerate(thr_dict[n_bins]): if val <= thr: binned_dict[n_bins][line][gene] = thr_idx break return binned_dict binned_exp = bin_exp(exp_values) context = {'bin_expression': binned_exp, 'mutation': muts} self._context['CCLE'] = context
def set_context(self, cell_type): """Set protein expression data and mutational status as node attribute This method uses :py:mod:`indra.databases.context_client` to get protein expression levels and mutational status for a given cell type and set a node attribute for proteins accordingly. Parameters ---------- cell_type : str Cell type name for which expression levels are queried. The cell type name follows the CCLE database conventions. Example: LOXIMVI_SKIN, BT20_BREAST """ node_names = [node['n'] for node in self.cx['nodes']] res_expr = context_client.get_protein_expression(node_names, [cell_type]) res_mut = context_client.get_mutations(node_names, [cell_type]) res_expr = res_expr.get(cell_type) res_mut = res_mut.get(cell_type) if not res_expr: msg = 'Could not get protein expression for %s cell type.' % \ cell_type logger.warning(msg) if not res_mut: msg = 'Could not get mutational status for %s cell type.' % \ cell_type logger.warning(msg) if not res_expr and not res_mut: return self.cx['networkAttributes'].append({'n': 'cellular_context', 'v': cell_type}) counter = 0 for node in self.cx['nodes']: amount = res_expr.get(node['n']) mut = res_mut.get(node['n']) if amount is not None: node_attribute = {'po': node['@id'], 'n': 'expression_amount', 'v': int(amount)} self.cx['nodeAttributes'].append(node_attribute) if mut is not None: is_mutated = 1 if mut else 0 node_attribute = {'po': node['@id'], 'n': 'is_mutated', 'v': is_mutated} self.cx['nodeAttributes'].append(node_attribute) if mut is not None or amount is not None: counter += 1 logger.info('Set context for %d nodes.' % counter)
def get_mutations(self, gene, cell_line): if cell_line in self.mut: mut = self.mut[cell_line].get(gene, []) return {cell_line: {gene: mut}} else: return context_client.get_mutations([gene], [cell_line])
def set_context(self, *args, **kwargs): """Set protein expression data as node attribute This method uses :py:mod:`indra.databases.context_client` to get protein expression levels for a given cell type and set a node attribute for proteins accordingly. Parameters ---------- cell_type : str Cell type name for which expression levels are queried. The cell type name follows the CCLE database conventions. Example: LOXIMVI_SKIN, BT20_BREAST bin_expression : bool If True, the gene expression will be put into 5 bins based on all gene expression values. An additional bin is used to indicate that the context_client returned None. user_bins : int If specified, split the expression levels into the given number of bins. If not specified, default will be 5. """ cell_type = kwargs.get('cell_type') if not cell_type: logger.warning('No cell type given.') return # Collect all gene names in network gene_names = [] for node in self._nodes: members = node['data'].get('members') if members: gene_names += list(members.keys()) else: if node['data']['name'].startswith('Group'): continue gene_names.append(node['data']['name']) # Get expression and mutation from context client exp = context_client.get_protein_expression(gene_names, cell_type) mut = context_client.get_mutations(gene_names, cell_type) if not exp: logger.warning('Could not get context for %s cell type.' % cell_type) return else: exp = {k: v[cell_type] for k, v in exp.items()} if not mut: logger.warning('Could not get mutations for %s cell type.' % cell_type) return else: mut = {k: v[cell_type] for k, v in mut.items()} # Get expression and mutation for specific gene def get_expr_mut(name, expr_data, mut_data): amount = expr_data.get(name) if amount is None: expression = None else: expression = np.log10(amount) mutation = mut_data.get(name) if mutation is not None: mutation = int(mutation) else: mutation = 0 return expression, mutation # Set node properties for expression and mutation for node in self._nodes: members = node['data'].get('members') if members: for member in members.keys(): expression, mutation = get_expr_mut(member, exp, mut) node['data']['members'][member]['expression'] = expression node['data']['members'][member]['mutation'] = mutation node['data']['expression'] = None node['data']['mutation'] = 0 else: if node['data']['name'].startswith('Group'): node['data']['expression'] = None node['data']['mutation'] = 0 else: expression, mutation = get_expr_mut( node['data']['name'], exp, mut) node['data']['expression'] = expression node['data']['mutation'] = mutation # Binning for the purpose of assigning colors if kwargs.get('bin_expression'): # how many bins? If not specified, set to 5 n_bins = 5 user_bins = kwargs.get('n_bins') if type(user_bins) == int: n_bins = user_bins if n_bins > 9: n_bins = 9 logger.info('Only 9 bins allowed. Setting n_bins = 9.') if n_bins < 3: n_bins = 3 logger.info('Need at least 3 bin. Setting n_bins = 3.') # Create color scale for unmutated gene expression # feed in hex values from colorbrewer2 9-class PuBuGn wt_hexes = [ '#f7fcf5', '#e5f5e0', '#c7e9c0', '#a1d99b', '#74c476', '#41ab5d', '#238b45', '#006d2c', '#00441b' ] exp_wt_colorscale = _build_color_scale(wt_hexes, n_bins) # tack on a gray for no expression data exp_wt_colorscale.append('#bdbdbd') self._exp_colorscale = exp_wt_colorscale # create color scale for mutated gene expression # feed in hex values from colorbrewer2 9-class YlOrRd mut_hexes = [ '#fff5eb', '#fee6ce', '#fdd0a2', '#fdae6b', '#fd8d3c', '#f16913', '#d94801', '#a63603', '#7f2704' ] exp_mut_colorscale = _build_color_scale(mut_hexes, n_bins) # tack on a gray for no expression data exp_mut_colorscale.append('#bdbdbd') self._mut_colorscale = exp_mut_colorscale # capture the expression levels of every gene in nodes exp_lvls = [n['data'].get('expression') for n in self._nodes] # capture the expression levels of every gene in family members m_exp_lvls = [] for n in self._nodes: if n['data'].get('members'): members = n['data']['members'] for m in members: m_exp_lvls.append(members[m]['expression']) # combine node expressions and family expressions exp_lvls = exp_lvls + m_exp_lvls # get rid of None gene expressions exp_lvls = [x for x in exp_lvls if x is not None] # bin expression levels into n equally sized bins # bin n+1 reserved for None # this returns the bounds of each bin. so n_bins+1 bounds. # get rid of first value which is the leftmost bound bin_thr = np.histogram(exp_lvls, n_bins)[1][1:] # iterate over nodes for n in self._nodes: # if node has members set member bin_expression values if n['data'].get('members'): members = n['data']['members'] for m in members: # if expression is None, set to bin index n_bins if members[m]['expression'] is None: members[m]['bin_expression'] = n_bins else: for thr_idx, thr in enumerate(bin_thr): if members[m]['expression'] <= thr: members[m]['bin_expression'] = thr_idx break # set bin_expression for the node itself if n['data']['expression'] is None: n['data']['bin_expression'] = n_bins else: for thr_idx, thr in enumerate(bin_thr): if n['data']['expression'] <= thr: n['data']['bin_expression'] = thr_idx break
def set_CCLE_context(self, cell_types): """Set context of all nodes and node members from CCLE.""" self.get_gene_names() gene_names = self._gene_names exp = {} mut = {} # context_client gives back a dict with genes as keys. # prefer lines keys, so this will need to be transposed def transpose_context(context_dict): d = context_dict d_genes = [x for x in d] d_lines = [x for x in d[d_genes[0]]] transposed = {x: {y: d[y][x] for y in d_genes} for x in d_lines} return transposed # access the context service in chunks of cell types. # it will timeout if queried with larger chunks. while len(cell_types) > 0: cell_types_chunk = cell_types[:10] del cell_types[:10] exp_temp = context_client.get_protein_expression(gene_names, cell_types_chunk) exp_temp = transpose_context(exp_temp) for e in exp_temp: exp[e] = exp_temp[e] mut_temp = context_client.get_mutations(gene_names, cell_types_chunk) mut_temp = transpose_context(mut_temp) for m in mut_temp: mut[m] = mut_temp[m] # create bins for the exp values # because colorbrewer only does 3-9 bins and I don't feel like # reinventing color scheme theory, this will only bin 3-9 bins def bin_exp(expression_dict): d = expression_dict exp_values = [] for line in d: for gene in d[line]: val = d[line][gene] if val is not None: exp_values.append(val) thr_dict = {} for n_bins in range(3, 10): bin_thr = np.histogram(np.log10(exp_values), n_bins)[1][1:] thr_dict[n_bins] = bin_thr # this dict isn't yet binned, that happens in the loop binned_dict = {x: deepcopy(expression_dict) for x in range(3, 10)} for n_bins in binned_dict: for line in binned_dict[n_bins]: for gene in binned_dict[n_bins][line]: # last bin is reserved for None if binned_dict[n_bins][line][gene] is None: binned_dict[n_bins][line][gene] = n_bins else: val = np.log10(binned_dict[n_bins][line][gene]) for thr_idx, thr in enumerate(thr_dict[n_bins]): if val <= thr: binned_dict[n_bins][line][gene] = thr_idx break return binned_dict binned_exp = bin_exp(exp) context = {'bin_expression': binned_exp, 'mutation': mut} self._context['CCLE'] = context
def test_get_mutations_cell_type_missing(): mutations = context_client.get_mutations(['BRAF'], ['A375_SKIN', 'XYZ']) assert 'A375_SKIN' in mutations assert mutations['A375_SKIN']['BRAF'] == ['V600E'] assert 'XYZ' in mutations assert not mutations['XYZ']['BRAF']
def test_get_mutations_gene_missing(): mutations = context_client.get_mutations(['BRAF', 'XYZ'], ['A375_SKIN']) assert ('A375_SKIN' in mutations) assert (mutations['A375_SKIN']['BRAF'] == ['V600E']) assert (not mutations['A375_SKIN']['XYZ'])
def set_context(self, *args, **kwargs): """Set protein expression data as node attribute This method uses :py:mod:`indra.databases.context_client` to get protein expression levels for a given cell type and set a node attribute for proteins accordingly. Parameters ---------- cell_type : str Cell type name for which expression levels are queried. The cell type name follows the CCLE database conventions. Example: LOXIMVI_SKIN, BT20_BREAST bin_expression : bool If True, the gene expression will be put into 5 bins based on all gene expression values. An additional bin is used to indicate that the context_client returned None. user_bins : int If specified, split the expression levels into the given number of bins. If not specified, default will be 5. """ cell_type = kwargs.get('cell_type') if not cell_type: logger.warning('No cell type given.') return # Collect all gene names in network gene_names = [] for node in self._nodes: members = node['data'].get('members') if members: gene_names += list(members.keys()) else: if node['data']['name'].startswith('Group'): continue gene_names.append(node['data']['name']) # Get expression and mutation from context client exp = context_client.get_protein_expression(gene_names, cell_type) mut = context_client.get_mutations(gene_names, cell_type) if not exp: logger.warning('Could not get context for %s cell type.' % cell_type) return else: exp = {k: v[cell_type] for k, v in exp.items()} if not mut: logger.warning('Could not get mutations for %s cell type.' % cell_type) return else: mut = {k: v[cell_type] for k, v in mut.items()} # Get expression and mutation for specific gene def get_expr_mut(name, expr_data, mut_data): amount = expr_data.get(name) if amount is None: expression = None else: expression = np.log10(amount) mutation = mut_data.get(name) if mutation is not None: mutation = int(mutation) else: mutation = 0 return expression, mutation # Set node properties for expression and mutation for node in self._nodes: members = node['data'].get('members') if members: for member in members.keys(): expression, mutation = get_expr_mut(member, exp, mut) node['data']['members'][member]['expression'] = expression node['data']['members'][member]['mutation'] = mutation node['data']['expression'] = None node['data']['mutation'] = 0 else: if node['data']['name'].startswith('Group'): node['data']['expression'] = None node['data']['mutation'] = 0 else: expression, mutation = get_expr_mut(node['data']['name'], exp, mut) node['data']['expression'] = expression node['data']['mutation'] = mutation # Binning for the purpose of assigning colors if kwargs.get('bin_expression'): # how many bins? If not specified, set to 5 n_bins = 5 user_bins = kwargs.get('n_bins') if type(user_bins) == int: n_bins = user_bins if n_bins > 9: n_bins = 9 logger.info('Only 9 bins allowed. Setting n_bins = 9.') if n_bins < 3: n_bins = 3 logger.info('Need at least 3 bin. Setting n_bins = 3.') # Create color scale for unmutated gene expression # feed in hex values from colorbrewer2 9-class PuBuGn wt_hexes = ['#f7fcf5', '#e5f5e0', '#c7e9c0', '#a1d99b', '#74c476', '#41ab5d', '#238b45', '#006d2c', '#00441b'] exp_wt_colorscale = _build_color_scale(wt_hexes, n_bins) # tack on a gray for no expression data exp_wt_colorscale.append('#bdbdbd') self._exp_colorscale = exp_wt_colorscale # create color scale for mutated gene expression # feed in hex values from colorbrewer2 9-class YlOrRd mut_hexes = ['#fff5eb', '#fee6ce', '#fdd0a2', '#fdae6b', '#fd8d3c', '#f16913', '#d94801', '#a63603', '#7f2704'] exp_mut_colorscale = _build_color_scale(mut_hexes, n_bins) # tack on a gray for no expression data exp_mut_colorscale.append('#bdbdbd') self._mut_colorscale = exp_mut_colorscale # capture the expression levels of every gene in nodes exp_lvls = [n['data'].get('expression') for n in self._nodes] # capture the expression levels of every gene in family members m_exp_lvls = [] for n in self._nodes: if n['data'].get('members'): members = n['data']['members'] for m in members: m_exp_lvls.append(members[m]['expression']) # combine node expressions and family expressions exp_lvls = exp_lvls + m_exp_lvls # get rid of None gene expressions exp_lvls = [x for x in exp_lvls if x is not None] # bin expression levels into n equally sized bins # bin n+1 reserved for None # this returns the bounds of each bin. so n_bins+1 bounds. # get rid of first value which is the leftmost bound bin_thr = np.histogram(exp_lvls, n_bins)[1][1:] # iterate over nodes for n in self._nodes: # if node has members set member bin_expression values if n['data'].get('members'): members = n['data']['members'] for m in members: # if expression is None, set to bin index n_bins if members[m]['expression'] is None: members[m]['bin_expression'] = n_bins else: for thr_idx, thr in enumerate(bin_thr): if members[m]['expression'] <= thr: members[m]['bin_expression'] = thr_idx break # set bin_expression for the node itself if n['data']['expression'] is None: n['data']['bin_expression'] = n_bins else: for thr_idx, thr in enumerate(bin_thr): if n['data']['expression'] <= thr: n['data']['bin_expression'] = thr_idx break