def my_gene_sets(): gene_sets = GeneSetCollection([ GeneSet('GeneSet1', 'First gene set', ['a', 'b', 'd'], source='TestSource', collection='TestCollection', description='The first test GeneSet.'), GeneSet('GeneSet2', 'Second gene set', ['a', 'c', 'd'], source='TestSource', collection='TestCollection', description='The second test GeneSet.'), ]) return gene_sets
def my_gene_sets(my_gene_set): gene_sets = [] for i in range(3): gs = GeneSet(my_gene_set.id + str(i + 1), my_gene_set.name, my_gene_set.genes) gene_sets.append(gs) return gene_sets
def my_gene_set(my_genes): gene_set = GeneSet('TestID', 'TestName', my_genes, source='TestSource', collection='TestCollection', description='Test GeneSet.') return gene_set
def my_rank_based_result(my_matrix, my_v): indices = np.uint16(np.nonzero(my_v)[0]) ind_genes = [my_matrix.genes[i] for i in indices] gs_genes = list(ind_genes) gene_set = GeneSet(genes=gs_genes, id='Random1', name='Random gene Set 1') N = my_v.size X = 1 L = N ## stat, n_star, pval = xlmhg_test(my_v, X, L) res = get_xlmhg_test_result(N, indices, X, L) result = RankBasedGSEResult(gene_set, N, indices, ind_genes, X, L, res.stat, res.cutoff, res.pval) return result
def my_uninteresting_gene_set(my_ranked_genes): """Select the last five ranked genes""" genes = my_ranked_genes[-5:] gene_set = GeneSet('BoringID', 'boring gene set', genes) return gene_set
def my_gene_set(my_ranked_genes, my_v): """Select the genes corresponding to the 1's in ``my_v``.""" genes = [my_ranked_genes[i] for i in np.nonzero(my_v)[0]] gene_set = GeneSet('TestID', 'TestName', genes) return gene_set
def get_gene_sets(self, min_genes=None, max_genes=None): """Return the set of annotated genes for each GO term. Parameters ---------- min_genes: int, optional Exclude GO terms with fewer than this number of genes. max_genes: int, optional Exclude GO terms with more than this number of genes. Returns ------- GeneSetCollection A gene set "database" with one gene set for each GO term. """ if not self.terms: raise ValueError('You need to first parse both an OBO file and ' 'a gene association file!') if not self.annotations: raise ValueError('You need to first parse a gene association ' 'file!') all_term_ids = sorted(self.terms.keys()) # go over all GO terms and get associated genes logger.info('Obtaining GO term associations...') # n = len(all_term_ids) # term_gene_counts = [] # term_ids = [] term_genes = OrderedDict() geneset_terms = {} gene_sets = [] for j, id_ in enumerate(all_term_ids): tg = self.get_goterm_genes(id_) assert isinstance(tg, frozenset) c = len(tg) if c == 0: continue if (min_genes is not None and c < min_genes) or \ (max_genes is not None and c > max_genes): # term doesn't meet min/max number of genes criteria continue # for finding redundant terms (use set of genes as key) try: geneset_terms[tg].append(id_) except KeyError: geneset_terms[tg] = [id_] term_genes[id_] = tg selected = len(term_genes) affected = 0 excl = 0 for id_, tg in term_genes.items(): # check if there are redundant terms term = self.terms[id_] if len(geneset_terms[tg]) > 1: gt = geneset_terms[tg] affected += 1 # check if this term is an ancestor of any of them # if so, exclude it excluded = False for other_id in gt: if (other_id != id_) and (other_id in term.descendants): excluded = True break if excluded: excl += 1 continue # if the term is not redundant with any other term, # or if it isn't the ancestor of any redundant term, # add its gene set to the list name = term.name source = 'GO' coll = term.domain_short desc = term.definition gs = GeneSet(id_, name, tg, source=source, collection=coll, description=desc) gene_sets.append(gs) D = GeneSetCollection(gene_sets) logger.info('# terms selected intially: %d', selected) logger.info('# terms with redundant gene sets: %d', affected) logger.info('# terms excluded due to redundancy: %d', excl) logger.info('# terms retained: %d', D.n) return D
def test_list(my_gene_set): l = my_gene_set.to_list() assert isinstance(l, list) assert len(l) == 6 other = GeneSet.from_list(l) assert other == my_gene_set
def my_gene_set2(my_genes): # a gene set with all optional attributes set to None gene_set = GeneSet('TestID', 'TestName', my_genes) return gene_set