def get_gene_sets(gene_set_names: list, organism: str = ORGANISM, go_slims: bool = False, set_sizes: tuple = (-np.inf, np.inf), reference: set = None) -> dict: """ Get all gene sets. :param gene_set_names: Names of ontologies for which to get gene sets (as returned by list_gene_sets) :param organism: organism id :param go_slims: If ontology type (first element from tuples retured by list_gene_sets) is GO then output only gene sets that are in 'goslim_generic' :param set_sizes: Use only gene sets with size greater or equal to 1st element of set_sizes and smaller or equal to 2nd element of set_sizes, default is -inf,inf :param reference: List of gene EIDs to use as a reference set. Gene set filtering based on gene set size takes in account only genes present in reference. :return: Dict with key is ontology name and values are its GeneSet objects """ gene_set_ontology = dict() if go_slims: anno = go.Annotations(organism) anno._ensure_ontology() anno._ontology.set_slims_subset('goslim_generic') slims = anno._ontology.slims_subset for gene_set_name in gene_set_names: gene_sets = load_gene_sets(gene_set_name, str(organism)) if reference is None: gene_sets = [gene_set for gene_set in gene_sets if set_sizes[0] <= len(gene_set.genes) <= set_sizes[1]] else: gene_sets = [gene_set for gene_set in gene_sets if set_sizes[0] <= len(gene_set.genes & reference) <= set_sizes[1]] if go_slims and gene_set_name[0] == 'GO': gene_sets = [gene_set for gene_set in gene_sets if gene_set.gs_id in slims] gene_set_ontology[gene_set_name] = gene_sets return gene_set_ontology
def go_gene_sets(tax_id: str) -> None: domain = 'go' ontology = go.Ontology(filename=f'{data_path}/{domain}/gene_ontology.obo') annotations = go.Annotations(tax_id, filename=f'{data_path}/{domain}/{tax_id}.tab', ontology=ontology) def to_gene_set(term: go.Term) -> Optional[GeneSet]: genes = annotations.get_genes_by_go_term(term.id) if len(genes) > 0: return GeneSet( gs_id=term.id, name=term.name, genes=set(genes), hierarchy=('GO', term.namespace), organism=tax_id, link=f'http://amigo.geneontology.org/amigo/term/{term.id}') gene_sets = GeneSets([ gs for gs in [to_gene_set(term) for term in ontology.terms.values()] if gs is not None ]) for gs_group in gene_sets.split_by_hierarchy(): hierarchy = gs_group.common_hierarchy() gs_group.to_gmt_file_format( f'{data_path}/gene_sets/{filename(hierarchy, tax_id)}')
def go_gene_sets(org): """ Returns gene sets from GO. """ ontology = go.Ontology() annotations = go.Annotations(org, ontology=ontology) gene_sets = [] for termn, term in ontology.terms.items(): genes = annotations.get_genes_by_go_term(termn) hier = ('GO', term.namespace) if len(genes) > 0: gs = GeneSet(gs_id=termn, name=term.name, genes=genes, hierarchy=hier, organism=org, link=GO_TERM_LINK.format(termn)) gene_sets.append(gs) return GeneSets(gene_sets)
def Load(self): a = self.available_annotations[self.annotation_index] if self.ontology is None: self.ontology = go.Ontology() if a.taxid != self.loaded_annotation_code: self.annotations = None gc.collect() # Force run garbage collection self.annotations = go.Annotations(a.taxid) self.loaded_annotation_code = a.taxid count = defaultdict(int) geneSets = defaultdict(set) for anno in self.annotations.annotations: count[anno.evidence] += 1 geneSets[anno.evidence].add(anno.gene_id) for etype in go.evidenceTypesOrdered: ecb = self.evidenceCheckBoxDict[etype] ecb.setEnabled(bool(count[etype])) ecb.setText(etype + ": %i annots(%i genes)" % (count[etype], len(geneSets[etype])))
def GO_enrichment(entrez_ids: list, organism: int = ORGANISM, fdr=0.25, slims: bool = True, aspect: str = None) -> OrderedDict: """ Calulate onthology enrichment for list of genes :param entrez_ids: entrez IDs of gene group to be analysed for enrichemnt :param organism: organism ID :param fdr: For retention of enriched gene sets :param slims: From Orange Annotations :param aspect: Which GO aspect to use. From Orange Annotations: None: all, 'Process', 'Function', 'Component' :return: Dict: key ontology term, value FDR. Sorted by FDR ascendingly. """ anno = go.Annotations(organism) enrichment = anno.get_enriched_terms(entrez_ids, slims_only=slims, aspect=aspect) filtered = go.filter_by_p_value(enrichment, fdr) enriched_data = dict() for go_id, data in filtered.items(): terms = anno.get_annotations_by_go_id(go_id) for term in terms: if term.go_id == go_id: padj = data[1] enriched_data[term.go_term] = padj break enriched_data = OrderedDict(sorted(enriched_data.items(), key=lambda x: x[1])) return enriched_data
from orangecontrib.bioinformatics import go ontology = go.Ontology() annotations = go.Annotations("4932", ontology=ontology) # keys are symbol names, values are Entrez IDs genes_ids = {'Yta7p': '853186', 'RPN2': '854735', 'RPT2': '851557'} res = annotations.get_enriched_terms(genes_ids.values()) print(res) print("Enriched terms:") for go_id, (genes, p_value, ref) in res.items(): if p_value < 0.05: print(ontology[go_id].name + " with p-value: %.4f " % p_value + ", ".join(genes)) # And again for slims annotations.ontology.set_slims_subset('goslim_yeast') res = annotations.get_enriched_terms(genes_ids.values(), slims_only=True) print("\n\nEnriched slim terms:") for go_id, (genes, p_value, _) in res.items(): if p_value < 0.2: print(ontology[go_id].name + " with p-value: %.4f " % p_value + ", ".join(genes))