Example #1
0
    def test_gene_sets(self):
        gs1 = GeneSet(
            gs_id=self.test_gs_id,
            name=self.test_name,
            genes=self.test_genes,
            hierarchy=self.test_hierarchy,
            organism=self.test_organism,
            link='',
        )

        gs2 = GeneSet(gs_id='test2',
                      name='test_name2',
                      hierarchy=('Test', 'test'),
                      organism='3702')
        gs3 = GeneSet(gs_id='test3',
                      name='test_name3',
                      hierarchy=('Test', 'test'),
                      organism='3702')

        sets = GeneSets([gs1, gs2, gs3])
        self.assertIsNotNone(sets)

        self.assertRaises(GeneSetException, sets.common_org)
        self.assertRaises(GeneSetException, sets.common_hierarchy)

        self.assertGreater(len(sets.hierarchies()), 1)

        split_by_hierarchy = sets.split_by_hierarchy()
        self.assertLess(len(split_by_hierarchy), len(sets))
Example #2
0
def go_gene_sets(tax_id: str) -> None:
    domain = 'go'
    ontology = go.Ontology(filename=f'{data_path}/{domain}/gene_ontology.obo')
    annotations = go.Annotations(tax_id,
                                 filename=f'{data_path}/{domain}/{tax_id}.tab',
                                 ontology=ontology)

    def to_gene_set(term: go.Term) -> Optional[GeneSet]:
        genes = annotations.get_genes_by_go_term(term.id)

        if len(genes) > 0:
            return GeneSet(
                gs_id=term.id,
                name=term.name,
                genes=set(genes),
                hierarchy=('GO', term.namespace),
                organism=tax_id,
                link=f'http://amigo.geneontology.org/amigo/term/{term.id}')

    gene_sets = GeneSets([
        gs for gs in [to_gene_set(term) for term in ontology.terms.values()]
        if gs is not None
    ])

    for gs_group in gene_sets.split_by_hierarchy():
        hierarchy = gs_group.common_hierarchy()
        gs_group.to_gmt_file_format(
            f'{data_path}/gene_sets/{filename(hierarchy, tax_id)}')