def test_get_genes(self): c = BOCC() c.add_members(EXAMPLE_MEMBERS, EXAMPLE_TYPES) self.assertEqual(len(c.get_genes()), 3, 'Number of genes should be 3') self.assertEqual(len(c.genes), 3, 'Number of genes should be 3') self.assertEqual(c.genes, EXAMPLE_MEMBERS[:-1], 'Given and recieved genes do not match')
def test_go_enrichment(self): c = BOCC() c.add_members(EXAMPLE_MEMBERS, EXAMPLE_TYPES) df = c.go_enrichment() self.assertNotEqual(df.shape[0], 0, 'Results should not be empty') self.assertEqual(df.iloc[0, 6], 'GO:0006649', 'Results to not match expected') self.assertEqual(df.iloc[0, 5], 0.0002912762741932548, 'Results to not match expected')
def test_get_ts(self): c = BOCC() c.add_members(EXAMPLE_MEMBERS, EXAMPLE_TYPES) c.get_genes() stuff = c.get_gene_tissue_specificities() self.assertEqual( len(stuff[0]), len(c.get_genes()), 'Length of genes and tissue specific information should match') self.assertEqual( len(stuff[1]), len(c.get_genes()), 'Length of genes and cell-type group information should match') self.assertEqual( len(stuff[2]), len(c.get_genes()), 'Length of genes and cell-type specific information should match')
def test_reset(self): c = BOCC() c.add_members(EXAMPLE_MEMBERS, EXAMPLE_TYPES) c.get_genes() self.assertEqual(len(c.genes), 3, 'Number of genes should be 3') c.reset() self.assertEqual(c.genes, None, 'Genes should be set to None')
def test_disease_associated_genes(self): c = BOCC() c.add_members(EXAMPLE_MEMBERS, EXAMPLE_TYPES) c.get_genes() stuff = c.get_diseases_associated_with_genes() self.assertEqual( len(stuff), len(c.get_genes()), 'Length of genes and tissue specific information should match')
def test_blank(self): c = BOCC() self.assertEqual(c.name, None, 'Name should be None when first initialized') self.assertEqual(len(c.members), 0, 'Members should be initialized as an empty list') self.assertEqual(len(c.members), len(c.types), 'length of members and types should always be equal')
def test_get_summary_stats(self): c = BOCC() c.add_members(EXAMPLE_MEMBERS, EXAMPLE_TYPES) df = c.get_summary_stats('Data/mygene2_gene_hpo_family.tsv') self.assertEqual(df.iloc[0, :]['cluster_size'], 4, 'Cluster size does not match expectation') self.assertEqual(df.iloc[0, :]['gene_ratio'], 0.75, 'gene ratio not as expected') self.assertEqual(df.iloc[0, :]['HPO_ratio'], 0.25, 'HPO ratio not as expected') self.assertEqual(df.iloc[0, :]['num_sig_go_enrichment_terms'], 0, 'wrong number of expected significant terms') self.assertEqual(df.iloc[0, :]['go_sig_threshold'], 0.05, 'wrong threshold for significance') self.assertEqual(df.iloc[0, :]['max_norm_cell_type_specificity'], 1 / 3, 'wrong max norm cell type specificity') self.assertEqual(df.iloc[0, :]['max_norm_disease_specificity'], 3 / 3, 'wrong max norm cell type specificity')
def test_get_disease_counts(self): c = BOCC() c.add_members(EXAMPLE_MEMBERS, EXAMPLE_TYPES) c.get_genes() stuff = c.get_disease_counts() self.assertEqual( len(stuff), 6, 'Number of returned associations does not match expectation') self.assertEqual( stuff['Retinitis pigmentosa'], 3, 'All gene in the community (3) should be associated with RP')
def test_summarize_disease_associations(self): c = BOCC() c.add_members(EXAMPLE_MEMBERS, EXAMPLE_TYPES) stuff = c.summarize_disease_associations() self.assertEqual( stuff[1], len(c.get_genes()), 'The max association count should not be larger than the number of genes in the com' )
from BOCC import BOCC import matplotlib.pyplot as plt from matplotlib.lines import Line2D import numpy as np import pandas as pd com_files = ['cesna_coms_june_22_2021.txt', 'greedy_coms_june_22_2021.txt', 'infomap_coms_june_22_2021.txt', 'walktrap_coms_june_22_2021_filtered.txt'] c = BOCC.load_clusters(com_files[0]) t = c[0].mygene2_stats('Data/mygene2_gene_hpo_family.tsv') fam = t[-1] # x: total pairs # y: num rediscovered # color: aglo total_num_pairs_per_fam = [] num_rediscovered_per_fam = [] algo = [] families = [] com_id = [] for file in com_files: coms = BOCC.load_clusters(file) for c in coms: t = c.mygene2_stats('Data/mygene2_gene_hpo_family.tsv') fam = t[-1] for key in fam.keys(): x = fam[key] total_num_pairs_per_fam.append(len(x['pairs']) + len(x['not_pairs'])) num_rediscovered_per_fam.append(len(x['pairs'])) algo.append(file)
def test_add_members(self): c = BOCC() c.add_members(EXAMPLE_MEMBERS, EXAMPLE_TYPES) self.assertEqual(len(c.members), len(EXAMPLE_MEMBERS), 'Number of members should be 4') self.assertEqual(len(c.types), len(EXAMPLE_TYPES), 'Number of types should be 4') # add more to the members c.add_members(['gene1', 'HP:000007'], ['gene', 'hpo']) self.assertEqual(len(c.members), 6, 'Number of members should be 6') self.assertEqual(len(c.types), 6, 'Number of types should be 6') # add more but without types listed c.add_members(['gene2', 'HP:000008']) self.assertEqual(len(c.members), 8, 'Number of members should be 8') self.assertEqual(len(c.types), 8, 'Number of types should be 8') del c for line in open(SMALL_TEST_COMS, 'r'): c = BOCC() row = line.strip().split('\t') c.add_members(row[1:]) self.assertEqual( len(c.members), len(row) - 1, 'Number of members in BOCC obj doesn\'t match expectation')
def test_summarize_cell_type_specificity(self): c = BOCC() c.add_members(EXAMPLE_MEMBERS, EXAMPLE_TYPES) stuff = c.summarize_cell_type_specificity() self.assertEqual(stuff[1], 1, 'The max association count should not be 1')
def test_get_cell_type_counts(self): c = BOCC() c.add_members(EXAMPLE_MEMBERS, EXAMPLE_TYPES) stuff = c.get_cell_type_counts() self.assertEqual(stuff['Cone photoreceptor cells'], 1, 'There should be 1 Cone cell occurrence')