def test_get_distinct(self): """should return list of strings""" db = Database(account=account, release=Release, species='human', db_type='variation') tn, tc = 'variation_feature', 'consequence_type' expected = set((('3PRIME_UTR', 'ESSENTIAL_SPLICE_SITE'), ('3PRIME_UTR', 'SPLICE_SITE'), ('5PRIME_UTR', 'ESSENTIAL_SPLICE_SITE'))) self.assertNotEquals(set(db.getDistinct(tn, tc)) & expected, set()) db = Database(account=account, release=Release, species='human', db_type='core') tn, tc = 'gene', 'biotype' expected = set([ 'protein_coding', 'pseudogene', 'processed_transcript', 'Mt_tRNA', 'Mt_rRNA', 'IG_V_gene', 'IG_J_gene', 'IG_C_gene', 'IG_D_gene', 'miRNA', 'misc_RNA', 'snoRNA', 'snRNA', 'rRNA' ]) got = set(db.getDistinct(tn, tc)) self.assertEquals(len(got & expected), len(expected)) db = Database(account=account, release=Release, db_type='compara') got = set(db.getDistinct('homology', 'description')) expected = set([ 'apparent_ortholog_one2one', 'between_species_paralog', 'ortholog_many2many', 'ortholog_one2many', 'ortholog_one2one', 'within_species_paralog' ]) self.assertEquals(len(got & expected), len(expected))
def test_get_distinct(self): """should return list of strings""" db = Database(account=account, release=Release, species='human', db_type='variation') tn, tc = 'variation_feature', 'consequence_types' expected = set(('3_prime_UTR_variant', 'splice_acceptor_variant', '5_prime_UTR_variant')) got = db.getDistinct(tn, tc) self.assertNotEqual(set(got) & expected, set()) db = Database(account=account, release=Release, species='human', db_type='core') tn, tc = 'gene', 'biotype' expected = set([ 'protein_coding', 'pseudogene', 'processed_transcript', 'Mt_tRNA', 'Mt_rRNA', 'IG_V_gene', 'IG_J_gene', 'IG_C_gene', 'IG_D_gene', 'miRNA', 'misc_RNA', 'snoRNA', 'snRNA', 'rRNA' ]) got = set(db.getDistinct(tn, tc)) self.assertNotEqual(set(got) & expected, set()) db = Database(account=account, release=Release, db_type='compara') got = set(db.getDistinct('homology', 'description')) expected = set([ 'gene_split', 'alt_allele', 'other_paralog', 'ortholog_one2many', 'ortholog_one2one', 'within_species_paralog', 'ortholog_many2many' ]) self.assertEqual(len(got & expected), len(expected))
def test_get_table_row_counts(self): """should return correct row counts for some tables""" expect = {'homo_sapiens_core_76_38.analysis': 61L, 'homo_sapiens_core_76_38.seq_region': 55616L, 'homo_sapiens_core_76_38.assembly': 102090L} human = Database(account=account, release=Release, species='human', db_type='core') table_names = [n.split('.')[1] for n in expect] got = dict(human.getTablesRowCount(table_names).getRawData()) for dbname in expect: self.assertTrue(got[dbname] >= expect[dbname])
def test_get_table_row_counts(self): """should return correct row counts for some tables""" expect = { "homo_sapiens_core_56_37a.analysis": 57L, "homo_sapiens_core_56_37a.seq_region": 55604L, "homo_sapiens_core_56_37a.assembly": 102068L, "homo_sapiens_core_56_37a.qtl": 0L, } human = Database(account=account, release=Release, species="human", db_type="core") table_names = [n.split(".")[1] for n in expect] got = dict(human.getTablesRowCount(table_names).getRawData()) self.assertEquals(got, expect)
def _connect_db(self, db_type): connection = dict(account=self._account, release=self.Release, species=self.Species, pool_recycle=self._pool_recycle) if self._core_db is None and db_type == 'core': self._core_db = Database(db_type='core', **connection) gen_rel = self.CoreDb.db_name.GeneralRelease gen_rel = int(re.findall(r'^\d+', str(gen_rel))[0]) self._gen_release = gen_rel elif self._var_db is None and db_type == 'variation': self._var_db = Database(db_type='variation', **connection) elif self._other_db is None and db_type == 'otherfeatures': self._other_db = Database(db_type='otherfeatures', **connection)
def test_get_table_row_counts(self): """should return correct row counts for some tables""" expect = { 'homo_sapiens_core_56_37a.analysis': 57L, 'homo_sapiens_core_56_37a.seq_region': 55604L, 'homo_sapiens_core_56_37a.assembly': 102068L, 'homo_sapiens_core_56_37a.qtl': 0L } human = Database(account=account, release=Release, species='human', db_type='core') table_names = [n.split('.')[1] for n in expect] got = dict(human.getTablesRowCount(table_names).getRawData()) self.assertEquals(got, expect)
def test_get_table_row_counts(self): """should return correct row counts for some tables""" expect = { 'homo_sapiens_core_76_38.analysis': 61, 'homo_sapiens_core_76_38.seq_region': 55616, 'homo_sapiens_core_76_38.assembly': 102090 } human = Database(account=account, release=Release, species='human', db_type='core') table_names = [n.split('.')[1] for n in expect] got = dict(human.getTablesRowCount(table_names).getRawData()) for dbname in expect: self.assertTrue(got[dbname] >= expect[dbname])
def _connect_db(self): # TODO can the connection be all done in init? connection = dict(account=self._account, release=self.Release, pool_recycle=self._pool_recycle) if self._compara_db is None: self._compara_db = Database(db_type='compara', division=self.division, **connection)
def test_get_distinct(self): """should return list of strings""" db = Database(account=account, release=Release, species='human', db_type='variation') tn, tc = 'variation_feature', 'consequence_types' expected = set(('3_prime_UTR_variant', 'splice_acceptor_variant', '5_prime_UTR_variant')) got = db.getDistinct(tn, tc) self.assertNotEquals(set(got) & expected, set()) db = Database(account=account, release=Release, species='human', db_type='core') tn, tc = 'gene', 'biotype' expected = set(['protein_coding', 'pseudogene', 'processed_transcript', 'Mt_tRNA', 'Mt_rRNA', 'IG_V_gene', 'IG_J_gene', 'IG_C_gene', 'IG_D_gene', 'miRNA', 'misc_RNA', 'snoRNA', 'snRNA', 'rRNA']) got = set(db.getDistinct(tn, tc)) self.assertNotEquals(set(got) & expected, set()) db = Database(account=account, release=Release, db_type='compara') got = set(db.getDistinct('homology', 'description')) expected = set([u'gene_split', u'alt_allele', u'other_paralog', u'ortholog_one2many', u'ortholog_one2one', u'within_species_paralog', u'ortholog_many2many']) self.assertEquals(len(got&expected), len(expected))
def test_get_distinct(self): """should return list of strings""" db = Database(account=account, release=Release, species="human", db_type="variation") tn, tc = "variation_feature", "consequence_type" expected = set( ( ("3PRIME_UTR", "ESSENTIAL_SPLICE_SITE"), ("3PRIME_UTR", "SPLICE_SITE"), ("5PRIME_UTR", "ESSENTIAL_SPLICE_SITE"), ) ) self.assertNotEquals(set(db.getDistinct(tn, tc)) & expected, set()) db = Database(account=account, release=Release, species="human", db_type="core") tn, tc = "gene", "biotype" expected = set( [ "protein_coding", "pseudogene", "processed_transcript", "Mt_tRNA", "Mt_rRNA", "IG_V_gene", "IG_J_gene", "IG_C_gene", "IG_D_gene", "miRNA", "misc_RNA", "snoRNA", "snRNA", "rRNA", ] ) got = set(db.getDistinct(tn, tc)) self.assertEquals(len(got & expected), len(expected)) db = Database(account=account, release=Release, db_type="compara") got = set(db.getDistinct("homology", "description")) expected = set( [ "apparent_ortholog_one2one", "between_species_paralog", "ortholog_many2many", "ortholog_one2many", "ortholog_one2one", "within_species_paralog", ] ) self.assertEquals(len(got & expected), len(expected))
def test_table_has_column(self): """return correct values for whether a Table has a column""" account = get_ensembl_account(release=Release) var61 = Database(account=account, release=61, species='human', db_type='variation') var62 = Database(account=account, release=62, species='human', db_type='variation') self.assertTrue( var61.tableHasColumn('transcript_variation', 'peptide_allele_string')) self.assertFalse( var61.tableHasColumn('transcript_variation', 'pep_allele_string')) self.assertTrue( var62.tableHasColumn('transcript_variation', 'pep_allele_string')) self.assertFalse( var62.tableHasColumn('transcript_variation', 'peptide_allele_string'))
def test_table_has_column(self): """return correct values for whether a Table has a column""" account = get_ensembl_account(release=Release) var61 = Database(account=account, release=61, species='human', db_type='variation') var62 = Database(account=account, release=62, species='human', db_type='variation') self.assertTrue(var61.tableHasColumn('transcript_variation', 'peptide_allele_string')) self.assertFalse(var61.tableHasColumn('transcript_variation', 'pep_allele_string')) self.assertTrue(var62.tableHasColumn('transcript_variation', 'pep_allele_string')) self.assertFalse(var62.tableHasColumn('transcript_variation', 'peptide_allele_string'))
def test_connect(self): human = Database(account=account, release=Release, species='human', db_type='core') gene = human.getTable('gene')
def test_connect(self): human = Database(account=account, release=Release, species="human", db_type="core") gene = human.getTable("gene")