def delete_genes_test_data(): genes = SESSION.query(Gene).all() # each Gene needs to be deleted individually in order to delete # the associated records from table 'gene_ontology_map'; # bulk deleting all Gene instances doesn't delete the records in 'gene_ontology_map' for gene in genes: SESSION.delete(gene)
def delete_test_ontology_terms_data(): on_terms = SESSION.query(OntologyTerm).all() # each OntologyTerm needs to be deleted individually in order to delete # the associated records from table 'on_pairs'; # bulk deleting all OntologyTerm instances doesn't delete the records in 'on_pairs' for term in on_terms: SESSION.delete(term)
def test_get_cytogenetic_band_neg(self): """ NEGATIVE CASE: Test to check non-existing taxon ID - Zebrafish - 7955 """ neg_taxonid = 7955 cytogenetic_band = SESSION.query(CytogeneticBand) \ .filter(CytogeneticBand.taxon_id == neg_taxonid) \ .first() self.assertIsNone(cytogenetic_band)
def test_get_all_synteny_blocks(self): """ Test getting back all SyntenyBlock entries """ blocks = SESSION.query(SyntenicBlock).all() self.assertTrue(len(blocks) == len(SYNTENY_BLOCKS_DATA)) for i, block in enumerate(blocks): serialized = marshal(block, BLOCKS_SCHEMA) self.assertTrue(serialized['id'] in SYNTENY_BLOCKS_DATA[i][9])
def test_get_all_loci(self): """ Positive case: test getting back all QTL records. """ loci = SESSION.query(Feature).all() self.assertTrue(len(loci), len(QTLS_DATA)) for i, locus in enumerate(loci): serialized = marshal(locus, QTLS_SCHEMA) self.assertTrue(serialized['id'] in QTLS_DATA[i][11])
def get_genes_by_species(species_id): """ Function that queries the database and returns a list of Gene objects for a specific species. :param species_id: NCBI species ID, such as 9606 (H. sapiens), 10090 (M. musculus), etc. :return: genes - a list of Gene objects or an empty list """ query = SESSION.query(Gene).filter_by(taxon_id=species_id) genes = query.all() return genes
def get_all_genes(): """ Function that queries the database and returns a list of all Gene objects available. :return: genes - a list of Gene objects or an empty list """ query = SESSION.query(Gene) genes = query.all() return genes
def get_all_snps(): """ Function that queries the database and returns a list of all available 'SNP Variant' objects. :return: snps - a list of 'SnpVariant' objects or an empty list (if none exist) """ query = SESSION.query(SnpVariant) snps = query.all() return snps
def get_all_bands(): """ Function that queries the database and returns a list of all 'Cytogenetic Band' objects available. :return: genes - a list of 'Cytogenetic Band' objects or an empty list """ query = SESSION.query(CytogeneticBand) bands = query.all() return bands
def test_get_synteny_block_by_species_ids(self): """ Test getting back entries by reference and comparison species IDs """ ref_taxonid = SYNTENY_BLOCKS_DATA[0][0] comp_taxonid = SYNTENY_BLOCKS_DATA[0][4] block = SESSION.query(SyntenicBlock)\ .filter(and_(SyntenicBlock.ref_taxonid == ref_taxonid, SyntenicBlock.comp_taxonid == comp_taxonid))\ .first() self.assertIsNotNone(block) self.assertEqual(block.id, SYNTENY_BLOCKS_DATA[0][9])
def test_get_cytogenetic_band(self): """ POSITIVE CASE: Test getting back all cytogenetic bands. """ cytogenetic_band = SESSION.query(CytogeneticBand).all() self.assertTrue(len(cytogenetic_band) == len(CYTOGENETIC_BAND_DATA)) for i, band in enumerate(cytogenetic_band): logging.debug(i) serialized = marshal(band, CYTOGENETIC_BAND_SCHEMA) self.assertTrue( serialized['location'] in CYTOGENETIC_BAND_DATA[i][7])
def get_homologs_by_species_ids_and_reference_chromosome( ref_taxonid, comp_taxonid, chromosome): """ :param ref_taxonid: species ID, such as 9606 (H. sapiens), 10090 (M. musculus), etc. :param comp_taxonid: species ID, such as 9606 (H. sapiens), 10090 (M. musculus), etc. :param chromosome: reference species chromosome ID :return: homologs - a list of Gene objects or an empty list """ # select all reference species genes, # located on the specified chromosome genes_list = SESSION.query(Gene) \ .filter(and_(Gene.chr == chromosome, Gene.taxon_id == ref_taxonid)) \ .all() # iterate through the gene list and identify all # homologs that belong to the comparison species homologs_set = set() for g in genes_list: for h in g.homologs: if h.taxon_id == comp_taxonid: homologs_set.add(h.id) # the maximum number of host parameters in a single # SQL statement in SQLite is 999. Chunk the data so that # the request does not result in 'sqlite.OperationalError: too many SQL variables' sqlite_max_variable_num = 999 # convert the set to list (since lists can be indexed) homologs_list = list(homologs_set) chunks = [ homologs_list[x:x + sqlite_max_variable_num - 1] for x in range(0, len(homologs_list), sqlite_max_variable_num - 1) ] # homologs list homologs = [] for chunk in chunks: # select all (homolog) genes: these are all comparison species # genes, which are located on various chromosomes and are homologs # to all reference species genes, located on the specified chromosome # TODO: [1/3/2020 gik] consider using the Homolog table for the comparison genes information, rather than Gene # TODO: [1/3/2020 gik] it is possible that the Gene table doesn't have entry for the comparison gene(s) query = SESSION \ .query(Gene) \ .filter(and_(Gene.taxon_id == comp_taxonid, Gene.id.in_(chunk)))\ .order_by(Gene.id) homologs.extend(query.all()) return homologs
def test_query_nonexistent_reference_species(self): """ Test that getting data for non-existent reference species has no result """ # 7227 is Drosophila melanogaster's NSBI species taxonomy ID ref_taxonid = 7227 comp_taxonid = SYNTENY_BLOCKS_DATA[4][4] block = SESSION.query(SyntenicBlock) \ .filter(and_(SyntenicBlock.ref_taxonid == ref_taxonid, SyntenicBlock.comp_taxonid == comp_taxonid)) \ .first() self.assertIsNone(block)
def get_snps_by_species_and_trait(species_id, trait_id): """ Function that returns a list of 'SNP Variant' objects for the specified species and trait. :param species_id: NCBI assigned species ID, such as 9606 (H. sapiens), 10090 (M. musculus), etc. :param trait_id: SNP source (such as EBI) assigned trait ID (e.g. 0001360 for Type 2 Diabetes), etc. :return: snps - a list of 'SnpVariant' objects or an empty list (if none exist) """ query = SESSION.query(SnpVariant).filter_by(taxon_id=species_id, trait_id=trait_id) snps = query.all() return snps
def get_blocks_by_species_ids(ref_taxonid, comp_taxonid): """ Queries the database and returns a list of SyntenyBlock objects for specific reference and comparison species. :param ref_taxonid: NCBI species ID, such as 9606 (H. sapiens), 10090 (M. musculus), etc. :param comp_taxonid: NCBI species ID, such as 9606 (H. sapiens), 10090 (M. musculus), etc. :return: blocks - a list of SyntenyBlock objects or an empty list """ query = SESSION.query(SyntenicBlock) \ .filter(and_(SyntenicBlock.ref_taxonid == ref_taxonid, SyntenicBlock.comp_taxonid == comp_taxonid)) blocks = query.all() return blocks
def test_get_synteny_blocks_by_taxon_ids_chromosome(self): """ Test getting back entries by reference and comparison species IDs, and a reference species chromosome """ ref_taxonid = SYNTENY_BLOCKS_DATA[4][0] ref_chromosome = SYNTENY_BLOCKS_DATA[4][1] comp_taxonid = SYNTENY_BLOCKS_DATA[4][4] block = SESSION.query(SyntenicBlock) \ .filter(and_(SyntenicBlock.ref_taxonid == ref_taxonid, SyntenicBlock.comp_taxonid == comp_taxonid, SyntenicBlock.ref_chr == ref_chromosome)) \ .first() self.assertIsNotNone(block) self.assertEqual(block.id, SYNTENY_BLOCKS_DATA[4][9])
def get_bands_by_species_and_chromosome(species_id, chromosome): """ Function that queries the database and returns a list of 'Cytogenetic Band' objects for a specific species and chromosome. :param species_id: NCBI species ID, such as 9606 (H. sapiens), 10090 (M. musculus), etc. :param chromosome: reference species chromosome ID :return: genes - a list of 'Cytogenetic Band' objects or an empty list """ query = SESSION.query(CytogeneticBand).filter_by( taxon_id=species_id, chr=chromosome ) bands = query.all() return bands
def test_get_homologs_by_species_ids_and_reference_chromosome(self): """ Test getting back homologs by reference and comparison species IDs, and by reference species chromosome. """ ref_taxonid = GENES_DATA[7][10][0][2] ref_chromosome = GENES_DATA[7][10][0][3] comp_taxonid = GENES_DATA[7][10][0][8] homolog = SESSION.query(Homolog)\ .filter(and_(Homolog.ref_taxon_id == ref_taxonid, Homolog.ref_seq_id == ref_chromosome, Homolog.taxon_id == comp_taxonid))\ .first() self.assertIsNotNone(homolog) self.assertEqual(homolog.id, GENES_DATA[0][0])
def test_query_nonexistent_reference_species(self): """ Test that getting data for non-existent reference species has no result """ # 7227 is Drosophila melanogaster's NCBI species taxonomy ID ref_taxonid = 7227 ref_chromosome = GENES_DATA[7][10][0][3] comp_taxonid = GENES_DATA[7][10][0][8] homolog = SESSION.query(Homolog) \ .filter(and_(Homolog.ref_taxon_id == ref_taxonid, Homolog.ref_seq_id == ref_chromosome, Homolog.taxon_id == comp_taxonid)) \ .first() self.assertIsNone(homolog)
def test_get_loci_when_valid_species_id(self): """ Positive case: test getting back all QTL records for a specific valid species. """ species_id = QTLS_DATA[0][0] expected_loci_ids = [ locus[11] for locus in QTLS_DATA if locus[0] == species_id ] loci = SESSION.query(Feature)\ .filter(and_(Feature.taxon_id == species_id, Feature.type == 'QTL')).all() self.assertIsNotNone(loci) for locus in loci: serialized = marshal(locus, QTLS_SCHEMA) self.assertTrue(serialized["id"] in expected_loci_ids)
def get_genes_by_species_chromosome_position(species_id, chromosome, block_start, block_end): """ Returns a list of Gene objects selected based on specific species, chromosome, and range on the chromosome. :param species_id: NCBI species ID, such as 9606 (H. sapiens), 10090 (M. musculus), etc. :param chromosome: species chromosome ID :block_start: synteny block start position :block_end: synteny block end position :return: genes - a list of Gene objects or an empty list """ query = SESSION.query(Gene).filter( and_(Gene.taxon_id == species_id, Gene.chr == chromosome, Gene.start <= block_end, Gene.end >= block_start)) genes = query.all() return genes
def test_get_metadata_by_ontology_term_id(self): """ positive case: test selecting term and it's descendants based on specific term id - such as GO:0046983 """ # ontology term descendant its from test data expected_descendant_ids = []; for term in ONTOLOGY_TERMS_DATA: if term[0] == 'GO:0046983': # check and add descendants: # those would have been entered in the DB by the model for descendant in term[5]: expected_descendant_ids.append(descendant[0]) break term = SESSION.query(OntologyTerm) \ .filter_by(id='GO:0046983').all() serialized = marshal(term, ontologies_controller.ONT_TERM_METADATA_SCHEMA) self.assertEqual(len(serialized), 1) self.assertEqual(len(serialized[0]["descendants"]), len(expected_descendant_ids)) for d in serialized[0]["descendants"]: self.assertTrue(d["id"] in expected_descendant_ids)
def test_get_terms_by_ontology_prefix(self): """ positive case: test selecting ontology terms with specific prefix - such as GO (Gene Ontology). """ # GO ontology term ids from test data expected_term_ids = [] for term in ONTOLOGY_TERMS_DATA: if term[0].startswith('GO:'): expected_term_ids.append(term[0]) # check and add descendants: # those would have been entered in the DB by the model for d in term[5]: expected_term_ids.append(d[0]) # remove duplicated ids: test data shouldn't have any, but just in case expected_term_ids = list(dict.fromkeys(expected_term_ids)) terms = SESSION.query(OntologyTerm).\ filter(OntologyTerm.id.like('GO%')).all() self.assertEqual(len(terms), len(expected_term_ids)) for i, term in enumerate(terms): serialized = marshal(term, ontologies_controller.ONT_TERMS_SCHEMA) self.assertTrue(serialized["id"] in expected_term_ids)
def delete_blocks_test_data(): SESSION.query(SyntenicBlock).delete()
def delete_homologs_test_data(): SESSION.query(Homolog).delete()
def delete_qtls_test_data(): SESSION.query(Feature).delete()
def delete_exons_test_data(): SESSION.query(Exon).delete()
def delete_cytogenetic_band_test_data(): SESSION.query(CytogeneticBand).delete()