Beispiel #1
0
def delete_genes_test_data():
    genes = SESSION.query(Gene).all()
    # each Gene needs to be deleted individually in order to delete
    # the associated records from table 'gene_ontology_map';
    # bulk deleting all Gene instances doesn't delete the records in 'gene_ontology_map'
    for gene in genes:
        SESSION.delete(gene)
Beispiel #2
0
def delete_test_ontology_terms_data():
    on_terms = SESSION.query(OntologyTerm).all()
    # each OntologyTerm needs to be deleted individually in order to delete
    # the associated records from table 'on_pairs';
    # bulk deleting all OntologyTerm instances doesn't delete the records in 'on_pairs'
    for term in on_terms:
        SESSION.delete(term)
 def test_get_cytogenetic_band_neg(self):
     """ NEGATIVE CASE: Test to check non-existing taxon ID - Zebrafish - 7955 """
     neg_taxonid = 7955
     cytogenetic_band = SESSION.query(CytogeneticBand) \
         .filter(CytogeneticBand.taxon_id == neg_taxonid) \
         .first()
     self.assertIsNone(cytogenetic_band)
Beispiel #4
0
    def test_get_all_synteny_blocks(self):
        """ Test getting back all SyntenyBlock entries """
        blocks = SESSION.query(SyntenicBlock).all()
        self.assertTrue(len(blocks) == len(SYNTENY_BLOCKS_DATA))

        for i, block in enumerate(blocks):
            serialized = marshal(block, BLOCKS_SCHEMA)
            self.assertTrue(serialized['id']
                            in SYNTENY_BLOCKS_DATA[i][9])
    def test_get_all_loci(self):
        """
        Positive case: test getting back all QTL records.
        """
        loci = SESSION.query(Feature).all()
        self.assertTrue(len(loci), len(QTLS_DATA))

        for i, locus in enumerate(loci):
            serialized = marshal(locus, QTLS_SCHEMA)
            self.assertTrue(serialized['id'] in QTLS_DATA[i][11])
def get_genes_by_species(species_id):
    """
    Function that queries the database and returns a list of Gene objects for a specific species.

    :param species_id: NCBI species ID, such as 9606 (H. sapiens), 10090 (M. musculus), etc.
    :return: genes - a list of Gene objects or an empty list
    """
    query = SESSION.query(Gene).filter_by(taxon_id=species_id)
    genes = query.all()

    return genes
def get_all_genes():
    """
    Function that queries the database and returns a list of all Gene objects available.

    :return: genes - a list of Gene objects or an empty list
    """
    query = SESSION.query(Gene)

    genes = query.all()

    return genes
def get_all_snps():
    """
    Function that queries the database and returns a list of all available 'SNP Variant' objects.

    :return: snps - a list of 'SnpVariant' objects or an empty list (if none exist)
    """
    query = SESSION.query(SnpVariant)

    snps = query.all()

    return snps
Beispiel #9
0
def get_all_bands():
    """
    Function that queries the database and returns a list of all 'Cytogenetic Band' objects available.

    :return: genes - a list of 'Cytogenetic Band' objects or an empty list
    """
    query = SESSION.query(CytogeneticBand)

    bands = query.all()

    return bands
Beispiel #10
0
    def test_get_synteny_block_by_species_ids(self):
        """ Test getting back entries by reference and comparison species IDs """
        ref_taxonid = SYNTENY_BLOCKS_DATA[0][0]
        comp_taxonid = SYNTENY_BLOCKS_DATA[0][4]

        block = SESSION.query(SyntenicBlock)\
            .filter(and_(SyntenicBlock.ref_taxonid == ref_taxonid,
                         SyntenicBlock.comp_taxonid == comp_taxonid))\
            .first()

        self.assertIsNotNone(block)
        self.assertEqual(block.id, SYNTENY_BLOCKS_DATA[0][9])
    def test_get_cytogenetic_band(self):
        """ POSITIVE CASE: Test getting back all cytogenetic bands. """

        cytogenetic_band = SESSION.query(CytogeneticBand).all()

        self.assertTrue(len(cytogenetic_band) == len(CYTOGENETIC_BAND_DATA))

        for i, band in enumerate(cytogenetic_band):
            logging.debug(i)
            serialized = marshal(band, CYTOGENETIC_BAND_SCHEMA)
            self.assertTrue(
                serialized['location'] in CYTOGENETIC_BAND_DATA[i][7])
Beispiel #12
0
def get_homologs_by_species_ids_and_reference_chromosome(
        ref_taxonid, comp_taxonid, chromosome):
    """

    :param ref_taxonid: species ID, such as 9606 (H. sapiens), 10090 (M. musculus), etc.
    :param comp_taxonid: species ID, such as 9606 (H. sapiens), 10090 (M. musculus), etc.
    :param chromosome: reference species chromosome ID
    :return: homologs - a list of Gene objects or an empty list
    """
    # select all reference species genes,
    # located on the specified chromosome
    genes_list = SESSION.query(Gene) \
        .filter(and_(Gene.chr == chromosome,
                     Gene.taxon_id == ref_taxonid)) \
        .all()

    # iterate through the gene list and identify all
    # homologs that belong to the comparison species
    homologs_set = set()
    for g in genes_list:
        for h in g.homologs:
            if h.taxon_id == comp_taxonid:
                homologs_set.add(h.id)

    # the maximum number of host parameters in a single
    # SQL statement in SQLite is 999. Chunk the data so that
    # the request does not result in 'sqlite.OperationalError: too many SQL variables'
    sqlite_max_variable_num = 999
    # convert the set to list (since lists can be indexed)
    homologs_list = list(homologs_set)
    chunks = [
        homologs_list[x:x + sqlite_max_variable_num - 1]
        for x in range(0, len(homologs_list), sqlite_max_variable_num - 1)
    ]

    # homologs list
    homologs = []

    for chunk in chunks:
        # select all (homolog) genes: these are all comparison species
        # genes, which are located on various chromosomes and are homologs
        # to all reference species genes, located on the specified chromosome
        # TODO: [1/3/2020 gik] consider using the Homolog table for the comparison genes information, rather than Gene
        # TODO: [1/3/2020 gik] it is possible that the Gene table doesn't have entry for the comparison gene(s)
        query = SESSION \
            .query(Gene) \
            .filter(and_(Gene.taxon_id == comp_taxonid, Gene.id.in_(chunk)))\
            .order_by(Gene.id)

        homologs.extend(query.all())

    return homologs
Beispiel #13
0
    def test_query_nonexistent_reference_species(self):
        """ Test that getting data for non-existent reference species has no result """

        # 7227 is Drosophila melanogaster's NSBI species taxonomy ID
        ref_taxonid = 7227
        comp_taxonid = SYNTENY_BLOCKS_DATA[4][4]

        block = SESSION.query(SyntenicBlock) \
            .filter(and_(SyntenicBlock.ref_taxonid == ref_taxonid,
                         SyntenicBlock.comp_taxonid == comp_taxonid)) \
            .first()

        self.assertIsNone(block)
def get_snps_by_species_and_trait(species_id, trait_id):
    """
    Function that returns a list of 'SNP Variant' objects for the specified species and trait.

    :param species_id: NCBI assigned species ID, such as 9606 (H. sapiens), 10090 (M. musculus), etc.
    :param trait_id: SNP source (such as EBI) assigned trait ID (e.g. 0001360 for Type 2 Diabetes), etc.
    :return: snps - a list of 'SnpVariant' objects or an empty list (if none exist)
    """
    query = SESSION.query(SnpVariant).filter_by(taxon_id=species_id,
                                                trait_id=trait_id)
    snps = query.all()

    return snps
def get_blocks_by_species_ids(ref_taxonid, comp_taxonid):
    """
    Queries the database and returns a list of SyntenyBlock objects for specific reference and comparison species.

    :param ref_taxonid: NCBI species ID, such as 9606 (H. sapiens), 10090 (M. musculus), etc.
    :param comp_taxonid: NCBI species ID, such as 9606 (H. sapiens), 10090 (M. musculus), etc.
    :return: blocks - a list of SyntenyBlock objects or an empty list
    """
    query = SESSION.query(SyntenicBlock) \
        .filter(and_(SyntenicBlock.ref_taxonid == ref_taxonid,
                     SyntenicBlock.comp_taxonid == comp_taxonid))
    blocks = query.all()

    return blocks
Beispiel #16
0
    def test_get_synteny_blocks_by_taxon_ids_chromosome(self):
        """ Test getting back entries by reference and comparison species IDs, and a reference species chromosome """
        ref_taxonid = SYNTENY_BLOCKS_DATA[4][0]
        ref_chromosome = SYNTENY_BLOCKS_DATA[4][1]

        comp_taxonid = SYNTENY_BLOCKS_DATA[4][4]

        block = SESSION.query(SyntenicBlock) \
            .filter(and_(SyntenicBlock.ref_taxonid == ref_taxonid,
                         SyntenicBlock.comp_taxonid == comp_taxonid,
                         SyntenicBlock.ref_chr == ref_chromosome)) \
            .first()

        self.assertIsNotNone(block)
        self.assertEqual(block.id, SYNTENY_BLOCKS_DATA[4][9])
Beispiel #17
0
def get_bands_by_species_and_chromosome(species_id, chromosome):
    """
    Function that queries the database and returns a list of 'Cytogenetic Band' objects for a specific species and chromosome.

    :param species_id: NCBI species ID, such as 9606 (H. sapiens), 10090 (M. musculus), etc.
    :param chromosome: reference species chromosome ID
    :return: genes - a list of 'Cytogenetic Band' objects or an empty list
    """
    query = SESSION.query(CytogeneticBand).filter_by(
        taxon_id=species_id,
        chr=chromosome
    )
    bands = query.all()

    return bands
Beispiel #18
0
    def test_get_homologs_by_species_ids_and_reference_chromosome(self):
        """ Test getting back homologs by reference and comparison species IDs, and by reference species chromosome. """

        ref_taxonid = GENES_DATA[7][10][0][2]
        ref_chromosome = GENES_DATA[7][10][0][3]

        comp_taxonid = GENES_DATA[7][10][0][8]

        homolog = SESSION.query(Homolog)\
            .filter(and_(Homolog.ref_taxon_id == ref_taxonid,
                         Homolog.ref_seq_id == ref_chromosome,
                         Homolog.taxon_id == comp_taxonid))\
            .first()

        self.assertIsNotNone(homolog)
        self.assertEqual(homolog.id, GENES_DATA[0][0])
Beispiel #19
0
    def test_query_nonexistent_reference_species(self):
        """ Test that getting data for non-existent reference species has no result """

        # 7227 is Drosophila melanogaster's NCBI species taxonomy ID
        ref_taxonid = 7227
        ref_chromosome = GENES_DATA[7][10][0][3]

        comp_taxonid = GENES_DATA[7][10][0][8]

        homolog = SESSION.query(Homolog) \
            .filter(and_(Homolog.ref_taxon_id == ref_taxonid,
                         Homolog.ref_seq_id == ref_chromosome,
                         Homolog.taxon_id == comp_taxonid)) \
            .first()

        self.assertIsNone(homolog)
    def test_get_loci_when_valid_species_id(self):
        """
        Positive case: test getting back all QTL records for a specific valid species.
        """
        species_id = QTLS_DATA[0][0]
        expected_loci_ids = [
            locus[11] for locus in QTLS_DATA if locus[0] == species_id
        ]

        loci = SESSION.query(Feature)\
            .filter(and_(Feature.taxon_id == species_id, Feature.type == 'QTL')).all()
        self.assertIsNotNone(loci)

        for locus in loci:
            serialized = marshal(locus, QTLS_SCHEMA)
            self.assertTrue(serialized["id"] in expected_loci_ids)
def get_genes_by_species_chromosome_position(species_id, chromosome,
                                             block_start, block_end):
    """
    Returns a list of Gene objects selected based on specific species, chromosome, and range on the chromosome.

    :param species_id: NCBI species ID, such as 9606 (H. sapiens), 10090 (M. musculus), etc.
    :param chromosome: species chromosome ID
    :block_start: synteny block start position
    :block_end: synteny block end position
    :return: genes - a list of Gene objects or an empty list
    """
    query = SESSION.query(Gene).filter(
        and_(Gene.taxon_id == species_id, Gene.chr == chromosome,
             Gene.start <= block_end, Gene.end >= block_start))
    genes = query.all()

    return genes
Beispiel #22
0
    def test_get_metadata_by_ontology_term_id(self):
        """ positive case: test selecting term and it's descendants based on specific term id - such as GO:0046983 """

        # ontology term descendant its from test data
        expected_descendant_ids = [];

        for term in ONTOLOGY_TERMS_DATA:
            if term[0] == 'GO:0046983':
                # check and add descendants:
                # those would have been entered in the DB by the model
                for descendant in term[5]:
                    expected_descendant_ids.append(descendant[0])
                break

        term = SESSION.query(OntologyTerm) \
            .filter_by(id='GO:0046983').all()

        serialized = marshal(term, ontologies_controller.ONT_TERM_METADATA_SCHEMA)
        self.assertEqual(len(serialized), 1)
        self.assertEqual(len(serialized[0]["descendants"]), len(expected_descendant_ids))

        for d in serialized[0]["descendants"]:
            self.assertTrue(d["id"] in expected_descendant_ids)
Beispiel #23
0
    def test_get_terms_by_ontology_prefix(self):
        """ positive case: test selecting ontology terms with specific prefix - such as GO (Gene Ontology). """

        # GO ontology term ids from test data
        expected_term_ids = []

        for term in ONTOLOGY_TERMS_DATA:
            if term[0].startswith('GO:'):
                expected_term_ids.append(term[0])
                # check and add descendants:
                # those would have been entered in the DB by the model
                for d in term[5]:
                    expected_term_ids.append(d[0])

        # remove duplicated ids: test data shouldn't have any, but just in case
        expected_term_ids = list(dict.fromkeys(expected_term_ids))

        terms = SESSION.query(OntologyTerm).\
            filter(OntologyTerm.id.like('GO%')).all()

        self.assertEqual(len(terms), len(expected_term_ids))
        for i, term in enumerate(terms):
            serialized = marshal(term, ontologies_controller.ONT_TERMS_SCHEMA)
            self.assertTrue(serialized["id"] in expected_term_ids)
Beispiel #24
0
def delete_blocks_test_data():
    SESSION.query(SyntenicBlock).delete()
Beispiel #25
0
def delete_homologs_test_data():
    SESSION.query(Homolog).delete()
Beispiel #26
0
def delete_qtls_test_data():
    SESSION.query(Feature).delete()
Beispiel #27
0
def delete_exons_test_data():
    SESSION.query(Exon).delete()
Beispiel #28
0
def delete_cytogenetic_band_test_data():
    SESSION.query(CytogeneticBand).delete()