def test_ucsc_refseq(): """ Testing with a small RefSeq GTF file downloaded from http://genome.ucsc.edu/cgi-bin/hgTables """ genome = Genome("GRCh38", gtf_path_or_url=UCSC_REFSEQ_PATH) genome.install() eq_(len(genome.genes()), 2) eq_(len(genome.transcripts()), 2) genes_at_locus = genome.genes_at_locus(1, 67092176) eq_(len(genes_at_locus), 2) ids = set([gene.id for gene in genes_at_locus]) eq_(set(["NM_001276352", "NR_075077"]), ids)
def test_mouse_ENSMUSG00000017167(): """ GTF cropped from ftp://ftp.ensembl.org/pub/release-81/gtf/mus_musculus/ Mus_musculus.GRCm38.81.gtf.gz via: grep "ENSMUSG00000017167" Mus_musculus.GRCm38.81.gtf Transcript FASTA cropped from ftp://ftp.ensembl.org/pub/release-81/ fasta/mus_musculus/cdna/Mus_musculus.GRCm38.cdna.all.fa.gz via: grep "ENSMUSG00000017167" Mus_musculus.GRCm38.cdna.all.fa -A 50 Protein FASTA cropped from ftp://ftp.ensembl.org/pub/release-81/fasta/ mus_musculus/pep/Mus_musculus.GRCm38.pep.all.fa.gz via: grep "ENSMUSG00000017167" Mus_musculus.GRCm38.pep.all.fa -A 50 Tested against: http://useast.ensembl.org/Mus_musculus/Gene/Summary?db=core;g=ENSMUSG00000017167 """ genome = Genome( reference_name="GRCm38", gtf_path_or_url=MOUSE_ENSMUSG00000017167_PATH, transcript_fasta_path_or_url=MOUSE_ENSMUSG00000017167_TRANSCRIPT_FASTA_PATH, protein_fasta_path_or_url=MOUSE_ENSMUSG00000017167_PROTEIN_FASTA_PATH, ) genome.install() genes_cntnap1 = genome.genes_by_name("Cntnap1") eq_(len(genes_cntnap1), 1) gene_cntnap1 = genes_cntnap1[0] transcripts_cntnap1 = gene_cntnap1.transcripts eq_(len(transcripts_cntnap1), 2) transcripts_coding_cntnap1 = [ transcript for transcript in transcripts_cntnap1 if transcript.biotype == "protein_coding" ] eq_(len(transcripts_coding_cntnap1), 1) transcript_cntnap1 = transcripts_coding_cntnap1[0] eq_( transcript_cntnap1.sequence[:120], ( "GAGAGAAGGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGA" "GAGAGAGAGAGATTGGGGGTAGGAGAGAGGGAAGGGTGGATAAGGACGGAAAAAAGCTTT" ), ) eq_( transcript_cntnap1.protein_sequence[:120], ( "MMSLRLFSILLATVVSGAWGWGYYGCNEELVGPLYARSLGASSYYGLFTTARFARLHGIS" "GWSPRIGDPNPWLQIDLMKKHRIRAVATQGAFNSWDWVTRYMLLYGDRVDSWTPFYQKGH" ), )
def test_ucsc_gencode(): """ Testing with a small GENCODE GTF file downloaded from http://genome.ucsc.edu/cgi-bin/hgTables """ genome = Genome("GRCh38", gtf_path_or_url=UCSC_GENCODE_PATH) genome.install() eq_(len(genome.genes()), 7) eq_(len(genome.transcripts()), 7) gene_uc001aak4 = genome.gene_by_id("uc001aak.4") eq_(gene_uc001aak4.id, "uc001aak.4") eq_(gene_uc001aak4.name, None) eq_(gene_uc001aak4.biotype, None) gene_1_17369 = genome.genes_at_locus(1, 17369) eq_(gene_1_17369[0].id, "uc031tla.1") transcript_1_30564 = genome.transcripts_at_locus(1, 30564) eq_(transcript_1_30564[0].id, "uc057aty.1")