Ejemplo n.º 1
0
def test_ucsc_refseq():
    """
    Testing with a small RefSeq GTF file downloaded from
    http://genome.ucsc.edu/cgi-bin/hgTables
    """
    genome = Genome("GRCh38", gtf_path_or_url=UCSC_REFSEQ_PATH)
    genome.install()
    eq_(len(genome.genes()), 2)
    eq_(len(genome.transcripts()), 2)

    genes_at_locus = genome.genes_at_locus(1, 67092176)
    eq_(len(genes_at_locus), 2)
    ids = set([gene.id for gene in genes_at_locus])
    eq_(set(["NM_001276352", "NR_075077"]), ids)
Ejemplo n.º 2
0
def test_mouse_ENSMUSG00000017167():
    """
    GTF cropped from ftp://ftp.ensembl.org/pub/release-81/gtf/mus_musculus/
    Mus_musculus.GRCm38.81.gtf.gz via:
    grep "ENSMUSG00000017167" Mus_musculus.GRCm38.81.gtf

    Transcript FASTA cropped from ftp://ftp.ensembl.org/pub/release-81/
    fasta/mus_musculus/cdna/Mus_musculus.GRCm38.cdna.all.fa.gz via: 
    grep "ENSMUSG00000017167" Mus_musculus.GRCm38.cdna.all.fa -A 50

    Protein FASTA cropped from ftp://ftp.ensembl.org/pub/release-81/fasta/
    mus_musculus/pep/Mus_musculus.GRCm38.pep.all.fa.gz via:
    grep "ENSMUSG00000017167" Mus_musculus.GRCm38.pep.all.fa -A 50

    Tested against:
    http://useast.ensembl.org/Mus_musculus/Gene/Summary?db=core;g=ENSMUSG00000017167
    """
    genome = Genome(
        reference_name="GRCm38",
        gtf_path_or_url=MOUSE_ENSMUSG00000017167_PATH,
        transcript_fasta_path_or_url=MOUSE_ENSMUSG00000017167_TRANSCRIPT_FASTA_PATH,
        protein_fasta_path_or_url=MOUSE_ENSMUSG00000017167_PROTEIN_FASTA_PATH,
    )
    genome.install()
    genes_cntnap1 = genome.genes_by_name("Cntnap1")
    eq_(len(genes_cntnap1), 1)
    gene_cntnap1 = genes_cntnap1[0]
    transcripts_cntnap1 = gene_cntnap1.transcripts
    eq_(len(transcripts_cntnap1), 2)
    transcripts_coding_cntnap1 = [
        transcript for transcript in transcripts_cntnap1 if transcript.biotype == "protein_coding"
    ]
    eq_(len(transcripts_coding_cntnap1), 1)
    transcript_cntnap1 = transcripts_coding_cntnap1[0]
    eq_(
        transcript_cntnap1.sequence[:120],
        (
            "GAGAGAAGGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGA"
            "GAGAGAGAGAGATTGGGGGTAGGAGAGAGGGAAGGGTGGATAAGGACGGAAAAAAGCTTT"
        ),
    )
    eq_(
        transcript_cntnap1.protein_sequence[:120],
        (
            "MMSLRLFSILLATVVSGAWGWGYYGCNEELVGPLYARSLGASSYYGLFTTARFARLHGIS"
            "GWSPRIGDPNPWLQIDLMKKHRIRAVATQGAFNSWDWVTRYMLLYGDRVDSWTPFYQKGH"
        ),
    )
Ejemplo n.º 3
0
def test_ucsc_gencode():
    """
    Testing with a small GENCODE GTF file downloaded from 
    http://genome.ucsc.edu/cgi-bin/hgTables
    """
    genome = Genome("GRCh38", gtf_path_or_url=UCSC_GENCODE_PATH)
    genome.install()
    eq_(len(genome.genes()), 7)
    eq_(len(genome.transcripts()), 7)

    gene_uc001aak4 = genome.gene_by_id("uc001aak.4")
    eq_(gene_uc001aak4.id, "uc001aak.4")
    eq_(gene_uc001aak4.name, None)
    eq_(gene_uc001aak4.biotype, None)

    gene_1_17369 = genome.genes_at_locus(1, 17369)
    eq_(gene_1_17369[0].id, "uc031tla.1")

    transcript_1_30564 = genome.transcripts_at_locus(1, 30564)
    eq_(transcript_1_30564[0].id, "uc057aty.1")