Beispiel #1
0
def test_dicts():
    with tempfile.NamedTemporaryFile("wt", suffix=".gff") as gff_temp:
        print(gff_ends_at_directive, file=gff_temp)
        gff_temp.flush()
        r1 = GFFReader(gff_temp.name)
        r2 = GFFReader(gff_temp.name)

        r1.uids.add("Test")

        assert r1.uids != r2.uids
Beispiel #2
0
def test_ids_are_unique():
    with tempfile.NamedTemporaryFile("wt", suffix=".gff") as gff_temp:
        print(gff_non_unique_ids, file=gff_temp)
        gff_temp.flush()
        with pytest.raises(Exception):
            for _ in GFFReader(gff_temp.name):
                pass
Beispiel #3
0
def test_wrong_number_of_fields():
    with tempfile.NamedTemporaryFile("wt", suffix=".gff") as gff_temp:
        print(gff_wrong_number_of_fields, file=gff_temp)
        gff_temp.flush()
        with pytest.raises(IOError):
            for _ in GFFReader(gff_temp.name):
                pass
Beispiel #4
0
def test_ends_mid_comment():
    with tempfile.NamedTemporaryFile("wt", suffix=".gff") as gff_temp:
        print(gff_ends_at_comment, file=gff_temp)
        gff_temp.flush()
        for _ in GFFReader(gff_temp.name):
            result = _
    assert result.start == 2
Beispiel #5
0
def test_gene_parented():
    with tempfile.NamedTemporaryFile("wt", suffix=".gff") as gff_temp:
        print(gff_gene_parented_CDS, file=gff_temp)
        gff_temp.flush()
        for _ in GFFReader(gff_temp.name):
            pass
        assert _
Beispiel #6
0
def test_invalid_mid_file():
    with tempfile.NamedTemporaryFile("wt", suffix=".gff") as gff_temp:
        print(gff_not_valid_file_content, file=gff_temp)
        gff_temp.flush()
        with pytest.raises(AssertionError):
            for _ in GFFReader(gff_temp.name):
                pass
Beispiel #7
0
def test_with_construct():
    with tempfile.NamedTemporaryFile("wt", suffix=".gff") as gff_temp:
        print(gff_file_content, file=gff_temp)
        gff_temp.flush()
        with GFFReader(gff_temp.name) as reader:
            result = next(reader)
    assert result.start == 2
Beispiel #8
0
def test_gff_gnomon():
    num_genes = 0
    total_transcripts = 0
    total_exons = 0
    monoexonic_cds = 0
    monoexonic = 0
    for gene in GFFReader("tests/unit/data/ncbi_annot.gff"):
        num_transcripts = 0
        for mrna_id, mrna in gene.mrnas.items():
            num_coding_exons = len(mrna.cds_exons)
            if num_coding_exons > 0:
                total_exons += len(mrna.exons)
                num_transcripts += 1
                if len(mrna.cds_exons) == 1:
                    monoexonic_cds += 1
                if len(mrna.exons) == 1:
                    monoexonic += 1
        if num_transcripts > 0:
            num_genes += 1
            total_transcripts += num_transcripts
    assert num_genes == 3
    assert total_transcripts == 3
    assert monoexonic_cds == 2
    assert monoexonic == 2
    assert total_exons == 11

    s = gene.mrnas["rna-XM_003689506.3"].to_bed(cds_only=True)
    bed_line = "NW_003789112.1\t4846\t25552\trna-XM_003689506.3\t0\t-\t4846\t25552\t0,0,0\t9\t38,131,245,228,185,186,198,115,315\t0,548,1486,3275,5756,6363,7036,16397,20391"
    assert s.split('\t')[:12] == bed_line.split('\t')[:12]
Beispiel #9
0
def test_cds_w_no_phase():
    with tempfile.NamedTemporaryFile("wt", suffix=".gff") as tmp:
        print(gff_CDS_w_no_phase, file=tmp)
        tmp.flush()
        for _ in GFFReader(tmp.name):
            pass

        for mrna in _.mrnas.values():
            for cds in mrna.cds_exons:
                assert cds.phase != '.'
Beispiel #10
0
def test_with_directives():
    with tempfile.NamedTemporaryFile("wt", suffix=".gff") as gff_temp:
        print(gff_file_content_with_directives, file=gff_temp)
        gff_temp.flush()
        count = 0
        for _ in GFFReader(gff_temp.name):
            if count == 1:
                result = _
            count += 1
    assert result.start == 3306
    assert _.uid == "AL1G10030.v2.1"
Beispiel #11
0
def test_eden():
    with tempfile.NamedTemporaryFile("wt", suffix=".gff") as gff_temp:
        print(gff_file_eden, file=gff_temp)
        gff_temp.flush()
        gene_count = 0
        for _ in GFFReader(gff_temp.name):
            gene_count += 1
    assert gene_count == 1
    assert _.uid == "gene00001"
    assert len(_.mrnas) == 3
    assert _.mrnas[next(iter(_.mrnas.keys()))].cds_exons[0].uid == "cds00001"
Beispiel #12
0
def test_ends_at_directive():
    with tempfile.NamedTemporaryFile("wt", suffix=".gff") as gff_temp:
        print(gff_ends_at_directive, file=gff_temp)
        gff_temp.flush()
        for _ in GFFReader(gff_temp.name):
            result = _
    assert result.start == 2
    assert result.mrnas[next(iter(
        result.mrnas.keys()))].attr['Alias'] == ['gene1', 'gene2', 'gnee3']
    assert result.mrnas[next(iter(
        result.mrnas.keys()))].attr['Ontology_term'] == ['t1', 't2']
    assert result.attr['Dbxref'] == ['db1']
Beispiel #13
0
def test_int_exon_mitochondrial_gff():
    genes = {}
    for gene in GFFReader("tests/unit/data/int_mitochondrial.gff"):
        genes[gene.uid] = gene

    assert genes['gene-CYTB'] is not None
    assert len(genes['gene-CYTB'].mrnas) > 0
    assert len(genes['gene-CYTB'].mrnas['gene-CYTB'].exons) > 0
    assert genes['gene-CYTB'].mrnas['gene-CYTB'].exons[0].uid[:5] == 'virt_'
    assert genes['gene-ND1'].mrnas['gene-ND1'].cds_exons[0].uid[:5] != 'virt_'
    assert genes['gene-ND1'].mrnas['gene-ND1'].exons[0].uid[:5] == 'virt_'
    assert genes['gene-ND1'].mrnas['gene-ND1'].cds_exons[0].uid[:5] != 'virt_'
Beispiel #14
0
def test_eden_integer_chr():
    with tempfile.NamedTemporaryFile("wt", suffix=".gff") as gff_temp:
        print(gff_file_eden_integerableChr, file=gff_temp)
        gff_temp.flush()
        gene_count = 0
        for _ in GFFReader(gff_temp.name):
            gene_count += 1
    assert gene_count == 1
    assert _.uid == "gene00001"
    assert len(_.mrnas) == 3
    assert _.mrnas[next(iter(_.mrnas.keys()))].cds_exons[0].uid == "cds00001"
    _.print_gff()
    for mrna in _.mrnas.values():
        print(mrna.to_bed(cds_only=True))
    def test_check_splicing_sites(self):
        genes = [g for g in GFFReader("tests/unit/data/refseq/pstrand_c.gff")]
        genome = pyfaidx.Fasta("tests/unit/data/refseq/pstrand_c.fa")
        long_intron, short_intron, nc_splicing, intron_len = check_splicing_sites(genome,
                                                                                  genes[0].mrnas["rna-XM_625167.6.m1"],
                                                                                  200000, 59)
        os.remove("tests/unit/data/refseq/pstrand_c.fa.fai")
        assert nc_splicing == 0
        assert short_intron == 1

        genes = [g for g in GFFReader("tests/unit/data/refseq/mstrand_c.gff")]
        genome = pyfaidx.Fasta("tests/unit/data/refseq/mstrand_c.fa")
        long_intron, short_intron, nc_splicing, intron_len = check_splicing_sites(genome,
                                                                                  genes[0].mrnas["rna-XM_392640.7.m1"],
                                                                                  200000)
        os.remove("tests/unit/data/refseq/mstrand_c.fa.fai")
        assert nc_splicing == 0
        assert short_intron == 0

        genes = [g for g in GFFReader("tests/unit/data/refseq/pstrand_nc.gff")]
        genome = pyfaidx.Fasta("tests/unit/data/refseq/pstrand_nc.fa")
        long_intron, short_intron, nc_splicing, intron_len = check_splicing_sites(genome,
                                                                                  genes[0].mrnas[
                                                                                      "rna-XM_006567179.3.m1"],
                                                                                  200000)
        os.remove("tests/unit/data/refseq/pstrand_nc.fa.fai")
        assert nc_splicing == 1

        genes = [g for g in GFFReader("tests/unit/data/refseq/mstrand_nc.gff")]
        genome = pyfaidx.Fasta("tests/unit/data/refseq/mstrand_nc.fa")
        long_intron, short_intron, nc_splicing, intron_len = check_splicing_sites(genome,
                                                                                  genes[0].mrnas[
                                                                                      "rna-XM_026443867.1.m1"],
                                                                                  200000)
        os.remove("tests/unit/data/refseq/mstrand_nc.fa.fai")
        assert nc_splicing == 1
    def test_get_spliced_cds_seq(self):
        genes = [g for g in GFFReader("tests/unit/data/refseq/mstrand_e.gff")]
        genome = pyfaidx.Fasta("tests/unit/data/refseq/mstrand_e.fa")
        seq, seq_len, intron_len, last_codon = get_spliced_cds_seq(genome,
                                                                   genes[0].mrnas["rna-XM_033456960.1.m1"],
                                                                   True)
        os.remove("tests/unit/data/refseq/mstrand_e.fa.fai")
        assert last_codon in stop_codons

        genes = [g for g in GFFReader("tests/unit/data/refseq/pstrand_e.gff")]
        genome = pyfaidx.Fasta("tests/unit/data/refseq/pstrand_e.fa")
        seq, seq_len, intron_len, last_codon = get_spliced_cds_seq(genome,
                                                                   genes[0].mrnas["rna-XM_033441273.1.m1"],
                                                                   True)
        os.remove("tests/unit/data/refseq/pstrand_e.fa.fai")
        assert last_codon in stop_codons

        genes = [g for g in GFFReader("tests/unit/data/refseq/mstrand_e2.gff")]
        genome = pyfaidx.Fasta("tests/unit/data/refseq/mstrand_e2.fa")
        seq, seq_len, intron_len, last_codon = get_spliced_cds_seq(genome,
                                                                   genes[0].mrnas["rna-XM_392669.6.m1"],
                                                                   True)
        os.remove("tests/unit/data/refseq/mstrand_e2.fa.fai")
        assert last_codon in stop_codons
Beispiel #17
0
def test_for_construct():
    with tempfile.NamedTemporaryFile("wt", suffix=".gff") as gff_temp:
        print(gff_file_content, file=gff_temp)
        gff_temp.flush()
        count = 0
        with GFFReader(gff_temp.name) as parser:
            for _ in parser:
                if count == 1:
                    result = _
                count += 1
        assert count == 3
        assert result.start == 3306
        assert _.uid == "AL1G10030.v2.1"
        assert _.mrnas["AL1G10030.t1.v2.1"].attr["Note"] == "cov:100|id:97.65"
        assert _.mrnas["AL1G10030.t1.v2.1"].fp_utr == 7449
        assert _.mrnas["AL1G10030.t1.v2.1"].tp_utr == 7610
def test_bter_CDS():
    genes_bed = []
    genes_gff = []
    for gene_bed in BEDReader("tests/unit/data/bter_top.coding.bed"):
        genes_bed.append(gene_bed)

    for gene_gff in GFFReader("tests/unit/data/bter_top.gff"):
        genes_gff.append(gene_gff)

    for gene_bed, gene_gff in zip(genes_bed, genes_gff):
        for mrna_bed, mrna_gff in zip(gene_bed.mrnas.values(),
                                      gene_gff.mrnas.values()):
            bed_string_repr = mrna_bed.to_bed().split('\t')

            coding_gff_string_repr = mrna_gff.to_bed(cds_only=True).split('\t')
            bed_string_repr = bed_string_repr[0:4] + bed_string_repr[5:12]
            coding_gff_string_repr = coding_gff_string_repr[
                0:4] + coding_gff_string_repr[5:12]
            assert bed_string_repr == coding_gff_string_repr
def test_bter():
    genes_bed = []
    genes_gff = []
    for gene_bed in BEDReader("tests/unit/data/bter_top.bed"):
        genes_bed.append(gene_bed)

    for gene_gff in GFFReader("tests/unit/data/bter_top.gff"):
        genes_gff.append(gene_gff)

    assert genes_bed == genes_gff

    for gene_bed, gene_gff in zip(genes_bed, genes_gff):
        for mrna_bed, mrna_gff in zip(gene_bed.mrnas.values(),
                                      gene_gff.mrnas.values()):
            bed_string_repr = mrna_bed.to_bed().split('\t')
            gff_string_repr = mrna_gff.to_bed().split('\t')
            bed_string_repr = bed_string_repr[0:4] + bed_string_repr[5:12]
            gff_string_repr = gff_string_repr[0:4] + gff_string_repr[5:12]
            assert bed_string_repr == gff_string_repr
Beispiel #20
0
def test_insufficient_permissions_file():
    with tempfile.NamedTemporaryFile("wt") as tmp:
        os.chmod(tmp.name, 0o000)
        with pytest.raises(PermissionError):
            _ = GFFReader(tmp.name)
Beispiel #21
0
def test_empty_file():
    with tempfile.NamedTemporaryFile() as tmp:
        with pytest.raises(EmptyFileError):
            for _ in GFFReader(tmp.name):
                pass
Beispiel #22
0
def test_invalid_file():
    with pytest.raises(FileNotFoundError):
        _ = GFFReader("foo")