Ejemplo n.º 1
0
class TestCodonVariant:
    @classmethod
    def setup_class(self):
        self.offset = 1269
        self.variant = CodonVariant(chrom="hxb2_pol",
                                    pos=1,
                                    gene="gag",
                                    nt_start_gene=1309,
                                    nt_end_gene=2841,
                                    nt_start=2077,
                                    nt_end=2079,
                                    ref_codon="ata",
                                    mutant_codon="aAa",
                                    ref_aa="I",
                                    mutant_aa="K",
                                    coverage=563,
                                    mutant_freq=1.60,
                                    mutant_type="S",
                                    ns_count=1.0000,
                                    s_count=1.5000)

    def test_to_csv_entry(self):
        assert self.variant.to_csv_entry(self.offset) == (
            "gag,%i-%i,%i,%i,ata,aAa,I,K,563,1.60,S,1.0000,1.5000\n" %
            (1309 + self.offset, 2841 + self.offset, 2077 + self.offset,
             2079 + self.offset))
Ejemplo n.º 2
0
 def setup_class(self):
     self.offset = 1269
     self.variant = CodonVariant(chrom="hxb2_pol",
                                 pos=1,
                                 gene="gag",
                                 nt_start_gene=1309,
                                 nt_end_gene=2841,
                                 nt_start=2077,
                                 nt_end=2079,
                                 ref_codon="ata",
                                 mutant_codon="aAa",
                                 ref_aa="I",
                                 mutant_aa="K",
                                 coverage=563,
                                 mutant_freq=1.60,
                                 mutant_type="S",
                                 ns_count=1.0000,
                                 s_count=1.5000)
Ejemplo n.º 3
0
    def setup_class(self):
        self.reference = TEST_PATH + "/data/hxb2_pol.fas"
        self.references = parse_references_from_fasta(self.reference)
        self.variant_collection = CodonVariantCollection(self.references)
        self.offset = 1269

        self.variant_collection.variants['gag']['3']['aTa'] = CodonVariant(
            chrom="hxb2_pol",
            pos=1,
            gene="gag",
            nt_start_gene=1309,
            nt_end_gene=2841,
            nt_start=2077,
            nt_end=2079,
            ref_codon="ata",
            mutant_codon="aTa",
            ref_aa="I",
            mutant_aa="K",
            coverage=563,
            mutant_freq=1.60,
            mutant_type="S",
            ns_count=1.0000,
            s_count=1.5000)
        self.variant_collection.variants['tat']['10']['aAa'] = CodonVariant(
            chrom="hxb2_pol",
            pos=2,
            gene="tat",
            nt_start_gene=3309,
            nt_end_gene=4841,
            nt_start=4000,
            nt_end=4002,
            ref_codon="ata",
            mutant_codon="aAa",
            ref_aa="I",
            mutant_aa="K",
            coverage=563,
            mutant_freq=1.60,
            mutant_type="S",
            ns_count=1.0000,
            s_count=1.5000)
def parse_codon_variants(csv, references):
    """Parse a codon variants csv and build a codon variants object"""

    variant_collect = CodonVariantCollection(references)

    with open(csv, "r") as f:
        for line in f:
            if line[0] != "#":
                (
                    gene, gene_start_end, nt_start,
                    nt_end, ref_codon, mutant_codon,
                    ref_aa, mutant_aa, coverage,
                    mutant_freq, mutant_type,
                    ns_count, s_count
                ) = line.rstrip().split(",")

                gene_start, gene_end = gene_start_end.split('-')

                pos = int(nt_start)-int(gene_start)

                variant = CodonVariant(
                    chrom=gene,
                    pos=pos,
                    gene=gene,
                    nt_start_gene=int(gene_start),
                    nt_end_gene=int(gene_end),
                    nt_start=int(nt_start),
                    nt_end=int(nt_end),
                    ref_codon=ref_codon,
                    mutant_codon=mutant_codon,
                    ref_aa=ref_aa,
                    mutant_aa=mutant_aa,
                    coverage=int(coverage),
                    mutant_freq=float(mutant_freq),
                    mutant_type=mutant_type,
                    ns_count=float(ns_count),
                    s_count=float(s_count))

                variant_collect.variants[gene][
                    pos][mutant_codon] = variant

    f.close()
    return variant_collect
Ejemplo n.º 5
0
    def test_valid_csv_file(self):
        """Tests to make sure that a valid codon variant csv file is properly
        parsed into a CodonVariantCollection object.
        """

        reference = TEST_PATH + "/data/hxb2_pol.fas"
        rs = parse_references_from_fasta(reference)

        var_obj = CodonVariantCollection(rs)

        for i in range(0, 30):
            variant = CodonVariant(chrom="hxb2_pol",
                                   pos=i,
                                   gene="gag",
                                   nt_start_gene=1309 + i,
                                   nt_end_gene=2841 + i,
                                   nt_start=2077 + i,
                                   nt_end=2079 + i,
                                   ref_codon="ata",
                                   mutant_codon="aAa",
                                   ref_aa="I",
                                   mutant_aa="K",
                                   coverage=563 + i,
                                   mutant_freq=1.60 + i,
                                   mutant_type="S",
                                   ns_count=1.0000,
                                   s_count=1.5000)

            pos = int(variant.nt_start) - int(variant.nt_start_gene)
            var_obj.variants["gag"][pos]["aAa"] = variant

        valid_csv = TEST_PATH + "/data/valid_csv.csv"

        with open(valid_csv, "w+") as f:
            f.write("#gene,nt position (gene),nt start position,"
                    "nt end position,ref codon,mutant codon,ref AA,mutant AA,"
                    "coverage,mutant frequency,mutant type,NS count,S count")

            for gene in var_obj.variants:
                for pos in var_obj.variants[gene]:
                    for codon in var_obj.variants[gene][pos]:
                        variant = var_obj.variants[gene][pos][codon]

                        f.write(
                            "%s,%i-%i,%i,%i,%s,%s,%s,%s,%i,%.2f,%s,%0.4f,%0.4f\n"
                            % (variant.gene, variant.nt_start_gene,
                               variant.nt_end_gene, variant.nt_start,
                               variant.nt_end, variant.ref_codon,
                               variant.mutant_codon, variant.ref_aa,
                               variant.mutant_aa, variant.coverage,
                               variant.mutant_freq, variant.mutant_type,
                               variant.ns_count, variant.s_count))

        parsed_codon_variants = parse_codon_variants(valid_csv, rs)

        for gene in parsed_codon_variants.variants:
            for pos in parsed_codon_variants.variants[gene]:
                for codon in parsed_codon_variants.variants[gene][pos]:
                    parsed_variant = parsed_codon_variants.variants[gene][pos][
                        codon]
                    variant = var_obj.variants[gene][pos][codon]

                    assert parsed_variant.chrom == variant.chrom
                    assert parsed_variant.nt_start_gene == variant.nt_start_gene
                    assert parsed_variant.nt_end_gene == variant.nt_end_gene
                    assert parsed_variant.nt_start == variant.nt_start
                    assert parsed_variant.nt_end == variant.nt_end
                    assert parsed_variant.ref_codon == variant.ref_codon
                    assert parsed_variant.mutant_codon == variant.mutant_codon
                    assert parsed_variant.ref_aa == variant.ref_aa
                    assert parsed_variant.mutant_aa == variant.mutant_aa
                    assert parsed_variant.coverage == variant.coverage
                    assert parsed_variant.mutant_freq == variant.mutant_freq
                    assert parsed_variant.mutant_type == variant.mutant_type
                    assert parsed_variant.ns_count == variant.ns_count
                    assert parsed_variant.s_count == variant.s_count

        os.remove(valid_csv)