Exemple #1
0
    def test_sv_out_of_range(self):
        pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
##source=TVsim\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr19   88013   .       CTT     C       20      PASS    .       GT      0/1\n
chr19   89272   .       C       T       20      PASS    .       GT      0/1\n
chr19   269852  .       A       AAAAGAAAGGCATGACCTATCCACCCATGCCACCTGGATGGACCTCACAGGCACACTGCTTCATGAGAGAG 20      PASS    .       GT      1/1
"""

        pred_io = StringIO.StringIO(pred_str)
        pred_vcf = vcf.Reader(pred_io)
        stat_reporter = evaluate_low_memory(self.true_vars, pred_vcf, sv_eps, sv_eps, get_reference(),50,50, {'chr19':0,None:100})

        self.truePositive(stat_reporter,VARIANT_TYPE.SNP)
        self.trueNegative(stat_reporter,VARIANT_TYPE.INDEL_INS)
        self.truePositive(stat_reporter,VARIANT_TYPE.INDEL_DEL)

        sv_ins_stats = stat_reporter(VARIANT_TYPE.SV_INS)

        self.assertEqual(sv_ins_stats['num_true'],1)
        self.assertEqual(sv_ins_stats['num_pred'],1)
        self.assertEqual(sv_ins_stats['good_predictions'],0)
        self.assertEqual(sv_ins_stats['intersect_bad'],0)
        self.assertEqual(sv_ins_stats['false_negatives'],1)
        self.assertEqual(sv_ins_stats['nrd_total'],0)
        self.assertEqual(sv_ins_stats['nrd_wrong'],0)

        self.trueNegative(stat_reporter,VARIANT_TYPE.SV_DEL)
Exemple #2
0
    def test_false_neg_snp(self):
        pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
##source=TVsim\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr19   88013   .       CTT     C       20      PASS    .       GT      0/1\n
chr19   269751  .       A       AAAAGAAAGGCATGACCTATCCACCCATGCCACCTGGATGGACCTCACAGGCACACTGCTTCATGAGAGAG 20      PASS    .       GT      1/1
"""

        pred_io = StringIO.StringIO(pred_str)
        pred_vcf = vcf.Reader(pred_io)

        stat_reporter = evaluate_low_memory(self.true_vars, pred_vcf, sv_eps, sv_eps, get_reference(),50,50, {'chr19':0,None:100})

        self.falseNegative(stat_reporter,VARIANT_TYPE.SNP)
        self.trueNegative(stat_reporter,VARIANT_TYPE.INDEL_INS)
        self.truePositive(stat_reporter,VARIANT_TYPE.INDEL_DEL)
        self.truePositive(stat_reporter,VARIANT_TYPE.SV_INS)
        self.trueNegative(stat_reporter,VARIANT_TYPE.SV_DEL)
Exemple #3
0
    def test_perfect(self):
        pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
##source=TVsim\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr19   88013   .       CTT     C       20      PASS    .       GT      0/1\n
chr19   89272   .       C       T       20      PASS    .       GT      0/1\n
chr19   269751  .       A       AAAAGAAAGGCATGACCTATCCACCCATGCCACCTGGATGGACCTCACAGGCACACTGCTTCATGAGAGAG 20      PASS    .       GT      1/1
"""

        pred_io = StringIO.StringIO(pred_str)
        pred_vcf = vcf.Reader(pred_io)
#def evaluate_low_memory(true_iter,pred_iter,eps,eps_bp,ref,window,max_indel_len,contig_lookup,writer=None,known_fp_iter=None):
        stat_reporter = evaluate_low_memory(self.true_vars, pred_vcf, sv_eps, sv_eps, get_reference(), 50, 50, {'chr19':0,None:100})

        self.truePositive(stat_reporter,VARIANT_TYPE.SNP)
        self.trueNegative(stat_reporter,VARIANT_TYPE.INDEL_INS)
        self.truePositive(stat_reporter,VARIANT_TYPE.INDEL_DEL)
        self.truePositive(stat_reporter,VARIANT_TYPE.SV_INS)
        self.trueNegative(stat_reporter,VARIANT_TYPE.SV_DEL)