コード例 #1
0
    def testRectify2(self):
        pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   3   .       GC     G       20      PASS    .       GT      1/1\n
chr2   6   .       G      A       20      PASS    .       GT      1/1\n
"""
        true_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   3   .       GCCG     GCA       20      PASS    .       GT      1/1\n
"""
        true_vars = vcf_to_ChromVariants(true_str, 'chr2')
        pred_vars = vcf_to_ChromVariants(pred_str, 'chr2')
        gtdict = _genotype_concordance_dict()
        cvs = ChromVariantStats(true_vars, pred_vars, [], [3, 6], [3], gtdict)
        cvs.rectify(get_reference(), 100)
        self.assertEqual(cvs.num_pred[VARIANT_TYPE.INDEL_OTH], 1)
        self.assertEqual(cvs.num_tp[VARIANT_TYPE.INDEL_OTH], 1)
        self.assertEqual(cvs.num_fn[VARIANT_TYPE.INDEL_OTH], 0)
        self.assertEqual(cvs.num_fp[VARIANT_TYPE.INDEL_OTH], 0)
        self.assertEqual(cvs.num_pred[VARIANT_TYPE.SNP], 0)
        self.assertEqual(cvs.num_fp[VARIANT_TYPE.SNP], 0)
        self.assertEqual(cvs.num_fn[VARIANT_TYPE.SNP], 0)
        self.assertEqual(cvs.num_tp[VARIANT_TYPE.SNP], 0)
        self.assertEqual(cvs.num_pred[VARIANT_TYPE.INDEL_DEL], 0)
        self.assertEqual(cvs.num_fp[VARIANT_TYPE.INDEL_DEL], 0)
        self.assertEqual(cvs.num_fn[VARIANT_TYPE.INDEL_DEL], 0)
        self.assertEqual(cvs.num_tp[VARIANT_TYPE.INDEL_DEL], 0)
コード例 #2
0
    def testTruePosRectify(self):
        true_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr4   3   .       TC     T       20      PASS    .       GT      1/1\n
chr4   5   .       TC     T       20      PASS    .       GT      1/1\n
chr4   8   .       C      T       20      PASS    .       GT      1/1\n
"""
        pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr4   4   .       C     T       20      PASS    .       GT      1/1\n
chr4   5   .       TC    T       20      PASS    .       GT      1/1\n
chr4   7   .       TC    T       20      PASS    .       GT      1/1\n
"""
        true_vars = vcf_to_ChromVariants(true_str, 'chr4')
        pred_vars = vcf_to_ChromVariants(pred_str, 'chr4')
        gtdict = _genotype_concordance_dict()
        cvs = ChromVariantStats(true_vars, pred_vars, [5], [4, 7], [3, 8],
                                gtdict)
        self.assertEqual(cvs.num_tp[VARIANT_TYPE.SNP], 0)
        self.assertEqual(cvs.num_fn[VARIANT_TYPE.SNP], 1)
        self.assertEqual(cvs.num_fp[VARIANT_TYPE.SNP], 1)
        self.assertEqual(cvs.num_fn[VARIANT_TYPE.INDEL_DEL], 1)
        self.assertEqual(cvs.num_fp[VARIANT_TYPE.INDEL_DEL], 1)
        self.assertEqual(cvs.num_tp[VARIANT_TYPE.INDEL_DEL], 1)
        cvs.rectify(get_reference(), 100)
        self.assertEqual(cvs.num_fn[VARIANT_TYPE.SNP], 0)
        self.assertEqual(cvs.num_fp[VARIANT_TYPE.SNP], 0)
        self.assertEqual(cvs.num_tp[VARIANT_TYPE.SNP], 1)
        self.assertEqual(cvs.num_fn[VARIANT_TYPE.INDEL_DEL], 0)
        self.assertEqual(cvs.num_tp[VARIANT_TYPE.INDEL_DEL], 2)
        self.assertEqual(cvs.num_fp[VARIANT_TYPE.INDEL_DEL], 0)
コード例 #3
0
    def testInit(self):
        # test counts of false positive, false negative, true positive
        true_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   3   .       G     A       20      PASS    .       GT      1/1\n
chr2   5   .       C     T       20      PASS    .       GT      1/1\n
"""
        pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   3   .       G     A       20      PASS    .       GT      1/1\n
chr2   7   .       G     C       20      PASS    .       GT      1/1\n
"""
        true_vars = vcf_to_ChromVariants(true_str, 'chr2')
        pred_vars = vcf_to_ChromVariants(pred_str, 'chr2')
        gtdict = _genotype_concordance_dict()
        gtdict[VARIANT_TYPE.SNP][GENOTYPE_TYPE.HOM_VAR][
            GENOTYPE_TYPE.HOM_VAR] += 1
        cvs = ChromVariantStats(true_vars, pred_vars, [3], [7], [5], gtdict)
        self.assertEqual(cvs.num_true[VARIANT_TYPE.SNP], 2)
        self.assertEqual(cvs.num_pred[VARIANT_TYPE.SNP], 2)
        self.assertEqual(len(cvs.false_positives.all_locations), 1)
        self.assertEqual(len(cvs.false_negatives.all_locations), 1)
        self.assertEqual(cvs.num_tp[VARIANT_TYPE.SNP], 1)
        self.assertEqual(cvs.num_fn[VARIANT_TYPE.SNP], 1)
        self.assertEqual(cvs.num_fp[VARIANT_TYPE.SNP], 1)
コード例 #4
0
    def testRectify(self):
        # rectify CVS with a rescue-able indel
        true_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   2   .       TGC     TAT       20      PASS    .       GT      1/1\n
"""
        pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   3   .       G     A       20      PASS    .       GT      1/1\n
chr2   4   .       C     T       20      PASS    .       GT      1/1\n
"""
        true_vars = vcf_to_ChromVariants(true_str, 'chr2')
        pred_vars = vcf_to_ChromVariants(pred_str, 'chr2')
        gtdict = _genotype_concordance_dict(
        )  # leave empty, we aren't testing this yet
        cvs = ChromVariantStats(true_vars, pred_vars, [], [3, 4], [2], gtdict)
        # before rectify, no true positives
        self.assertTrue(all(map(lambda x: x == 0, cvs.num_tp.itervalues())))
        # one false negative indel
        self.assertEqual(cvs.num_fn[VARIANT_TYPE.INDEL_OTH], 1)
        # two false positives SNPs
        self.assertEqual(cvs.num_fp[VARIANT_TYPE.SNP], 2)
        cvs.rectify(get_reference(), 100)
        # after rectify, one true positive indel
        self.assertEqual(cvs.num_tp[VARIANT_TYPE.INDEL_OTH], 1)
        # no false positives or false negatives
        self.assertTrue(all(map(lambda x: x == 0, cvs.num_fp.itervalues())))
        self.assertTrue(all(map(lambda x: x == 0, cvs.num_fn.itervalues())))
コード例 #5
0
ファイル: eval_helper.py プロジェクト: amplab/smash
    def testTruePosRectify(self):
        true_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr4   3   .       TC     T       20      PASS    .       GT      1/1\n
chr4   5   .       TC     T       20      PASS    .       GT      1/1\n
chr4   8   .       C      T       20      PASS    .       GT      1/1\n
"""
        pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr4   4   .       C     T       20      PASS    .       GT      1/1\n
chr4   5   .       TC    T       20      PASS    .       GT      1/1\n
chr4   7   .       TC    T       20      PASS    .       GT      1/1\n
"""
        true_vars = vcf_to_ChromVariants(true_str,'chr4')
        pred_vars = vcf_to_ChromVariants(pred_str,'chr4')
        gtdict = _genotype_concordance_dict()
        cvs = ChromVariantStats(true_vars,pred_vars,[5],[4,7],[3,8],gtdict)
        self.assertEqual(cvs.num_tp[VARIANT_TYPE.SNP],0)
        self.assertEqual(cvs.num_fn[VARIANT_TYPE.SNP],1)
        self.assertEqual(cvs.num_fp[VARIANT_TYPE.SNP],1)
        self.assertEqual(cvs.num_fn[VARIANT_TYPE.INDEL_DEL],1)
        self.assertEqual(cvs.num_fp[VARIANT_TYPE.INDEL_DEL],1)
        self.assertEqual(cvs.num_tp[VARIANT_TYPE.INDEL_DEL],1)
        cvs.rectify(get_reference(),100)
        self.assertEqual(cvs.num_fn[VARIANT_TYPE.SNP],0)
        self.assertEqual(cvs.num_fp[VARIANT_TYPE.SNP],0)
        self.assertEqual(cvs.num_tp[VARIANT_TYPE.SNP],1)
        self.assertEqual(cvs.num_fn[VARIANT_TYPE.INDEL_DEL],0)
        self.assertEqual(cvs.num_tp[VARIANT_TYPE.INDEL_DEL],2)
        self.assertEqual(cvs.num_fp[VARIANT_TYPE.INDEL_DEL],0)
コード例 #6
0
ファイル: eval_helper.py プロジェクト: amplab/smash
    def testRectify2(self):
        pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   3   .       GC     G       20      PASS    .       GT      1/1\n
chr2   6   .       G      A       20      PASS    .       GT      1/1\n
"""
        true_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   3   .       GCCG     GCA       20      PASS    .       GT      1/1\n
"""
        true_vars = vcf_to_ChromVariants(true_str,'chr2')
        pred_vars = vcf_to_ChromVariants(pred_str,'chr2')
        gtdict = _genotype_concordance_dict()
        cvs = ChromVariantStats(true_vars, pred_vars, [], [3,6], [3],gtdict)
        cvs.rectify(get_reference(),100)
        self.assertEqual(cvs.num_pred[VARIANT_TYPE.INDEL_OTH],1)
        self.assertEqual(cvs.num_tp[VARIANT_TYPE.INDEL_OTH],1)
        self.assertEqual(cvs.num_fn[VARIANT_TYPE.INDEL_OTH],0)
        self.assertEqual(cvs.num_fp[VARIANT_TYPE.INDEL_OTH],0)
        self.assertEqual(cvs.num_pred[VARIANT_TYPE.SNP],0)
        self.assertEqual(cvs.num_fp[VARIANT_TYPE.SNP],0)
        self.assertEqual(cvs.num_fn[VARIANT_TYPE.SNP],0)
        self.assertEqual(cvs.num_tp[VARIANT_TYPE.SNP],0)
        self.assertEqual(cvs.num_pred[VARIANT_TYPE.INDEL_DEL],0)
        self.assertEqual(cvs.num_fp[VARIANT_TYPE.INDEL_DEL],0)
        self.assertEqual(cvs.num_fn[VARIANT_TYPE.INDEL_DEL],0)
        self.assertEqual(cvs.num_tp[VARIANT_TYPE.INDEL_DEL],0)
コード例 #7
0
ファイル: eval_helper.py プロジェクト: amplab/smash
    def testRectify(self):
        # rectify CVS with a rescue-able indel
        true_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   2   .       TGC     TAT       20      PASS    .       GT      1/1\n
"""
        pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   3   .       G     A       20      PASS    .       GT      1/1\n
chr2   4   .       C     T       20      PASS    .       GT      1/1\n
"""
        true_vars = vcf_to_ChromVariants(true_str,'chr2')
        pred_vars = vcf_to_ChromVariants(pred_str,'chr2')
        gtdict = _genotype_concordance_dict() # leave empty, we aren't testing this yet
        cvs = ChromVariantStats(true_vars,pred_vars,[],[3,4],[2],gtdict)
        # before rectify, no true positives
        self.assertTrue(all(map(lambda x: x == 0,cvs.num_tp.itervalues())))
        # one false negative indel
        self.assertEqual(cvs.num_fn[VARIANT_TYPE.INDEL_OTH],1)
        # two false positives SNPs
        self.assertEqual(cvs.num_fp[VARIANT_TYPE.SNP],2)
        cvs.rectify(get_reference(),100)
        # after rectify, one true positive indel
        self.assertEqual(cvs.num_tp[VARIANT_TYPE.INDEL_OTH],1)
        # no false positives or false negatives
        self.assertTrue(all(map(lambda x: x == 0, cvs.num_fp.itervalues())))
        self.assertTrue(all(map(lambda x: x ==0, cvs.num_fn.itervalues())))
コード例 #8
0
ファイル: eval_helper.py プロジェクト: amplab/smash
    def testInit(self):
        # test counts of false positive, false negative, true positive
        true_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   3   .       G     A       20      PASS    .       GT      1/1\n
chr2   5   .       C     T       20      PASS    .       GT      1/1\n
"""
        pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   3   .       G     A       20      PASS    .       GT      1/1\n
chr2   7   .       G     C       20      PASS    .       GT      1/1\n
"""
        true_vars = vcf_to_ChromVariants(true_str,'chr2')
        pred_vars = vcf_to_ChromVariants(pred_str,'chr2')
        gtdict = _genotype_concordance_dict()
        gtdict[VARIANT_TYPE.SNP][GENOTYPE_TYPE.HOM_VAR][GENOTYPE_TYPE.HOM_VAR] += 1
        cvs = ChromVariantStats(true_vars,pred_vars,[3],[7],[5],gtdict)
        self.assertEqual(cvs.num_true[VARIANT_TYPE.SNP],2)
        self.assertEqual(cvs.num_pred[VARIANT_TYPE.SNP],2)
        self.assertEqual(len(cvs.false_positives.all_locations),1)
        self.assertEqual(len(cvs.false_negatives.all_locations),1)
        self.assertEqual(cvs.num_tp[VARIANT_TYPE.SNP],1)
        self.assertEqual(cvs.num_fn[VARIANT_TYPE.SNP],1)
        self.assertEqual(cvs.num_fp[VARIANT_TYPE.SNP],1)
コード例 #9
0
    def testAggregate(self):
        # build two ChromVariantStats objects
        true_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   3   .       G     A       20      PASS    .       GT      1/1\n
chr2   5   .       C     T       20      PASS    .       GT      1/1\n
"""
        pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   3   .       G     A       20      PASS    .       GT      1/1\n
chr2   7   .       G     C       20      PASS    .       GT      1/1\n
"""
        true_vars = vcf_to_ChromVariants(true_str, 'chr2')
        pred_vars = vcf_to_ChromVariants(pred_str, 'chr2')
        gtdict = _genotype_concordance_dict()  # leave empty for now
        cvs2 = chrom_evaluate_variants(true_vars, pred_vars, 100, 100,
                                       get_reference(), 50)
        true_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr3   3   .       G     A       20      PASS    .       GT      1/1\n
chr3   5   .       C     T       20      PASS    .       GT      1/1\n
"""
        pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr3   3   .       G     A       20      PASS    .       GT      1/1\n
chr3   4   .       T     A       20      PASS    .       GT      1/1\n
chr3   7   .       G     C       20      PASS    .       GT      1/1\n
"""
        true_vars = vcf_to_ChromVariants(true_str, 'chr3')
        pred_vars = vcf_to_ChromVariants(pred_str, 'chr3')
        cvs3 = chrom_evaluate_variants(true_vars, pred_vars, 100, 100,
                                       get_reference(), 50)
        #cvs5 = ChromVariantStats(true_vars,pred_vars,[31],[49,79],[52],_genotype_concordance_dict())
        aggregator, errors = _aggregate([cvs2, cvs3])
        # test some sums
        self.assertEqual(cvs2.num_true[VARIANT_TYPE.SNP], 2)
        self.assertEqual(cvs3.num_true[VARIANT_TYPE.SNP], 2)
        self.assertEqual(aggregator(VARIANT_TYPE.SNP)['num_true'], 4)
        self.assertEqual(cvs2.num_tp[VARIANT_TYPE.SNP], 1)
        self.assertEqual(cvs3.num_tp[VARIANT_TYPE.SNP], 1)
        self.assertEqual(aggregator(VARIANT_TYPE.SNP)['good_predictions'], 2)
コード例 #10
0
ファイル: variants.py プロジェクト: chapmanb/smash
    def testAggregate(self):
        # build two ChromVariantStats objects
        true_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   3   .       G     A       20      PASS    .       GT      1/1\n
chr2   5   .       C     T       20      PASS    .       GT      1/1\n
"""
        pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   3   .       G     A       20      PASS    .       GT      1/1\n
chr2   7   .       G     C       20      PASS    .       GT      1/1\n
"""
        true_vars = vcf_to_ChromVariants(true_str,'chr2')
        pred_vars = vcf_to_ChromVariants(pred_str,'chr2')
        gtdict = _genotype_concordance_dict() # leave empty for now
        cvs2 = chrom_evaluate_variants(true_vars,pred_vars,100,100,get_reference(),50)
        true_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr3   3   .       G     A       20      PASS    .       GT      1/1\n
chr3   5   .       C     T       20      PASS    .       GT      1/1\n
"""
        pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr3   3   .       G     A       20      PASS    .       GT      1/1\n
chr3   4   .       T     A       20      PASS    .       GT      1/1\n
chr3   7   .       G     C       20      PASS    .       GT      1/1\n
"""
        true_vars = vcf_to_ChromVariants(true_str,'chr3')
        pred_vars = vcf_to_ChromVariants(pred_str,'chr3')
        cvs3 = chrom_evaluate_variants(true_vars,pred_vars,100,100,get_reference(),50)
        #cvs5 = ChromVariantStats(true_vars,pred_vars,[31],[49,79],[52],_genotype_concordance_dict())
        aggregator,errors = _aggregate([cvs2,cvs3])
        # test some sums
        self.assertEqual(cvs2.num_true[VARIANT_TYPE.SNP],2)
        self.assertEqual(cvs3.num_true[VARIANT_TYPE.SNP],2)
        self.assertEqual(aggregator(VARIANT_TYPE.SNP)['num_true'],4)
        self.assertEqual(cvs2.num_tp[VARIANT_TYPE.SNP],1)
        self.assertEqual(cvs3.num_tp[VARIANT_TYPE.SNP],1)
        self.assertEqual(aggregator(VARIANT_TYPE.SNP)['good_predictions'],2)