コード例 #1
0
 def test_to_vcf_row_instantiated_variant_numeric_chrom(self):
     factory = variant.VariantFactory()
     v1 = factory.from_edit(1,2093,'TGG','CCC')
     row = str(self.writer.get_row(v1))
     row2 = str(v1)
     self.assertIn('TGG', row)
     self.assertIn('TGG', row2)
コード例 #2
0
    def test_get_vcf_row_instantiated_variant(self):
        factory = variant.VariantFactory()
        v1 = factory.from_edit('chr24',2093,'TGG','CCC')

        row = self.writer.get_row(v1)
        self.assertEqual(row.REF, 'TGG')
        self.assertEqual(row.POS, 2094)
        self.assertEqual(str(row), 'chr24\t2094\t.\tTGG\tCCC\t.\t.\t.')

        gv = GenomVariant(v1, attrib={'id':'vrtid', 'filter':'LOWQUAL',
                                      'qual':100})
        row = self.writer.get_row(gv)
        self.assertEqual(
            str(row),'chr24\t2094\tvrtid\tTGG\tCCC\t100\tLOWQUAL\t.')
        
        vrt = factory.from_edit('chr20', 1253922,'TGT','G')
        
        row = self.writer.get_row(vrt)
        self.assertEqual(str(row), 'chr20\t1253923\t.\tTGT\tG\t.\t.\t.')

        # vf = VariantFactory(reference=ref,
        #                     normindel=True)
        vrt = factory.from_edit('chr1',13957,'TCCCCCA','TCCCCA')
        with self.assertRaises(ValueError) as cm:
            row = self.writer.get_row(vrt)
        self.assertIn('Reference is required',cm.exception.args[0])
コード例 #3
0
 def test_haplotype_edit_equality(self):
     factory = variant.VariantFactory()
     v1 = factory.from_edit('chr24',2093,'TGG','CCC')
     v2 = factory.from_edit('chr24',2098,'TT','GG')
     v3 = factory.from_edit('chr24',2098,'TT','CC')
     h1 = variant.Haplotype.from_variants([v1,v2])
     h1_ = variant.Haplotype.from_variants([v1,v2])
     h2 = variant.Haplotype.from_variants([v1,v3])
     self.assertTrue(h1.edit_equal(h1_))
     self.assertFalse(h1.edit_equal(h2))
コード例 #4
0
 def test_ambig_difference_snp_in_locus(self):
     #         10043
     # Ref     TC-ACA--G
     # v1 s1         CA
     # v1 s2    G
     # v2 s2     T
     fac = variant.VariantFactory(reference=self.chr24, normindel=True)
     s1 = VariantSet.from_variants(
         [fac.from_edit('chr24', 10047, 'A', 'ACA')])
     s2 = VariantSet.from_variants([
         fac.from_edit('chr24', 10044, 'C', 'G'),
         fac.from_edit('chr24', 10044, 'C', 'CT')
     ])
     self.assertEqual(len(list(s1.comm(s2, match_ambig=True).iter_vrt())),
                      0)
コード例 #5
0
 def test_from_variants_to_records(self):
     fac = variant.VariantFactory(reference=self.chr24, normindel=True)
     hap = Haplotype.from_variants([
         fac.from_edit('chr24', 1207, 'G', 'C'),
         fac.from_edit('chr24', 1207, 'G', 'T')
     ])
     vs = VariantSet.from_variants([
         fac.from_edit('chr24', 10043, 'T', 'TCA'),
         fac.from_edit('chr24', 10045, 'ACA', 'A'), hap
     ])
     recs = vs.to_records()
     self.assertEqual(recs.shape, (4, ))
     self.assertEqual(list(recs.dtype.fields), [
         'chrom', 'start', 'end', 'ref', 'alt', 'vartype', 'phase_group',
         'attrib'
     ])
コード例 #6
0
    def test_strip_order_dependent_Ambig(self):
        #    10043
        # R  T--CA--CAG
        # v1 TCACA--CAG
        # v2 T--CACACAG
        factory = variant.VariantFactory(reference=pkg_file(
            'genomvar.test', 'data/chr24.fna'),
                                         normindel=True)
        v1 = factory.from_edit('chr24', 10043, 'T', 'TCA')
        v2 = factory.from_edit('chr24', 10045, 'A', 'ACA')
        s1 = VariantSet.from_variants([v1])
        s2 = VariantSet.from_variants([v2])

        diff = s1.diff(s2, match_ambig=True)
        self.assertEqual(len(list(diff.iter_vrt())), 0)
        diff = s1.diff(s2, match_ambig=False)
        self.assertEqual(len(list(diff.iter_vrt())), 1)
コード例 #7
0
 def test_ambig_difference_different_ambig(self):
     #         10043
     # Ref     T--CA--CA--G
     # v1 s1   T--CA--CACAG ins CA right
     # v2 s1   T------CA--G del CA left
     # v1 s2   TCACA--CA--G ins CA left
     # v2 s2   T--CA------G del CA right
     fac = variant.VariantFactory(reference=self.chr24, normindel=True)
     s1 = VariantSet.from_variants([
         fac.from_edit('chr24', 10047, 'A', 'ACA'),
         fac.from_edit('chr24', 10043, 'TCA', 'T')
     ])
     s2 = VariantSet.from_variants([
         fac.from_edit('chr24', 10043, 'T', 'TCA'),
         fac.from_edit('chr24', 10045, 'ACA', 'A')
     ])
     self.assertEqual(len(list(s1.diff(s2, match_ambig=True).iter_vrt())),
                      0)
コード例 #8
0
    def test_no_ovlp(self):
        # REF      TGG   TT
        #          2093  2099
        #          CCC   GG
        #                CG
        factory = variant.VariantFactory()
        variants = [
            factory.from_edit('1', 2093, 'TGG', 'CCC'),
            factory.from_edit('1', 2098, 'TT', 'GG'),
            factory.from_edit('1', 2098, 'TT', 'CG'),
            factory.from_edit('2', 3200, 'G', 'GG')
        ]

        for ind, chunk in enumerate(no_ovlp(variants)):
            if ind == 0:
                self.assertEqual(len(chunk), 1)
            elif ind == 1:
                self.assertEqual(len(chunk), 2)
            if ind == 2:
                self.assertEqual(len(chunk), 1)
コード例 #9
0
 def setUp(self):
     self.chr24 = Reference(
         pkg_file(__name__,'data/chr24.fna'))
     self.nvf = variant.VariantFactory(self.chr24,normindel=True)
     # Factory not normalizing indels
     self.svf = variant.VariantFactory()
コード例 #10
0
 def test_min_mnps(self):
     factory = variant.VariantFactory()
     v1 = factory.from_edit('1', 1, 'AAA', 'GGG')
     v2 = factory.from_edit('1', 1, 'AAA', 'GGC')
     v3 = factory.from_edit('1', 1, 'AAA', 'CGG')
     self.assertEqual(nof_snp_vrt([v1, v2, v3]), 5)
コード例 #11
0
from genomvar import OverlappingHaplotypeVars,\
    UnsortedVariantFileError,VCFSampleMismatch,NoIndexFoundError
from genomvar.vcf import VCFRow, VCFReader, RESERVED_FORMAT
from genomvar import variant
from genomvar.test import MyTestCase

# Representation of an example used here a lot (example1.vcf)
#
# REF      AG    T|   T|  C    G     TGG   TT    G|      T    T      CACAGTTCCAC
#          22    154  165 453  1206  2093  2099  3200    4754 6145   10044
# varset1  A     TT   TG  CT   C     CCC   GG    GG      TCG  C      T----------
#          AGG   TT   TG       T     CCC   GG    GG      T    G      G----------
#          AG                        --------            ------ phased
# FILT           h    h;n                  n     n

factory = variant.VariantFactory()


class TestVariantSetCase(MyTestCase):
    def test_empty_vcf(self):
        buf = io.StringIO()
        with open(pkg_file('genomvar.test', 'data/example1.vcf')) as fh:
            for line in itertools.takewhile(lambda l: l.startswith('#'), fh):
                buf.write(line)
        buf.seek(0)
        vs = VariantSet.from_vcf(buf)
        self.assertEqual(vs.nof_unit_vrt(), 0)

    def test_random_sample(self):
        vs = VariantSet.from_vcf(pkg_file('genomvar.test',
                                          'data/example1.vcf'))