Example #1
0
    def _get_sample_variant_1(self):
        """Get first sample variant.

    Features:
      multiple alternates
      not phased
      multiple names
    """
        vcf_line = ('20	1234	rs123;rs2	C	A,T	50	PASS	AF=0.5,0.1;NS=1	'
                    'GT:GQ	0/0:48	1/0:20\n')
        variant = Variant(reference_name='20',
                          start=1233,
                          end=1234,
                          reference_bases='C',
                          alternate_bases=['A', 'T'],
                          names=['rs123', 'rs2'],
                          quality=50,
                          filters=['PASS'],
                          info={
                              'AF': VariantInfo(data=[0.5, 0.1],
                                                field_count='A'),
                              'NS': VariantInfo(data=1, field_count='1')
                          })
        variant.calls.append(
            VariantCall(name='Sample1', genotype=[0, 0], info={'GQ': 48}))
        variant.calls.append(
            VariantCall(name='Sample2', genotype=[1, 0], info={'GQ': 20}))
        return variant, vcf_line
Example #2
0
 def test_info_numbers_and_types(self):
   info_headers = [
       '##INFO=<ID=HA,Number=A,Type=String,Description="StringInfo_A">\n',
       '##INFO=<ID=HG,Number=G,Type=Integer,Description="IntInfo_G">\n',
       '##INFO=<ID=HR,Number=R,Type=Character,Description="ChrInfo_R">\n',
       '##INFO=<ID=HF,Number=0,Type=Flag,Description="FlagInfo">\n',
       '##INFO=<ID=HU,Number=.,Type=Float,Description="FloatInfo_variable">\n']
   record_lines = [
       '19	2	.	A	T,C	.	.	HA=a1,a2;HG=1,2,3;HR=a,b,c;HF;HU=0.1	GT	1/0	0/1\n',
       '19	124	.	A	T	.	.	HG=3,4,5;HR=d,e;HU=1.1,1.2	GT	0/0	0/1']
   variant_1 = Variant(
       reference_name='19', start=1, end=2, reference_bases='A',
       alternate_bases=['T', 'C'],
       info={'HA': VariantInfo(data=['a1', 'a2'], field_count='A'),
             'HG': VariantInfo(data=[1, 2, 3], field_count='G'),
             'HR': VariantInfo(data=['a', 'b', 'c'], field_count='R'),
             'HF': VariantInfo(data=True, field_count='0'),
             'HU': VariantInfo(data=[0.1], field_count=None)})
   variant_1.calls.append(VariantCall(name='Sample1', genotype=[1, 0]))
   variant_1.calls.append(VariantCall(name='Sample2', genotype=[0, 1]))
   variant_2 = Variant(
       reference_name='19', start=123, end=124, reference_bases='A',
       alternate_bases=['T'],
       info={'HG': VariantInfo(data=[3, 4, 5], field_count='G'),
             'HR': VariantInfo(data=['d', 'e'], field_count='R'),
             'HU': VariantInfo(data=[1.1, 1.2], field_count=None)})
   variant_2.calls.append(VariantCall(name='Sample1', genotype=[0, 0]))
   variant_2.calls.append(VariantCall(name='Sample2', genotype=[0, 1]))
   read_data = self._create_temp_file_and_read_records(
       info_headers + _SAMPLE_HEADER_LINES[1:] + record_lines)
   self.assertEqual(2, len(read_data))
   self._assert_variants_equal([variant_1, variant_2], read_data)
Example #3
0
    def _get_sample_variant_3(self):
        """Get third sample variant.

    Features:
      symbolic alternate
      no calls for sample 2
    """
        vcf_line = ('19	12	.	C	<SYMBOLIC>	49	q10	AF=0.5	GT:GQ	0|1:45 .:.\n')
        variant = Variant(
            reference_name='19',
            start=11,
            end=12,
            reference_bases='C',
            alternate_bases=['<SYMBOLIC>'],
            quality=49,
            filters=['q10'],
            info={'AF': VariantInfo(data=[0.5], field_count='A')})
        variant.calls.append(
            VariantCall(name='Sample1',
                        genotype=[0, 1],
                        phaseset=DEFAULT_PHASESET_VALUE,
                        info={'GQ': 45}))
        variant.calls.append(
            VariantCall(name='Sample2',
                        genotype=[MISSING_GENOTYPE_VALUE],
                        info={'GQ': None}))
        return variant, vcf_line
Example #4
0
    def _get_sample_variant_2(self):
        """Get second sample variant.

    Features:
      multiple references
      no alternate
      phased
      multiple filters
      missing format field
    """
        vcf_line = ('19	123	rs1234	GTC	.	40	q10;s50	NS=2	GT:GQ	1|0:48	0/1:.\n')
        variant = Variant(reference_name='19',
                          start=122,
                          end=125,
                          reference_bases='GTC',
                          alternate_bases=[],
                          names=['rs1234'],
                          quality=40,
                          filters=['q10', 's50'],
                          info={'NS': VariantInfo(data=2, field_count='1')})
        variant.calls.append(
            VariantCall(name='Sample1',
                        genotype=[1, 0],
                        phaseset=DEFAULT_PHASESET_VALUE,
                        info={'GQ': 48}))
        variant.calls.append(
            VariantCall(name='Sample2', genotype=[0, 1], info={'GQ': None}))
        return variant, vcf_line
Example #5
0
 def test_no_samples(self):
   header_line = '#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO\n'
   record_line = '19	123	.	G	A	.	PASS	AF=0.2'
   expected_variant = Variant(
       reference_name='19', start=122, end=123, reference_bases='G',
       alternate_bases=['A'], filters=['PASS'],
       info={'AF': VariantInfo(data=[0.2], field_count='A')})
   read_data = self._create_temp_file_and_read_records(
       _SAMPLE_HEADER_LINES[:-1] + [header_line, record_line])
   self.assertEqual(1, len(read_data))
   self.assertEqual(expected_variant, read_data[0])