コード例 #1
0
ファイル: test_wecall_reduce.py プロジェクト: dylex/wecall
    def test_should_obtain_correct_vcf_header_on_reduce(self):
        temp_vcf_filename_b = self.__run_wecall_variant_caller(
            "2",
            "AAAAAAAAAAACGCACCCCCCATAAAAAAAATTTTTTTTTTT",
            ["............T........................C...."], vcf_stem="ab"
        )
        temp_vcf_filename_a = self.__run_wecall_variant_caller(
            "1",
            "AAAAAAAAAAACGCACCCCCCATAAAAAAAATTTTTTTTTTT",
            ["...................T......................"], vcf_stem="aa"
        )

        with VCFReaderContextManager(temp_vcf_filename_a) as temp_vcf_a:
            with VCFReaderContextManager(temp_vcf_filename_b) as temp_vcf_b:
                temp_vcf_a.read_header()
                header_a = temp_vcf_a.header

                temp_vcf_b.read_header()
                header_b = temp_vcf_b.header

        self.__run_wecall_reduce()

        with VCFReaderContextManager(self.final_vcf_location) as final_vcf:
            final_vcf.read_header()

            expected_header = header_a

            expected_header.set_contig('2', header_b.get_contig('2').length)
            self.assertEqual(final_vcf.header, expected_header)
コード例 #2
0
ファイル: test_wecall_reduce.py プロジェクト: dylex/wecall
    def test_should_derive_use_lexigraphical_order_of_vcfs_for_reduce(self):
        temp_vcf_filename_b = self.__run_wecall_variant_caller(
            "2",
            "AAAAAAAAAAACGCACCCCCCATAAAAAAAATTTTTTTTTTT",
            ["............T........................C...."], vcf_stem="ab"
        )
        temp_vcf_filename_a = self.__run_wecall_variant_caller(
            "1",
            "AAAAAAAAAAACGCACCCCCCATAAAAAAAATTTTTTTTTTT",
            ["...................T......................"], vcf_stem="aa"
        )

        with VCFReaderContextManager(temp_vcf_filename_a) as temp_vcf_a:
            with VCFReaderContextManager(temp_vcf_filename_b) as temp_vcf_b:
                # aa is lexicographical less than ab
                reference_records = list(
                    temp_vcf_a.read_records()) + list(temp_vcf_b.read_records())

        self.__run_wecall_reduce()

        with VCFReaderContextManager(self.final_vcf_location) as final_vcf:
            final_records = list(final_vcf.read_records())

            self.assertEqual(len(final_records), 3)
            self.assertEqual(final_records, reference_records)
コード例 #3
0
ファイル: test_vcf_schema.py プロジェクト: dylex/wecall
    def __run_small_variant_caller(self, refcalls, format):
        sample_bank = SampleBank("T")
        sample_bank.add_sample_name("TEST").add_sequence(".")

        variant_caller_builder = VariantCallerBuilderFromSampleBank(
            sample_bank, self.work_dir)
        variant_caller_builder.configuration = {}  # clear config.
        variant_caller = variant_caller_builder.build()
        variant_caller.add_additional_command('outputRefCalls', refcalls)
        variant_caller.add_additional_command('outputFormat',
                                              "VCF{}".format(format))
        variant_caller.run()

        with VCFReaderContextManager(variant_caller.output_vcf) as vcf_file:
            actual_schema = vcf_file.read_header()

        reference = os.path.splitext(
            os.path.basename(variant_caller_builder.wecall_input_data.
                             reference_filename))[0]
        expected_schema = wecall_schema(
            file_date=datetime.datetime.today().strftime('%F'),
            reference=reference,
            contigs={
                sample_bank.reference.chrom: {
                    "length": sample_bank.reference.length_minus_deletions()
                }
            },
            add_ref_calls=refcalls,
            format=format)

        return expected_schema, actual_schema
コード例 #4
0
 def test_read_variant_from_vcf(self):
     with VCFReaderContextManager(
             os.path.join(self.data_dir, "vcf_example.vcf")) as vcf_handler:
         variant_gen = (record.variant
                        for record in vcf_handler.read_records())
         next_variant = next(variant_gen)
         self.assertEqual(next_variant.chrom, "20")
         self.assertEqual(next_variant.pos_from, 9)
         self.assertEqual(next_variant.ref, "CT")
         self.assertEqual(next_variant.alt, "C")
コード例 #5
0
ファイル: test_wecall_reduce.py プロジェクト: dylex/wecall
    def test_should_reduce_a_wecall_produced_vcf_to_a_valid_vcf(self):
        temp_vcf_filename = self.__run_wecall_variant_caller(
            "1",
            "AAAAAAAAAAACGCACCCCCCATAAAAAAAATTTTTTTTTTT",
            ["...................T......................"],
        )

        with VCFReaderContextManager(temp_vcf_filename) as temp_vcf:
            reference_header = temp_vcf.header
            reference_records = list(temp_vcf.read_records())

        self.__run_wecall_reduce()

        with VCFReaderContextManager(self.final_vcf_location) as final_vcf:
            self.assertEqual(final_vcf.header, reference_header)
            final_records = list(final_vcf.read_records())

            self.assertEqual(len(final_records), 1)
            self.assertEqual(final_records, reference_records)
コード例 #6
0
    def __init__(self, test_case, path):
        self.__test_case = test_case
        self.__path = path
        self.__test_case.assertTrue(os.path.exists(self.__path))
        with VCFReaderContextManager(self.__path) as vcf_reader:
            self.__schema = vcf_reader.read_header()
            self.__records = list(vcf_reader.read_records())

        # ensure genotype likelihoods are within range throughout
        for record in self.__records:
            for sample_name in record.sample_info.get_sample_names():
                try:
                    for GL_value in record.sample_info.get_field(
                            sample_name, 'GL'):
                        self.__test_case.assertTrue(GL_value <= 0.0)
                except KeyError:
                    pass
コード例 #7
0
ファイル: test_parser.py プロジェクト: dylex/wecall
    def test_reads_simple_file(self):
        filename = os.path.join(self.work_dir, "test.vcf")

        with VCFWriterContextManager(filename) as left_vcf:
            left_vcf.write_variant(Variant("1", 1, "A", "T"))
            left_vcf.write_variant(Variant("2", 1, "A", "T"))
            left_vcf.write_variant(Variant("10", 1, "A", "T"))

        expected_variants = [
            Variant("1", 1, "A", "T"),
            Variant("2", 1, "A", "T"),
            Variant("10", 1, "A", "T"),
        ]

        with VCFReaderContextManager(filename) as vcf_reader:
            actual_variants = [record.variant for record in vcf_reader.read_records()]

        self.assertEqual(expected_variants, actual_variants)
コード例 #8
0
    def test_should_write_missing_values_in_sample_data(self):
        with VCFReaderContextManager(
                os.path.join(self.data_dir, "vcf_example.vcf")) as vcf_handler:
            first_record = next(vcf_handler.read_records())

        sample_data = SampleData(['GT', 'PL', 'GQ'],
                                 ['sample1', 'sample2', 'sample3'])

        sample_data.add_sample_data("sample1", "GT", GenotypeCall("1|0"))
        sample_data.add_sample_data("sample1", "PL", [3000, 0, 3000])
        sample_data.add_sample_data("sample1", "GQ", [1000])

        sample_data.add_sample_data("sample2", "GT", GenotypeCall("1|1"))
        sample_data.add_sample_data("sample2", "PL", [2000, 0, 1000])
        sample_data.add_sample_data("sample2", "GQ", [3])

        first_record.sample_info = sample_data

        print((sample_data.to_vcf_columns()))
        vcf_string = vcf_row_from_record(first_record)
        expected_vcf_string = "20	10	.	CT	C	3000	PASS	PP=3000;DP=250;DPR=140;DPF=110;VC=100;VCR=49;VCF=51;ABPV=0.2;SBPV=0.3;MQ=70.0;BR=31.0;QD=None	GT:PL:GQ	1|0:3000,0,3000:1000	1|1:2000,0,1000:3	./.:.:."  # noqa
        self.assertEqual(expected_vcf_string, vcf_string)
コード例 #9
0
    def test_read_record_line(self):
        with VCFReaderContextManager(
                os.path.join(self.data_dir, "vcf_example.vcf")) as vcf_handler:

            record_gen = vcf_handler.read_records()
            next_record = next(record_gen)

            self.assertEqual(next_record.chrom, "20")
            self.assertEqual(next_record.pos_from, 9)
            self.assertEqual(next_record.ids, set())
            self.assertEqual(next_record.ref, "CT")
            self.assertEqual(next_record.alt, "C")
            self.assertEqual(next_record.quality, 3000)
            self.assertEqual(next_record.filters, set())
            self.assertEqual(next_record.passes_filter, True)
            self.assertEqual(next_record.from_multi_alt, False)
            self.assertEqual(next_record.type, variant.TYPE_DEL)

            self.assertEqual(next_record.info['PP'], [3000])
            self.assertEqual(next_record.info['DP'], [250])
            self.assertEqual(next_record.info['VC'], [100])
            self.assertEqual(next_record.info['ABPV'], [0.2])
            self.assertEqual(next_record.info['SBPV'], [0.3])
            self.assertEqual(next_record.info['MQ'], [70])
            self.assertEqual(next_record.info['QD'], [None])

            self.assertTrue(next_record.sample_info.has_sample("sample1"))
            self.assertEqual(next_record.genotypes, {
                "sample1": GenotypeCall("1|0"),
                "sample2": GenotypeCall("1|1")
            })
            self.assertEqual(
                next_record.sample_info.get_field("sample1", 'GT'),
                GenotypeCall("1|0"))
            self.assertEqual(
                next_record.sample_info.get_field("sample1", 'PL'),
                [3000, 0, 3000])
            self.assertEqual(
                next_record.sample_info.get_field("sample1", "GQ"), [1000])
コード例 #10
0
 def __get_example_schema(self, filename):
     with VCFReaderContextManager(os.path.join(self.data_dir,
                                               filename)) as vcf_handler:
         vcf_handler.read_header()
         return vcf_handler.header