Ejemplo n.º 1
0
    def testOverwriteAnnotationsSupported(self):
        """Test that mutations support overwrite annotation in the VCFInputMutationCreator. (white box testing)"""
        inputFilename = os.path.join(*["testdata", "vcf", "example.trailing_whitespace_in_alleles.vcf"])


        vcf_overwriting_disallowed = VcfInputMutationCreator(inputFilename, MutationDataFactory())
        vcf_overwriting_allowed = VcfInputMutationCreator(inputFilename, MutationDataFactory(allow_overwriting=True))

        mutations = vcf_overwriting_disallowed.createMutations()
        for m in mutations:
            self.assertTrue(m._new_required)

        mutations = vcf_overwriting_allowed.createMutations()
        for m in mutations:
            self.assertFalse(m._new_required)
Ejemplo n.º 2
0
    def test_mutation_combiner(self):
        """Test that attributes and annotations are set properly with combine mutations"""
        mut1 = MutationDataFactory.default_create(chr=1,
                                                  start=100,
                                                  end=100,
                                                  ref_allele="G",
                                                  alt_allele="A")
        mut1.createAnnotation("SomeValue", "value1", "INPUT", "STRING",
                              "a value")
        mut2 = MutationDataFactory.default_create(chr=1,
                                                  start=101,
                                                  end=101,
                                                  ref_allele="C",
                                                  alt_allele="T")
        mut2.createAnnotation("SomeValue", "value2", tags=["IT"])
        mut2.createAnnotation("AnotherValue", "5")
        mdf = MutationDataFactory()
        result = OnpQueue._combine_mutations([mut1, mut2], mdf)

        expected = MutationDataFactory.default_create(chr=1,
                                                      start=100,
                                                      end=101,
                                                      ref_allele="GC",
                                                      alt_allele="AT")
        expected.createAnnotation("SomeValue",
                                  "value1|value2",
                                  "INPUT",
                                  "STRING",
                                  "a value",
                                  tags=["IT"])
        expected.createAnnotation("AnotherValue", "5")
        self.assertTrue(result.attributesEqual(expected))
        self.assertEqual(result, expected)
Ejemplo n.º 3
0
 def _onp_ordered_combiner_test(self, inputs, expected):
     input_muts = iter(self._tuples_to_MutationData(inputs))
     expected_muts = self._tuples_to_MutationData(expected)
     mut_factory = MutationDataFactory()
     combiner = OnpQueue(input_muts, mut_factory)
     results = list(combiner.get_combined_mutations())
     self._assert_mutation_lists_equal(expected_muts, results)
Ejemplo n.º 4
0
    def test_mutation_combiner_identical_annotation(self):
        """Test that annotations with all identical values are not repeated with | between them"""
        mut1 = MutationDataFactory.default_create(chr=1,
                                                  start=100,
                                                  end=100,
                                                  ref_allele="G",
                                                  alt_allele="A")
        mut1.createAnnotation("SampleName", "John Doe")

        mut2 = MutationDataFactory.default_create(chr=1,
                                                  start=101,
                                                  end=101,
                                                  ref_allele="C",
                                                  alt_allele="T")
        mut2.createAnnotation("SampleName", "John Doe")

        mdf = MutationDataFactory()
        result = OnpQueue._combine_mutations([mut1, mut2], mdf)

        expected = MutationDataFactory.default_create(chr=1,
                                                      start=100,
                                                      end=101,
                                                      ref_allele="GC",
                                                      alt_allele="AT")
        expected.createAnnotation("SampleName", "John Doe")
        self.assertTrue(result.attributesEqual(expected))
        self.assertEqual(result, expected)
Ejemplo n.º 5
0
    def testAnnotationRoundTripEmpty(self):
        """Read a VCF, annotate it with no datasources, write it, and read it again without changes"""
        inputFilename = os.path.join(
            *["testdata", "m2_support", "NA12878.ob_filtered.vcf"])
        outputFilename = os.path.join("out",
                                      "test_round_trip_empty_annotated.vcf")

        other_opts = dict()
        other_opts[OptionConstants.COLLAPSE_NUMBER_ANNOTATIONS] = True

        run_spec = RunSpecificationFactory.create_run_spec(
            "VCF",
            "VCF",
            inputFilename,
            outputFilename,
            datasource_dir="THIS_DIR_DOES_NOT_EXIST__",
            genomeBuild="hg19",
            other_opts=other_opts)
        annotator = Annotator()
        annotator.initialize(run_spec)
        annotated_filename = annotator.annotate()

        vcf_input2 = VcfInputMutationCreator(
            annotated_filename,
            MutationDataFactory(allow_overwriting=True),
            other_options=other_opts)
        muts2 = [m for m in vcf_input2.createMutations()]
        self.assertTrue(len(muts2) > 0)
Ejemplo n.º 6
0
    def test_mutation_combiner_ordering(self):
        """Test that ordering of combined attributes makes matches original order"""
        mut1 = MutationDataFactory.default_create(chr=1,
                                                  start=100,
                                                  end=100,
                                                  ref_allele="G",
                                                  alt_allele="A")
        mut1.createAnnotation("SomeDepth", "2")
        mut1.createAnnotation("AnotherDepth", "1")

        mut2 = MutationDataFactory.default_create(chr=1,
                                                  start=101,
                                                  end=101,
                                                  ref_allele="C",
                                                  alt_allele="T")
        mut2.createAnnotation("SomeDepth", "1")
        mut2.createAnnotation("AnotherDepth", "2")

        mdf = MutationDataFactory()
        result = OnpQueue._combine_mutations([mut1, mut2], mdf)

        expected = MutationDataFactory.default_create(chr=1,
                                                      start=100,
                                                      end=101,
                                                      ref_allele="GC",
                                                      alt_allele="AT")
        expected.createAnnotation("SomeDepth", "2|1")
        expected.createAnnotation("AnotherDepth", "1|2")
        self.assertTrue(result.attributesEqual(expected))
        self.assertEqual(result, expected)
Ejemplo n.º 7
0
 def __init__(self, sourceFilename, mutation_data_factory, configFile="", genomeBuild="hg19", other_options=None):
     """
     Constructor
     """
     if mutation_data_factory is None:
         logging.getLogger(__name__).info("No mutation data factory provided, using default settings.")
     self._mutation_data_factory = MutationDataFactory() if mutation_data_factory is None else mutation_data_factory
Ejemplo n.º 8
0
    def test_tnp_blank_snp(self):
        """Test a harder scenario for ONP combination"""
        mut1 = MutationData(chr=1,
                            start=100,
                            end=100,
                            ref_allele="G",
                            alt_allele="A")
        mut1.createAnnotation("phasing_id", "value1", "INPUT")
        mut1.createAnnotation("phasing_genotype", "0|1", "INPUT")

        mut2 = MutationData(chr=1,
                            start=101,
                            end=101,
                            ref_allele="C",
                            alt_allele="T")
        mut2.createAnnotation("phasing_id", "value1", "INPUT")
        mut2.createAnnotation("phasing_genotype", "0|1", "INPUT")

        mut3 = MutationData(chr=1,
                            start=102,
                            end=102,
                            ref_allele="C",
                            alt_allele="T")
        mut3.createAnnotation("phasing_id", "value1", "INPUT")
        mut3.createAnnotation("phasing_genotype", "0|1", "INPUT")

        # Note the differing ID in mut4
        mut4 = MutationData(chr=1,
                            start=103,
                            end=103,
                            ref_allele="C",
                            alt_allele="T")
        mut4.createAnnotation("phasing_id", "value2", "INPUT")
        mut4.createAnnotation("phasing_genotype", "0|1", "INPUT")

        mut5 = MutationData(chr=1,
                            start=104,
                            end=104,
                            ref_allele="C",
                            alt_allele="T")
        mut5.createAnnotation("phasing_id", "value1", "INPUT")
        mut5.createAnnotation("phasing_genotype", "0|1", "INPUT")

        # Note separate chromosome for mut6
        mut6 = MutationData(chr=2,
                            start=105,
                            end=105,
                            ref_allele="C",
                            alt_allele="T")
        mut6.createAnnotation("phasing_id", "value1", "INPUT")
        mut6.createAnnotation("phasing_genotype", "0|1", "INPUT")

        gt_alts = ["ATT", "T", "T", "T"]
        mutations = [mut1, mut2, mut3, mut4, mut5, mut6]
        mdf = MutationDataFactory()
        queue = OnpQueue(mutations, mdf)

        for i, mut in enumerate(queue.get_combined_mutations()):
            self.assertTrue(gt_alts[i] == mut.alt_allele)
Ejemplo n.º 9
0
    def test_indel(self):
        """Test indel not used in onp combination no matter what the phasing info"""
        mut1 = MutationData(chr=1,
                            start=100,
                            end=100,
                            ref_allele="G",
                            alt_allele="A")
        mut1.createAnnotation("phasing_id", "value1", "INPUT")
        mut1.createAnnotation("phasing_genotype", "0|1", "INPUT")

        mut2 = MutationData(chr=1,
                            start=101,
                            end=101,
                            ref_allele="C",
                            alt_allele="T")
        mut2.createAnnotation("phasing_id", "value1", "INPUT")
        mut2.createAnnotation("phasing_genotype", "0|1", "INPUT")

        mut3 = MutationData(chr=1,
                            start=102,
                            end=102,
                            ref_allele="C",
                            alt_allele="T")
        mut3.createAnnotation("phasing_id", "value1", "INPUT")
        mut3.createAnnotation("phasing_genotype", "0|1", "INPUT")

        # Indel
        mut4 = MutationData(chr=1,
                            start=103,
                            end=104,
                            ref_allele="-",
                            alt_allele="TT")
        mut4.createAnnotation("phasing_id", "value1", "INPUT")
        mut4.createAnnotation("phasing_genotype", "0|1", "INPUT")

        mut5 = MutationData(chr=1,
                            start=104,
                            end=104,
                            ref_allele="C",
                            alt_allele="T")
        mut5.createAnnotation("phasing_id", "value1", "INPUT")
        mut5.createAnnotation("phasing_genotype", "0|1", "INPUT")

        mut6 = MutationData(chr=1,
                            start=105,
                            end=105,
                            ref_allele="C",
                            alt_allele="T")
        mut6.createAnnotation("phasing_id", "value1", "INPUT")
        mut6.createAnnotation("phasing_genotype", "0|1", "INPUT")

        gt_alts = ["ATT", "TT", "TT"]
        mutations = [mut1, mut2, mut3, mut4, mut5, mut6]
        mdf = MutationDataFactory()
        queue = OnpQueue(mutations, mdf)

        for i, mut in enumerate(queue.get_combined_mutations()):
            self.assertTrue(gt_alts[i] == mut.alt_allele)
Ejemplo n.º 10
0
 def _onp_unordered_combiner_test(self, inputs, expected):
     """Convert input and expected tuples into MutationData objects, then run the inputs through the ONP combiner on
     the inputs and compare to the expected"""
     input_muts = iter(self._tuples_to_MutationData(inputs))
     expected = self._tuples_to_MutationData(expected)
     mdf = MutationDataFactory()
     combiner = OnpQueue(input_muts, mdf)
     results = list(combiner.get_combined_mutations())
     self.assert_mutations_match_expected(expected, results)
Ejemplo n.º 11
0
    def testFailureWithSpanningDeletion(self):
        """Fail with a spanning deletion unless alternates are being ignored."""
        inputFilename = os.path.join(*["testdata", "simple_vcf_spanning_deletion.vcf"])
        vcf_input = VcfInputMutationCreator(inputFilename, MutationDataFactory(allow_overwriting=True))
        muts = vcf_input.createMutations()
        ctr = 0

        for m in muts:
            ctr += 1
Ejemplo n.º 12
0
    def test_annotation_overwriting_on(self):
        """Test that the factory can produce a mutation that allows overwriting.  Just need to make sure no exception thrown."""
        mdf = MutationDataFactory(allow_overwriting=True)
        mut = mdf.create()

        mut.createAnnotation("blah", "123")
        self.assertTrue(mut['blah'] == "123")

        mut.createAnnotation("blah", "456")
        self.assertTrue(mut['blah'] == "456")
Ejemplo n.º 13
0
    def testSimpleRoundTripWithoutAnnotating(self):
        """Read a VCF, write it, and read it again without changes"""
        other_opts = dict()
        other_opts[OptionConstants.COLLAPSE_NUMBER_ANNOTATIONS] = True
        inputFilename = os.path.join(
            *["testdata", "m2_support", "NA12878.ob_filtered.vcf"])
        vcf_input = VcfInputMutationCreator(
            inputFilename,
            MutationDataFactory(allow_overwriting=True),
            other_options=other_opts)
        muts = [m for m in vcf_input.createMutations()]

        outputFilename = os.path.join("out", "test_round_trip.vcf")
        vcf_output = VcfOutputRenderer(outputFilename, otherOptions=other_opts)
        vcf_output.renderMutations(muts)

        vcf_input2 = VcfInputMutationCreator(
            outputFilename,
            MutationDataFactory(allow_overwriting=True),
            other_options=other_opts)
        muts2 = [m for m in vcf_input2.createMutations()]
        self.assertTrue(len(muts2) > 0)
Ejemplo n.º 14
0
    def testSuccesseWithSpanningDeletion(self):
        """Succeed with a spanning deletion since alternates are being ignored."""
        inputFilename = os.path.join(*["testdata", "simple_vcf_spanning_deletion.vcf"])

        other_options = {InputMutationCreatorOptions.IS_SKIP_ALTS: True}
        vcf_input = VcfInputMutationCreator(inputFilename, MutationDataFactory(allow_overwriting=True),
                                            other_options=other_options)
        muts = vcf_input.createMutations()
        ctr = 0

        for m in muts:
            ctr += 1
        self.assertTrue(ctr == 1, "There should only have been one mutation seen, instead saw: " + str(ctr))
Ejemplo n.º 15
0
    def initializeMutFromAttributes(chr,
                                    start,
                                    end,
                                    ref_allele,
                                    alt_allele,
                                    build,
                                    mutation_data_factory=None):
        mutation_data_factory = MutationDataFactory(
        ) if mutation_data_factory is None else mutation_data_factory
        mut = mutation_data_factory.create(str(chr), str(start), str(end),
                                           ref_allele, alt_allele, str(build))
        varType = TranscriptProviderUtils.infer_variant_type(
            mut.ref_allele, mut.alt_allele)

        if TranscriptProviderUtils.is_xnp(varType):  # Snps and other xNPs
            mut.createAnnotation(
                annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
                annotationValue="")
        if varType == VariantClassification.VT_DEL:  # deletion
            preceding_bases, updated_ref_allele, updated_start, updated_end =\
                MutUtils.retrievePrecedingBasesForDeletions(mut)
            mut.ref_allele = updated_ref_allele
            mut["ref_allele"] = updated_ref_allele
            mut.alt_allele = "-"
            mut["alt_allele"] = "-"
            mut.start = updated_start
            mut["start"] = updated_start
            mut.end = updated_end
            mut["end"] = updated_end
            mut.createAnnotation(
                annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
                annotationValue=preceding_bases)
        elif varType == VariantClassification.VT_INS:  # insertion
            preceding_bases, updated_alt_allele, updated_start, updated_end = \
                MutUtils.retrievePrecedingBasesForInsertions(mut)
            mut.ref_allele = "-"
            mut["ref_allele"] = "-"
            mut.alt_allele = updated_alt_allele
            mut["alt_allele"] = updated_alt_allele
            mut.start = updated_start
            mut["start"] = updated_start
            mut.end = updated_end
            mut["end"] = updated_end
            mut.createAnnotation(
                annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
                annotationValue=preceding_bases)

        return mut
Ejemplo n.º 16
0
    def test_annotation_overwriting_off(self):
        """Test that the factory can produce a mutation that does not allow overwriting.  Make sure DuplicateAnnotationException is thrown."""
        mdf = MutationDataFactory(allow_overwriting=False)
        mut = mdf.create()

        mut.createAnnotation("blah", "123")
        self.assertTrue(mut['blah'] == "123")

        is_exception_raised = False
        try:
            mut.createAnnotation("blah", "456")
        except DuplicateAnnotationException as dae:
            is_exception_raised = True

        self.assertTrue(
            is_exception_raised,
            "DuplicateAnnotationException should have been seen, but wasn't")
Ejemplo n.º 17
0
 def initialize(self, run_spec):
     """ Given a RunSpecification instance, initialize self properly.  Do not start annotation.
     """
     self.setInputCreator(run_spec.inputCreator)
     self.setOutputRenderer(run_spec.outputRenderer)
     self.setManualAnnotations(run_spec.manualAnnotations)
     self.setDefaultAnnotations(run_spec.defaultAnnotations)
     self._datasources = run_spec.datasources
     self.setIsMulticore(run_spec.get_is_multicore())
     self.setNumCores(run_spec.get_num_cores())
     self._cache_stats = {"miss": 0, "hit":0}
     self._is_skip_no_alts = run_spec.get_is_skip_no_alts()
     self.initialize_cache_manager(run_spec)
     self.set_annotating_type(run_spec.annotating_type)
     self._annotate_func_ptr = Annotator.ANNOTATING_FUNC_DICT.get(self._annotating_type, _annotate_mut)
     self._is_allow_annotation_overwriting = run_spec.is_allow_annotation_overwriting
     self._mutation_data_factory = MutationDataFactory(allow_overwriting=self._is_allow_annotation_overwriting)
Ejemplo n.º 18
0
    def create_run_spec_given_datasources(input_format, output_format, input_filename, output_filename, global_annotations=None,
                        datasource_list=None, genomeBuild="hg19", is_multicore=False, num_cores=4,
                        default_annotations=None, cache_url=None, read_only_cache=True,
                        tx_mode=TranscriptProvider.TX_MODE_CANONICAL, is_skip_no_alts=False, other_opts=None, annotating_type=None):
        """Same as create_run_spec, but a list of datasource instances can be used.  Typically, this method is only called
        by automated tests."""


        global_annotations = dict() if global_annotations is None else global_annotations
        default_annotations = dict() if default_annotations is None else default_annotations
        datasource_list = [] if datasource_list is None else datasource_list

        other_opts = dict() if other_opts is None else other_opts

        if input_format == "TCGAMAF" and not other_opts.get(OptionConstants.REANNOTATE_TCGA_MAF_COLS, False):
            other_opts[OptionConstants.REANNOTATE_TCGA_MAF_COLS] = True

        other_opts[InputMutationCreatorOptions.IS_SKIP_ALTS] = is_skip_no_alts

        # Step 0 Validate given parameters and log messages.  If an error or critical is found, throw an exception.
        validation_messages = RunSpecificationFactory._validate_run_spec_parameters(input_format, output_format, input_filename, output_filename, global_annotations,
                        datasource_list, genomeBuild, is_multicore, num_cores,
                        default_annotations, cache_url, read_only_cache,
                        tx_mode, is_skip_no_alts, other_opts, annotating_type)
        for msg in validation_messages:
            logging.getLogger(__name__).log(msg.level, msg.message)
            if (msg.level == logging.ERROR) or (msg.level == logging.CRITICAL):
                raise RunSpecificationException(msg.message)

        # Step 1 Initialize input and output
        is_allow_annotation_overwriting = other_opts.get(OptionConstants.ALLOW_ANNOTATION_OVERWRITING, False)
        mutation_data_factory = MutationDataFactory(is_allow_annotation_overwriting)

        inputCreator = OncotatorCLIUtils.create_input_creator(input_filename, input_format, mutation_data_factory, genomeBuild, other_opts)
        outputRenderer = OncotatorCLIUtils.create_output_renderer(output_filename, output_format, other_opts)

        result = RunSpecification()
        result.initialize(inputCreator, outputRenderer, manualAnnotations=global_annotations, datasources=datasource_list,
                          isMulticore=is_multicore, numCores=num_cores, defaultAnnotations=default_annotations,
                          cacheUrl=cache_url, read_only_cache=read_only_cache, is_skip_no_alts=is_skip_no_alts, annotating_type=annotating_type,
                          is_allow_annotation_overwriting=is_allow_annotation_overwriting)
        return result
Ejemplo n.º 19
0
 def test_mutation_combiner_no_mut(self):
     """Combining no mutations should return None"""
     mdf = MutationDataFactory()
     result = OnpQueue._combine_mutations([], mdf)
     self.assertIsNone(result)