コード例 #1
0
ファイル: AnnotatorTest.py プロジェクト: Yixf-Self/oncotator
    def testManualAnnotations(self):
        """ Test that the manual annotation facility in the Annotator is working properly. """
        annotator = Annotator()
        overrides = {'source': 'Capture', 'status': 'Somatic', 'phase': 'Phase_I', 'sequencer': 'Illumina GAIIx'}
        annotator.setManualAnnotations(overrides)
        inputCreator = MafliteInputMutationCreator('testdata/maflite/Patient0.snp.maf.txt')
        outputRenderer = SimpleOutputRenderer("out/testManualAnnotationsFile.tsv")
        annotator.setInputCreator(inputCreator)
        annotator.setOutputRenderer(outputRenderer)

        testOutputFilename = annotator.annotate()

        keysOfInterest = overrides.keys()

        statinfo = os.stat(testOutputFilename)
        self.assertTrue(statinfo.st_size > 0, "Generated TSV file (" + testOutputFilename + ") is empty.")

        tsvReader = GenericTsvReader(testOutputFilename)

        ctr = 1
        for lineDict in tsvReader:
            for k in keysOfInterest:
                self.assertTrue(lineDict[k] != "__UNKNOWN__",
                                "__UNKNOWN__ value seen on line " + str(ctr) + ", when it should be populated: " + k)
                self.assertTrue(lineDict[k] != "",
                                "Blank value seen on line " + str(ctr) + ", when it should be populated: " + k)
                self.assertTrue(lineDict[k] == overrides[k],
                                "Value for " + k + " on line " + str(ctr) + " did not match override: " + str(
                                    lineDict[k]) + " <> " + str(overrides[k]))
            ctr += 1
コード例 #2
0
    def testAnnotationWithMafliteWithTrailingSpaces(self):
        """
        Tests the ability to annotate a VCF file that contains trailing spaces in ref and alt alleles.
        """
        db_dir = self.config.get('DEFAULT', "dbDir")
        inputFilename = os.path.join(
            *["testdata", "vcf", "example.trailing_whitespace_in_alleles.vcf"])
        outputFilename = os.path.join(
            "out", "example.trailing_whitespace_in_alleles.vcf")

        annotator = Annotator()
        from oncotator.utils.RunSpecification import RunSpecification
        run_spec = RunSpecificationFactory.create_run_spec(
            "VCF",
            "VCF",
            inputFilename,
            outputFilename,
            datasource_dir=db_dir,
            annotating_type=RunSpecification.ANNOTATE_MUTATIONS,
            other_opts={'vcf_out_infer_genotypes': False})
        annotator.initialize(run_spec)
        annotator.annotate()

        #check output
        vcf_data = open(outputFilename).read()
        self.assertIn('\n1\t14907\t.\tA\tG\t', vcf_data)
        self.assertIn('\n1\t14930\trs150145850\tA\tG\t', vcf_data)
        self.assertIn('\n1\t14933\trs138566748\tG\tA\t', vcf_data)
        self.assertIn('\n1\t14948\trs148911281\tG\tA\t', vcf_data)
コード例 #3
0
    def test_full_seg_file_annotations(self):
        """Test that we can read in a seg file, do a proper full annotation, and output as SIMPLE_TSV"""
        inputFilename = "testdata/seg/Patient0.seg.txt"
        output_filename = "out/test_full_seg_file_annotations.tsv"
        db_dir = self.config.get('DEFAULT', "dbDir")
        if os.path.exists(output_filename):
            os.remove(output_filename)

        annotator = Annotator()
        run_spec = RunSpecificationFactory.create_run_spec(
            "SEG_FILE",
            "SIMPLE_TSV",
            inputFilename,
            output_filename,
            datasource_dir=db_dir,
            annotating_type=RunSpecification.ANNOTATE_SEGMENTS)
        annotator.initialize(run_spec)
        annotator.annotate()

        # Now check the output
        output_reader = GenericTsvReader(output_filename)

        required_cols = ["Sample", "Num_Probes", "Segment_Mean"]
        headers = output_reader.getFieldNames()
        for rcol in required_cols:
            self.assertTrue(rcol in headers)

        for line_dict in output_reader:
            self.assertTrue(line_dict['start'] is not None)
            self.assertTrue(line_dict['start'].strip() != "")
            self.assertTrue(line_dict['end'] is not None)
            self.assertTrue(line_dict['end'].strip() != "")
            self.assertTrue("genes" in line_dict.keys())
            self.assertTrue(len(line_dict["genes"].split(",")) > 0)
コード例 #4
0
ファイル: AnnotatorTest.py プロジェクト: ihuerga/oncotator
    def testAnnotateListOfMutations(self):
        """Test that we can initialize an Annotator, without an input or output and then feed mutations,
        one at a time... using a runspec"""

        # Locate the datasource directory and create a runspec
        dbDir = self.config.get("DEFAULT", "dbDir")
        ds = DatasourceFactory.createDatasources(dbDir)
        runSpec = RunSpecification()
        runSpec.initialize(None, None, datasources=ds)

        # Initialize the annotator with the runspec
        annotator = Annotator()
        annotator.initialize(runSpec)

        m = MutationData()
        m.chr = "1"
        m.start = "12941796"
        m.end = "12941796"
        m.alt_allele = "G"
        m.ref_allele = "T"

        muts = [m]

        muts = annotator.annotate_mutations(muts)
        m2 = muts.next()
        self.assertTrue(m2.get("gene", None) is not None)
コード例 #5
0
    def testFullSnpVcf(self):
        """ Perform test of a SNP call stats (maflite) all the way through TCGA VCF creation.  Only checks that a file was created.
        """
        outputFilename = "out/TCGAVCFTest.snp.vcf"
        callStatsIn = MafliteInputMutationCreator(
            "testdata/Test.call_stats.trim.txt")
        vcfOR = TcgaVcfOutputRenderer(outputFilename)
        datasources = self._createDatasourcesForTesting()

        annotator = Annotator()
        annotator.setInputCreator(callStatsIn)
        annotator.setOutputRenderer(vcfOR)
        annotator.setManualAnnotations(self._createManualAnnotations())
        for ds in datasources:
            annotator.addDatasource(ds)
        annotator.annotate()

        self.assertTrue(os.path.exists(outputFilename))

        maflite_ic = MafliteInputMutationCreator(
            "testdata/maflite/Patient0.indel.maf.txt")
        muts = maflite_ic.createMutations()
        vcf_reader = vcf.Reader(open(outputFilename, 'r'))
        for i, m in enumerate(muts):
            rec = vcf_reader.next()
            qual = rec.QUAL

            # All records should have QUAL with a value (i.e. NOT ".")
            self.assertIsNotNone(qual)
コード例 #6
0
    def test_overwriting_muts(self):
        """Ensure that (given correct configuration) we can annotate from a datasource, even if the datasource will overwrite an existing mutation."""
        # We will have an input with a "Who" annotation that this datasource will try to write.
        gene_ds = DatasourceFactory.createDatasource(
            "testdata/thaga_janakari_gene_ds/hg19/tj_data.config",
            "testdata/thaga_janakari_gene_ds/hg19/")
        input_filename = "testdata/maflite/who_alt1_vs_alt2.maflite"
        output_filename = "out/who_alt1_vs_alt2.maf.annotated"
        input_format = "MAFLITE"
        output_format = "TCGAMAF"

        other_opts = {
            OptionConstants.ALLOW_ANNOTATION_OVERWRITING: True,
            OptionConstants.NO_PREPEND: True
        }

        run_spec = RunSpecificationFactory.create_run_spec_given_datasources(
            input_format,
            output_format,
            input_filename,
            output_filename,
            datasource_list=[gene_ds],
            other_opts=other_opts)
        annotator = Annotator()
        annotator.initialize(run_spec)

        annotator.annotate()

        tsv_reader = GenericTsvReader(output_filename)

        for i, line_dict in enumerate(tsv_reader):
            self.assertTrue(line_dict.get('TJ_Data_Who', "") != "Tromokratis")
コード例 #7
0
    def testCreationAndAnnotation(self):
        """ Test the datasource creation and then do a simple annotation
        """
        outputFilename = 'out/genericGeneProteinPositionTest.out.tsv'

        gafDS = TestUtils.createTranscriptProviderDatasource(self.config)
        gppDS = DatasourceFactory.createDatasource("testdata/simple_uniprot_natvar/simple_uniprot_natvar.config", "testdata/simple_uniprot_natvar/")

        annotator = Annotator()
        annotator.setInputCreator(MafliteInputMutationCreator('testdata/maflite/tiny_maflite_natvar.maf.tsv'))
        annotator.setOutputRenderer(SimpleOutputRenderer(outputFilename))
        annotator.addDatasource(gafDS)
        annotator.addDatasource(gppDS)
        testFilename = annotator.annotate()

        # Make sure that some values were populated
        self.assertTrue(os.path.exists(testFilename))
        tsvReader = GenericTsvReader(testFilename)

        ctr = 0
        for lineDict in tsvReader:
            colName = "UniProt_NatVar_natural_variations"
            self.assertTrue(sorted(lineDict[colName].split("|")) == sorted("R -> RR (in EDMD2).|R -> Q (in EDMD2).".split("|")), "Annotation value did not match: " + lineDict[colName])
            ctr += 1

        self.assertTrue(ctr == 1, "Number of mutations incorrect (1): " + str(ctr) )
コード例 #8
0
    def testBasicAnnotation(self):
        ''' Test annotation from a generic TSV based on a transcript annotation.  Only confirms the proper headers of the output. '''
        # We need a gaf data source to annotate gene

        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            config=self.config)
        transcriptDS = DatasourceFactory.createDatasource(
            "testdata/small_transcript_tsv_ds/small_transcript_tsv_ds.config",
            "testdata/small_transcript_tsv_ds/")
        outputFilename = 'out/genericTranscriptTest.out.tsv'

        annotator = Annotator()
        annotator.setInputCreator(
            MafliteInputMutationCreator(
                'testdata/maflite/Patient0.snp.maf.txt'))
        annotator.setOutputRenderer(SimpleOutputRenderer(outputFilename))
        annotator.addDatasource(gafDatasource)
        annotator.addDatasource(transcriptDS)
        outputFilename = annotator.annotate()

        tsvReader = GenericTsvReader(outputFilename)
        headers = tsvReader.getFieldNames()
        self.assertTrue(
            "refseq_test_mRNA_Id" in headers,
            "refseq_test_mRNA_Id not found in headers: " + str(headers))
        self.assertTrue(
            "refseq_test_prot_Id" in headers,
            "refseq_test_prot_Id not found in headers: " + str(headers))
コード例 #9
0
    def testFullIndelVcf(self):
        """ Perform test of a Indel maflite all the way through TCGA VCF creation
        """
        outputFilename = "out/TCGAVCFTest.indel.vcf"
        callStatsIn = MafliteInputMutationCreator("testdata/maflite/Patient0.indel.maf.txt")
        vcfOR = TcgaVcfOutputRenderer(outputFilename)
        datasources = self._createDatasourcesForTesting()

        annotator = Annotator()
        annotator.setInputCreator(callStatsIn)
        annotator.setOutputRenderer(vcfOR)
        annotator.setManualAnnotations(self._createManualAnnotations())
        for ds in datasources:
            annotator.addDatasource(ds)
        annotator.annotate()

        self.assertTrue(os.path.exists(outputFilename))

        # Check that the deletions have position decremented by one from what is present in the maflite
        #  Checking that 1	36643701 in the maflite (a deletion) becomes 1	36643700 in the vcf, but that the others are
        #  the same.
        maflite_ic = MafliteInputMutationCreator("testdata/maflite/Patient0.indel.maf.txt")
        muts = maflite_ic.createMutations()
        vcf_reader = vcf.Reader(open(outputFilename, 'r'))

        vcf_pos = [int(rec.POS) for rec in vcf_reader]
        for m in muts:
            # If the variant is a deletion, then the vcf position should be the same as maflite minus one.  Otherwise, the same.
            is_variant_deletion = (m.alt_allele == "") or (m.alt_allele == "-") or (m.alt_allele == ".")
            if is_variant_deletion:
                self.assertTrue((int(m.start) - 1) in vcf_pos, "Deletion was not correct for " + m.chr + ":" + m.start)
            else:
                self.assertTrue(int(m.start) in vcf_pos, "Insertion was not correct for " + m.chr + ":" + m.start)
コード例 #10
0
    def test_rendering_with_exons(self):
        """Test that we can render a seg file that includes exons at end points"""
        inputFilename = "testdata/seg/Middle_of_exon.seg.txt"
        output_filename = "out/test_exon_seg2.gene_list.tsv"
        db_dir = self.config.get('DEFAULT', "dbDir")
        if os.path.exists(output_filename):
            os.remove(output_filename)

        annotator = Annotator()
        run_spec = RunSpecificationFactory.create_run_spec(
            "SEG_FILE",
            "GENE_LIST",
            inputFilename,
            output_filename,
            datasourceDir=db_dir,
            annotating_type=RunSpecification.ANNOTATE_SEGMENTS)
        annotator.initialize(run_spec)
        annotator.annotate()

        # Now check the output
        output_reader = GenericTsvReader(output_filename)

        headers = output_reader.getFieldNames()

        for line_dict in output_reader:
            self.assertTrue(line_dict['segment_start'] is not None)
            self.assertTrue(line_dict['segment_start'].strip() != "")
            if line_dict['segment_end_gene'] == "MAPK1":
                self.assertTrue(
                    line_dict['segment_end_exon'].strip() == "8+",
                    "Should have been 8+, but saw: %s" %
                    line_dict['segment_end_exon'].strip())
コード例 #11
0
    def test_basic_rendering(self):
        """Test that we can render a basic seg file as a gene list"""
        inputFilename = "testdata/seg/Patient0.seg.txt"
        output_filename = "out/test_basic_rendering.gene_list.tsv"
        db_dir = self.config.get('DEFAULT', "dbDir")
        if os.path.exists(output_filename):
            os.remove(output_filename)

        annotator = Annotator()
        run_spec = RunSpecificationFactory.create_run_spec(
            "SEG_FILE",
            "GENE_LIST",
            inputFilename,
            output_filename,
            datasourceDir=db_dir,
            annotating_type=RunSpecification.ANNOTATE_SEGMENTS)
        annotator.initialize(run_spec)
        annotator.annotate()

        # Now check the output
        output_reader = GenericTsvReader(output_filename)

        headers = output_reader.getFieldNames()

        for line_dict in output_reader:
            self.assertTrue(line_dict['segment_start'] is not None)
            self.assertTrue(line_dict['segment_start'].strip() != "")
            self.assertTrue(line_dict['segment_end'] is not None)
            self.assertTrue(line_dict['segment_end'].strip() != "")
            self.assertTrue("gene" in line_dict.keys())
            self.assertTrue(len(line_dict["gene"]) > 0)
            self.assertTrue(float(line_dict["segment_num_probes"]))
            self.assertTrue(line_dict['sample'] == "Patient0")
コード例 #12
0
    def test_no_overwriting_muts(self):
        """Ensure that (given configuration that disallows) we cannot annotate from a datasource when a value was specified in the input."""
        # We will have an input with a "Who" annotation that this datasource will try to write.
        gene_ds = DatasourceFactory.createDatasource(
            "testdata/thaga_janakari_gene_ds/hg19/tj_data.config",
            "testdata/thaga_janakari_gene_ds/hg19/")
        input_filename = "testdata/maflite/who_alt1_vs_alt2.maflite"
        output_filename = "out/who_alt1_vs_alt2.maf.annotated"
        input_format = "MAFLITE"
        output_format = "TCGAMAF"

        other_opts = {
            OptionConstants.ALLOW_ANNOTATION_OVERWRITING: False,
            OptionConstants.NO_PREPEND: True
        }

        run_spec = RunSpecificationFactory.create_run_spec_given_datasources(
            input_format,
            output_format,
            input_filename,
            output_filename,
            datasource_list=[gene_ds],
            other_opts=other_opts)
        annotator = Annotator()
        annotator.initialize(run_spec)

        self.assertRaises(DuplicateAnnotationException, annotator.annotate)
コード例 #13
0
    def testAnnotationRoundTripEmpty(self):
        """Read a VCF, annotate it with no datasources, write it, and read it again without changes"""
        inputFilename = os.path.join(
            *["testdata", "m2_support", "NA12878.ob_filtered.vcf"])
        outputFilename = os.path.join("out",
                                      "test_round_trip_empty_annotated.vcf")

        other_opts = dict()
        other_opts[OptionConstants.COLLAPSE_NUMBER_ANNOTATIONS] = True

        run_spec = RunSpecificationFactory.create_run_spec(
            "VCF",
            "VCF",
            inputFilename,
            outputFilename,
            datasource_dir="THIS_DIR_DOES_NOT_EXIST__",
            genomeBuild="hg19",
            other_opts=other_opts)
        annotator = Annotator()
        annotator.initialize(run_spec)
        annotated_filename = annotator.annotate()

        vcf_input2 = VcfInputMutationCreator(
            annotated_filename,
            MutationDataFactory(allow_overwriting=True),
            other_options=other_opts)
        muts2 = [m for m in vcf_input2.createMutations()]
        self.assertTrue(len(muts2) > 0)
コード例 #14
0
    def testDuplicateAnnotation(self):
        """
        Tests that the duplicate annotations are parsed correctly.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.duplicate_annotation.vcf"])
        outputFilename = os.path.join("out", "example.duplicate_annotation.out.tsv")

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename)
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.annotate()

        tsvReader = GenericTsvReader(outputFilename)
        fieldnames = tsvReader.getFieldNames()
        self.assertTrue("variant_status" in fieldnames, "variant_status field is missing in the header.")
        self.assertTrue("sample_variant_status" in fieldnames, "sample_variant_status is missing in the header.")

        row = tsvReader.next()
        self.assertTrue("variant_status" in row, "variant_status field is missing in the row.")
        self.assertTrue("sample_variant_status" in row, "sample_variant_status is missing in the row.")

        self.assertEqual("2", row["variant_status"], "Incorrect value of variant_status.")
        self.assertEqual("0", row["sample_variant_status"], "Incorrect value of sample_variant_status")
コード例 #15
0
    def testSNPsAndIndelStartAndEndPos(self):
        """
        Tests that the start and end positions of SNPs and Indels are parsed as defined by the NCI's MAF specification
        (https://wiki.nci.nih.gov/display/TCGA/Mutation+Annotation+Format+(MAF)+Specification).
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.snps.indels.vcf"])
        outputFilename = os.path.join("out", "example.snps.indels.out.tsv")

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename)
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.annotate()

        tsvReader = GenericTsvReader(outputFilename)
        for row in tsvReader:
            if row['start'] == "16890445":
                self.assertEqual(row["end"], "16890445", "The value should be %s but it was %s." % ("16890445",
                                                                                                    row["end"]))
            elif row["start"] == "154524458":
                self.assertEqual(row["end"], "154524459", "The value should be %s but it was %s." % ("154524459",
                                                                                                     row["end"]))
            elif row["start"] == "114189432":
                self.assertEqual(row["end"], "114189433", "The value should be %s but it was %s." % ("114189433",
                                                                                                     row["end"]))
コード例 #16
0
ファイル: AnnotatorTest.py プロジェクト: ihuerga/oncotator
 def _simple_annotate(self, is_skip_no_alts):
     runSpec = RunSpecification()
     runSpec.initialize(None,
                        None,
                        datasources=[],
                        is_skip_no_alts=is_skip_no_alts)
     # Initialize the annotator with the runspec
     annotator = Annotator()
     annotator.initialize(runSpec)
     m = MutationData()
     m.chr = "1"
     m.start = "12941796"
     m.end = "12941796"
     m.alt_allele = "G"
     m.ref_allele = "T"
     m.createAnnotation("alt_allele_seen", "False")
     m2 = MutationData()
     m2.chr = "1"
     m2.start = "12941796"
     m2.end = "12941796"
     m2.alt_allele = "G"
     m2.ref_allele = "T"
     muts = [m, m2]
     muts = annotator.annotate_mutations(muts)
     ctr = 0
     for m in muts:
         ctr += 1
     return ctr
コード例 #17
0
ファイル: AnnotatorTest.py プロジェクト: ihuerga/oncotator
    def testBlankAnnotatorInit(self):
        """ Test an extremely simple scenario, where no additional annotations are needed.  I.e. no data sources """
        self.logger.info("Starting Blank Annotator Init Test...")

        inputCreator = MafliteInputMutationCreator(
            'testdata/maflite/tiny_maflite.maf.txt')
        outputRenderer = SimpleOutputRenderer(
            "out/testBlankAnnotatorTestFile.tsv")

        # Assumed myIC and myOC have been initialized as the proper Input and Output Creators, respectively.
        # 1) Initialize the Annotator
        annotator = Annotator()
        annotator.setInputCreator(inputCreator)
        annotator.setOutputRenderer(outputRenderer)
        testOutputFilename = annotator.annotate()

        # Test that file exists and that it has correct # of mutations (+1 for header +1 for annotator comment line).
        numSamples = 1
        numExtraLines = 3  # one for header, two for comment lines
        numDoubleLines = 0  # Number of lines with two alt alleles
        numVariants = 9
        gt = numSamples * numVariants + numDoubleLines * numSamples + numExtraLines
        fp = file(testOutputFilename, 'r')
        ctr = 0
        for line in fp:
            ctr += 1
        fp.close()
        self.assertEqual(
            ctr, gt, "Number of lines read was not correct: " + str(ctr) +
            " -- should have been: " + str(gt))
コード例 #18
0
 def _annotate_m2_vcf(self, input_vcf_file, output_tcgamaf_file):
     # For this conversion, you must specify the barcodes manually
     override_annotations = dict()
     override_annotations.update({
         'tumor_barcode': 'Patient0-Tumor',
         'normal_barcode': 'Patient0-Normal'
     })
     other_opts = {
         OptionConstants.COLLAPSE_FILTER_COLS: True,
         OptionConstants.NO_PREPEND: True,
         OptionConstants.SPLIT_ALLELIC_DEPTH: False,
         OptionConstants.INFER_ONPS: True
     }
     # Use an empty datasource dir in order to speed this up.
     annotator = Annotator()
     runSpec = RunSpecificationFactory.create_run_spec(
         "VCF",
         "TCGAMAF",
         input_vcf_file,
         output_tcgamaf_file,
         datasource_dir=".",
         global_annotations=override_annotations,
         is_skip_no_alts=True,
         other_opts=other_opts)
     annotator.initialize(runSpec)
     annotator.annotate()
コード例 #19
0
    def _annotateTest(self,
                      inputFilename,
                      outputFilename,
                      datasource_dir,
                      inputFormat="MAFLITE",
                      outputFormat="TCGAMAF",
                      default_annotations=TCGA_MAF_DEFAULTS,
                      override_annotations=None,
                      is_skip_no_alts=False,
                      other_opts=None):
        self.logger.info("Initializing Annotator...")

        if override_annotations is None:
            override_annotations = dict()

        if other_opts is None:
            other_opts = dict()

        annotator = Annotator()
        runSpec = RunSpecificationFactory.create_run_spec(
            inputFormat,
            outputFormat,
            inputFilename,
            outputFilename,
            default_annotations=default_annotations,
            datasource_dir=datasource_dir,
            global_annotations=override_annotations,
            is_skip_no_alts=is_skip_no_alts,
            other_opts=other_opts)
        annotator.initialize(runSpec)
        self.logger.info("Annotation starting...")
        return annotator.annotate()
コード例 #20
0
ファイル: OnpCombinerTest.py プロジェクト: ihuerga/oncotator
 def test_rendering_combined_to_tsv(self):
     """Test that we produce a merged ONP simple tsv file without crashing """
     input_filename = os.path.join(*["testdata", "maflite", "onp_combination.maf.txt"])
     output_filename = os.path.join("out", "onp_combination.tsv")
     spec = RunSpecificationFactory.create_run_spec("MAFLITE","SIMPLE_TSV",input_filename, output_filename,
                                             other_opts={OptionConstants.INFER_ONPS: True})
     annotator = Annotator()
     annotator.initialize(spec)
     annotator.annotate()
コード例 #21
0
ファイル: AnnotatorTest.py プロジェクト: Yixf-Self/oncotator
 def testVersionHeader(self):
     """ This method simply tests that the version string returned by the annotator does not cause an exception.
         Minimal checking that the returned sting is actually correct.
         Does not attempt to initialize input or output.  Only a gaf datasource.
      """
     annotator = Annotator()
     annotator.addDatasource(TestUtils.createTranscriptProviderDatasource(self.config))
     tmp = annotator.createHeaderString()
     self.assertTrue(tmp.find("Gaf ") != -1 or tmp.find("GENCODE") != -1, "Could not find Gaf or GENCODE version in header string.")
     self.assertTrue(tmp.find("Oncotator") != -1, "Could not find the word Oncotator in header string.")
コード例 #22
0
    def testBasicAnnotation(self):
        ''' Annotate from a basic tsv gene file.  Use the Gaf to annotate before trying the tsv -- required since the gene annotation must be populated.
        Using trimmed CancerGeneCensus as basis for this test.
        '''

        # cut -f 1 oncotator/test/testdata/small_tsv_ds/CancerGeneCensus_Table_1_full_2012-03-15_trim.txt | egrep -v Symbol | sed -r "s/^/'/g" | sed ':a;N;$!ba;s/\n/,/g' | sed -r "s/,'/','/g"
        genesAvailable = [
            'ABL1', 'ABL2', 'ACSL3', 'AF15Q14', 'AF1Q', 'AF3p21', 'AF5q31',
            'AKAP9', 'AKT1', 'AKT2', 'ALDH2', 'ALK', 'ALO17', 'APC',
            'ARHGEF12', 'ARHH', 'ARID1A', 'ARID2', 'ARNT', 'ASPSCR1', 'ASXL1',
            'ATF1', 'ATIC', 'ATM', 'ATRX', 'BAP1', 'BCL10', 'BCL11A', 'BCL11B'
        ]

        # We need a gaf data source to annotate gene

        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            config=self.config)
        geneDS = DatasourceFactory.createDatasource(
            "testdata/small_tsv_ds/small_tsv_ds.config",
            "testdata/small_tsv_ds/")
        outputFilename = 'out/genericGeneTest.out.tsv'

        annotator = Annotator()
        annotator.setInputCreator(
            MafliteInputMutationCreator(
                'testdata/maflite/Patient0.snp.maf.txt'))
        annotator.setOutputRenderer(SimpleOutputRenderer(outputFilename))
        annotator.addDatasource(gafDatasource)
        annotator.addDatasource(geneDS)
        annotator.annotate()

        # Check that there were actual annotations performed.
        tsvReader = GenericTsvReader(outputFilename)

        fields = tsvReader.getFieldNames()
        self.assertTrue(
            'CGC_Abridged_Other Syndrome/Disease' in fields,
            "'CGC_Other Syndrome/Disease' was not present in the header")
        self.assertTrue(
            'CGC_Abridged_Mutation Type' in fields,
            "'CGC_Abridged_Mutation Type' was not present in the header")

        ctr = 1
        linesThatShouldBeAnnotated = 0
        for lineDict in tsvReader:
            self.assertTrue('gene' in lineDict.keys())
            if lineDict['gene'] in genesAvailable:
                self.assertTrue(
                    lineDict['CGC_Abridged_GeneID'] != '',
                    "'CGC_Abridged_GeneID' was missing on a row that should have been populated.  Line: "
                    + str(ctr))
                linesThatShouldBeAnnotated += 1
            ctr += 1
        self.assertTrue((linesThatShouldBeAnnotated) > 0,
                        "Bad data -- cannot test missed detects.")
コード例 #23
0
    def testNumberGRenderingOfRandomVcf(self):
        inputFilename = os.path.join(*["testdata", "vcf", "number_g.random.vcf"])
        outputFilename = os.path.join("out", "number_g.random.out.tsv")

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename)
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.annotate()
コード例 #24
0
ファイル: AnnotatorTest.py プロジェクト: Yixf-Self/oncotator
    def test_querying_transcripts_by_genes(self):
        """Test that we can get all of the transcripts for a given set of genes. """

        datasource_list = DatasourceFactory.createDatasources(self._determine_db_dir(), "hg19", isMulticore=False)
        annotator = Annotator()
        for ds in datasource_list:
            annotator.addDatasource(ds)

        # Step 1 get all of the relevant transcripts
        txs = annotator.retrieve_transcripts_by_genes(["MAPK1", "PIK3CA"])
        self.assertTrue(len(txs) > 3)
コード例 #25
0
ファイル: AnnotatorTest.py プロジェクト: Yixf-Self/oncotator
    def test_simple_transcript_annotation(self):
        """Test web api backend call /transcript/ """
        # http://www.broadinstitute.org/oncotator/transcript/ENST00000215832.6/
        datasource_list = DatasourceFactory.createDatasources(self._determine_db_dir(), "hg19", isMulticore=False)
        annotator = Annotator()
        for ds in datasource_list:
            annotator.addDatasource(ds)

        tx = annotator.retrieve_transcript_by_id("ENST00000215832.6")
        self.assertTrue(tx is not None)
        self.assertTrue(tx.get_gene() == "MAPK1")
コード例 #26
0
ファイル: OnpCombinerTest.py プロジェクト: ihuerga/oncotator
 def test_single_sample_onp_combiner(self):
     """test that we can create an onp combined TCGA maf without crashing"""
     input_filename = 'testdata/maflite/onp.singlesample.maf.txt'
     output_filename = 'out/testSingleSampleOnpCombiner.maf'
     config = TestUtils.createUnitTestConfig()
     defaultdb = config.get('DEFAULT',"dbDir")
     spec = RunSpecificationFactory.create_run_spec("MAFLITE","TCGAMAF", input_filename, output_filename,datasourceDir=defaultdb,
                                             other_opts={OptionConstants.INFER_ONPS: True})
     annotator = Annotator()
     annotator.initialize(spec)
     annotator.annotate()
コード例 #27
0
ファイル: AnnotatorTest.py プロジェクト: Yixf-Self/oncotator
    def test_simple_genes_by_gene_annotation(self):
        """Test web api backend call /gene/ """
        # http://www.broadinstitute.org/oncotator/gene/MAPK1/
        datasource_list = DatasourceFactory.createDatasources(self._determine_db_dir(), "hg19", isMulticore=False)
        annotator = Annotator()
        for ds in datasource_list:
            annotator.addDatasource(ds)

        txs = annotator.retrieve_transcripts_by_genes(["MAPK1"])
        self.assertTranscriptsFound(txs)

        mut_dict = annotator.annotate_genes_given_txs(txs)
        self.assertTrue(len(mut_dict.keys()) == 1)
コード例 #28
0
    def testSwitchedFieldsWithExampleVcf(self):
        """
        Tests whether the switched tags are ignored.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.bad.switched.fields.vcf"])
        outputFilename = os.path.join("out", "example.switched.out.tsv")

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename, [])
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
コード例 #29
0
    def testAnnotationWithNoSampleNameExampleVcf(self):
        """
        Tests whether parsed annotations match the actual annotations when the input is a VCF file that has no samples.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.sampleName.removed.vcf"])
        outputFilename = os.path.join("out", "example.sampleName.removed.out.tsv")

        creator = VcfInputMutationCreator(inputFilename)
        renderer = SimpleOutputRenderer(outputFilename)
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.annotate()
コード例 #30
0
    def testAnnotationWithDuplicateValuesInVcf(self):
        """
        Tests the ability to parse a VCF that contains an INFO, FILTER, and INFO field with the same name.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.duplicate_fields.vcf"])
        outputFilename = os.path.join("out", "example.duplicate_fields2.tsv")

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename, [])
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.annotate()