コード例 #1
0
    def testCreateDatasourceWithMissingValues(self):
        """

        """
        dsFile = os.path.join("testdata", "ESP6500SI-V2.chr1.snps_indels.head.25.missing.txt")
        destDir = "out"
        datasourceFilename = "ESP6500SI-V2.chr1.snps_indels.head.25.missing.tabix_indexed.txt.gz"
        indexColumnNames = "CHROM,POS,POS"
        dataSourceType = "indexed_tsv"
        dataSourceName = "ESP"
        dataSourceVersion = "6500SI-V2"
        dataSourceMatchMode = "overlap"
        annotationColumnNames = "EA_GTC,DP"
        configFilename = os.path.join("out", "esp_coverage.missing.config")

        datasourceBuilder = TabixIndexedTsvDatasourceCreator()
        datasourceBuilder.createDatasource(destDir, dsFile, indexColumnNames, configFilename, dataSourceType, dataSourceName,
                                           dataSourceVersion, dataSourceMatchMode, annotationColumnNames,
                                           DatasourceInstallUtils.getIndexCols(dataSourceType, indexColumnNames))

        configParser = ConfigUtils.createConfigParser(configFilename)
        self.assertEqual(configParser.get("general", "src_file"), datasourceFilename,
                         "Expected data source src_file is %s but was %s."
                         % (datasourceFilename, configParser.get("general", "src_file")))

        self.assertEqual(configParser.get("data_types", "EA_GTC"), "Float",
                         "Expected EA_GTC data type is %s but was %s."
                         % ("Float", configParser.get("data_types", "EA_GTC")))
        self.assertEqual(configParser.get("data_types", "DP"), "Integer",
                         "Expected DP data type is %s but was %s."
                         % ("Integer", configParser.get("data_types", "DP")))
コード例 #2
0
    def testCreateGPTsvConfigFile(self):
        configFilename = "out/ccle_by_gp.config"
        datasourceFilename = "ccle_results_by_pos.hg19.import.txt"
        dataSourceType = "gp_tsv"
        dataSourceName = "CCLE_By_GP"
        dataSourceVersion = "09292010"
        genomicPositionColumnNames = "chr,start,end"

        datasourceBuilder = GenericTsvDatasourceCreator()
        datasourceBuilder._createConfigFile(
            configFilename=configFilename,
            baseDSFile=datasourceFilename,
            ds_name=dataSourceName,
            ds_type=dataSourceType,
            ds_version=dataSourceVersion,
            indexCols=DatasourceInstallUtils.getIndexCols(
                "gp_tsv", genomicPositionColumnNames))

        configParser = ConfigUtils.createConfigParser(configFilename)
        self.assertTrue(configParser.has_section("general"),
                        "general section is missing.")
        self.assertTrue(configParser.has_option("general", "type"),
                        "type option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "src_file"),
                        "src_file option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "title"),
                        "title option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "version"),
                        "version option is missing in general section.")
        self.assertTrue(
            configParser.has_option("general", "genomic_position_cols"),
            "genomic_position_cols option is missing in general section.")

        self.assertEqual(
            configParser.get("general", "type"), dataSourceType,
            "Expected data source type is %s but was %s." %
            (dataSourceType, configParser.get("general", "type")))
        self.assertEqual(
            configParser.get("general", "src_file"), datasourceFilename,
            "Expected data source src_file is %s but was %s." %
            (datasourceFilename, configParser.get("general", "src_file")))
        self.assertEqual(
            configParser.get("general", "title"), dataSourceName,
            "Expected data source title is %s but was %s." %
            (dataSourceName, configParser.get("general", "title")))
        self.assertEqual(
            configParser.get("general", "version"), dataSourceVersion,
            "Expected data source version is %s but was %s." %
            (dataSourceVersion, configParser.get("general", "version")))
        self.assertEqual(
            configParser.get("general", "genomic_position_cols"),
            genomicPositionColumnNames,
            "Expected data source genomic_position_cols is %s but was %s." %
            (genomicPositionColumnNames,
             configParser.get("general", "genomic_position_cols")))
コード例 #3
0
    def getGeneTsvConfigFile(self):
        configFilename = "out/simple_uniprot.config"
        datasourceFilename = "simple_uniprot.out.2011_09.tsv"
        dataSourceType = "gene_tsv"
        dataSourceName = "UniProt"
        dataSourceVersion = "2011_09"
        geneColumnName = "gene"

        datasourceBuilder = GenericTsvDatasourceCreator()
        datasourceBuilder._createConfigFile(
            configFilename=configFilename,
            baseDSFile=datasourceFilename,
            ds_name=dataSourceName,
            ds_type=dataSourceType,
            ds_version=dataSourceVersion,
            indexCols=DatasourceInstallUtils.getIndexCols(
                "gene_tsv", geneColumnName))

        configParser = ConfigUtils.createConfigParser(configFilename)
        self.assertTrue(configParser.has_section("general"),
                        "general section is missing.")
        self.assertTrue(configParser.has_option("general", "type"),
                        "type option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "src_file"),
                        "src_file option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "title"),
                        "title option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "version"),
                        "version option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "gene_col"),
                        "gene_col option is missing in general section.")

        self.assertEqual(
            configParser.get("general", "type"), dataSourceType,
            "Expected data source type is %s but was %s." %
            (dataSourceType, configParser.get("general", "type")))
        self.assertEqual(
            configParser.get("general", "src_file"), datasourceFilename,
            "Expected data source src_file is %s but was %s." %
            (datasourceFilename, configParser.get("general", "src_file")))
        self.assertEqual(
            configParser.get("general", "title"), dataSourceName,
            "Expected data source title is %s but was %s." %
            (dataSourceName, configParser.get("general", "title")))
        self.assertEqual(
            configParser.get("general", "version"), dataSourceVersion,
            "Expected data source version is %s but was %s." %
            (dataSourceVersion, configParser.get("general", "version")))
        self.assertEqual(
            configParser.get("general", "gene_col"), geneColumnName,
            "Expected data source gene_col is %s but was %s." %
            (geneColumnName, configParser.get("general", "gene_col")))
コード例 #4
0
    def testCreateDatasourceWithMissingAnnotationColumns(self):
        """

        """
        dsFile = os.path.join("testdata", "ESP6500SI-V2.chr1.snps_indels.head.25.missing.txt")
        destDir = "out"
        indexColumnNames = "CHROM,POS,POS"
        dataSourceType = "indexed_tsv"
        dataSourceName = "ESP"
        dataSourceVersion = "6500SI-V2"
        dataSourceMatchMode = "overlap"
        annotationColumnNames = "EA_GTC,DP,ESP_DBSNP"
        configFilename = os.path.join("out", "esp_coverage.missing_annotation_cols.config")

        datasourceBuilder = TabixIndexedTsvDatasourceCreator()
        with self.assertRaises(ValueError):
            datasourceBuilder.createDatasource(destDir, dsFile, indexColumnNames, configFilename, dataSourceType,
                                               dataSourceName, dataSourceVersion, dataSourceMatchMode,
                                               annotationColumnNames,
                                               DatasourceInstallUtils.getIndexCols(dataSourceType, indexColumnNames))
コード例 #5
0
    def testCreateDatasourceWithMissingAnnotationColumns(self):
        """

        """
        dsFile = os.path.join("testdata", "ESP6500SI-V2.chr1.snps_indels.head.25.txt")
        destDir = "out"
        indexColumnNames = "CHROM,POS,POS"
        dataSourceType = "indexed_tsv"
        dataSourceName = "ESP"
        dataSourceVersion = "6500SI-V2"
        dataSourceMatchMode = "overlap"
        annotationColumnNames = "EA_GTC,DP,ESP_DBSNP"
        configFilename = os.path.join("out", "esp_coverage.missing.config")

        datasourceBuilder = TabixIndexedTsvDatasourceCreator()
        try:
            datasourceBuilder.createDatasource(destDir, dsFile, indexColumnNames, configFilename, dataSourceType,
                                               dataSourceName, dataSourceVersion, dataSourceMatchMode,
                                               annotationColumnNames,
                                               DatasourceInstallUtils.getIndexCols(dataSourceType, indexColumnNames))
        except ValueError:
            pass
コード例 #6
0
    def testCreateDatasourceWithMissingColumns(self):
        """

        """
        dsFile = os.path.join("testdata", "ESP6500SI-V2.chr1.snps_indels.head.25.txt")
        destDir = "out"
        indexColumnNames = "CHROM,POS,POS"
        dataSourceType = "indexed_tsv"
        dataSourceName = "ESP"
        dataSourceVersion = "6500SI-V2"
        dataSourceMatchMode = "overlap"
        annotationColumnNames = "EA_GTC,DP"
        configFilename = os.path.join("out", "esp_coverage.missing.config")

        datasourceBuilder = TabixIndexedTsvDatasourceCreator()
        try:
            datasourceBuilder.createDatasource(destDir, dsFile, indexColumnNames, configFilename,
                                               dataSourceType, dataSourceName, dataSourceVersion, dataSourceMatchMode,
                                               annotationColumnNames,
                                               DatasourceInstallUtils.getIndexCols(dataSourceType, indexColumnNames))
        except InputMismatchException:
            pass
コード例 #7
0
    def testCreateGPTsvConfigFile(self):
        configFilename = "out/ccle_by_gp.config"
        datasourceFilename = "ccle_results_by_pos.hg19.import.txt"
        dataSourceType = "gp_tsv"
        dataSourceName = "CCLE_By_GP"
        dataSourceVersion = "09292010"
        genomicPositionColumnNames = "chr,start,end"

        datasourceBuilder = GenericTsvDatasourceCreator()
        datasourceBuilder._createConfigFile(configFilename=configFilename, baseDSFile=datasourceFilename,
                                           ds_name=dataSourceName, ds_type=dataSourceType, ds_version=dataSourceVersion,
                                           indexCols=DatasourceInstallUtils.getIndexCols("gp_tsv",
                                                                                         genomicPositionColumnNames))

        configParser = ConfigUtils.createConfigParser(configFilename)
        self.assertTrue(configParser.has_section("general"), "general section is missing.")
        self.assertTrue(configParser.has_option("general", "type"), "type option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "src_file"),
                        "src_file option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "title"), "title option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "version"), "version option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "genomic_position_cols"),
                        "genomic_position_cols option is missing in general section.")

        self.assertEqual(configParser.get("general", "type"), dataSourceType,
                         "Expected data source type is %s but was %s."
                         % (dataSourceType, configParser.get("general", "type")))
        self.assertEqual(configParser.get("general", "src_file"), datasourceFilename,
                         "Expected data source src_file is %s but was %s."
                         % (datasourceFilename, configParser.get("general", "src_file")))
        self.assertEqual(configParser.get("general", "title"), dataSourceName,
                         "Expected data source title is %s but was %s."
                         % (dataSourceName, configParser.get("general", "title")))
        self.assertEqual(configParser.get("general", "version"), dataSourceVersion,
                         "Expected data source version is %s but was %s."
                         % (dataSourceVersion, configParser.get("general", "version")))
        self.assertEqual(configParser.get("general", "genomic_position_cols"), genomicPositionColumnNames,
                         "Expected data source genomic_position_cols is %s but was %s."
                         % (genomicPositionColumnNames, configParser.get("general", "genomic_position_cols")))
コード例 #8
0
    def getGeneTsvConfigFile(self):
        configFilename = "out/simple_uniprot.config"
        datasourceFilename = "simple_uniprot.out.2011_09.tsv"
        dataSourceType = "gene_tsv"
        dataSourceName = "UniProt"
        dataSourceVersion = "2011_09"
        geneColumnName = "gene"

        datasourceBuilder = GenericTsvDatasourceCreator()
        datasourceBuilder._createConfigFile(configFilename=configFilename, baseDSFile=datasourceFilename,
                                           ds_name=dataSourceName, ds_type=dataSourceType, ds_version=dataSourceVersion,
                                           indexCols=DatasourceInstallUtils.getIndexCols("gene_tsv", geneColumnName))

        configParser = ConfigUtils.createConfigParser(configFilename)
        self.assertTrue(configParser.has_section("general"), "general section is missing.")
        self.assertTrue(configParser.has_option("general", "type"), "type option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "src_file"),
                        "src_file option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "title"), "title option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "version"), "version option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "gene_col"),
                        "gene_col option is missing in general section.")

        self.assertEqual(configParser.get("general", "type"), dataSourceType,
                         "Expected data source type is %s but was %s."
                         % (dataSourceType, configParser.get("general", "type")))
        self.assertEqual(configParser.get("general", "src_file"), datasourceFilename,
                         "Expected data source src_file is %s but was %s."
                         % (datasourceFilename, configParser.get("general", "src_file")))
        self.assertEqual(configParser.get("general", "title"), dataSourceName,
                         "Expected data source title is %s but was %s."
                         % (dataSourceName, configParser.get("general", "title")))
        self.assertEqual(configParser.get("general", "version"), dataSourceVersion,
                         "Expected data source version is %s but was %s."
                         % (dataSourceVersion, configParser.get("general", "version")))
        self.assertEqual(configParser.get("general", "gene_col"), geneColumnName,
                         "Expected data source gene_col is %s but was %s."
                         % (geneColumnName, configParser.get("general", "gene_col")))
コード例 #9
0
    def testCreateDatasource(self):
        """

        """
        dsFile = os.path.join("testdata", "ESP6500SI-V2.chr1.snps_indels.head.25.txt")
        destDir = "out"
        datasourceFilename = "ESP6500SI-V2.chr1.snps_indels.head.25.tabix_indexed.txt.gz"
        indexColumnNames = "CHROM,POS,POS,REF,ALT"
        columnNames = "CHROM,POS,REF,ALT,DBSNP,EA_AC,AA_AC,TAC,MAF,GTS,EA_GTC,AA_GTC,GTC,DP,FG,GM,AA,AAC,PP,CDP,PH,CP,CG,GL,GS,CA,EXOME_CHIP,GWAS_PUBMED"
        configFilename = "out/esp_coverage.config"
        dataSourceType = "indexed_tsv"
        dataSourceName = "ESP"
        dataSourceVersion = "6500SI-V2"
        dataSourceMatchMode = "overlap"
        annotationColumnNames = "DBSNP,EA_GTC,DP"

        datasourceBuilder = TabixIndexedTsvDatasourceCreator()
        datasourceBuilder.createDatasource(destDir, dsFile, indexColumnNames, configFilename, dataSourceType,
                                           dataSourceName, dataSourceVersion, dataSourceMatchMode,
                                           annotationColumnNames, DatasourceInstallUtils.getIndexCols(dataSourceType,
                                                                                                      indexColumnNames))

        configParser = ConfigUtils.createConfigParser(configFilename)
        self.assertTrue(configParser.has_section("general"), "general section is missing.")
        self.assertTrue(configParser.has_section("data_types"), "data_types section is missing.")
        self.assertTrue(configParser.has_option("general", "type"), "type option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "src_file"),
                        "src_file option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "title"), "title option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "version"), "version option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "column_names"),
                        "column_names option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "annotation_column_names"),
                        "annotation_column_names option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "match_mode"),
                        "match_mode option is missing in general section")
        self.assertTrue(configParser.has_option("general", "index_column_names"),
                        "index_column_names option is missing in general section.")

        self.assertEqual(configParser.get("general", "type"), dataSourceType,
                         "Expected data source type is %s but was %s."
                         % (dataSourceType, configParser.get("general", "type")))
        self.assertEqual(configParser.get("general", "src_file"), datasourceFilename,
                         "Expected data source src_file is %s but was %s."
                         % (datasourceFilename, configParser.get("general", "src_file")))
        self.assertEqual(configParser.get("general", "title"), dataSourceName,
                         "Expected data source title is %s but was %s."
                         % (dataSourceName, configParser.get("general", "title")))
        self.assertEqual(configParser.get("general", "version"), dataSourceVersion,
                         "Expected data source version is %s but was %s."
                         % (dataSourceVersion, configParser.get("general", "version")))
        self.assertEqual(configParser.get("general", "column_names"), columnNames,
                         "Expected data source column names is %s but was %s."
                         % (columnNames, configParser.get("general", "column_names")))
        self.assertEqual(configParser.get("general", "annotation_column_names"), annotationColumnNames,
                         "Expected data source annotation column names is %s but was %s."
                         % (annotationColumnNames, configParser.get("general", "annotation_column_names")))
        self.assertEqual(configParser.get("general", "match_mode"), dataSourceMatchMode,
                         "Expected data source match mode is %s but was %s."
                         % (dataSourceMatchMode, configParser.get("general", "match_mode")))
        self.assertEqual(configParser.get("general", "index_column_names"), indexColumnNames,
                         "Expected data source index column names is %s but was %s."
                         % (indexColumnNames, configParser.get("general", "index_column_names")))

        self.assertEqual(configParser.get("data_types", "EA_GTC"), "String",
                         "Expected EA_GTC data type is %s but was %s."
                         % ("String", configParser.get("data_types", "EA_GTC")))
        self.assertEqual(configParser.get("data_types", "DP"), "Integer",
                         "Expected DP data type is %s but was %s."
                         % ("Integer", configParser.get("data_types", "DP")))
コード例 #10
0
    def testCreateConfigFile(self):
        """

        """
        configFilename = os.path.join("out", "esp_coverage.config")
        datasourceFilename = "ESP6500SI-V2.coverage.txt.gz"
        dataSourceType = "indexed_tsv"
        dataSourceName = "ESP"
        dataSourceVersion = "6500SI-V2"
        dataSourceMatchMode = "overlap"
        indexColumnNames = "Chromosome,Position,Position"
        columnNames = "Chromosome,Position,TotalSamplesCovered,AvgSampleReadDepth,TotalEAsamplesCovered,AvgEAsampleReadDepth,TotalAAsamplesCovered,AvgAAsampleReadDepth"
        annotationColumnNames = "TotalSamplesCovered,AvgSampleReadDepth,AvgEAsampleReadDepth,TotalAAsamplesCovered,AvgAAsampleReadDepth"

        datasourceBuilder = TabixIndexedTsvDatasourceCreator()
        datasourceBuilder._createConfigFile(configFilename=configFilename, baseDSFile=datasourceFilename,
                                            ds_type=dataSourceType, ds_name=dataSourceName,
                                            ds_version=dataSourceVersion, column_names=columnNames,
                                            annotation_column_names=annotationColumnNames,
                                            ds_match_mode=dataSourceMatchMode,
                                            indexCols=DatasourceInstallUtils.getIndexCols(dataSourceType,
                                                                                          indexColumnNames))
        configParser = ConfigUtils.createConfigParser(configFilename)
        self.assertTrue(configParser.has_section("general"), "general section is missing.")
        self.assertTrue(configParser.has_section("data_types"), "data_types section is missing.")
        self.assertTrue(configParser.has_option("general", "type"), "type option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "src_file"),
                        "src_file option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "title"), "title option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "version"), "version option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "column_names"),
                        "column_names option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "annotation_column_names"),
                        "annotation_column_names option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "match_mode"),
                        "match_mode option is missing in general section")
        self.assertTrue(configParser.has_option("general", "index_column_names"),
                        "index_column_names option is missing in general section.")

        self.assertEqual(configParser.get("general", "type"), dataSourceType,
                         "Expected data source type is %s but was %s."
                         % (dataSourceType, configParser.get("general", "type")))
        self.assertEqual(configParser.get("general", "src_file"), datasourceFilename,
                         "Expected data source src_file is %s but was %s."
                         % (datasourceFilename, configParser.get("general", "src_file")))
        self.assertEqual(configParser.get("general", "title"), dataSourceName,
                         "Expected data source title is %s but was %s."
                         % (dataSourceName, configParser.get("general", "title")))
        self.assertEqual(configParser.get("general", "version"), dataSourceVersion,
                         "Expected data source version is %s but was %s."
                         % (dataSourceVersion, configParser.get("general", "version")))
        self.assertEqual(configParser.get("general", "column_names"), columnNames,
                         "Expected data source column names is %s but was %s."
                         % (columnNames, configParser.get("general", "column_names")))
        self.assertEqual(configParser.get("general", "annotation_column_names"), annotationColumnNames,
                         "Expected data source annotation column names is %s but was %s."
                         % (annotationColumnNames, configParser.get("general", "annotation_column_names")))
        self.assertEqual(configParser.get("general", "match_mode"), dataSourceMatchMode,
                         "Expected data source match mode is %s but was %s."
                         % (dataSourceMatchMode, configParser.get("general", "match_mode")))
        self.assertEqual(configParser.get("general", "index_column_names"), indexColumnNames,
                         "Expected data source index column names is %s but was %s."
                         % (indexColumnNames, configParser.get("general", "index_column_names")))
コード例 #11
0
    def testCreateDatasource(self):
        """

        """
        dsFile = os.path.join("testdata",
                              "ESP6500SI-V2.chr1.snps_indels.head.25.txt")

        # Never specify "out/"
        destDir = "out/create_ds_test/"

        if os.path.exists(destDir):
            shutil.rmtree(destDir)
        os.makedirs(destDir)
        datasourceFilename = "ESP6500SI-V2.chr1.snps_indels.head.25.tabix_indexed.txt.gz"
        indexColumnNames = "CHROM,POS,POS,REF,ALT"
        columnNames = "CHROM,POS,REF,ALT,DBSNP,EA_AC,AA_AC,TAC,MAF,GTS,EA_GTC,AA_GTC,GTC,DP,FG,GM,AA,AAC,PP,CDP,PH,CP,CG,GL,GS,CA,EXOME_CHIP,GWAS_PUBMED"
        configFilename = "out/esp_coverage.config"
        dataSourceType = "indexed_tsv"
        dataSourceName = "ESP"
        dataSourceVersion = "6500SI-V2"
        dataSourceMatchMode = "overlap"
        annotationColumnNames = "DBSNP,EA_GTC,DP"

        datasourceBuilder = TabixIndexedTsvDatasourceCreator()
        datasourceBuilder.createDatasource(
            destDir, dsFile, indexColumnNames, configFilename, dataSourceType,
            dataSourceName, dataSourceVersion, dataSourceMatchMode,
            annotationColumnNames,
            DatasourceInstallUtils.getIndexCols(dataSourceType,
                                                indexColumnNames))

        self.assertTrue(os.path.exists(destDir + datasourceFilename))
        self.assertTrue(os.path.exists(destDir + datasourceFilename + ".tbi"))

        configParser = ConfigUtils.createConfigParser(configFilename)
        self.assertTrue(configParser.has_section("general"),
                        "general section is missing.")
        self.assertTrue(configParser.has_section("data_types"),
                        "data_types section is missing.")
        self.assertTrue(configParser.has_option("general", "type"),
                        "type option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "src_file"),
                        "src_file option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "title"),
                        "title option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "version"),
                        "version option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "column_names"),
                        "column_names option is missing in general section.")
        self.assertTrue(
            configParser.has_option("general", "annotation_column_names"),
            "annotation_column_names option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "match_mode"),
                        "match_mode option is missing in general section")
        self.assertTrue(
            configParser.has_option("general", "index_column_names"),
            "index_column_names option is missing in general section.")

        self.assertEqual(
            configParser.get("general", "type"), dataSourceType,
            "Expected data source type is %s but was %s." %
            (dataSourceType, configParser.get("general", "type")))
        self.assertEqual(
            configParser.get("general", "src_file"), datasourceFilename,
            "Expected data source src_file is %s but was %s." %
            (datasourceFilename, configParser.get("general", "src_file")))
        self.assertEqual(
            configParser.get("general", "title"), dataSourceName,
            "Expected data source title is %s but was %s." %
            (dataSourceName, configParser.get("general", "title")))
        self.assertEqual(
            configParser.get("general", "version"), dataSourceVersion,
            "Expected data source version is %s but was %s." %
            (dataSourceVersion, configParser.get("general", "version")))
        self.assertEqual(
            configParser.get("general", "column_names"), columnNames,
            "Expected data source column names is %s but was %s." %
            (columnNames, configParser.get("general", "column_names")))
        self.assertEqual(
            configParser.get("general",
                             "annotation_column_names"), annotationColumnNames,
            "Expected data source annotation column names is %s but was %s." %
            (annotationColumnNames,
             configParser.get("general", "annotation_column_names")))
        self.assertEqual(
            configParser.get("general", "match_mode"), dataSourceMatchMode,
            "Expected data source match mode is %s but was %s." %
            (dataSourceMatchMode, configParser.get("general", "match_mode")))
        self.assertEqual(
            configParser.get("general",
                             "index_column_names"), indexColumnNames,
            "Expected data source index column names is %s but was %s." %
            (indexColumnNames, configParser.get("general",
                                                "index_column_names")))

        self.assertEqual(
            configParser.get("data_types", "EA_GTC"), "String",
            "Expected EA_GTC data type is %s but was %s." %
            ("String", configParser.get("data_types", "EA_GTC")))
        self.assertEqual(
            configParser.get("data_types", "DP"), "Integer",
            "Expected DP data type is %s but was %s." %
            ("Integer", configParser.get("data_types", "DP")))

        ds = DatasourceFactory.createDatasourceFromConfigParser(
            configParser, "out/create_ds_test/")
        mut = MutationData(chr="1",
                           start="69428",
                           end="69428",
                           ref_allele="T",
                           alt_allele="G")
        mut2 = ds.annotate_mutation(mut)
        self.assertEquals(mut2["ESP_DBSNP"], "dbSNP_134")
        self.assertEquals(mut2["ESP_EA_GTC"], "92,129,3203")
        self.assertEquals(mut2["ESP_DP"], "110")
コード例 #12
0
    def testCreateDatasource(self):
        """

        """
        dsFile = os.path.join("testdata", "ESP6500SI-V2.chr1.snps_indels.head.25.txt")

        # Never specify "out/"
        destDir = "out/create_ds_test/"

        if os.path.exists(destDir):
            shutil.rmtree(destDir)
        os.makedirs(destDir)
        datasourceFilename = "ESP6500SI-V2.chr1.snps_indels.head.25.tabix_indexed.txt.gz"
        indexColumnNames = "CHROM,POS,POS,REF,ALT"
        columnNames = "CHROM,POS,REF,ALT,DBSNP,EA_AC,AA_AC,TAC,MAF,GTS,EA_GTC,AA_GTC,GTC,DP,FG,GM,AA,AAC,PP,CDP,PH,CP,CG,GL,GS,CA,EXOME_CHIP,GWAS_PUBMED"
        configFilename = "out/esp_coverage.config"
        dataSourceType = "indexed_tsv"
        dataSourceName = "ESP"
        dataSourceVersion = "6500SI-V2"
        dataSourceMatchMode = "overlap"
        annotationColumnNames = "DBSNP,EA_GTC,DP"

        datasourceBuilder = TabixIndexedTsvDatasourceCreator()
        datasourceBuilder.createDatasource(destDir, dsFile, indexColumnNames, configFilename, dataSourceType,
                                           dataSourceName, dataSourceVersion, dataSourceMatchMode,
                                           annotationColumnNames, DatasourceInstallUtils.getIndexCols(dataSourceType,
                                                                                                      indexColumnNames))

        self.assertTrue(os.path.exists(destDir + datasourceFilename))
        self.assertTrue(os.path.exists(destDir + datasourceFilename + ".tbi"))

        configParser = ConfigUtils.createConfigParser(configFilename)
        self.assertTrue(configParser.has_section("general"), "general section is missing.")
        self.assertTrue(configParser.has_section("data_types"), "data_types section is missing.")
        self.assertTrue(configParser.has_option("general", "type"), "type option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "src_file"),
                        "src_file option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "title"), "title option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "version"), "version option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "column_names"),
                        "column_names option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "annotation_column_names"),
                        "annotation_column_names option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "match_mode"),
                        "match_mode option is missing in general section")
        self.assertTrue(configParser.has_option("general", "index_column_names"),
                        "index_column_names option is missing in general section.")

        self.assertEqual(configParser.get("general", "type"), dataSourceType,
                         "Expected data source type is %s but was %s."
                         % (dataSourceType, configParser.get("general", "type")))
        self.assertEqual(configParser.get("general", "src_file"), datasourceFilename,
                         "Expected data source src_file is %s but was %s."
                         % (datasourceFilename, configParser.get("general", "src_file")))
        self.assertEqual(configParser.get("general", "title"), dataSourceName,
                         "Expected data source title is %s but was %s."
                         % (dataSourceName, configParser.get("general", "title")))
        self.assertEqual(configParser.get("general", "version"), dataSourceVersion,
                         "Expected data source version is %s but was %s."
                         % (dataSourceVersion, configParser.get("general", "version")))
        self.assertEqual(configParser.get("general", "column_names"), columnNames,
                         "Expected data source column names is %s but was %s."
                         % (columnNames, configParser.get("general", "column_names")))
        self.assertEqual(configParser.get("general", "annotation_column_names"), annotationColumnNames,
                         "Expected data source annotation column names is %s but was %s."
                         % (annotationColumnNames, configParser.get("general", "annotation_column_names")))
        self.assertEqual(configParser.get("general", "match_mode"), dataSourceMatchMode,
                         "Expected data source match mode is %s but was %s."
                         % (dataSourceMatchMode, configParser.get("general", "match_mode")))
        self.assertEqual(configParser.get("general", "index_column_names"), indexColumnNames,
                         "Expected data source index column names is %s but was %s."
                         % (indexColumnNames, configParser.get("general", "index_column_names")))

        self.assertEqual(configParser.get("data_types", "EA_GTC"), "String",
                         "Expected EA_GTC data type is %s but was %s."
                         % ("String", configParser.get("data_types", "EA_GTC")))
        self.assertEqual(configParser.get("data_types", "DP"), "Integer",
                         "Expected DP data type is %s but was %s."
                         % ("Integer", configParser.get("data_types", "DP")))

        ds = DatasourceFactory.createDatasourceFromConfigParser(configParser, "out/create_ds_test/")
        mut = MutationData(chr="1", start="69428", end="69428", ref_allele="T", alt_allele="G")
        mut2 = ds.annotate_mutation(mut)
        self.assertEquals(mut2["ESP_DBSNP"], "dbSNP_134")
        self.assertEquals(mut2["ESP_EA_GTC"], "92,129,3203")
        self.assertEquals(mut2["ESP_DP"], "110")