Example #1
0
    def gen_sql_heatmap(self, id_table):
        #scan the children
        # XXX Handling of sql for children is broken if the child may appear
        # as part of multiple merge objects, such as TrackGenomic and TrackClinical.
        # A disgusting workaround for clinicalMatrix is to prevent the TrackGenomic from calling
        # it for gen_sql.
        clinical = self.members.pop("clinicalMatrix")
        for line in CGData.CGMergeObject.sql_pass(self, id_table, method="heatmap"):
            yield line
        self.members["clinicalMatrix"] = clinical

        gmatrix = self.members[ 'genomicMatrix' ]
        pmap = self.members[ 'probeMap' ].get( assembly="hg18" ) # BUG: hard coded to only producing HG18 tables
        if pmap is None:
            CGData.error("Missing HG18 %s" % ( self.members[ 'probeMap'].get_name() ))
            return
        
        table_base = self.get_name()
        CGData.log("Writing Track %s" % (table_base))
        
        clinical_table_base =  self.members[ "clinicalMatrix" ].get_name()

        yield "INSERT into raDb( name, sampleTable, clinicalTable, columnTable, aliasTable, shortLabel, longLabel, expCount, dataType, platform, profile, security) VALUES ( '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%d', '%s', '%s', '%s', '%s');\n" % \
            ( "genomic_" + table_base, "sample_" + table_base,
                "clinical_" + clinical_table_base, "clinical_" + clinical_table_base + "_colDb",
                "genomic_" + table_base + "_alias",
                sql_fix(gmatrix.attrs['shortTitle']),
                sql_fix(gmatrix.attrs['longTitle']),
                len(gmatrix.get_sample_list()),
                self.format,
                gmatrix.attrs[':dataSubType'],
                'localDb',
                'public',
                )
        
        # write out the sample table
        yield "drop table if exists sample_%s;" % ( table_base )
        yield """
CREATE TABLE sample_%s (
    id           int,
    sampleName   varchar(255)
) engine 'MyISAM';
""" % ( table_base )

        from CGData.ClinicalMatrix import sortedSamples
        for sample in sortedSamples(gmatrix.get_sample_list()):
	    yield "INSERT INTO sample_%s VALUES( %d, '%s' );\n" % ( table_base, id_table.get( clinical_table_base + ':sample_id', sample), sample )

        
        yield "drop table if exists genomic_%s_alias;" % ( table_base )
        yield """
CREATE TABLE genomic_%s_alias (
    name        varchar(255),
    alias         varchar(255)
) engine 'MyISAM';
""" % ( table_base )

        for pset in pmap:
            for probe in pset:
                for alias in probe.aliases:
                    yield "insert into genomic_%s_alias( name, alias ) values( '%s', '%s' );\n" % (table_base, sql_fix(probe.name), sql_fix(alias))

        # write out the BED table
        yield "drop table if exists %s;" % ( "genomic_" + table_base )
        yield CREATE_BED % ( "genomic_" + table_base + "_tmp")
        
        sample_ids = []
        samples = gmatrix.get_sample_list()

        # sort samples by sample_id, and retain the sort order for application to the genomic data, below
        tmp=sorted(zip(samples, range(len(samples))), cmp=lambda x,y: id_table.get(clinical_table_base + ':sample_id', x[0]) - id_table.get( clinical_table_base + ':sample_id', y[0]))
        samples, order = map(lambda t: list(t), zip(*tmp))

        for sample in samples:
            sample_ids.append( str( id_table.get( clinical_table_base + ':sample_id', sample ) ) )
        
        exp_ids = ','.join( sample_ids )
        missingProbeCount = 0
        for probe_name in gmatrix.get_probe_list():
            # get the genomic data and rearrange to match the sample_id order
            tmp = gmatrix.get_row_vals( probe_name )
            row = map(lambda i: tmp[order[i]], range(len(tmp)))

            pset = pmap.get( probe_name )
            if pset is not None:
                for probe in pset:
                    istr = "insert into %s(chrom, chromStart, chromEnd, strand,  name, expCount, expIds, expScores) values ( '%s', '%s', '%s', '%s', '%s', '%s', '%s', %s );\n" % \
                            ( "genomic_%s_tmp" % (table_base), probe.chrom, probe.chrom_start, probe.chrom_end, probe.strand, sql_fix(probe_name), len(sample_ids), exp_ids, self.scores(row) )
                    yield istr
            else:
                missingProbeCount += 1
        yield "create table genomic_%s like genomic_%s_tmp;" % (table_base, table_base)
        yield "insert into genomic_%s select * from genomic_%s_tmp order by chrom, chromStart;" % (table_base, table_base)
        yield "drop table genomic_%s_tmp;" % table_base
        CGData.log("%s Missing probes %d" % (table_base, missingProbeCount))
Example #2
0
    def gen_sql_heatmap(self, id_table):
        #scan the children
        # XXX Handling of sql for children is broken if the child may appear
        # as part of multiple merge objects, such as TrackGenomic and TrackClinical.
        # A disgusting workaround for clinicalMatrix is to prevent the TrackGenomic from calling
        # it for gen_sql.
        clinical = self.members.pop("clinicalMatrix")
        for line in CGData.CGMergeObject.sql_pass(self, id_table, method="heatmap"):
            yield line
        self.members["clinicalMatrix"] = clinical

        gmatrix = self.members[ 'genomicMatrix' ]
        pmap = self.members[ 'probeMap' ].lookup( assembly="hg18" ) # BUG: hard coded to only producing HG18 tables
        if pmap is None:
            CGData.error("Missing HG18 %s" % ( self.members[ 'probeMap'].get_name() ))
            return
        
        table_base = self.get_name()
        CGData.log("Writing Track %s" % (table_base))
        
        clinical_table_base =  self.members[ "clinicalMatrix" ].get_name()

        other = {}
        for attr in ['wrangler', 'wrangling_procedure', 'url', 'citation', 'description']:
            if attr in gmatrix:
                other[attr] = gmatrix[attr]
        if 'dataProducer' in gmatrix:
            other['author_list'] = gmatrix['dataProducer']
        if 'articleTitle' in gmatrix:
            other['article_title'] = gmatrix['articleTitle']
        
        other['version'] = gmatrix.get('version', "")
        datetime.datetime.strptime(other['version'], "%Y-%m-%d") #if the version isn't properly formatted, though exception
        
        if 'owner' in gmatrix:
            other['owner'] = gmatrix['owner']
        other['colNormalization'] = gmatrix.get('colNormalization', False)
        if not isinstance(other['colNormalization'], bool):
            other['colNormalization']  = False
        other['redistribution'] = gmatrix.get('redistribution', False)
        if not isinstance(other['redistribution'], bool):
            other['redistribution']  = False
        other['security'] = gmatrix.get('security', "public")
        if other['security'] not in [ "public", "private" ]:
            other['security'] = "public"

        yield "DELETE from raDb where name = '%s';\n" % ("genomic_" + table_base)
        yield "INSERT into raDb( name, sampleTable, clinicalTable, columnTable, aliasTable, shortLabel, longLabel, expCount, dataType, platform, profile, security, priority, gain, groupName, wrangler, url, article_title, citation, author_list, wrangling_procedure, other) VALUES ( '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%d', '%s', '%s', '%s', '%s', %f, %f, '%s', %s, %s, %s, %s, %s, %s, '%s');\n" % \
            ( "genomic_" + table_base, "sample_" + table_base,
                "clinical_" + clinical_table_base, "colDb",
                "genomic_" + table_base + "_alias",
                sql_fix(gmatrix['shortTitle']),
                sql_fix(gmatrix['longTitle']),
                len(gmatrix.get_sample_list()),
                self.format,
                dataSubTypeMap[gmatrix[':dataSubType']] if gmatrix[':dataSubType'] in dataSubTypeMap else gmatrix[':dataSubType'],
                'localDb',
                'public',
                float(gmatrix.get('priority', 1.0)),
                float(gmatrix.get('gain', 1.0)),
                sql_fix(gmatrix.get('groupTitle', 'Misc.')),
                "'%s'"%sql_fix(gmatrix['wrangler']) if 'wrangler' in gmatrix else '\N',
                "'%s'"%sql_fix(gmatrix['url']) if 'url' in gmatrix else '\N',
                "'%s'"%sql_fix(gmatrix['articleTitle']) if 'articleTitle' in gmatrix else '\N',
                "'%s'"%sql_fix(gmatrix['citation']) if 'citation' in gmatrix else '\N',
                "'%s'"%sql_fix(gmatrix['dataProducer']) if 'dataProducer' in gmatrix else '\N',
                "'%s'"%sql_fix(gmatrix['wrangling_procedure']) if 'wrangling_procedure' in gmatrix else '\N',
                sql_fix(json.dumps(other)),
                )
        
        # write out the sample table
        yield "drop table if exists sample_%s;" % ( table_base )
        yield """
CREATE TABLE sample_%s (
    id           int,
    sampleName   varchar(255)
) engine 'MyISAM';
""" % ( table_base )

        from CGData.ClinicalMatrix import sortedSamples
        for sample in sortedSamples(gmatrix.get_sample_list()):
            yield "INSERT INTO sample_%s VALUES( %d, '%s' );\n" % ( table_base, id_table.get( clinical_table_base + ':sample_id', sample), sql_fix(sample) )

        
        yield "drop table if exists genomic_%s_alias;" % ( table_base )
        yield """
CREATE TABLE genomic_%s_alias (
    name        varchar(255),
    alias         varchar(255)
) engine 'MyISAM';
""" % ( table_base )

        for probe in pmap.get_probes():
            for alias in probe.aliases:
                yield "insert into genomic_%s_alias( name, alias ) values( '%s', '%s' );\n" % (table_base, sql_fix(probe.name), sql_fix(alias))

        # write out the BED table
        yield "drop table if exists %s;" % ( "genomic_" + table_base )
        yield CREATE_BED % ( "genomic_" + table_base + "_tmp")
        
        sample_ids = []
        samples = gmatrix.get_sample_list()

        # sort samples by sample_id, and retain the sort order for application to the genomic data, below
        tmp=sorted(zip(samples, range(len(samples))), cmp=lambda x,y: id_table.get(clinical_table_base + ':sample_id', x[0]) - id_table.get( clinical_table_base + ':sample_id', y[0]))
        samples, order = map(lambda t: list(t), zip(*tmp))

        for sample in samples:
            sample_ids.append( str( id_table.get( clinical_table_base + ':sample_id', sample ) ) )
        
        exp_ids = ','.join( sample_ids )
        missingProbeCount = 0
        for probe_name in gmatrix.get_probe_list():
            # get the genomic data and rearrange to match the sample_id order
            tmp = gmatrix.get_row_vals( probe_name )
            row = map(lambda i: tmp[order[i]], range(len(tmp)))

            pset = pmap.lookup( probe_name )
            if pset is not None:
                for probe in pset:
                    istr = "insert into %s(chrom, chromStart, chromEnd, strand,  name, expCount, expIds, expScores) values ( '%s', '%s', '%s', '%s', '%s', '%s', '%s', %s );\n" % \
                            ( "genomic_%s_tmp" % (table_base), probe.chrom, probe.chrom_start-1, probe.chrom_end, probe.strand, sql_fix(probe_name), len(sample_ids), exp_ids, self.scores(row) )
                    yield istr
            else:
                missingProbeCount += 1
        yield "# sort file by chrom position\n"
        yield "create table genomic_%s like genomic_%s_tmp;\n" % (table_base, table_base)
        yield "insert into genomic_%s select * from genomic_%s_tmp order by chrom, chromStart;\n" % (table_base, table_base)
        yield "drop table genomic_%s_tmp;\n" % table_base
        CGData.log("%s Missing probes %d" % (table_base, missingProbeCount))
Example #3
0
    def gen_sql_heatmap(self, id_table):
        # scan the children
        # XXX Handling of sql for children is broken if the child may appear
        # as part of multiple merge objects, such as TrackGenomic and TrackClinical.
        # A disgusting workaround for clinicalMatrix is to prevent the TrackGenomic from calling
        # it for gen_sql.
        clinical = self.members.pop("clinicalMatrix")
        for line in CGData.CGMergeObject.sql_pass(self, id_table, method="heatmap"):
            yield line
        self.members["clinicalMatrix"] = clinical

        gmatrix = self.members["genomicMatrix"]
        pmap = self.members["probeMap"].lookup(assembly="hg18")  # BUG: hard coded to only producing HG18 tables
        if pmap is None:
            CGData.error("Missing HG18 %s" % (self.members["probeMap"].get_name()))
            return

        table_base = self.get_name()
        CGData.log("Writing Track %s" % (table_base))

        clinical_table_base = self.members["clinicalMatrix"].get_name()

        other = {}
        for attr in ["wrangler", "wrangling_procedure", "url", "citation", "description"]:
            if attr in gmatrix:
                other[attr] = gmatrix[attr]
        if "dataProducer" in gmatrix:
            other["author_list"] = gmatrix["dataProducer"]
        if "articleTitle" in gmatrix:
            other["article_title"] = gmatrix["articleTitle"]

        other["version"] = gmatrix.get("version", "")
        datetime.datetime.strptime(
            other["version"], "%Y-%m-%d"
        )  # if the version isn't properly formatted, though exception

        if "owner" in gmatrix:
            other["owner"] = gmatrix["owner"]
        other["colNormalization"] = gmatrix.get("colNormalization", False)
        if not isinstance(other["colNormalization"], bool):
            other["colNormalization"] = False
        other["redistribution"] = gmatrix.get("redistribution", False)
        if not isinstance(other["redistribution"], bool):
            other["redistribution"] = False
        other["security"] = gmatrix.get("security", "public")
        if other["security"] not in ["public", "private"]:
            other["security"] = "public"

        yield "DELETE from raDb where name = '%s';\n" % ("genomic_" + table_base)
        yield "INSERT into raDb( name, sampleTable, clinicalTable, columnTable, aliasTable, shortLabel, longLabel, expCount, dataType, platform, profile, security, priority, gain, groupName, wrangler, url, article_title, citation, author_list, wrangling_procedure, other) VALUES ( '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%d', '%s', '%s', '%s', '%s', %f, %f, '%s', %s, %s, %s, %s, %s, %s, '%s');\n" % (
            "genomic_" + table_base,
            "sample_" + table_base,
            "clinical_" + clinical_table_base,
            "colDb",
            "genomic_" + table_base + "_alias",
            sql_fix(gmatrix["shortTitle"]),
            sql_fix(gmatrix["longTitle"]),
            len(gmatrix.get_sample_list()),
            self.format,
            dataSubTypeMap[gmatrix[":dataSubType"]]
            if gmatrix[":dataSubType"] in dataSubTypeMap
            else gmatrix[":dataSubType"],
            "localDb",
            "public",
            float(gmatrix.get("priority", 1.0)),
            float(gmatrix.get("gain", 1.0)),
            sql_fix(gmatrix.get("groupTitle", "Misc.")),
            "'%s'" % sql_fix(gmatrix["wrangler"]) if "wrangler" in gmatrix else "\N",
            "'%s'" % sql_fix(gmatrix["url"]) if "url" in gmatrix else "\N",
            "'%s'" % sql_fix(gmatrix["articleTitle"]) if "articleTitle" in gmatrix else "\N",
            "'%s'" % sql_fix(gmatrix["citation"]) if "citation" in gmatrix else "\N",
            "'%s'" % sql_fix(gmatrix["dataProducer"]) if "dataProducer" in gmatrix else "\N",
            "'%s'" % sql_fix(gmatrix["wrangling_procedure"]) if "wrangling_procedure" in gmatrix else "\N",
            sql_fix(json.dumps(other)),
        )

        # write out the sample table
        yield "drop table if exists sample_%s;" % (table_base)
        yield """
CREATE TABLE sample_%s (
    id           int,
    sampleName   varchar(255)
) engine 'MyISAM';
""" % (
            table_base
        )

        from CGData.ClinicalMatrix import sortedSamples

        for sample in sortedSamples(gmatrix.get_sample_list()):
            yield "INSERT INTO sample_%s VALUES( %d, '%s' );\n" % (
                table_base,
                id_table.get(clinical_table_base + ":sample_id", sample),
                sql_fix(sample),
            )

        yield "drop table if exists genomic_%s_alias;" % (table_base)
        yield """
CREATE TABLE genomic_%s_alias (
    name        varchar(255),
    alias         varchar(255)
) engine 'MyISAM';
""" % (
            table_base
        )

        for probe in pmap.get_probes():
            for alias in probe.aliases:
                yield "insert into genomic_%s_alias( name, alias ) values( '%s', '%s' );\n" % (
                    table_base,
                    sql_fix(probe.name),
                    sql_fix(alias),
                )

        # write out the BED table
        yield "drop table if exists %s;" % ("genomic_" + table_base)
        yield CREATE_BED % ("genomic_" + table_base + "_tmp")

        sample_ids = []
        samples = gmatrix.get_sample_list()

        # sort samples by sample_id, and retain the sort order for application to the genomic data, below
        tmp = sorted(
            zip(samples, range(len(samples))),
            cmp=lambda x, y: id_table.get(clinical_table_base + ":sample_id", x[0])
            - id_table.get(clinical_table_base + ":sample_id", y[0]),
        )
        samples, order = map(lambda t: list(t), zip(*tmp))

        for sample in samples:
            sample_ids.append(str(id_table.get(clinical_table_base + ":sample_id", sample)))

        exp_ids = ",".join(sample_ids)
        missingProbeCount = 0
        for probe_name in gmatrix.get_probe_list():
            # get the genomic data and rearrange to match the sample_id order
            tmp = gmatrix.get_row_vals(probe_name)
            row = map(lambda i: tmp[order[i]], range(len(tmp)))

            pset = pmap.lookup(probe_name)
            if pset is not None:
                for probe in pset:
                    istr = (
                        "insert into %s(chrom, chromStart, chromEnd, strand,  name, expCount, expIds, expScores) values ( '%s', '%s', '%s', '%s', '%s', '%s', '%s', %s );\n"
                        % (
                            "genomic_%s_tmp" % (table_base),
                            probe.chrom,
                            probe.chrom_start - 1,
                            probe.chrom_end,
                            probe.strand,
                            sql_fix(probe_name),
                            len(sample_ids),
                            exp_ids,
                            self.scores(row),
                        )
                    )
                    yield istr
            else:
                missingProbeCount += 1
        yield "# sort file by chrom position\n"
        yield "create table genomic_%s like genomic_%s_tmp;\n" % (table_base, table_base)
        yield "insert into genomic_%s select * from genomic_%s_tmp order by chrom, chromStart;\n" % (
            table_base,
            table_base,
        )
        yield "drop table genomic_%s_tmp;\n" % table_base
        CGData.log("%s Missing probes %d" % (table_base, missingProbeCount))
Example #4
0
    def gen_sql_heatmap(self, id_table, opts):
        #scan the children
        # XXX Handling of sql for children is broken if the child may appear
        # as part of multiple merge objects, such as TrackGenomic and TrackClinical.
        # A disgusting workaround for clinicalMatrix is to prevent the TrackGenomic from calling
        # it for gen_sql.
        clinical = self.members.pop("clinicalMatrix")
        for line in CGData.CGMergeObject.sql_pass(self, id_table, method="heatmap"):
            yield line
        self.members["clinicalMatrix"] = clinical

        gmatrix = self.members[ 'genomicMatrix' ]
        pmap = self.members[ 'probeMap' ].lookup( assembly="hg18" ) # BUG: hard coded to only producing HG18 tables
        if pmap is None:
            CGData.error("Missing HG18 %s" % ( self.members[ 'probeMap'].get_name() ))
            return

        savedownsample = 'save-ds' in opts and opts['save-ds']
        
        table_base = self.get_name().replace(".", "_")
        CGData.log("Writing Track %s" % (table_base))
        
        clinical_table_base =  self.members[ "clinicalMatrix" ].get_name().replace(".", "_")

        other = {}
        for attr in ['wrangler', 'wrangling_procedure', 'url', 'citation', 'description']:
            if attr in gmatrix:
                other[attr] = gmatrix[attr]
        if 'dataProducer' in gmatrix:
            other['author_list'] = gmatrix['dataProducer']
        if 'articleTitle' in gmatrix:
            other['article_title'] = gmatrix['articleTitle']

        ##TO DO, the version info should be the lastest of genomic and clinical, currently only check genomic
        cVersion= self.members[ 'clinicalMatrix' ].get('version',"")
        gVersion= self.members[ 'genomicMatrix' ].get('version',"")
        dG= makeDate(gVersion)
        dC= makeDate(cVersion)
        if dC == None:
            other['version'] = gVersion
        elif dG<dC:
            other['version'] = cVersion
        else:
            other['version'] = gVersion
        datetime.datetime.strptime(other['version'], "%Y-%m-%d") #if the version isn't properly formatted, though exception

        if 'owner' in gmatrix:
            other['owner'] = gmatrix['owner']
        other['colNormalization'] = gmatrix.get('colNormalization', False)
        if not isinstance(other['colNormalization'], bool):
            other['colNormalization']  = False
        other['redistribution'] = gmatrix.get('redistribution', False)
        if not isinstance(other['redistribution'], bool):
            other['redistribution']  = False
        security = gmatrix.get('security', "public")
        if security not in [ "public", "private" ]:
            security = "public"

        if savedownsample:
            yield "SET @ds=(SELECT downSampleTable FROM raDb WHERE name = '%s');\n" % ("genomic_" + table_base)
        yield "DELETE from raDb where name = '%s';\n" % ("genomic_" + table_base)
        yield "INSERT into raDb( name, downSampleTable, sampleTable, clinicalTable, columnTable, aliasTable, shortLabel, longLabel, expCount, dataType, platform, profile, security, priority, gain, groupName, wrangler, url, article_title, citation, author_list, wrangling_procedure, other) VALUES ( '%s', %s, '%s', '%s', '%s', '%s', '%s', '%s', '%d', '%s', '%s', '%s', '%s', %f, %f, '%s', %s, %s, %s, %s, %s, %s, '%s');\n" % \
            ( "genomic_" + table_base,
                "@ds" if savedownsample else "NULL",
                "sample_" + table_base,
                "clinical_" + clinical_table_base, "colDb",
                "genomic_" + table_base + "_alias",
                sql_fix(gmatrix['shortTitle']),
                sql_fix(gmatrix['longTitle']),
                len(gmatrix.get_sample_list()),
                self.format,
                dataSubTypeMap[gmatrix[':dataSubType']] if gmatrix[':dataSubType'] in dataSubTypeMap else gmatrix[':dataSubType'],
                'localDb',
                security,
                float(gmatrix.get('priority', 1.0)),
                float(gmatrix.get('gain', 1.0)),
                sql_fix(gmatrix.get('groupTitle', 'Misc.')),
                "'%s'"%sql_fix(gmatrix['wrangler']) if 'wrangler' in gmatrix else '\N',
                "'%s'"%sql_fix(gmatrix['url']) if 'url' in gmatrix else '\N',
                "'%s'"%sql_fix(gmatrix['articleTitle']) if 'articleTitle' in gmatrix else '\N',
                "'%s'"%sql_fix(gmatrix['citation']) if 'citation' in gmatrix else '\N',
                "'%s'"%sql_fix(gmatrix['dataProducer']) if 'dataProducer' in gmatrix else '\N',
                "'%s'"%sql_fix(gmatrix['wrangling_procedure']) if 'wrangling_procedure' in gmatrix else '\N',
                sql_fix(json.dumps(other)),
                )

        if 'no-genomic-matrix' in opts and opts['no-genomic-matrix']:
            return
        
        # write out the sample table
        yield "drop table if exists sample_%s;" % ( table_base )
        yield """
        CREATE TABLE sample_%s (
        id           int,
        sampleName   varchar(255)
        ) engine 'MyISAM';
        """ % ( table_base )

        from CGData.ClinicalMatrix import sortedSamples
        for sample in sortedSamples(gmatrix.get_sample_list()):
            yield "INSERT INTO sample_%s VALUES( %d, '%s' );\n" % ( table_base, id_table.get( clinical_table_base + ':sample_id', sample), sql_fix(sample) )

        
        yield "drop table if exists genomic_%s_alias;" % ( table_base )
        yield """
        CREATE TABLE genomic_%s_alias (
        name        varchar(255),
        alias         varchar(255)
        ) engine 'MyISAM';
        """ % ( table_base )

        for probe in pmap.get_probes():
            for alias in probe.aliases:
                yield "insert into genomic_%s_alias( name, alias ) values( '%s', '%s' );\n" % (table_base, sql_fix(probe.name), sql_fix(alias))

        # write out the BED table
        yield "drop table if exists %s;" % ( "genomic_" + table_base )
        yield CREATE_BED % ( "genomic_" + table_base + "_tmp")
        
        sample_ids = []
        samples = gmatrix.get_sample_list()

        # sort samples by sample_id, and retain the sort order for application to the genomic data, below
        tmp=sorted(zip(samples, range(len(samples))), cmp=lambda x,y: id_table.get(clinical_table_base + ':sample_id', x[0]) - id_table.get( clinical_table_base + ':sample_id', y[0]))
        samples, order = map(lambda t: list(t), zip(*tmp))

        for sample in samples:
            sample_ids.append( str( id_table.get( clinical_table_base + ':sample_id', sample ) ) )
        
        exp_ids = ','.join( sample_ids )
        missingProbeCount = 0
        for probe_name in gmatrix.get_probe_list():
            # get the genomic data and rearrange to match the sample_id order
            tmp = gmatrix.get_row_vals( probe_name )
            row = map(lambda i: tmp[order[i]], range(len(tmp)))

            pset = pmap.lookup( probe_name )
            if pset is not None:
                for probe in pset:
                    istr = "insert into %s(bin, chrom, chromStart, chromEnd, strand,  name, expCount, expIds, expScores) values ( %d, '%s', '%s', '%s', '%s', '%s', '%s', '%s', %s );\n" % \
                            ( "genomic_%s_tmp" % (table_base), Binner.calcBin(probe.chrom_start, probe.chrom_end), probe.chrom, probe.chrom_start-1, probe.chrom_end, probe.strand, sql_fix(probe_name), len(sample_ids), exp_ids, self.scores(row) )
                    yield istr
            else:
                missingProbeCount += 1
        yield "# sort file by chrom position\n"
        yield "create table genomic_%s like genomic_%s_tmp;\n" % (table_base, table_base)
        yield "insert into genomic_%s(bin, chrom, chromStart, chromEnd, strand,  name, expCount, expIds, expScores) select bin, chrom, chromStart, chromEnd, strand,  name, expCount, expIds, expScores from genomic_%s_tmp order by chrom, chromStart;\n" % (table_base, table_base)
        yield "drop table genomic_%s_tmp;\n" % table_base
        CGData.log("%s Missing probes %d" % (table_base, missingProbeCount))