Example #1
0
if link.has_key("dbN") :
	dbN = link.getvalue("dbN")
else :
	dbN = "tcga1"

if link.has_key("sId") :
	mode = 'samp'
	sId = link.getvalue("sId") ## new ID  coming
elif link.has_key("dType"):
	mode = 'type'
	dType = link.getvalue("dType")
else :
	mode = 'samp'
	sId = "TCGA-06-5411"

new_id=mycgi.get_new_id(sId)
old_id=mycgi.get_old_id(new_id)

dTypeH = {
	'Fusion': (1,'nPos>10'),
	'ExonSkipping': (2,'nPos>20'),
	'3pDeletion': (3,'nReads_w and nReads/nReads_w>=50')
	}

specL = [
#	('Mutation', ["concat(strand,chrom,':',chrSta,'-',chrEnd) coord_hg19", "ref", "alt", "n_nReads_ref", "n_nReads_alt", "nReads_ref", "nReads_alt", \
	('Mutation', ["concat(chrom,':',chrSta,'-',chrEnd) coord_hg19", "ref", "alt", "n_nReads_ref", "n_nReads_alt", "nReads_ref", "nReads_alt", \
		"r_nReads_ref", "r_nReads_alt","if(count is NULL,'0',count) nIRCRb", "gene_symL", "ch_dna", "ch_aa", "ch_type", "cosmic", "mutsig", "if(census is NULL,'',census) census"], 't_mut', 'True', 'gene_symL,chrSta'),
	('Mutation_CS', ["concat(chrom,':',chrSta,'-',chrEnd) coord_hg19", "ref", "alt", "n_nReads_ref", "n_nReads_alt", "nReads_ref", "nReads_alt", \
		"r_nReads_ref", "r_nReads_alt","ifnull(count,'0') nIRCRb", "strand_cnt", "gene_sym", "ch_dna", "ch_aa", "ch_type", "cosmic", "mutsig", "ifnull(census,'')"], 't_mut_cs', 'True', 'gene_sym,chrSta'),
	('Fusion', ["loc1 coord1", "loc2 coord2", "gene_sym1", "gene_sym2", "frame", "ftype", "exon1", "exon2", "nPos","nReads","nReads_w1","nReads_w2"], 'splice_fusion_AF', 'nPos>2', 'nPos desc'),
Example #2
0
def main(dbN, geneN):

    (con, cursor) = mycgi.connectDB(db=dbN)

    # prep RNA-Seq data availability table
    cursor.execute(
        'create temporary table t_avail_RNASeq as select distinct samp_id from rpkm_gene_expr'
    )

    # prep exonSkip info
    cursor.execute(
        'select delExons,frame,loc1,loc2, count(*) cnt from splice_skip where gene_sym = "%s" and nPos>=5 group by delExons order by count(*) desc'
        % geneN)
    results = cursor.fetchall()

    conditionL_exonSkip = []

    for (delExons, frame, loc1, loc2, cnt) in results:

        if ':Y' in frame:
            frame_code = 'in'
        elif ':N' in frame:
            frame_code = 'off'
        else:
            frame_code = 'utr'

        conditionL_exonSkip.append( [
         ('nReads', 'splice_skip', 'loc1="%s" and loc2="%s" and nPos>=5' % (loc1,loc2), '%3d', '%s<br><sup>(n=%s, %s)</sup>' % (delExons.split(',')[0], cnt,frame_code),), \
			#			('avg(nReads)', 'splice_normal', 'loc1="%s" or loc2="%s"' % (loc1,loc2), '%d') ])

         ('sum(nReads)', 'splice_normal', 'loc1="%s"' % (loc1,), '%d') ])

    # prep mutation info


#		where gene_symL like "%s%s%s" and ch_type != "synonymous_variant" and ch_type != "nc_transcript_variant,synonymous_variant" and ch_type != "intron_variant,synonymous_variant" \
    if dbN == 'CancerSCAN':
        cursor.execute('create temporary table t_mut as \
			select concat(substring(chrom,4,4),":",cast(chrSta as char),ref,">",alt) as ch_pos, ch_dna,ch_aa,ch_type,cosmic from mutation_cs \
			where find_in_set("%s",gene_sym) > 0 and ch_type != "synonymous_variant" and ch_type != "nc_transcript_variant,synonymous_variant" and ch_type != "intron_variant,synonymous_variant" \
			and ch_type != "nc_transcript_variant" and ch_type != "intron_variant,nc_transcript_variant" and ch_type != "intron_variant" and ch_type != "Substitution - coding silent"\
			and nReads_alt<>2 order by ch_type desc' % (geneN))
    else:
        cursor.execute('create temporary table t_mut as \
			select concat(substring(chrom,4,4),":",cast(chrSta as char),ref,">",alt) as ch_pos, ch_dna,ch_aa,ch_type,cosmic from mutation_rxsq \
			where find_in_set("%s",gene_symL) > 0 and ch_type != "synonymous_variant" and ch_type != "nc_transcript_variant,synonymous_variant" and ch_type != "intron_variant,synonymous_variant" \
			and ch_type != "nc_transcript_variant" and ch_type != "intron_variant,nc_transcript_variant" and ch_type != "intron_variant" and ch_type != "Substitution - coding silent"\
			and nReads_alt<>2 order by ch_type desc' % (geneN))

    cursor.execute(
        'select *,count(*) cnt from t_mut group by ch_pos order by count(*) desc, cosmic desc'
    )
    results = cursor.fetchall()

    conditionL_mutation = []

    for (ch_pos, ch_dna, ch_aa, ch_type, cosmic, cnt) in results:

        ch_aa = ch_aa.replace(',', '<br>')

        if (dbN == 'CancerSCAN' and 'cosmic' in cosmic) or (dbN != 'CancerSCAN'
                                                            and cosmic):
            cosmic_fmt = '<font color="red">%s</font><br><sup>(n=%d, %s)</sup>'

        else:
            cosmic_fmt = '%s<br><sup>(n=%d, %s)</sup>'

        if ch_aa:
            cnd = ch_aa
        elif ch_dna:
            cnd = ch_dna
        else:
            cnd = ch_pos

        if dbN == 'CancerSCAN':
            conditionL_mutation.append([
             ('nReads_alt', 'mutation_cs', 'nReads_alt<>2 and concat(substring(chrom,4,4),":",cast(chrSta as char),ref,">",alt)="%s"' % ch_pos, '%d', cosmic_fmt % (cnd, cnt, mutation_map(ch_type, dbN))), \
             ('nReads_ref', 'mutation_cs', 'nReads_alt<>2 and concat(substring(chrom,4,4),":",cast(chrSta as char),ref,">",alt)="%s"' % ch_pos, '%d') ])
        else:
            conditionL_mutation.append( [
             ('nReads_alt,r_nReads_alt', 'mutation_rxsq', 'nReads_alt<>2 and concat(substring(chrom,4,4),":",cast(chrSta as char),ref,">",alt)="%s"' % ch_pos, '%d', cosmic_fmt % (cnd, cnt, mutation_map(ch_type, dbN))), \
             ('nReads_ref,r_nReads_ref', 'mutation_rxsq', 'nReads_alt<>2 and concat(substring(chrom,4,4),":",cast(chrSta as char),ref,">",alt)="%s"' % ch_pos, '%d') ])

    # prep fusion table
    cursor.execute('create temporary table t_fusion as \
		select samp_id,locate(":Y",frame)>1 frame,count(nPos) nEvents \
		from splice_fusion where nPos>=2 and (find_in_set("%s",gene_sym1) or find_in_set("%s",gene_sym2)) group by samp_id, locate(":Y",frame)>1'
                   % (geneN, geneN))

    # prep eiJunc info
    cursor.execute(
        'select loc,juncAlias, count(*) cnt from splice_eiJunc where gene_sym="%s" and nReads>=10 group by loc'
        % geneN)
    results = cursor.fetchall()

    conditionL_eiJunc = []

    for (loc, juncAlias, cnt) in results:
        conditionL_eiJunc.append([
            ('nReads', 'splice_eiJunc', 'loc="%s" and nReads>=10' % loc, '%3d',
             '%s<br><sup>(n=%s)</sup>' % (juncAlias, cnt)),
            ('sum(nReads)', 'splice_normal', 'loc1="%s"' % (loc, ), '%d')
        ])

    # outlier
    outlier_sId = []

    if dbN in ['ircr1', 'tcga1', 'ccle1']:
        cursor.execute(
            'select samp_id, expr_MAD from array_gene_expr_MAD mad, gene_expr_stat stat where mad.gene_sym = stat.gene_sym and (mad.expr_MAD >= stat.q75 + 3*(stat.q75 - stat.q25) or mad.expr_MAD <= stat.q25 - 3*(stat.q75 - stat.q25)) and mad.gene_sym = "%s"'
            % geneN)
        results3 = cursor.fetchall()

        for i in range(len(results3)):
            outlier_sId.append(results3[i][0])

    ## corrected CN value available?
    cncorr_sId = []
    if dbN in ['ircr1']:
        cursor.execute(
            'select distinct samp_id from xsq_purity where tumor_frac != "ND"')
        resultsCorr = cursor.fetchall()
        for i in range(len(resultsCorr)):
            cncorr_sId.append(resultsCorr[i][0])
    normal_sId = []
    if dbN in ['ircr1']:
        cursor.execute('select distinct samp_id from xsq_purity')
        tttt = cursor.fetchall()
        for i in range(len(tttt)):
            normal_sId.append(tttt[i][0])

    conditionL = conditionL_preH[
        dbN] + conditionL_mutation + conditionL_fusion + conditionL_exonSkip + conditionL_eiJunc

    print '<p><h4>%s status of %s panel <small><a href="http://www.genecards.org/cgi-bin/carddisp.pl?gene=%s">[GeneCard]</a> <a href="http://www.ncbi.nlm.nih.gov/pubmed/?term=%s">[PubMed]</a></small></h4></p>' % (
        geneN, mycgi.db2dsetN(dbN), geneN, geneN)

    # census
    cursor.execute(
        'select tumor_soma, tumor_germ, syndrome, mut_type from common.census where gene_sym="%s"'
        % geneN)
    census = cursor.fetchall()

    print('\n<font size=2> <table border="1" cellpadding="0" cellspacing="0">')
    print(
        '<tr>\n<td rowspan=2>Census</td>\n<td>tumor_soma</td>\n<td>tumor_germ</td>\n<td>syndrome</td>\n<td>mut_type</td>\n</tr>\n'
    )

    if len(census) != 0:
        print(
            '<tr>\n<td>%s</td>\n<td>%s</td>\n<td>%s</td>\n<td>%s</td>\n</tr>\n'
            % (census[0][0], census[0][1], census[0][2], census[0][3]))
    else:
        print('<tr>\n<td></td>\n<td></td>\n<td></td>\n<td></td>\n</tr>\n')

    # drugbank
    cursor.execute('select drug from common.drugbank where gene_sym="%s"' %
                   geneN)
    drug = [x for (x, ) in cursor.fetchall()]

    print('\n<font size=2> <table border="1" cellpadding="0" cellspacing="0">')
    print('<br><tr>\n<td>Drug</td>\n')
    print('<td>%s</td>\n</tr>\n' % ('</br>\n'.join(drug)))

    # pathway
    cursor.execute(
        'select biocarta_id, biocarta_desc from common.biocarta where gene_sym="%s"'
        % geneN)
    biocarta = cursor.fetchall()

    cursor.execute(
        'select kegg_id, kegg_desc from common.kegg where gene_sym="%s"' %
        geneN)
    kegg = cursor.fetchall()

    cursor.execute('select go_id, go_desc from common.go where gene_sym="%s"' %
                   geneN)
    go = cursor.fetchall()

    print('\n<font size=2> <table border="1" cellpadding="0" cellspacing="0">')
    print('<br><tr>\n<td>Biocarta</td>\n<td>KEGG</td>\n<td>GO</td>\n</tr>\n')
    print('<tr><td><div style="width:100%; height:50px; overflow:auto">')
    for (id, desc) in biocarta:
        print(
            '<a href="http://cgap.nci.nih.gov/Pathways/BioCarta/%s">%s</br>' %
            (id, desc))
    print('</td>\n<td><div style="width:100%; height:50px; overflow:auto">')
    for (id, desc) in kegg:
        print(
            '<a href="http://www.genome.jp/dbget-bin/www_bget?pathway+%s">%s</br>'
            % (id, desc))
    print('</td>\n<td><div style="width:100%; height:50px; overflow:auto">')
    for (id, desc) in go:
        print(
            '<a href="http://amigo.geneontology.org/cgi-bin/amigo/term_details?term=GO:%s">%s</br>'
            % (id, desc))
    print('</div></td>\n</tr>\n')

    if dbN == 'CancerSCAN':
        cursor.execute(
            '''CREATE TEMPORARY TABLE t_id AS SELECT DISTINCT samp_id FROM mutation_cs UNION SELECT DISTINCT samp_id FROM cs_cn'''
        )
    elif dbN == 'tcga1':
        cursor.execute('create temporary table t_id as \
		select distinct samp_id from array_gene_expr union select distinct samp_id from array_cn union select distinct samp_id from splice_normal union select distinct samp_id from mutation_rxsq union select distinct samp_id from rpkm_gene_expr'
                       )
    else:
        cursor.execute('create temporary table t_id as \
		select distinct samp_id from array_gene_expr union select distinct samp_id from array_cn union select distinct samp_id from splice_normal union select distinct samp_id from mutation_rxsq union select distinct samp_id from rpkm_gene_expr union select distinct samp_id from xsq_cn'
                       )

    cursor.execute('alter table t_id add index (samp_id)')

    cursor.execute(
        'create temporary table t_expr as select * from array_gene_expr where gene_sym="%s"'
        % geneN)
    #cursor.execute('create temporary table t_cn as select * from xsq_cn where gene_sym="%s"' % geneN)

    cursor.execute('alter table t_expr add index (samp_id,gene_sym)')
    #cursor.execute('alter table t_cn add index (samp_id,gene_sym)')

    cursor.execute(
        'select samp_id from t_id left join t_expr using (samp_id) order by z_score desc'
    )
    #cursor.execute('select samp_id from t_id left join t_cn using (samp_id) order by value_log2 desc')

    results = cursor.fetchall()

    numTotSamp = len(results)

    print('\n<font size=3> <table border="1" cellpadding="0" cellspacing="0">')

    # header: row1
    print '<br><tr>\n<td rowspan=2><div class="verticaltext" align="middle">samples<br><sup>n=%s</sup></div></td>' % numTotSamp,

    for i in range(len(conditionL)):

        row = conditionL[i]

        if type(row) == list:
            row = row[0]

        if i < len(conditionL_preH[dbN]):
            if ('tag' in row[1]) or ('t_avail' in row[1]) or (
                    'subtype' in row[1]) or ('purity' in row[1]):
                cursor.execute('select count(*) from %s where %s' %
                               (row[1], row[2]))
            elif ('loh' in row[1]):
                cursor.execute(
                    'select count(distinct samp_id) from xsq_purity'
                )  # of samples for which purity pipeline was applied = # of samples with matched blood exome
            else:
                cursor.execute(
                    'select count(*) from %s where %s and gene_sym ="%s"' %
                    (row[1], row[2], geneN))

            count = cursor.fetchone()
            if 'MAD' in row[4]:
                print(
                    '<td rowspan=2 align="middle"><div class="verticaltext">%s, n=%s)</sup></div></td>'
                    % (row[-1], count[0]))
            else:
                print(
                    '<td rowspan=2 align="middle"><div class="verticaltext">%s<br><sup>(n=%s)</sup></div></td>'
                    % (row[-1], count[0]))
        else:
            if i == len(conditionL_preH[dbN]) and len(conditionL_mutation) > 0:
                print('<td align="middle" colspan=%s>mutation (mt/wt)</td>' %
                      len(conditionL_mutation))
            elif i == len(conditionL_preH[dbN]) + len(conditionL_mutation):
                print(
                    '<td align="middle" colspan=%s><a href="ircr_samp.py?dbN=%s&dType=Fusion">fusion</a></td>'
                    % (len(conditionL_fusion), dbN))
            elif i == len(
                    conditionL_preH[dbN]) + len(conditionL_mutation) + len(
                        conditionL_fusion) and len(conditionL_exonSkip) > 0:
                print(
                    '<td align="middle" colspan=%s><a href="ircr_samp.py?dbN=%s&dType=ExonSkipping">exonSkip</a> (mt/wt)</td>'
                    % (len(conditionL_exonSkip), dbN))
            elif i == len(conditionL_preH[dbN]) + len(
                    conditionL_mutation) + len(conditionL_fusion) + len(
                        conditionL_exonSkip) and len(conditionL_eiJunc) > 0:
                print(
                    '<td align="middle" colspan=%s><a href="ircr_samp.py?dbN=%s&dType=3pDeletion">3p deletion</a> (mt/wt)</td>'
                    % (len(conditionL_eiJunc), dbN))

    print('\n</tr>\n')

    # header: row2
    print '<tr>\n',

    for i in range(len(conditionL)):

        row = conditionL[i]

        if type(row) == list:
            row = row[0]

        if i < len(conditionL_preH[dbN]):
            pass
        else:
            print(
                '<td height="100"><div class="verticaltext" align="middle">%s</div></td>'
                % row[-1])

    print('\n</tr>\n')

    for (sId, ) in results:

        print '<tr>',
        #		print '<td nowrap><a href="ircr_samp.py?dbN=%s&sId=%s">%s</td>' % (dbN,sId,sId),
        new_id = mycgi.get_new_id(sId)
        print '<td nowrap><a href="ircr_samp.py?dbN=%s&sId=%s">%s</td>' % (
            dbN, new_id, new_id),

        d_flag = None
        r_flag = None

        for row in conditionL:

            outlier = None

            if type(row) == list:
                row_wt = row[1]
                row = row[0]
            else:
                row_wt = None

            (col, tbl, cnd, fmt) = row[:4]

            cursor.execute('show columns from %s like "gene_sym"' % tbl)

            if cursor.fetchone():
                cnd += ' and gene_sym="%s"' % geneN

            if tbl == 'mutation_rxsq':
                cursor.execute(
                    'select distinct %s from %s where samp_id="%s" and (%s)' %
                    (col, tbl, sId, cnd))
            else:
                cursor.execute(
                    'select %s from %s where samp_id="%s" and (%s)' %
                    (col, tbl, sId, cnd))

            results2 = cursor.fetchall()

            if len(results2) > 1:
                print sId
                raise Exception

            if type(row) == tuple and row[-1] == 'RSq':

                if len(results2) > 0:
                    r_flag = True
                else:
                    r_flag = False

            if type(row) == tuple and row[-1] == 'XSq':

                if len(results2) > 0:
                    d_flag = True
                else:
                    d_flag = False

            if sId not in normal_sId:  # flag for matched normal
                p_flag = False
            else:
                p_flag = True

            if type(row) == tuple and row[-1] == 'expr<br><sup>(MAD':

                if sId in outlier_sId:
                    outlier = True
                else:
                    outlier = False

            if len(results2) == 1:  # and results2[0][0] not in (0,'0'):

                value = ('%s' % fmt) % results2[0][0]

                if len(results2[0]) >= 2:
                    n_value = ('%s' % fmt) % results2[0][1]
                else:
                    n_value = 0

                if row_wt:

                    (col, tbl, cnd, fmt) = row_wt[:4]

                    if tbl == 'mutation_rxsq':
                        cursor.execute(
                            'select distinct %s from %s where samp_id="%s" and %s'
                            % (col, tbl, sId, cnd))
                    else:
                        cursor.execute(
                            'select %s from %s where samp_id="%s" and %s' %
                            (col, tbl, sId, cnd))

                    results_wt = cursor.fetchone()

                    n_count_wt = 0

                    if results_wt[0]:
                        count_wt = ('%s' % fmt) % results_wt[0]
                    else:
                        count_wt = 0

                    if len(results_wt) >= 2:
                        count_wt = ('%s' % fmt) % results_wt[0]
                        n_count_wt = ('%s' % fmt) % results_wt[1]

                    if row[1] == 'mutation_rxsq':

                        #						if int(n_value)!=0 and int(n_count_wt)!=0 and int(value)!=0 and int(count_wt)!=0:
                        if int(n_value) != 0 and int(value) != 0:
                            if int(value) > (int(value) + int(count_wt)
                                             ) * cutoff and int(n_value) > (
                                                 int(n_value) +
                                                 int(n_count_wt)) * cutoff:
                                print '<td><font color=red><b>%s</b></font><sub>/%s</sub>,<font color=468847><b>%s</b><sub>/%s</sub></font></td>' % (
                                    value, count_wt, n_value, n_count_wt),
                            elif int(n_value) > (int(n_value) +
                                                 int(n_count_wt)) * cutoff:
                                print '<td>%s<sub>/%s</sub>,<font color=468847><b>%s</b><sub>/%s</sub></font></td>' % (
                                    value, count_wt, n_value, n_count_wt),
                            elif int(value) > (int(value) +
                                               int(count_wt)) * cutoff:
                                print '<td><font color=red><b>%s</b></font><sub>/%s</sub>,<font color=468847>%s<sub>/%s</sub></font></td>' % (
                                    value, count_wt, n_value, n_count_wt),
                            else:
                                print '<td>%s<sub>/%s</sub>,<font color=468847>%s<sub>/%s</sub></font></td>' % (
                                    value, count_wt, n_value, n_count_wt),

                        elif int(value) == 0 and int(
                                count_wt) == 0:  ## no exome
                            if int(n_value) > (int(n_value) +
                                               int(n_count_wt)) * cutoff:
                                print '<td><font color=468847><b>%s</b><sub>/%s</sub></font></td>' % (
                                    n_value, n_count_wt),
                            else:
                                print '<td><font color=468847>%s<sub>/%s</sub></font></td>' % (
                                    n_value, n_count_wt),
                        elif int(n_value) == 0 and int(
                                n_count_wt) == 0:  ## no normal
                            if int(value) > (int(value) +
                                             int(count_wt)) * cutoff:
                                print '<td><font color=red><b>%s</b></font><sub>/%s</sub></td>' % (
                                    value, count_wt),
                            else:
                                print '<td>%s<sub>/%s</sub></td>' % (value,
                                                                     count_wt),

                    else:
                        if int(value) > (int(value) + int(count_wt)) * cutoff:
                            tmp = row[4].split('<br>')[0].split('/')
                            if len(tmp) > 1 and tmp[0] == tmp[1]:
                                print '<td>%s<sub>/%s</sub></td>' % (value,
                                                                     count_wt),
                            else:
                                print '<td><font color=red><b>%s</b></font><sub>/%s</sub></td>' % (
                                    value, count_wt),
                        else:
                            print '<td>%s<sub>/%s</sub></td>' % (value,
                                                                 count_wt),

                else:
                    if row[1] == 't_fusion':
                        html_content = ""
                        if row[4] == 'in':
                            print '<a name="%s"></a>' % sId
                            html_content = mycgi.compose_fusion_table(
                                cursor, dbN, geneN, sId, "in")
                            print '''
									<td><div class="tooltip_content">%s</div><div class="tooltip_link"><a href="#current">%s</a></div></td>
									''' % (html_content, value)
                        else:
                            html_content = mycgi.compose_fusion_table(
                                cursor, dbN, geneN, sId, "off")
                            print '''
									<td><div class="tooltip_content">%s</div><div class="tooltip_link"><a href="#current">%s</a></div></td>
									''' % (html_content, value)
                    elif outlier:
                        print '<td><font color=red><b>%s</b></font></td>' % value

                    elif tbl == 'array_cn' or tbl == 'cs_cn':  ##aCGH
                        (cl, bL) = cn_format(value)
                        print '<td><font color=%s>%s%s%s</font></td>' % (
                            cl, bL[0], value, bL[1])
                    elif tbl == 'xsq_cn':
                        (cl1, bL1) = cn_format(value)

                        if sId in cncorr_sId:
                            cursor.execute(
                                'select %s from xsq_cn_corr where samp_id="%s" and (%s)'
                                % (col, sId, cnd))
                            val_corr = ('%s' % fmt) % cursor.fetchone()[0]
                            (cl2, bL2) = cn_format(val_corr)
                            print '<td><font color=%s>%s%s%s</font>,<font color=%s>%s%s%s</font></td>' % (
                                cl1, bL1[0], value, bL1[1], cl2, bL2[0],
                                val_corr, bL2[1])
                        else:
                            print '<td><font color=%s>%s%s%s</font></td>' % (
                                cl1, bL1[0], value, bL1[1])
                    else:
                        print '<td>%s</td>' % value

            else:
                #grey out
                if (r_flag == False
                        and row[1] in ('splice_skip', 't_fusion',
                                       'splice_eiJunc', 'rpkm_gene_expr')
                    ) or (r_flag == False and d_flag == False
                          and row[1] in ('mutation_rxsq')) or (
                              (d_flag == False or p_flag == False)
                              and row[1] in ('xsq_loh')):
                    print '<td bgcolor=silver></td>'
                else:
                    print '<td></td>'

        print '</tr>'

    print('\n</table> </font>\n')
    return
Example #3
0
File: ircr.py Project: SMC1/JK1
def main(dbN, geneN):

    (con, cursor) = mycgi.connectDB(db=dbN)

    # prep RNA-Seq data availability table
    cursor.execute("create temporary table t_avail_RNASeq as select distinct samp_id from rpkm_gene_expr")

    # prep exonSkip info
    cursor.execute(
        'select delExons,frame,loc1,loc2, count(*) cnt from splice_skip where gene_sym = "%s" and nPos>=5 group by delExons order by count(*) desc'
        % geneN
    )
    results = cursor.fetchall()

    conditionL_exonSkip = []

    for (delExons, frame, loc1, loc2, cnt) in results:

        if ":Y" in frame:
            frame_code = "in"
        elif ":N" in frame:
            frame_code = "off"
        else:
            frame_code = "utr"

        conditionL_exonSkip.append(
            [
                (
                    "nReads",
                    "splice_skip",
                    'loc1="%s" and loc2="%s" and nPos>=5' % (loc1, loc2),
                    "%3d",
                    "%s<br><sup>(n=%s, %s)</sup>" % (delExons.split(",")[0], cnt, frame_code),
                ),
                # 			('avg(nReads)', 'splice_normal', 'loc1="%s" or loc2="%s"' % (loc1,loc2), '%d') ])
                ("sum(nReads)", "splice_normal", 'loc1="%s"' % (loc1,), "%d"),
            ]
        )

        # prep mutation info
    # 		where gene_symL like "%s%s%s" and ch_type != "synonymous_variant" and ch_type != "nc_transcript_variant,synonymous_variant" and ch_type != "intron_variant,synonymous_variant" \
    if dbN == "CancerSCAN":
        cursor.execute(
            'create temporary table t_mut as \
			select concat(substring(chrom,4,4),":",cast(chrSta as char),ref,">",alt) as ch_pos, ch_dna,ch_aa,ch_type,cosmic from mutation_cs \
			where find_in_set("%s",gene_sym) > 0 and ch_type != "synonymous_variant" and ch_type != "nc_transcript_variant,synonymous_variant" and ch_type != "intron_variant,synonymous_variant" \
			and ch_type != "nc_transcript_variant" and ch_type != "intron_variant,nc_transcript_variant" and ch_type != "intron_variant" and ch_type != "Substitution - coding silent"\
			and nReads_alt<>2 order by ch_type desc'
            % (geneN)
        )
    else:
        cursor.execute(
            'create temporary table t_mut as \
			select concat(substring(chrom,4,4),":",cast(chrSta as char),ref,">",alt) as ch_pos, ch_dna,ch_aa,ch_type,cosmic from mutation_rxsq \
			where find_in_set("%s",gene_symL) > 0 and ch_type != "synonymous_variant" and ch_type != "nc_transcript_variant,synonymous_variant" and ch_type != "intron_variant,synonymous_variant" \
			and ch_type != "nc_transcript_variant" and ch_type != "intron_variant,nc_transcript_variant" and ch_type != "intron_variant" and ch_type != "Substitution - coding silent"\
			and nReads_alt<>2 order by ch_type desc'
            % (geneN)
        )

    cursor.execute("select *,count(*) cnt from t_mut group by ch_pos order by count(*) desc, cosmic desc")
    results = cursor.fetchall()

    conditionL_mutation = []

    for (ch_pos, ch_dna, ch_aa, ch_type, cosmic, cnt) in results:

        ch_aa = ch_aa.replace(",", "<br>")

        if (dbN == "CancerSCAN" and "cosmic" in cosmic) or (dbN != "CancerSCAN" and cosmic):
            cosmic_fmt = '<font color="red">%s</font><br><sup>(n=%d, %s)</sup>'

        else:
            cosmic_fmt = "%s<br><sup>(n=%d, %s)</sup>"

        if ch_aa:
            cnd = ch_aa
        elif ch_dna:
            cnd = ch_dna
        else:
            cnd = ch_pos

        if dbN == "CancerSCAN":
            conditionL_mutation.append(
                [
                    (
                        "nReads_alt",
                        "mutation_cs",
                        'nReads_alt<>2 and concat(substring(chrom,4,4),":",cast(chrSta as char),ref,">",alt)="%s"'
                        % ch_pos,
                        "%d",
                        cosmic_fmt % (cnd, cnt, mutation_map(ch_type, dbN)),
                    ),
                    (
                        "nReads_ref",
                        "mutation_cs",
                        'nReads_alt<>2 and concat(substring(chrom,4,4),":",cast(chrSta as char),ref,">",alt)="%s"'
                        % ch_pos,
                        "%d",
                    ),
                ]
            )
        else:
            conditionL_mutation.append(
                [
                    (
                        "nReads_alt,r_nReads_alt",
                        "mutation_rxsq",
                        'nReads_alt<>2 and concat(substring(chrom,4,4),":",cast(chrSta as char),ref,">",alt)="%s"'
                        % ch_pos,
                        "%d",
                        cosmic_fmt % (cnd, cnt, mutation_map(ch_type, dbN)),
                    ),
                    (
                        "nReads_ref,r_nReads_ref",
                        "mutation_rxsq",
                        'nReads_alt<>2 and concat(substring(chrom,4,4),":",cast(chrSta as char),ref,">",alt)="%s"'
                        % ch_pos,
                        "%d",
                    ),
                ]
            )

            # prep fusion table
    cursor.execute(
        'create temporary table t_fusion as \
		select samp_id,locate(":Y",frame)>1 frame,count(nPos) nEvents \
		from splice_fusion where nPos>=2 and (find_in_set("%s",gene_sym1) or find_in_set("%s",gene_sym2)) group by samp_id, locate(":Y",frame)>1'
        % (geneN, geneN)
    )

    # prep eiJunc info
    cursor.execute(
        'select loc,juncAlias, count(*) cnt from splice_eiJunc where gene_sym="%s" and nReads>=10 group by loc' % geneN
    )
    results = cursor.fetchall()

    conditionL_eiJunc = []

    for (loc, juncAlias, cnt) in results:
        conditionL_eiJunc.append(
            [
                (
                    "nReads",
                    "splice_eiJunc",
                    'loc="%s" and nReads>=10' % loc,
                    "%3d",
                    "%s<br><sup>(n=%s)</sup>" % (juncAlias, cnt),
                ),
                ("sum(nReads)", "splice_normal", 'loc1="%s"' % (loc,), "%d"),
            ]
        )

        # outlier
    outlier_sId = []

    if dbN in ["ircr1", "tcga1", "ccle1"]:
        cursor.execute(
            'select samp_id, expr_MAD from array_gene_expr_MAD mad, gene_expr_stat stat where mad.gene_sym = stat.gene_sym and (mad.expr_MAD >= stat.q75 + 3*(stat.q75 - stat.q25) or mad.expr_MAD <= stat.q25 - 3*(stat.q75 - stat.q25)) and mad.gene_sym = "%s"'
            % geneN
        )
        results3 = cursor.fetchall()

        for i in range(len(results3)):
            outlier_sId.append(results3[i][0])

            ## corrected CN value available?
    cncorr_sId = []
    if dbN in ["ircr1"]:
        cursor.execute('select distinct samp_id from xsq_purity where tumor_frac != "ND"')
        resultsCorr = cursor.fetchall()
        for i in range(len(resultsCorr)):
            cncorr_sId.append(resultsCorr[i][0])
    normal_sId = []
    if dbN in ["ircr1"]:
        cursor.execute("select distinct samp_id from xsq_purity")
        tttt = cursor.fetchall()
        for i in range(len(tttt)):
            normal_sId.append(tttt[i][0])

    conditionL = (
        conditionL_preH[dbN] + conditionL_mutation + conditionL_fusion + conditionL_exonSkip + conditionL_eiJunc
    )

    print '<p><h4>%s status of %s panel <small><a href="http://www.genecards.org/cgi-bin/carddisp.pl?gene=%s">[GeneCard]</a> <a href="http://www.ncbi.nlm.nih.gov/pubmed/?term=%s">[PubMed]</a></small></h4></p>' % (
        geneN,
        mycgi.db2dsetN(dbN),
        geneN,
        geneN,
    )

    # census
    cursor.execute('select tumor_soma, tumor_germ, syndrome, mut_type from common.census where gene_sym="%s"' % geneN)
    census = cursor.fetchall()

    print ('\n<font size=2> <table border="1" cellpadding="0" cellspacing="0">')
    print (
        "<tr>\n<td rowspan=2>Census</td>\n<td>tumor_soma</td>\n<td>tumor_germ</td>\n<td>syndrome</td>\n<td>mut_type</td>\n</tr>\n"
    )

    if len(census) != 0:
        print (
            "<tr>\n<td>%s</td>\n<td>%s</td>\n<td>%s</td>\n<td>%s</td>\n</tr>\n"
            % (census[0][0], census[0][1], census[0][2], census[0][3])
        )
    else:
        print ("<tr>\n<td></td>\n<td></td>\n<td></td>\n<td></td>\n</tr>\n")

        # drugbank
    cursor.execute('select drug from common.drugbank where gene_sym="%s"' % geneN)
    drug = [x for (x,) in cursor.fetchall()]

    print ('\n<font size=2> <table border="1" cellpadding="0" cellspacing="0">')
    print ("<br><tr>\n<td>Drug</td>\n")
    print ("<td>%s</td>\n</tr>\n" % ("</br>\n".join(drug)))

    # pathway
    cursor.execute('select biocarta_id, biocarta_desc from common.biocarta where gene_sym="%s"' % geneN)
    biocarta = cursor.fetchall()

    cursor.execute('select kegg_id, kegg_desc from common.kegg where gene_sym="%s"' % geneN)
    kegg = cursor.fetchall()

    cursor.execute('select go_id, go_desc from common.go where gene_sym="%s"' % geneN)
    go = cursor.fetchall()

    print ('\n<font size=2> <table border="1" cellpadding="0" cellspacing="0">')
    print ("<br><tr>\n<td>Biocarta</td>\n<td>KEGG</td>\n<td>GO</td>\n</tr>\n")
    print ('<tr><td><div style="width:100%; height:50px; overflow:auto">')
    for (id, desc) in biocarta:
        print ('<a href="http://cgap.nci.nih.gov/Pathways/BioCarta/%s">%s</br>' % (id, desc))
    print ('</td>\n<td><div style="width:100%; height:50px; overflow:auto">')
    for (id, desc) in kegg:
        print ('<a href="http://www.genome.jp/dbget-bin/www_bget?pathway+%s">%s</br>' % (id, desc))
    print ('</td>\n<td><div style="width:100%; height:50px; overflow:auto">')
    for (id, desc) in go:
        print ('<a href="http://amigo.geneontology.org/cgi-bin/amigo/term_details?term=GO:%s">%s</br>' % (id, desc))
    print ("</div></td>\n</tr>\n")

    if dbN == "CancerSCAN":
        cursor.execute(
            """CREATE TEMPORARY TABLE t_id AS SELECT DISTINCT samp_id FROM mutation_cs UNION SELECT DISTINCT samp_id FROM cs_cn"""
        )
    elif dbN == "tcga1":
        cursor.execute(
            "create temporary table t_id as \
		select distinct samp_id from array_gene_expr union select distinct samp_id from array_cn union select distinct samp_id from splice_normal union select distinct samp_id from mutation_rxsq union select distinct samp_id from rpkm_gene_expr"
        )
    else:
        cursor.execute(
            "create temporary table t_id as \
		select distinct samp_id from array_gene_expr union select distinct samp_id from array_cn union select distinct samp_id from splice_normal union select distinct samp_id from mutation_rxsq union select distinct samp_id from rpkm_gene_expr union select distinct samp_id from xsq_cn"
        )

    cursor.execute("alter table t_id add index (samp_id)")

    cursor.execute('create temporary table t_expr as select * from array_gene_expr where gene_sym="%s"' % geneN)
    # cursor.execute('create temporary table t_cn as select * from xsq_cn where gene_sym="%s"' % geneN)

    cursor.execute("alter table t_expr add index (samp_id,gene_sym)")
    # cursor.execute('alter table t_cn add index (samp_id,gene_sym)')

    cursor.execute("select samp_id from t_id left join t_expr using (samp_id) order by z_score desc")
    # cursor.execute('select samp_id from t_id left join t_cn using (samp_id) order by value_log2 desc')

    results = cursor.fetchall()

    numTotSamp = len(results)

    print ('\n<font size=3> <table border="1" cellpadding="0" cellspacing="0">')

    # header: row1
    print '<br><tr>\n<td rowspan=2><div class="verticaltext" align="middle">samples<br><sup>n=%s</sup></div></td>' % numTotSamp,

    for i in range(len(conditionL)):

        row = conditionL[i]

        if type(row) == list:
            row = row[0]

        if i < len(conditionL_preH[dbN]):
            if ("tag" in row[1]) or ("t_avail" in row[1]) or ("subtype" in row[1]) or ("purity" in row[1]):
                cursor.execute("select count(*) from %s where %s" % (row[1], row[2]))
            elif "loh" in row[1]:
                cursor.execute(
                    "select count(distinct samp_id) from xsq_purity"
                )  # of samples for which purity pipeline was applied = # of samples with matched blood exome
            else:
                cursor.execute('select count(*) from %s where %s and gene_sym ="%s"' % (row[1], row[2], geneN))

            count = cursor.fetchone()
            if "MAD" in row[4]:
                print (
                    '<td rowspan=2 align="middle"><div class="verticaltext">%s, n=%s)</sup></div></td>'
                    % (row[-1], count[0])
                )
            else:
                print (
                    '<td rowspan=2 align="middle"><div class="verticaltext">%s<br><sup>(n=%s)</sup></div></td>'
                    % (row[-1], count[0])
                )
        else:
            if i == len(conditionL_preH[dbN]) and len(conditionL_mutation) > 0:
                print ('<td align="middle" colspan=%s>mutation (mt/wt)</td>' % len(conditionL_mutation))
            elif i == len(conditionL_preH[dbN]) + len(conditionL_mutation):
                print (
                    '<td align="middle" colspan=%s><a href="ircr_samp.py?dbN=%s&dType=Fusion">fusion</a></td>'
                    % (len(conditionL_fusion), dbN)
                )
            elif (
                i == len(conditionL_preH[dbN]) + len(conditionL_mutation) + len(conditionL_fusion)
                and len(conditionL_exonSkip) > 0
            ):
                print (
                    '<td align="middle" colspan=%s><a href="ircr_samp.py?dbN=%s&dType=ExonSkipping">exonSkip</a> (mt/wt)</td>'
                    % (len(conditionL_exonSkip), dbN)
                )
            elif (
                i
                == len(conditionL_preH[dbN])
                + len(conditionL_mutation)
                + len(conditionL_fusion)
                + len(conditionL_exonSkip)
                and len(conditionL_eiJunc) > 0
            ):
                print (
                    '<td align="middle" colspan=%s><a href="ircr_samp.py?dbN=%s&dType=3pDeletion">3p deletion</a> (mt/wt)</td>'
                    % (len(conditionL_eiJunc), dbN)
                )

    print ("\n</tr>\n")

    # header: row2
    print "<tr>\n",

    for i in range(len(conditionL)):

        row = conditionL[i]

        if type(row) == list:
            row = row[0]

        if i < len(conditionL_preH[dbN]):
            pass
        else:
            print ('<td height="100"><div class="verticaltext" align="middle">%s</div></td>' % row[-1])

    print ("\n</tr>\n")

    for (sId,) in results:

        print "<tr>",
        # 		print '<td nowrap><a href="ircr_samp.py?dbN=%s&sId=%s">%s</td>' % (dbN,sId,sId),
        new_id = mycgi.get_new_id(sId)
        print '<td nowrap><a href="ircr_samp.py?dbN=%s&sId=%s">%s</td>' % (dbN, new_id, new_id),

        d_flag = None
        r_flag = None

        for row in conditionL:

            outlier = None

            if type(row) == list:
                row_wt = row[1]
                row = row[0]
            else:
                row_wt = None

            (col, tbl, cnd, fmt) = row[:4]

            cursor.execute('show columns from %s like "gene_sym"' % tbl)

            if cursor.fetchone():
                cnd += ' and gene_sym="%s"' % geneN

            if tbl == "mutation_rxsq":
                cursor.execute('select distinct %s from %s where samp_id="%s" and (%s)' % (col, tbl, sId, cnd))
            else:
                cursor.execute('select %s from %s where samp_id="%s" and (%s)' % (col, tbl, sId, cnd))

            results2 = cursor.fetchall()

            if len(results2) > 1:
                print sId
                raise Exception

            if type(row) == tuple and row[-1] == "RSq":

                if len(results2) > 0:
                    r_flag = True
                else:
                    r_flag = False

            if type(row) == tuple and row[-1] == "XSq":

                if len(results2) > 0:
                    d_flag = True
                else:
                    d_flag = False

            if sId not in normal_sId:  # flag for matched normal
                p_flag = False
            else:
                p_flag = True

            if type(row) == tuple and row[-1] == "expr<br><sup>(MAD":

                if sId in outlier_sId:
                    outlier = True
                else:
                    outlier = False

            if len(results2) == 1:  # and results2[0][0] not in (0,'0'):

                value = ("%s" % fmt) % results2[0][0]

                if len(results2[0]) >= 2:
                    n_value = ("%s" % fmt) % results2[0][1]
                else:
                    n_value = 0

                if row_wt:

                    (col, tbl, cnd, fmt) = row_wt[:4]

                    if tbl == "mutation_rxsq":
                        cursor.execute('select distinct %s from %s where samp_id="%s" and %s' % (col, tbl, sId, cnd))
                    else:
                        cursor.execute('select %s from %s where samp_id="%s" and %s' % (col, tbl, sId, cnd))

                    results_wt = cursor.fetchone()

                    n_count_wt = 0

                    if results_wt[0]:
                        count_wt = ("%s" % fmt) % results_wt[0]
                    else:
                        count_wt = 0

                    if len(results_wt) >= 2:
                        count_wt = ("%s" % fmt) % results_wt[0]
                        n_count_wt = ("%s" % fmt) % results_wt[1]

                    if row[1] == "mutation_rxsq":

                        # 						if int(n_value)!=0 and int(n_count_wt)!=0 and int(value)!=0 and int(count_wt)!=0:
                        if int(n_value) != 0 and int(value) != 0:
                            if (
                                int(value) > (int(value) + int(count_wt)) * cutoff
                                and int(n_value) > (int(n_value) + int(n_count_wt)) * cutoff
                            ):
                                print "<td><font color=red><b>%s</b></font><sub>/%s</sub>,<font color=468847><b>%s</b><sub>/%s</sub></font></td>" % (
                                    value,
                                    count_wt,
                                    n_value,
                                    n_count_wt,
                                ),
                            elif int(n_value) > (int(n_value) + int(n_count_wt)) * cutoff:
                                print "<td>%s<sub>/%s</sub>,<font color=468847><b>%s</b><sub>/%s</sub></font></td>" % (
                                    value,
                                    count_wt,
                                    n_value,
                                    n_count_wt,
                                ),
                            elif int(value) > (int(value) + int(count_wt)) * cutoff:
                                print "<td><font color=red><b>%s</b></font><sub>/%s</sub>,<font color=468847>%s<sub>/%s</sub></font></td>" % (
                                    value,
                                    count_wt,
                                    n_value,
                                    n_count_wt,
                                ),
                            else:
                                print "<td>%s<sub>/%s</sub>,<font color=468847>%s<sub>/%s</sub></font></td>" % (
                                    value,
                                    count_wt,
                                    n_value,
                                    n_count_wt,
                                ),

                        elif int(value) == 0 and int(count_wt) == 0:  ## no exome
                            if int(n_value) > (int(n_value) + int(n_count_wt)) * cutoff:
                                print "<td><font color=468847><b>%s</b><sub>/%s</sub></font></td>" % (
                                    n_value,
                                    n_count_wt,
                                ),
                            else:
                                print "<td><font color=468847>%s<sub>/%s</sub></font></td>" % (n_value, n_count_wt),
                        elif int(n_value) == 0 and int(n_count_wt) == 0:  ## no normal
                            if int(value) > (int(value) + int(count_wt)) * cutoff:
                                print "<td><font color=red><b>%s</b></font><sub>/%s</sub></td>" % (value, count_wt),
                            else:
                                print "<td>%s<sub>/%s</sub></td>" % (value, count_wt),

                    else:
                        if int(value) > (int(value) + int(count_wt)) * cutoff:
                            tmp = row[4].split("<br>")[0].split("/")
                            if len(tmp) > 1 and tmp[0] == tmp[1]:
                                print "<td>%s<sub>/%s</sub></td>" % (value, count_wt),
                            else:
                                print "<td><font color=red><b>%s</b></font><sub>/%s</sub></td>" % (value, count_wt),
                        else:
                            print "<td>%s<sub>/%s</sub></td>" % (value, count_wt),

                else:
                    if row[1] == "t_fusion":
                        html_content = ""
                        if row[4] == "in":
                            print '<a name="%s"></a>' % sId
                            html_content = mycgi.compose_fusion_table(cursor, dbN, geneN, sId, "in")
                            print """
									<td><div class="tooltip_content">%s</div><div class="tooltip_link"><a href="#current">%s</a></div></td>
									""" % (
                                html_content,
                                value,
                            )
                        else:
                            html_content = mycgi.compose_fusion_table(cursor, dbN, geneN, sId, "off")
                            print """
									<td><div class="tooltip_content">%s</div><div class="tooltip_link"><a href="#current">%s</a></div></td>
									""" % (
                                html_content,
                                value,
                            )
                    elif outlier:
                        print "<td><font color=red><b>%s</b></font></td>" % value

                    elif tbl == "array_cn" or tbl == "cs_cn":  ##aCGH
                        (cl, bL) = cn_format(value)
                        print "<td><font color=%s>%s%s%s</font></td>" % (cl, bL[0], value, bL[1])
                    elif tbl == "xsq_cn":
                        (cl1, bL1) = cn_format(value)

                        if sId in cncorr_sId:
                            cursor.execute('select %s from xsq_cn_corr where samp_id="%s" and (%s)' % (col, sId, cnd))
                            val_corr = ("%s" % fmt) % cursor.fetchone()[0]
                            (cl2, bL2) = cn_format(val_corr)
                            print "<td><font color=%s>%s%s%s</font>,<font color=%s>%s%s%s</font></td>" % (
                                cl1,
                                bL1[0],
                                value,
                                bL1[1],
                                cl2,
                                bL2[0],
                                val_corr,
                                bL2[1],
                            )
                        else:
                            print "<td><font color=%s>%s%s%s</font></td>" % (cl1, bL1[0], value, bL1[1])
                    else:
                        print "<td>%s</td>" % value

            else:
                # grey out
                if (
                    (r_flag == False and row[1] in ("splice_skip", "t_fusion", "splice_eiJunc", "rpkm_gene_expr"))
                    or (r_flag == False and d_flag == False and row[1] in ("mutation_rxsq"))
                    or ((d_flag == False or p_flag == False) and row[1] in ("xsq_loh"))
                ):
                    print "<td bgcolor=silver></td>"
                else:
                    print "<td></td>"

        print "</tr>"

    print ("\n</table> </font>\n")
    return