def main(outFileName,dbNL,dTypeL,outDirName,outFileN): for dbN in dbNL: (con,cursor) = mymysql.connectDB(db=dbN) if dbN == 'ircr1': cursor.execute('create temporary table t_recur select distinct samp_id from sample_tag where substring(tag,1,6)="pair_P"') elif dbN == 'tcga1': cursor.execute('create temporary table t_recur select distinct samp_id from sample_tag where tag="Recur"') cursor.execute('SELECT distinct loc,geneName FROM tcga1.methyl_pId where platform="Infinium27k"') results1 = cursor.fetchall() for dType in dTypeL: for (loc,geneN) in results1: outFile = open(outFileName,'w') outFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % ('dbT','dType','geneN','PR','sId','val','loc')) for PR in ('P','R'): cursor.execute('select pId,%s from %s where platform="Infinium27k" and loc="%s" and geneName="%s" and pId %s in (select samp_id from t_recur)' % (dTypeH[dType][1],dTypeH[dType][0],loc,geneN,'not' if PR=='P' else '')) results = cursor.fetchall() for (sId,val) in results: outFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (dbH[dbN],dType,geneN,PR,sId,val,loc)) outFile.close() os.system('Rscript %s/PrimRecur/unpaired_gene_methyl_ks.r %s %s &>> %s/error_kstest.txt' % (mysetting.SRC_HOME,outDirName,outFileN,outDirName)) con.close()
def main(geneN1, geneN2, altType1='rpkm', altType2='rpkm', dset='tcga1'): tblN1,valN1,featN1,lab1 = altTypeH[altType1] tblN2,valN2,featN2,lab2 = altTypeH[altType2] con,cursor = mymysql.connectDB(db=dset) cursor.execute('select samp_id from mutation_normal where gene_symL="IDH1" and ch_aa like "%sR132%s"' % ('%','%')) idh1 = [x[0] for x in cursor.fetchall()] ret1 = os.system('''(echo "SELECT t1.samp_id,t1.%s %s,t2.%s %s FROM %s t1, %s t2 where t1.%s='%s' and t2.%s='%s' and t1.samp_id=t2.samp_id" | mysql %s -u cancer --password=cancer > /var/www/html/tmp/correlation.txt) &> /var/www/html/tmp/correaltion.err''' % \ (valN1,geneN1, valN2,geneN2, tblN1,tblN2, featN1,geneN1, featN2,geneN2, dset)) f = open('/var/www/html/tmp/correlation.txt') fo = open('/var/www/html/tmp/correlation_idh1.txt','w') for line in f: (sId, t1, t2) = line[:-1].split('\t') if sId in idh1: fo.write('%s\t%s\t%s\t%s\n' % (sId,t1,t2,'mut')) else: fo.write('%s\t%s\t%s\t%s\n' % (sId,t1,t2,'na')) f.close() fo.close() ret2 = os.system('Rscript correlation_idh1.r "%s" "%s" "%s" png &>> /var/www/html/tmp/correaltion.err' % (dsetH[dset],lab1,lab2)) return ret1!=0 or ret2!=0
def post_s_rsq2expr(baseDir, server='smc1', dbN='ihlee_test'): sampN = baseDir.split('/')[-1] sid = sampN[:-4].replace('-','_').replace('.','_') ##drop '_RSq' if dbN in ['ihlee_test','ircr1']: gctFileN = '/EQL1/NSL/RNASeq/results/expression/%s.gct' % sampN datFileN = '/EQL1/NSL/RNASeq/results/expression/%s.dat' % sampN else: gctFileN = '%s/%s.gct' % (baseDir, sampN) datFileN = '%s/%s.dat' % (baseDir, sampN) print sampN, gctFileN rpkm_process.rpkm_process(inputDirN=baseDir, filePattern='*.rpkm', sampRegex='(.*)_RSq\.rpkm', outputFileN=gctFileN) ## prep prepDB_rpkm_gene_expr.main(inGctFileName=gctFileN, geneList=[], samplePrefix='', outDatFileName=datFileN) ## import (con, cursor) = mymysql.connectDB(user=mysqlH[server]['user'],passwd=mysqlH[server]['passwd'],db=dbN,host=mysqlH[server]['host']) cursor.execute('DELETE FROM rpkm_gene_expr WHERE samp_id="%s"' % sid) cursor.execute('LOAD DATA LOCAL INFILE "%s" INTO TABLE rpkm_gene_expr' % datFileN) cursor.execute('DROP VIEW IF EXISTS rpkm_gene_expr_lg2') cursor.execute('CREATE VIEW rpkm_gene_expr_lg2 AS SELECT samp_id,gene_sym,log2(rpkm+1) AS lg2_rpkm FROM rpkm_gene_expr') ## make sure to update sample_tag that this sample has RNA-Seq cursor.execute('SELECT * FROM sample_tag WHERE samp_id="%s" AND tag="RNA-Seq"' % sid) results = cursor.fetchall() if len(results) < 1: cursor.execute('INSERT INTO sample_tag SET samp_id="%s", tag="RNA-Seq"' % sid) ##draw boxplot boxplot_expr_cs_gene.main(sid, '/EQL1/NSL/RNASeq/results/expression')
def main(inDir, outDir, pbs=False, server='smc1'): inFileNL = os.listdir(inDir) inFileNL = filter(lambda x: re.match('.*\.ngCGH', x), inFileNL) print 'Files: %s' % inFileNL (con, cursor) = mymysql.connectDB(user=mysetting.mysqlH[server]['user'], passwd=mysetting.mysqlH[server]['passwd'], db='ircr1', host=mysetting.mysqlH[server]['host']) for inFileN in inFileNL: sampN = re.match('(.*)\.ngCGH', inFileN).group(1) (sid, tag) = re.match('(.*)_(T.{,2}.*)_[STKN]{2}\.ngCGH', inFileN).groups() if tag != 'T': sid = '%s_%s' % (sid, tag) cursor.execute('SELECT tumor_frac FROM xsq_purity WHERE samp_id="%s"' % sid) purity = int(cursor.fetchall()[0][0]) iprefix = '%s/%s' % (inDir,sampN) oprefix = '%s/%s' % (outDir,sampN) cmd = '/usr/bin/python %s/NGS/copynumber/cn_corr.py -i %s.ngCGH -o %s.corr.ngCGH -p %s' % (mysetting.SRC_HOME, iprefix, oprefix, purity) log = '%s.cn_corr.qlog' % (oprefix) print cmd if pbs: os.system('echo "%s" | qsub -N %s -o %s -j oe' % (cmd, sampN, log)) else: os.system('(%s) &> %s' % (cmd, log))
def post_xsq2purity(outFileN, server='smc1', dbN='ircr1'): cmd = 'cat %s/*/*tumor_frac.txt | /usr/bin/python %s/Integration/prepDB_xsq_purity.py > %s' % ( mysetting.wxsPurityDir, mysetting.SRC_HOME, outFileN) os.system(cmd) mymysql.reset_table(tableN='xsq_purity', dataFileN=outFileN, user=mysetting.mysqlH[server]['user'], passwd=mysetting.mysqlH[server]['passwd'], db=dbN, host=mysetting.mysqlH[server]['host']) # add normal if missed (con, cursor) = mymysql.connectDB(user=mysetting.mysqlH[server]['user'], passwd=mysetting.mysqlH[server]['passwd'], db=dbN, host=mysetting.mysqlH[server]['host']) cursor.execute('''SELECT DISTINCT samp_id FROM %s.xsq_purity''' % dbN) sIdL = [x for (x, ) in cursor.fetchall()] cursor.execute( '''SELECT DISTINCT samp_id FROM %s.sample_tag WHERE tag = "XSeq_SS"''' % dbN) refL = [x for (x, ) in cursor.fetchall()] for sid in sIdL: if sid in refL: print sid cursor.execute( '''UPDATE %s.sample_tag SET samp_id="%s", tag="XSeq_SS,N" WHERE samp_id="%s" and tag="XSeq_SS"''' % (dbN, sid, sid))
def main(geneN,dType,dbN='ircr1',outFileDir=None): if outFileDir: outFile = open('%s/%s_%s_%s_paired.dst2' % (outFileDir,geneN,dType,dbH[dbN]),'w') else: outFile = sys.stdout (con,cursor) = mymysql.connectDB(db=dbN) cursor.execute('select distinct samp_id from sample_tag where substring(tag,1,6)="pair_R"') sIdL_prim = [x for (x,) in cursor.fetchall()] vL = []; sIdL_pair = [] for sId_p in sIdL_prim: cursor.execute('select samp_id from sample_tag where tag="pair_P:%s"' % sId_p) (sId_r,) = cursor.fetchone() cursor.execute('select %s from %s where gene_sym="%s" and samp_id="%s"' % (dTypeH[dType][1],dTypeH[dType][0],geneN,sId_p)) r_p = cursor.fetchone() cursor.execute('select %s from %s where gene_sym="%s" and samp_id="%s"' % (dTypeH[dType][1],dTypeH[dType][0],geneN,sId_r)) r_r = cursor.fetchone() if r_p and r_r: vL.append("%.2f" % (r_r[0]-r_p[0],)) sIdL_pair.append((sId_p,sId_r)) outFile.write('%s-%s-%s\t%s\n' % (geneN,dType,dbN,len(sIdL_pair))) outFile.write(','.join(vL)+'\n') outFile.write(','.join(['%s_%s' % (x,y) for (x,y) in sIdL_pair])+'\n') con.close()
def main(): con,cursor = mymysql.connectDB(db='tcga1') cursor.execute('SELECT distinct platform,loc FROM methyl_pId') results1 = cursor.fetchall() output = [] for (plat,loc) in results1: #cursor.execute('select fraction,z_score from methyl_pId, array_gene_expr where platform="%s" and loc="%s" and gene_sym="MGMT" and pId=samp_id' % (plat,loc)) cursor.execute('select fraction,log2(rpkm+1) from methyl_pId, rpkm_gene_expr where platform="%s" and loc="%s" and gene_sym="MGMT" and pId=samp_id' % (plat,loc)) results2 = cursor.fetchall() methyl,expr = zip(*results2) r = numpy.corrcoef(methyl,expr)[0,1] output.append((plat,loc,len(methyl),r)) output.sort(lambda x,y: cmp(y[-1],x[-1])) for (plat,loc,n,r) in output: print '%s\t%s\t%s\t%.2f' % (plat,loc,n,r)
def prep_single(outFileN, server='smc1', dbN='ircr1'): (con, cursor) = mymysql.connectDB(user=mysetting.mysqlH[server]['user'], passwd=mysetting.mysqlH[server]['passwd'], db=dbN, host=mysetting.mysqlH[server]['host']) cosmicL = [] for dir in mysetting.wxsMutscanDirL: cosmicL += filter( lambda x: '_B_' not in x, glob('%s/*/*cosmic.dat' % dir) + glob('%s/*cosmic.dat' % dir)) cursor.execute( 'SELECT DISTINCT samp_id FROM sample_tag WHERE tag LIKE "XSeq_%%"') results = cursor.fetchall() sidL = [] for res in results: sidL.append(res[0]) for cosmic in cosmicL: (sid, postfix, platform) = re.match('(.*)_([XT].{,2})_([STKN]{2})_cosmic.dat', os.path.basename(cosmic)).groups() if postfix not in ['T', 'RSq']: sid = '%s_%s' % (sid, postfix) if sid not in sidL: print sid, cosmic tag = 'XSeq_%s' % platform cursor.execute( 'INSERT INTO sample_tag SET samp_id="%s", tag="%s"' % (sid, tag)) cmd = 'cat %s | /usr/bin/python %s/Integration/prepDB_mutscan.py > %s' % ( ' '.join(cosmicL), mysetting.SRC_HOME, outFileN) os.system(cmd)
def post_rsq2eiJunc(dirN, server='smc1', dbN='ihlee_test', sampL=[]): (con, cursor) = mymysql.connectDB(user=mysqlH[server]['user'],passwd=mysqlH[server]['passwd'],db=dbN,host=mysqlH[server]['host']) sampNL = filter(lambda x: os.path.isdir(dirN + '/' + x), os.listdir(dirN)) for sampN in sampNL: baseDir = dirN + '/' + sampN sid = sampN[:-4].replace('.','_').replace('-','_') ## RNASeq sample has '***_RSq' if sampL != [] and sid not in sampL: continue print sampN, sid ## make sure to update sample_tag that this sample has RNA-Seq cursor.execute('SELECT * FROM sample_tag WHERE samp_id="%s" AND tag="RNA-Seq"' % sid) results = cursor.fetchall() if len(results) < 1: cursor.execute('INSERT INTO sample_tag SET samp_id="%s", tag="RNA-Seq"' % sid) ei_dat = glob('%s/%s*ei.dat' % (baseDir, sampN))[0] if dbN in ['ihlee_test','ircr1']: splice_eiJunc_txt = '%s/eiJunc/splice_eiJunc_%s_ft.txt' % (BASE, sampN) else: splice_eiJunc_txt = '%s/splice_eiJunc_%s_ft.txt' % (baseDir, sampN) ei_junc_filter.main(overlap=10, minNReads=1, inFileN=ei_dat, outFileN=splice_eiJunc_txt) if dbN in ['ihlee_test','ircr1']: splice_eiJunc_dat = '%s/eiJunc/splice_eiJunc_%s.dat' % (BASE, sampN) else: splice_eiJunc_dat = '%s/splice_eiJunc_%s.dat' % (baseDir, sampN) prepDB_splice_eiJunc.main(minNReads=1, sampNamePat=RSQPattern, geneList=[], inFileN=splice_eiJunc_txt, outFileN=splice_eiJunc_dat) cursor.execute('DELETE FROM splice_eiJunc WHERE samp_id="%s"' % sid) cursor.execute('LOAD DATA LOCAL INFILE "%s" IGNORE INTO TABLE splice_eiJunc' % splice_eiJunc_dat) makeDB_splice_AF.eiJunc(dbN=dbN, cursor=cursor)
def load_mutation_all(inFileN, server='smc1', dbN='ircr1'): (con, cursor) = mymysql.connectDB(user=mysetting.mysqlH[server]['user'],passwd=mysetting.mysqlH[server]['passwd'],db=dbN,host=mysetting.mysqlH[server]['host']) cursor.execute('DROP TABLE IF EXISTS mutation_normal') stmt = ''' CREATE TABLE mutation_normal ( samp_id varchar(63) NOT NULL, chrom varchar(10) NOT NULL, chrSta int unsigned NOT NULL, chrEnd int unsigned NOT NULL, ref varchar(63) NOT NULL, alt varchar(63) NOT NULL, n_nReads_ref mediumint unsigned NOT NULL, n_nReads_alt mediumint unsigned NOT NULL, nReads_ref mediumint unsigned NOT NULL, nReads_alt mediumint unsigned NOT NULL, strand char(1) NOT NULL, gene_symL varchar(63), ch_dna varchar(127), ch_aa varchar(63), ch_type varchar(127), cosmic text, mutsig text, index (samp_id,gene_symL), index (samp_id,chrom,chrSta,chrEnd), index (samp_id,chrom,chrSta,ref,alt), index (samp_id,chrom,chrSta,chrEnd,ref,alt) ) ''' cursor.execute(stmt) cursor.execute('CREATE TEMPORARY TABLE tmp LIKE mutation_normal') cursor.execute('LOAD DATA LOCAL INFILE "%s" INTO TABLE tmp' % inFileN) cursor.execute('CREATE TEMPORARY TABLE t2 SELECT tmp.samp_id,tmp.chrom,tmp.chrSta,tmp.chrEnd,tmp.ref,tmp.alt,tmp.n_nReads_ref,tmp.n_nReads_alt,tmp.nReads_ref,tmp.nReads_alt,tmp.strand,tmp.gene_symL,tmp.ch_dna,tmp.ch_aa,tmp.ch_type,cosmic.ch_aaL AS cosmic,cosmic.ch_typeL AS cosmic_type,tmp.mutsig FROM tmp LEFT JOIN cosmic ON tmp.chrom=cosmic.chrom AND tmp.chrSta=cosmic.chrSta AND tmp.chrEnd=cosmic.chrEnd AND tmp.ref=cosmic.ref AND tmp.alt=cosmic.alt AND tmp.gene_symL=cosmic.gene_symL') cursor.execute('INSERT INTO mutation_normal SELECT samp_id,chrom,chrSta,chrEnd,ref,alt,n_nReads_ref,n_nReads_alt,nReads_ref,nReads_alt,strand,gene_symL,ch_dna,ch_aa,ch_type,"" AS cosmic,mutsig FROM t2 WHERE cosmic IS NULL') cursor.execute('INSERT INTO mutation_normal SELECT samp_id,chrom,chrSta,chrEnd,ref,alt,n_nReads_ref,n_nReads_alt,nReads_ref,nReads_alt,strand,gene_symL,ch_dna,cosmic AS ch_aa,cosmic_type AS ch_type,cosmic,mutsig FROM t2 WHERE cosmic IS NOT NULL')
def prep_somatic_new(outFileN, server='smc1', dbN='ircr1'): ## run VEP vep_batch.main(glob('/EQL3/pipeline/somatic_mutation/*S'), postfixL=['.mutect_rerun_filter.vcf','.mutect_filter.vcf','.mutect_pair_filter.vcf','.indels_pair_filter.vcf'], fork=True) ## make table DIR='/EQL3/pipeline/somatic_mutation' cmd = 'cat %s/*/*filter_vep.dat | /usr/bin/python %s/Integration/prepDB_mutation_xsq2mut_tmp.py > %s' % (DIR, mysetting.SRC_HOME, outFileN) os.system(cmd) ## update tag sidL = map(lambda x: x.rstrip(), os.popen('cut -f 1 %s | sort | uniq' % outFileN).readlines()) (con, cursor) = mymysql.connectDB(user=mysetting.mysqlH[server]['user'],passwd=mysetting.mysqlH[server]['passwd'],db=dbN,host=mysetting.mysqlH[server]['host']) for sid in sidL: cursor.execute('SELECT samp_id,tag FROM sample_tag WHERE samp_id="%s" AND tag LIKE "XSeq_%%"' % sid) results = cursor.fetchall() if len(results)>0: if len(results)>1: sys.stderr.write('Duplication in sample_tag: %s\n' % sid) sys.exit(1) else: old_tag = results[0][1] new_tag = '%s,N' % old_tag cursor.execute('UPDATE sample_tag SET samp_id="%s", tag="%s" WHERE samp_id="%s" AND tag="%s"' % (sid,new_tag, sid,old_tag)) else: cursor.execute('INSERT INTO sample_tag SET samp_id="%s", tag="XSeq_SS,N"' % sid)
def main(inDir, outDir, cnDir, pbs=False, server='smc1'): inFileNL = os.listdir(inDir) inFileNL = filter(lambda x: not re.match('(.*)\.union_pos\.mutect$', x), filter(lambda x: re.match('(.*)\.mutect$', x), inFileNL)) print 'Files: %s' % inFileNL (con, cursor) = mymysql.connectDB(user=mysetting.mysqlH[server]['user'], passwd=mysetting.mysqlH[server]['passwd'], db='ircr1', host=mysetting.mysqlH[server]['host']) for inFileN in inFileNL: sampN = re.match('(.*)\.mutect', inFileN).group(1) (sid, postfix) = re.match('(.*)_(T.{,2})_[STKN]{2}\.mutect', inFileN).groups() if postfix != 'T': sid = '%s_%s' % (sid, postfix) cursor.execute('SELECT tumor_frac FROM xsq_purity WHERE samp_id="%s"' % sid) result = cursor.fetchall() if len(result) > 0 and result[0][0] != 'ND': purity = int(result[0][0]) iprefix = '%s/%s' % (inDir, sampN) oprefix = '%s/%s' % (outDir, sampN) segFile = '%s/%s/%s.ngCGH.seg' % (cnDir, sampN, sampN) if os.path.isfile(segFile) and not os.path.isfile('%s.mutect_cl.dat' % (oprefix)): cmd = '/usr/bin/python %s/NGS/mutation/mut_clonality.py -s %s -i %s.mutect -o %s.mutect_cl.dat -p %s' % (mysetting.SRC_HOME, segFile, iprefix, oprefix, purity) log = '%s.mutect_cl.log' % (oprefix) if pbs: os.system('echo "%s" | qsub -N %s -o %s -j oe' % (cmd, sampN, log)) else: os.system('(%s) &> %s' % (cmd, log)) else: print "Missing copy number segmentation file!" sys.exit(1)
def main(dbN='ircr1', cursor=None): if cursor == None: (con,cursor) = mymysql.connectDB(db=dbN) cursor.execute('drop table if exists mutation_rxsq') cursor.execute('create temporary table t_m as \ select n.samp_id,n.chrom,n.chrSta,n.chrEnd,n.ref,n.alt,n.n_nReads_ref,n.n_nReads_alt,n.nReads_ref,n.nReads_alt,r.r_nReads_ref,r.r_nReads_alt,n.strand,n.gene_symL,n.ch_dna,n.ch_aa,n.ch_type,n.cosmic,n.mutsig \ from mutation_normal n left join mutation_rsq r \ on n.samp_id = r.samp_id and n.chrom=r.chrom and n.chrSta=r.chrSta and n.ref=r.ref and n.alt=r.alt \ union \ select r.samp_id,r.chrom,r.chrSta,r.chrEnd,r.ref,r.alt,n.n_nReads_ref,n.n_nReads_alt,n.nReads_ref,n.nReads_alt,r.r_nReads_ref,r.r_nReads_alt,r.strand,r.gene_symL,r.ch_dna,r.ch_aa,r.ch_type,r.cosmic,r.mutsig \ from mutation_normal n right join mutation_rsq r \ on n.samp_id = r.samp_id and n.chrom=r.chrom and n.chrSta=r.chrSta and n.ref=r.ref and n.alt=r.alt') cursor.execute('create table mutation_rxsq as \ select * from (select * from t_m order by ch_dna desc) as i group by samp_id,chrom,chrSta,ref,alt,ch_aa') cursor.execute('update mutation_rxsq set r_nReads_ref = 0, r_nReads_alt = 0 where r_nReads_ref is null') cursor.execute('update mutation_rxsq set n_nReads_ref = 0, n_nReads_alt = 0 where n_nReads_ref is null') cursor.execute('update mutation_rxsq set nReads_ref = 0, nReads_alt = 0 where nReads_ref is null') cursor.execute('alter table mutation_rxsq add index (samp_id,gene_symL)') cursor.execute('alter table mutation_rxsq add index (samp_id,chrom,chrSta,chrEnd)') cursor.execute('alter table mutation_rxsq add index (samp_id,chrom,chrSta,ref,alt)') cursor.execute('alter table mutation_rxsq add index (samp_id,chrom,chrSta,chrEnd,ref,alt)') cursor.execute('drop temporary table if exists t_m')
def post_s_rsq2mut(baseDir, server='smc1', dbN='ihlee_test'): sampN = baseDir.split('/')[-1] sid = sampN[:-4].replace('.', '_').replace('-', '_') print sampN, sid cosmicDatFileN = '%s/%s_splice_cosmic.dat' % (baseDir, sampN) if dbN in ['ihlee_test', 'ircr1']: datFileN = '/EQL1/NSL/RNASeq/results/mutation/%s.dat' % sampN else: datFileN = '%s/%s.dat' % (baseDir, sampN) if os.path.isfile(cosmicDatFileN): prepDB_mutscan.main(sampNamePat=('(.*)_(RSq)', ''), geneList=[], inFileN=cosmicDatFileN, outFileN=datFileN) ## import (con, cursor) = mymysql.connectDB(user=mysqlH[server]['user'], passwd=mysqlH[server]['passwd'], db=dbN, host=mysqlH[server]['host']) cursor.execute('DELETE FROM mutation_rsq WHERE samp_id="%s"' % sid) cursor.execute('LOAD DATA LOCAL INFILE "%s" INTO TABLE mutation_rsq' % datFileN) ## make sure to update sample_tag that this sample has RNA-Seq cursor.execute( 'SELECT * FROM sample_tag WHERE samp_id="%s" AND tag="RNA-Seq"' % sid) results = cursor.fetchall() if len(results) < 1: cursor.execute( 'INSERT INTO sample_tag SET samp_id="%s", tag="RNA-Seq"' % sid)
def main(outFileName): (con,cursor) = mymysql.connectDB(db='ircr1') outFile = open(outFileName,'w') cursor.execute('select distinct samp_id from sample_tag where substring(tag,1,6)="pair_R" and samp_id!="S520" and samp_id!="S042"') sIdL_prim = [x for (x,) in cursor.fetchall()] outFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' % ('dType','geneN','sId_p','sId_r','val_p','val_r')) for dType in dTypeL: for geneN in geneL: for sId_p in sIdL_prim: (tbl,col_name,col_val) = dTypeH[dType] cursor.execute('select samp_id from sample_tag where tag="pair_P:%s"' % sId_p) (sId_r,) = cursor.fetchone() cursor.execute('select %s from %s where %s="%s" and samp_id="%s"' % (col_val,tbl,col_name,geneN,sId_p)) r_p = cursor.fetchone() cursor.execute('select %s from %s where %s="%s" and samp_id="%s"' % (col_val,tbl,col_name,geneN,sId_r)) r_r = cursor.fetchone() if r_p and r_r: outFile.write('%s\t%s\t%s\t%s\t%.2f\t%.2f\n' % (dType,geneN,sId_p,sId_r,r_p[0],r_r[0])) outFile.close() con.close()
def main(locFileName): con,cursor = mymysql.connectDB(db='tcga1') locFile = open(locFileName) cursor.execute('drop table if exists methyl_gene') for line in locFile: (plat, gN, loc, n, r) = line[:-1].split('\t') if r != '-nan' and float(r) <= -0.25: cursor.execute('create temporary table t_methyl as \ select * from methyl where geneName="%s" and platform="%s" and loc="%s"' % (gN,plat,loc)) cursor.execute('alter table t_methyl add column r float, add column n smallint unsigned') cursor.execute('update t_methyl set n=%s, r=%s' % (int(n),float(r))) try: cursor.execute('create table methyl_gene as \ select platform,pId,geneName,loc,sum(fraction)/count(fraction) fraction, n, r from t_methyl group by pId') except: cursor.execute('insert into methyl_gene \ select platform,pId,geneName,loc,sum(fraction)/count(fraction) fraction, n, r from t_methyl group by pId') cursor.execute('drop table t_methyl') cursor.execute('alter table methyl_gene add index (geneName)') cursor.execute('alter table methyl_gene add index (pId)')
def has_acgh(samp_id, dbN='ircr1'): (con, cursor) = mymysql.connectDB(db=dbN) cursor.execute('select count(distinct samp_id) from array_cn where samp_id="%s"' % samp_id) if cursor.fetchone()[0] > 0: return(True) else: return(False)
def main(dataN='TCGA_GBM', query): con, cursor = mymysql.connectDB(db='tcga1') geneN, altType, feature, cutoff = query.split(':') cursor.execute('create temporary table t1 select samp_id pId, % value from %s where gene_sym="%s"' % \ (altTypeH[altType][1],altTypeH[altType][0],geneN)) cursor.execute('select value from t1') valueL = [v for (v, ) in cursor.fetchall()] l = len(valueL) recordL = mymysql.dictSelect( "SELECT pId,days_followup time,if(days_death is not null,1,0) event,%s value \ FROM clinical join %s on pId=samp_id and gene_sym='%s'" % (altTypeH[altType][1], altTypeH[altType][0], geneN), cursor) threshold = (mymath.percentile(valueL, cutoff[0]), mymath.percentile(valueL, 100 - cutoff[1])) outFile = open('/var/www/html/survival/survival.mvc', 'w') colN = ['pId', 'time', 'event', 'value', 'label', 'priority'] outFile.write('\t'.join(colN) + '\n') for r in recordL: if r['value'] < threshold[0]: label = '"%s %s < B%s%% (%.2f)"' % (geneN, altType, cutoff[0], threshold[0]) priority = '1' elif r['value'] >= threshold[1]: label = '"%s %s > T%s%% (%.2f)"' % (geneN, altType, cutoff[1], threshold[1]) priority = '2' else: label = '"%s %s Middle"' % (geneN, altType) priority = '9' outFile.write( '%s\t%s\t%s\t%s\t%s\t%s\n' % (r['pId'], r['time'], r['event'], r['value'], label, priority)) outFile.close() ret1 = os.system( 'Rscript distribution.r /var/www/html/survival/survival.mvc &> /var/www/html/survival/error_distr.txt' ) ret2 = os.system( 'Rscript survival.r /var/www/html/survival/survival.mvc &> /var/www/html/survival/error_surv.txt' ) return ret1 != 0 or ret2 != 0
def post_xsq2cn(outFileN, platform='', server='smc1', dbN='ircr1'): if platform == 'CS': cmd = 'cat %s/*/*%s.cn_gene.dat | /usr/bin/python %s/Integration/prepDB_xsq_cn.py > %s' % (mysetting.CScnaDir,platform, mysetting.SRC_HOME, outFileN) else: cmd = 'cat %s/*/*%s.cn_gene.dat | /usr/bin/python %s/Integration/prepDB_xsq_cn.py > %s' % (mysetting.wxsCNADir,platform, mysetting.SRC_HOME, outFileN) os.system(cmd) if platform == 'SS': tableN = 'xsq_cn' elif platform == 'CS': tableN = 'cs_cn' else: sys.stderr.write('illegal platform name: %s' % platform) sys.exit(1) mymysql.reset_table(tableN=tableN, dataFileN=outFileN, user=mysetting.mysqlH[server]['user'],passwd=mysetting.mysqlH[server]['passwd'],db=dbN, host=mysetting.mysqlH[server]['host']) ## add samp_id if missing (con, cursor) = mymysql.connectDB(user=mysetting.mysqlH[server]['user'], passwd=mysetting.mysqlH[server]['passwd'], db=dbN, host=mysetting.mysqlH[server]['host']) cursor.execute('SELECT DISTINCT samp_id FROM %s' % tableN) sIdL = [x for (x,) in cursor.fetchall()] cursor.execute('SELECT DISTINCT samp_id FROM sample_tag WHERE tag like "XSeq_%s%%"' % platform) refL = [x for (x,) in cursor.fetchall()] for sid in sIdL: if sid not in refL: pl = platform cursor.execute('INSERT INTO sample_tag SET samp_id="%s", tag="XSeq_%s"' % (sid, pl))
def fusion_s(sid, dbN='ircr1', cursor=None): if cursor == None: (con, cursor) = mymysql.connectDB(db=dbN) cursor.execute( 'CREATE TEMPORARY TABLE t_m AS SELECT * FROM splice_fusion WHERE samp_id="%s"' % sid) cursor.execute('ALTER TABLE t_m ADD INDEX (samp_id,loc1)') cursor.execute('ALTER TABLE t_m ADD INDEX (samp_id,loc2)') cursor.execute( 'CREATE TEMPORARY TABLE loc1 AS SELECT * FROM splice_normal_loc1 WHERE samp_id="%s"' % sid) cursor.execute('ALTER TABLE loc1 ADD INDEX (samp_id,loc1)') cursor.execute( 'CREATE TEMPORARY TABLE loc2 AS SELECT * FROM splice_normal_loc2 WHERE samp_id="%s"' % sid) cursor.execute('ALTER TABLE loc2 ADD INDEX (samp_id,loc2)') cursor.execute('CREATE TEMPORARY TABLE af_m AS \ SELECT t_m.samp_id,loc1,loc2,gene_sym1,gene_sym2,ftype,exon1,exon2,frame,nReads,nPos,nReads_w1,nReads_w2 FROM t_m \ LEFT JOIN loc1 t_w1 USING (samp_id,loc1) LEFT JOIN loc2 t_w2 USING (samp_id,loc2)' ) cursor.execute('ALTER TABLE splice_fusion_AF DISABLE KEYS') cursor.execute('INSERT INTO splice_fusion_AF SELECT * FROM af_m') cursor.execute('ALTER TABLE splice_fusion_AF ENABLE KEYS') cursor.execute('DROP TEMPORARY TABLE IF EXISTS t_m,af_m,loc1,loc2')
def main(): con,cursor = mymysql.connectDB(db='tcga1') cursor.execute('create temporary table t_EGFR as \ select platform,pId,geneName,loc,sum(fraction)/count(fraction) fraction from tcga1.methyl where TN="T" and geneName = "EGFR" \ group by platform, pId, loc') cursor.execute('alter table t_EGFR add index (geneName)') cursor.execute('alter table t_EGFR add index (pId)') cursor.execute('SELECT distinct platform,loc FROM t_EGFR') results1 = cursor.fetchall() output = [] for (plat,loc) in results1: #cursor.execute('select fraction,z_score from methyl_pId, array_gene_expr where platform="%s" and loc="%s" and gene_sym="MGMT" and pId=samp_id' % (plat,loc)) cursor.execute('select fraction,log2(rpkm+1) from t_EGFR, rpkm_gene_expr where platform="%s" and loc="%s" and gene_sym="EGFR" and pId=samp_id' % (plat,loc)) results2 = cursor.fetchall() methyl,expr = zip(*results2) r = numpy.corrcoef(methyl,expr)[0,1] output.append((plat,loc,len(methyl),r)) output.sort(lambda x,y: cmp(y[-1],x[-1])) for (plat,loc,n,r) in output: print '%s\t%s\t%s\t%.3f' % (plat,loc,n,r)
def main(outFileName,dbNL,dTypeL,geneNL): outFile = open(outFileName,'w') outFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' % ('dbT','dType','geneN','PR','sId','val')) for dbN in dbNL: (con,cursor) = mymysql.connectDB(db=dbN) if dbN == 'ircr1': cursor.execute('create temporary table t_recur select distinct samp_id from sample_tag where substring(tag,1,6)="pair_P"') elif dbN == 'tcga1': cursor.execute('create temporary table t_recur select distinct samp_id from sample_tag where tag="Recur"') for dType in dTypeL: for geneN in geneNL: for PR in ('P','R'): cursor.execute('select samp_id,%s from %s where gene_sym="%s" and samp_id %s in (select samp_id from t_recur)' % (dTypeH[dType][1],dTypeH[dType][0],geneN,'not' if PR=='P' else '')) results = cursor.fetchall() for (sId,val) in results: outFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' % (dbH[dbN],dType,geneN,PR,sId,val)) con.close() outFile.close()
def post_rsq2fusion(dirN, server='smc1', dbN='ihlee_test', sampL=[]): (con, cursor) = mymysql.connectDB(user=mysqlH[server]['user'],passwd=mysqlH[server]['passwd'],db=dbN,host=mysqlH[server]['host']) sampNL = filter(lambda x: os.path.isdir(dirN + '/' + x), os.listdir(dirN)) for sampN in sampNL: baseDir = dirN + '/' + sampN sid = sampN[:-4].replace('.','_').replace('-','_') ## RNASeq sample has '***_RSq' if sampL != [] and sid not in sampL: continue print sampN, sid ## make sure to update sample_tag that this sample has RNA-Seq cursor.execute('SELECT * FROM sample_tag WHERE samp_id="%s" AND tag="RNA-Seq"' % sid) results = cursor.fetchall() if len(results) < 1: cursor.execute('INSERT INTO sample_tag SET samp_id="%s", tag="RNA-Seq"' % sid) fusion_report_annot = glob('%s/%s*_splice_transloc_annot1.report_annot.txt' % (baseDir, sampN))[0] if dbN in ['ihlee_test','ircr1']: splice_fusion_txt = '%s/fusion/splice_fusion_%s.txt' % (BASE, sampN) else: splice_fusion_txt = '%s/splice_fusion_%s.txt' % (baseDir, sampN) fusion_summarize.fusion_summarize_s(inputFileN=fusion_report_annot, minNPos=1, outFileN=splice_fusion_txt) if dbN in ['ihlee_test','ircr1']: splice_fusion_dat = '%s/fusion/splice_fusion_%s.dat' % (BASE, sampN) else: splice_fusion_dat = '%s/splice_fusion_%s.dat' % (baseDir, sampN) prepDB_splice_fusion.main(inGctFileName=splice_fusion_txt, minNPos=1, sampNamePat=RSQPattern, geneList=[], outFileN=splice_fusion_dat) cursor.execute('DELETE FROM splice_fusion WHERE samp_id="%s"' % sid) cursor.execute('LOAD DATA LOCAL INFILE "%s" IGNORE INTO TABLE splice_fusion' % splice_fusion_dat) cursor.execute('DELETE FROM splice_fusion WHERE gene_sym1 LIKE "HLA-%" AND gene_sym2 LIKE "HLA-%"') makeDB_splice_AF.fusion(dbN=dbN, cursor=cursor)
def make_mutation_rxsq_cs(dbN='CancerSCAN', cursor=None): if cursor == None: (con,cursor) = mymysql.connectDB(db=dbN) cursor.execute('''DROP TABLE IF EXISTS mutation_rxsq''') cursor.execute('''CREATE TEMPORARY TABLE t_m AS \ SELECT n.samp_id,n.chrom,n.chrSta,n.chrEnd,n.ref,n.alt,n.n_nReads_ref,n.n_nReads_alt,n.nReads_ref,n.nReads_alt,r.r_nReads_ref,r.r_nReads_alt,ifnull(r.strand,''),n.gene_sym,n.ch_dna,n.ch_aa,n.ch_type,concat(n.cosmic,",",n.tcga) cosmic,'' mutsig \ FROM mutation_cs n LEFT JOIN mutation_rsq r \ ON n.samp_id=r.samp_id AND n.chrom=r.chrom AND n.chrSta=r.chrSta AND n.chrEnd=r.chrEnd AND n.ref=r.ref AND n.alt=r.alt\ UNION \ SELECT r.samp_id,r.chrom,r.chrSta,r.chrEnd,r.ref,r.alt,n.n_nReads_ref,n.n_nReads_alt,n.nReads_ref,n.nReads_alt,r.r_nReads_ref,r.r_nReads_alt,r.strand,r.gene_symL gene_sym,r.ch_dna,r.ch_aa,r.ch_type,r.cosmic,r.mutsig \ FROM mutation_cs n RIGHT JOIN mutation_rsq r \ ON n.samp_id=r.samp_id AND n.chrom=r.chrom AND n.chrSta=r.chrSta AND n.chrEnd=r.chrEnd AND n.ref=r.ref AND n.alt=r.alt ''') cursor.execute('''CREATE TABLE mutation_rxsq AS \ SELECT * FROM (SELECT * FROM t_m ORDER BY ch_dna desc) AS i GROUP BY samp_id,chrom,chrSta,ref,alt,ch_aa''') cursor.execute('''UPDATE mutation_rxsq SET r_nReads_ref=0, r_nReads_alt=0 WHERE r_nReads_ref IS NULL''') cursor.execute('''UPDATE mutation_rxsq SET n_nReads_ref=0, n_nReads_alt=0 WHERE n_nReads_ref IS NULL''') cursor.execute('''UPDATE mutation_rxsq SET nReads_ref=0, nReads_alt=0 WHERE nReads_ref IS NULL''') cursor.execute('''ALTER TABLE mutation_rxsq ADD INDEX (samp_id,gene_sym)''') cursor.execute('''ALTER TABLE mutation_rxsq ADD INDEX (samp_id,chrom,chrSta,chrEnd)''') cursor.execute('''ALTER TABLE mutation_rxsq ADD INDEX (samp_id,chrom,chrSta,ref,alt)''') cursor.execute('''ALTER TABLE mutation_rxsq ADD INDEX (samp_id,chrom,chrSta,chrEnd,ref,alt)''') cursor.execute('''DROP TEMPORARY TABLE IF EXISTS t_m''')
def main(dataN='TCGA_GBM', endPoint='death', geneN='MGMT', altType='methyl', cutoff=(50,50)): colN = ['pId','time','event','value','label','priority'] con,cursor = mymysql.connectDB(db='tcga1') cursor.execute('select samp_id from mutation_normal where gene_symL="IDH1" and ch_aa like "%sR132%s"' % ('%','%')) idh1 = [x[0] for x in cursor.fetchall()] recordL = mymysql.dictSelect("SELECT pId,days_followup time,if(days_death is not null,1,0) event,%s value \ FROM clinical join %s on pId=samp_id and %s='%s'" % (altTypeH[altType][1],altTypeH[altType][0],altTypeH[altType][2],geneN), cursor) recordL2=[] for i in range(len(recordL)): if recordL[i]['pId'] in idh1: continue else: recordL2.append(recordL[i]) recordL = recordL2 # for r in recordL: # if r['pId'] in idh1: # recordL.remove(r) valueL = [r['value'] for r in recordL] l = len(valueL) threshold = (mymath.percentile(valueL,cutoff[0]), mymath.percentile(valueL,100-cutoff[1])) outFile = open('/var/www/html/tmp/survival.mvc','w') outFile.write('\t'.join(colN)+'\n') for r in recordL: if r['value'] < threshold[0]: label = '"%s %s < B%s%% (%.2f)"' % (geneN,altType,cutoff[0],threshold[0]) priority = '1' elif r['value'] >= threshold[1]: label = '"%s %s > T%s%% (%.2f)"' % (geneN,altType,cutoff[1],threshold[1]) priority = '2' else: label = '"%s %s Middle"' % (geneN,altType) priority = '9' outFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' % (r['pId'], r['time'], r['event'], r['value'], label, priority)) outFile.close() ret1 = os.system('Rscript distribution.r /var/www/html/tmp/survival.mvc &> /var/www/html/tmp/error_surv.txt') ret2 = os.system('Rscript survival.r /var/www/html/tmp/survival.mvc png &>> /var/www/html/tmp/error_surv.txt') os.system('Rscript survival.r /var/www/html/tmp/survival.mvc pdf &>> /var/www/html/tmp/error_surv.txt') return ret1!=0 or ret2!=0
def batch(outDir): (con, cursor) = mymysql.connectDB() cursor.execute('SELECT DISTINCT samp_id FROM rpkm_gene_expr') result = cursor.fetchall() for res in result: (sid, ) = res print sid main(sid, outDir)
def has_exome(samp_id, dbN='ircr1'): (con, cursor) = mymysql.connectDB(db=dbN) cursor.execute('select count(distinct samp_id) from sample_tag where (tag like "XSeq_TS%%" or tag like "XSeq_SS%%") and samp_id="%s"' % samp_id) idL = [x for (x,) in cursor.fetchall()] if int(idL[0]) > 0: return(True) else: return(False)
def batch(outDir): (con, cursor) = mymysql.connectDB() cursor.execute('SELECT DISTINCT samp_id FROM rpkm_gene_expr') result = cursor.fetchall() for res in result: (sid,) = res print sid main(sid, outDir)
def has_rsq(samp_id, dbN='ircr1'): (con, cursor) = mymysql.connectDB(db=dbN) cursor.execute('select count(distinct samp_id) from sample_tag where tag="RNA-Seq" and samp_id="%s"' % samp_id) idL = [x for (x,) in cursor.fetchall()] if int(idL[0]) > 0: return(True) else: return(False)
def main(inputFilePathL, projectN, clean=False, pbs=False, server='smc1'): storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/' apacheBase = storageBase if glob(storageBase + projectN): print('File directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (storageBase, projectN, storageBase, projectN)) print('File directory: created') if glob(apacheBase + projectN): print('Log directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (apacheBase, projectN, apacheBase, projectN)) print('Log directory: created') (con, cursor) = mymysql.connectDB(user=mysqlH['smc1']['user'], passwd=mysqlH['smc1']['passwd'], db='ircr1', host=mysqlH['smc1']['host']) for inputFileP in inputFilePathL: inputFileN = inputFileP.split('/')[-1] sampN = re.match('(.*)\.ngCGH', inputFileN).group(1) (sid, tag) = re.match('(.*)_([XCT].{0,2})_.*\.ngCGH', inputFileN).groups() if tag != 'T': sid = '%s_%s' % (sid, tag) # if sid not in ['IRCR_GBM13_352_T02_C01']: # continue cursor.execute('SELECT tumor_frac FROM xsq_purity WHERE samp_id="%s"' % (sid)) results = cursor.fetchall() if len(results) > 0 and results[0][ 0] != 'ND': ##Of samples for which purity was calculated if any( sid in x for x in os.listdir('/EQL3/pipeline/CNA_corr') ): # only those for which corrected cn were not calculated, yet continue print sid cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2cnCorr.py -i %s -n %s -p %s -c %s -s %s' % ( mysetting.SRC_HOME, inputFileP, sampN, projectN, False, server) print cmd if pbs: log = '%s/%s.Xsq_cnCorr.qlog' % (storageBase + projectN + '/' + sampN, sampN) os.system('echo "%s" | qsub -q %s -N %s -o %s -j oe' % (cmd, server, sampN, log)) else: log = '%s/%s.Xsq_cnCorr.qlog' % (storageBase + projectN, sampN) os.system('(%s) 2> %s' % (cmd, log))
def retSig_CNA(mode='up', th=1.0, dbN='ircr1'): (con,cursor) = mymysql.connectDB(db=dbN) cursor.execute('select distinct samp_id from sample_tag where tag like "Pair_R:%%"') sidL = [x for (x,) in cursor.fetchall()] sidL.sort() geneH = {} fracH = {} for sid in sidL: if sid in ['S042']: ## consent form issue continue if '_X' in sid: continue cursor.execute('select distinct tag from sample_tag where tag like "Pair_R:%%" and samp_id="%s"' % sid) rid = cursor.fetchone()[0].split(':')[-1] pair = '%s:%s' % (sid, rid) if has_acgh(sid) and has_acgh(rid): tblN = 'array_cn' elif has_exome(sid) and has_exome(rid): tblN = 'xsq_cn' else: continue if mode == 'up': cursor.execute('select distinct gene_sym from %s where value_log2 >= %s and samp_id="%s"' % (tblN, th, sid)) p_geneL = [item[0] for item in cursor.fetchall()] cursor.execute('select distinct gene_sym from %s where value_log2 >= %s and samp_id="%s"' % (tblN, th, rid)) r_geneL = [item[0] for item in cursor.fetchall()] elif mode == 'dn': cursor.execute('select distinct gene_sym from %s where value_log2 <= %s and samp_id="%s"' % (tblN, th, sid)) p_geneL = [item[0] for item in cursor.fetchall()] cursor.execute('select distinct gene_sym from %s where value_log2 <= %s and samp_id="%s"' % (tblN, th, rid)) r_geneL = [item[0] for item in cursor.fetchall()] shared_geneL = list(set(p_geneL) & set(r_geneL)) if len(p_geneL) < 1: continue # fracH[pair] = float(len(shared_geneL))/float(len(p_geneL)) fracH[pair] = 0.5 ## uniform for gene in p_geneL: if gene in shared_geneL: ## retained if gene in geneH: geneH[gene]['retain'].append(pair) geneH[gene]['p_cnt'] += 1 else: geneH[gene] = {'retain': [pair], 'vanish': [], 'p_cnt':1} else: ## vanished if gene in geneH: geneH[gene]['vanish'].append(pair) geneH[gene]['p_cnt'] += 1 else: geneH[gene] = {'retain': [], 'vanish': [pair], 'p_cnt':1} ##if ##for print_mutSig(geneH, fracH)
def has_exome(samp_id, dbN='ircr1'): (con, cursor) = mymysql.connectDB(db=dbN) cursor.execute( 'select count(distinct samp_id) from sample_tag where (tag like "XSeq_TS%%" or tag like "XSeq_SS%%") and samp_id="%s"' % samp_id) idL = [x for (x, ) in cursor.fetchall()] if int(idL[0]) > 0: return (True) else: return (False)
def main(outFileName,dbName): (con,cursor) = mymysql.connectDB(db=dbName) cursor.execute('select distinct samp_id from sample_tag where substring(tag,1,6)="pair_R" and \ samp_id!="S042" and samp_id not like "%_X" and substring(samp_id,length(samp_id)-1)!="_2" and \ find_in_set(samp_id,"S437,S586,S023,S697,S372,S538,S458,S453,S428,S460,S768,S780,S640,S096,S671,S592,S572,S520,S1A,S2A,S3A,S4A,S5A,S6A,S7A,S8A,S9A,S10A,S11A,S12A,S13A,S14A,S722,S171,S121,S652,S752,S386")>=1') sIdL_prim = [x for (x,) in cursor.fetchall()] print sIdL_prim resultL = [] for dType in dTypeL: for geneN in geneL: for sId_p in sIdL_prim: cursor.execute('select t1.samp_id from sample_tag t1 where t1.tag="pair_P:%s" and \ "%s" in (select t2.samp_id from sample_tag t2 where t2.tag=concat("pair_R:",t1.samp_id))' % (sId_p,sId_p)) (sId_r,) = cursor.fetchone() # if sId_p=='S520': # sId_r = 'S602' cursor.execute('select %s from %s where gene_sym="%s" and samp_id="%s"' % (dTypeH[dType][1],dTypeH[dType][0],geneN,sId_p)) r_p = cursor.fetchone() cursor.execute('select %s from %s where gene_sym="%s" and samp_id="%s"' % (dTypeH[dType][1],dTypeH[dType][0],geneN,sId_r)) r_r = cursor.fetchone() if r_p and r_r: resultL.append((dType,geneN,sId_p,sId_r,r_p[0],r_r[0])) resultL_cna = filter(lambda x: x[0]=='CNA',resultL) resultL_oth = filter(lambda x: x[0]!='CNA',resultL) for r in resultL_cna: overlap = filter(lambda x: x[0]=='CNX' and x[1:4]==r[1:4], resultL_oth) if overlap: resultL_oth.remove(overlap[0]) resultL_oth.append(r) outFile = open(outFileName,'w') outFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' % ('dType','geneN','sId_p','sId_r','val_p','val_r')) for r in resultL_oth: outFile.write('%s\t%s\t%s\t%s\t%.2f\t%.2f\n' % r) outFile.close() con.close()
def main(geneN, dType, dbN, outFileDir=None, pairedOnly=False): if outFileDir: if pairedOnly and dbN == 'ircr1': outFile = open( '%s/%s_%s_%s_paired.dst2' % (outFileDir, geneN, dType, dbH[dbN]), 'w') else: outFile = open( '%s/%s_%s_%s.dst2' % (outFileDir, geneN, dType, dbH[dbN]), 'w') else: outFile = sys.stdout (con, cursor) = mymysql.connectDB(db=dbN) if dbN == 'ircr1': cursor.execute( 'create temporary table t_paired_prim select distinct samp_id from sample_tag where substring(tag,1,6)="pair_R"' ) cursor.execute( 'create temporary table t_recur select distinct samp_id from sample_tag where substring(tag,1,6)="pair_P"' ) elif dbN == 'tcga1': cursor.execute( 'create temporary table t_recur select distinct samp_id from sample_tag where tag="Recur"' ) else: raise Exception if pairedOnly and dbN == 'ircr1': cursor.execute( 'select %s from %s where gene_sym="%s" and samp_id in (select samp_id from t_paired_prim)' % (dTypeH[dType][1], dTypeH[dType][0], geneN)) else: cursor.execute( 'select %s from %s where gene_sym="%s" and samp_id not in (select samp_id from t_recur)' % (dTypeH[dType][1], dTypeH[dType][0], geneN)) prim = [str(x) for (x, ) in cursor.fetchall()] cursor.execute( 'select %s from %s where gene_sym="%s" and samp_id in (select samp_id from t_recur)' % (dTypeH[dType][1], dTypeH[dType][0], geneN)) recur = [str(x) for (x, ) in cursor.fetchall()] outFile.write('%s-%s-%s-Prim\t%s\n' % (geneN, dType, dbN, len(prim))) outFile.write(','.join(prim) + '\n') outFile.write('\n') outFile.write('%s-%s-%s-Recur\t%s\n' % (geneN, dType, dbN, len(recur))) outFile.write(','.join(recur) + '\n') outFile.write('\n') con.close()
def post_rsq2skip(dirN, server='smc1', dbN='ihlee_test', sampL=[]): (con, cursor) = mymysql.connectDB(user=mysqlH[server]['user'],passwd=mysqlH[server]['passwd'],db=dbN,host=mysqlH[server]['host']) cursor.execute('ALTER TABLE splice_normal CHANGE COLUMN samp_id samp_id char(63)') cursor.execute('ALTER TABLE splice_normal_loc1 CHANGE COLUMN samp_id samp_id char(63)') cursor.execute('ALTER TABLE splice_normal_loc2 CHANGE COLUMN samp_id samp_id char(63)') cursor.execute('CREATE TEMPORARY TABLE splice_normal_tmp LIKE splice_normal') sampNL = filter(lambda x: os.path.isdir(dirN + '/' + x), os.listdir(dirN)) for sampN in sampNL: baseDir = dirN + '/' + sampN sid = sampN[:-4].replace('.','_').replace('-','_') ## RNASeq sample has '***_RSq' if sampL != [] and sid not in sampL: continue print sampN, sid ## make sure to update sample_tag that this sample has RNA-Seq cursor.execute('SELECT * FROM sample_tag WHERE samp_id="%s" AND tag="RNA-Seq"' % sid) results = cursor.fetchall() if len(results) < 1: cursor.execute('INSERT INTO sample_tag SET samp_id="%s", tag="RNA-Seq"' % sid) normal_report = glob('%s/%s*normal_report.txt' % (baseDir, sampN))[0] if dbN in ['ihlee_test','ircr1']: splice_normal = '%s/exonSkip_normal/splice_normal_%s.dat' % (BASE, sampN) else: splice_normal = '%s/splice_normal_%s.dat' % (baseDir, sampN) prepDB_splice_normal.main(sampNamePat=RSQPattern, inFileN=normal_report, outFileN=splice_normal) cursor.execute('LOAD DATA LOCAL INFILE "%s" INTO TABLE splice_normal_tmp' % splice_normal) skip_report_annot = glob('%s/%s*_splice_exonSkip_report_annot.txt' % (baseDir, sampN))[0] if dbN in ['ihlee_test','ircr1']: splice_skip_txt = '%s/exonSkip/splice_skip_%s.txt' % (BASE, sampN) else: splice_skip_txt = '%s/splice_skip_%s.txt' % (baseDir, sampN) exonSkip_summarize.exonSkip_summarize_s(inFileN=skip_report_annot, minPos=1, outFileN=splice_skip_txt) if dbN in ['ihlee_test','ircr1']: splice_skip_dat = '%s/exonSkip/splice_skip_%s.dat' % (BASE, sampN) else: splice_skip_dat = '%s/splice_skip_%s.dat' % (baseDir, sampN) prepDB_splice_skip.main(inFileName=splice_skip_txt, minNPos=1, sampNamePat=RSQPattern, geneList=[], outFileName=splice_skip_dat) cursor.execute('DELETE FROM splice_skip WHERE samp_id="%s"' % sid) cursor.execute('LOAD DATA LOCAL INFILE "%s" IGNORE INTO TABLE splice_skip' % splice_skip_dat) cursor.execute('ALTER TABLE splice_normal DISABLE KEYS') cursor.execute('INSERT INTO splice_normal SELECT * FROM splice_normal_tmp') cursor.execute('ALTER TABLE splice_normal ENABLE KEYS') cursor.execute('ALTER TABLE splice_normal_loc1 DISABLE KEYS') cursor.execute('DELETE FROM splice_normal_loc1 WHERE samp_id in (SELECT DISTINCT samp_id FROM splice_normal_tmp)') cursor.execute('INSERT INTO splice_normal_loc1 SELECT samp_id,loc1,sum(nReads) nReads_w1 FROM splice_normal_tmp GROUP BY samp_id,loc1') cursor.execute('ALTER TABLE splice_normal_loc1 ENABLE KEYS') cursor.execute('ALTER TABLE splice_normal_loc2 DISABLE KEYS') cursor.execute('DELETE FROM splice_normal_loc2 WHERE samp_id in (SELECT DISTINCT samp_id FROM splice_normal_tmp)') cursor.execute('INSERT INTO splice_normal_loc2 SELECT samp_id,loc2,sum(nReads) nReads_w2 FROM splice_normal_tmp GROUP BY samp_id,loc2') cursor.execute('ALTER TABLE splice_normal_loc2 ENABLE KEYS') makeDB_splice_AF.skip(dbN=dbN, cursor=cursor) cursor.execute('DROP TEMPORARY TABLE IF EXISTS splice_normal_tmp')
def coverage_calc_batch(inputDirNL,outputDirN,pbs=False,refFileName='/data1/Sequence/ucsc_hg19/annot/refFlat_exon_autosome_NM_merged.txt'): con,cursor = mymysql.connectDB(db='ircr1') cursor.execute('select distinct samp_id from sample_tag_paperfreeze') sampNameL = [x[0] for x in cursor.fetchall()] sampNameL = sampNameL + ['S641','S140'] inputFileNL = [re.match('.*\/(S[0-9]{1}.*S$)', x).group(1) for x in inputDirNL] inDirSampNameL = [re.match('(S.*)_T_[TS]S$', x).group(1) for x in inputFileNL] sampNameS = set(sampNameL).intersection(inDirSampNameL) sampNameL = list(sampNameS) sampNameL.sort() print 'Samples: %s (%s)' % (sampNameL,len(sampNameL)) fileNameL = [] for fileN in inputDirNL: sampN = re.match('(S.*)_T_[TS]S$',fileN.split('/')[-1]).group(1) if sampN not in sampNameL: continue fileNameL = fileNameL + glob(fileN+'/*_[TB]_[TS]S*.bam') fileNameL.sort() procL = [] for fileN in fileNameL: sampN = re.match('(.*).recal.bam',fileN.split('/')[-1]).group(1) if sampN in procL: continue print sampN cmd = 'samtools depth -b %s %s > %s/%s.recal.depth.txt' % \ (refFileName,fileN, outputDirN,sampN) log = '%s/%s.depth.qlog' % (outputDirN,sampN) if pbs: cmd = "%s; awk '{cnt[\$3]+=1}END{for (x in cnt){print x,cnt[x]}}' %s/%s.recal.depth.txt | sort -n -k1 > %s/%s.recal.depth_hash.txt" % \ (cmd, outputDirN,sampN, outputDirN,sampN) os.system('echo "%s" | qsub -N %s -o %s -j oe' % (cmd,sampN,log)) else: cmd = '%s; awk "{cnt[\$3]+=1}END{for (x in cnt){print x,cnt[x]}}" %s/%s.recal.depth.txt | sort -n -k1 > %s/%s.recal.depth_hash.txt' % \ (cmd, outputDirN,sampN, outputDirN,sampN) os.system('(%s) 2> %s' % (cmd, log)) procL.append(sampN)
def main(dataN='TCGA_GBM', endPoint='death', geneN='EGFR', altType='2-7', cutoff=(50,50)): colN = ['pId','time','event','value','label','priority'] con,cursor = mymysql.connectDB(db='tcga1') cursor.execute('create temporary table t1 select distinct samp_id pId from splice_normal') cursor.execute('create temporary table t2 \ select pId, nReads/(nReads+nReads_w1) af from t1 left join splice_skip_AF on pId=samp_id and gene_sym="EGFR" and delExons like "%2-7%"') cursor.execute('update t2 set af=0 where af is null') recordL = mymysql.dictSelect("SELECT pId,days_followup time,if(days_death is not null,1,0) event, af value \ FROM clinical join t2 using (pId)", cursor) # valueL = [r['value'] for r in recordL] # l = len(valueL) threshold = (0.01,0.01) outFile = open('/var/www/html/tmp/survival.mvc','w') outFile.write('\t'.join(colN)+'\n') for r in recordL: r['value'] = float(r['value']) if r['value'] < threshold[0]: label = '"%s %s < %.2f"' % (geneN,altType,threshold[0]) priority = '1' elif r['value'] >= threshold[1]: label = '"%s %s > %.2f"' % (geneN,altType,threshold[1]) priority = '2' else: label = '"%s %s Middle"' % (geneN,altType) priority = '9' if r['pId'] not in CIMP: outFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' % (r['pId'], r['time'], r['event'], r['value'], label, priority)) outFile.close() ret1 = os.system('Rscript distribution.r /var/www/html/tmp/survival.mvc png &> /var/www/html/tmp/error_distr.txt') ret2 = os.system('Rscript survival.r /var/www/html/tmp/survival.mvc png &> /var/www/html/tmp/error_surv.txt') return ret1!=0 or ret2!=0
def main(inFileName, outFileName): (con, cursor) = mymysql.connectDB(db='ircr1') inFile = open(inFileName) inFile.readline() outFile = open(outFileName, 'w') outFile.write('\t'.join(('dType', 'geneN', 'sId_p', 'sId_r', 'val_p', 'val_r', 'chemo', 'RT', 'either')) + '\n') for line in inFile: (sId_p, chemo, RT) = line[:-1].split('\t') if chemo == 'NA': chemo = 1000 else: chemo = min(int(chemo), 1000) if RT == 'NA': RT = 1000 else: RT = min(int(RT), 1000) for geneN in geneL: for dType in dTypeL: cursor.execute( 'select samp_id from sample_tag where tag="pair_P:%s"' % sId_p) (sId_r, ) = cursor.fetchone() cursor.execute( 'select %s from %s where gene_sym="%s" and samp_id="%s"' % (dTypeH[dType][1], dTypeH[dType][0], geneN, sId_p)) r_p = cursor.fetchone() cursor.execute( 'select %s from %s where gene_sym="%s" and samp_id="%s"' % (dTypeH[dType][1], dTypeH[dType][0], geneN, sId_r)) r_r = cursor.fetchone() if r_p and r_r: outFile.write('%s\t%s\t%s\t%s\t%.2f\t%.2f\t%d\t%d\t%d\n' % (dType, geneN, sId_p, sId_r, r_p[0], r_r[0], chemo, RT, min(chemo, RT))) inFile.close() outFile.close() con.close()
def main(newDBN='', server='smc1'): (con, cursor) = mymysql.connectDB(user='******', passwd='123456', host=mysqlH[server]['host']) cursor.execute('CREATE DATABASE IF NOT EXISTS %s' % newDBN) cursor.execute("GRANT ALL ON %s.* TO 'cancer'@'localhost'" % newDBN) cursor.execute('USE %s' % newDBN) cursor.execute('show tables from ircr1') tableL = filter(lambda x: x not in ['census','cosmic','rpkm_gene_expr_lg2'] and 'bak' not in x, [x for (x,) in cursor.fetchall()]) for table in tableL: cursor.execute('CREATE TABLE IF NOT EXISTS %s LIKE ircr1.%s' % (table, table))
def prep_somatic(outFileN, server='smc1', dbN='ircr1'): # ##VEP mutect # vep_mutect_batch.main([mysetting.wxsMutectDir]) (con, cursor) = mymysql.connectDB(user=mysetting.mysqlH[server]['user'], passwd=mysetting.mysqlH[server]['passwd'], db=dbN, host=mysetting.mysqlH[server]['host']) cursor.execute( 'SELECT DISTINCT samp_id,tag FROM sample_tag WHERE tag LIKE "XSeq_%%"') results = cursor.fetchall() singleL = [] somaticL = [] for res in results: pl_typeL = re.match('XSeq_(.*)', res[1]).group(1).split(',') if 'N' in pl_typeL: somaticL.append(res[0]) else: singleL.append(res[0]) cmd = 'cat %s/*mutect_vep.dat | /usr/bin/python %s/Integration/prepDB_mutation_mutect.py > %s' % ( mysetting.wxsMutectDir, mysetting.SRC_HOME, outFileN) os.system(cmd) mutectL = glob('%s/*mutect_vep.dat' % mysetting.wxsMutectDir) for mutect in mutectL: (sid, postfix, platform) = re.match('(.*)_([XT].{,2})_([STKN]{2}).mutect_vep.dat', os.path.basename(mutect)).groups() if postfix not in ['T']: sid = '%s_%s' % (sid, postfix) if sid in somaticL: continue else: if sid in singleL: ##previously analyzed without matched normal cursor.execute( 'SELECT samp_id,tag FROM sample_tag WHERE samp_id="%s" AND tag LIKE "XSeq_%%"' % sid) results = cursor.fetchall() if len(results) > 1: sys.stderr.write('Duplication in sample_tag: %s\n' % sid) sys.exit(1) tag = '%s,N' % results[0][1] cursor.execute( 'UPDATE sample_tag SET samp_id="%s", tag="%s" WHERE samp_id="%s" AND tag LIKE "XSeq_%%"' % (sid, tag, sid)) else: ##brand new sample tag = 'XSeq_%s,N' % platform cursor.execute( 'INSERT INTO sample_tag SET samp_id="%s", tag="%s"' % (sid, tag))
def main(outDirName): (con, cursor) = mymysql.connectDB(db='ircr1') cursor.execute( 'select distinct samp_id from sample_tag where substring(tag,1,6)="pair_R" and samp_id!="S042" and samp_id not like "%_X"' ) sIdL_prim = [x for (x, ) in cursor.fetchall() if x not in exeSampL] for dType in dTypeL: print dType outFile = open('%s/paired_df_%s.txt' % (outDirName, dType), 'w') outFile.write('%s\t%s\t%s\t%s\t%s\n' % ('sId_p', 'sId_r', 'geneN', 'val_p', 'val_r')) for sId_p in sIdL_prim: print '\t%s' % sId_p cursor.execute( 'select samp_id from sample_tag where tag="pair_P:%s" and samp_id!="%s"' % (sId_p, ('" and samp_id!="').join(sId_r1))) (sId_r, ) = cursor.fetchone() cursor.execute('drop table if exists tP') cursor.execute('drop table if exists tR') cursor.execute( 'create temporary table tP select gene_sym,%s vP from %s where samp_id="%s"' % (dTypeH[dType][1], dTypeH[dType][0], sId_p)) cursor.execute('alter table tP add index (gene_sym)') cursor.execute( 'create temporary table tR select gene_sym,%s vR from %s where samp_id="%s"' % (dTypeH[dType][1], dTypeH[dType][0], sId_r)) cursor.execute('alter table tR add index (gene_sym)') cursor.execute( 'select gene_sym,vP,vR from tP join tR using (gene_sym)') results = cursor.fetchall() for (geneN, vP, vR) in results: outFile.write('%s\t%s\t%s\t%.2f\t%.2f\n' % (sId_p, sId_r, geneN, vP, vR)) outFile.close() con.close()
def post_xsq2purity(outFileN, server='smc1', dbN='ircr1'): cmd = 'cat %s/*/*tumor_frac.txt | /usr/bin/python %s/Integration/prepDB_xsq_purity.py > %s' % (mysetting.wxsPurityDir, mysetting.SRC_HOME, outFileN) os.system(cmd) mymysql.reset_table(tableN='xsq_purity', dataFileN=outFileN, user=mysetting.mysqlH[server]['user'],passwd=mysetting.mysqlH[server]['passwd'],db=dbN, host=mysetting.mysqlH[server]['host']) # add normal if missed (con, cursor) = mymysql.connectDB(user=mysetting.mysqlH[server]['user'], passwd=mysetting.mysqlH[server]['passwd'], db=dbN, host=mysetting.mysqlH[server]['host']) cursor.execute('''SELECT DISTINCT samp_id FROM %s.xsq_purity''' % dbN) sIdL = [x for (x,) in cursor.fetchall()] cursor.execute('''SELECT DISTINCT samp_id FROM %s.sample_tag WHERE tag = "XSeq_SS"''' % dbN) refL = [x for (x,) in cursor.fetchall()] for sid in sIdL: if sid in refL: print sid cursor.execute('''UPDATE %s.sample_tag SET samp_id="%s", tag="XSeq_SS,N" WHERE samp_id="%s" and tag="XSeq_SS"''' % (dbN, sid, sid))
def main(outFileName, dbNL, dTypeL, outDirName, outFileN): for dbN in dbNL: (con, cursor) = mymysql.connectDB(db=dbN) if dbN == 'ircr1': cursor.execute( 'create temporary table t_recur select distinct samp_id from sample_tag where substring(tag,1,6)="pair_P"' ) elif dbN == 'tcga1': cursor.execute( 'create temporary table t_recur select distinct samp_id from sample_tag where tag="Recur"' ) cursor.execute( 'SELECT distinct loc,geneName FROM tcga1.methyl_pId where platform="Infinium27k"' ) results1 = cursor.fetchall() for dType in dTypeL: for (loc, geneN) in results1: outFile = open(outFileName, 'w') outFile.write( '%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % ('dbT', 'dType', 'geneN', 'PR', 'sId', 'val', 'loc')) for PR in ('P', 'R'): cursor.execute( 'select pId,%s from %s where platform="Infinium27k" and loc="%s" and geneName="%s" and pId %s in (select samp_id from t_recur)' % (dTypeH[dType][1], dTypeH[dType][0], loc, geneN, 'not' if PR == 'P' else '')) results = cursor.fetchall() for (sId, val) in results: outFile.write( '%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (dbH[dbN], dType, geneN, PR, sId, val, loc)) outFile.close() os.system( 'Rscript %s/PrimRecur/unpaired_gene_methyl_ks.r %s %s &>> %s/error_kstest.txt' % (mysetting.SRC_HOME, outDirName, outFileN, outDirName)) con.close()
def main(): con, cursor = mymysql.connectDB(db='tcga1') cursor.execute( 'select distinct geneName from methyl_pId where geneName <>""') results = cursor.fetchall() for (gN, ) in results: cursor.execute( 'SELECT distinct platform,loc FROM methyl_pId where geneName ="%s"' % gN) results1 = cursor.fetchall() output = [] for (plat, loc) in results1: #cursor.execute('select fraction,z_score from methyl_pId, array_gene_expr where platform="%s" and loc="%s" and gene_sym="MGMT" and pId=samp_id' % (plat,loc)) #cursor.execute('select fraction,log2(rpkm+1) from methyl_pId, rpkm_gene_expr where platform="%s" and loc="%s" and gene_sym="%s" and pId=samp_id' % (plat,loc,gN)) cursor.execute( 'create temporary table t_rpkm as select samp_id,log2(rpkm+1) as rpkm_log from rpkm_gene_expr where gene_sym="%s"' % gN) cursor.execute( 'create temporary table t_methyl as select * from tcga1.methyl_pId where geneName="%s"' % gN) cursor.execute( 'select fraction,rpkm_log from t_methyl, t_rpkm where platform="%s" and loc="%s" and pId=samp_id' % (plat, loc)) results2 = cursor.fetchall() cursor.execute('drop table t_rpkm,t_methyl') if len(results2) == 0: continue methyl, expr = zip(*results2) r = numpy.corrcoef(methyl, expr)[0, 1] output.append((plat, loc, len(methyl), r)) output.sort(lambda x, y: cmp(y[-1], x[-1])) for (plat, loc, n, r) in output: print '%s\t%s\t%s\t%s\t%.2f' % (gN, plat, loc, n, r)
def main(): con,cursor = mymysql.connectDB(db='tcga1') cursor.execute('select distinct platform from methyl') platformL = cursor.fetchall() for (platform,) in platformL: cursor.execute('select distinct geneName from methyl where geneName <>"" and platform="%s"' % platform) results = cursor.fetchall() for (gN,) in results: cursor.execute('SELECT distinct loc FROM methyl where geneName ="%s" and platform="%s"' % (gN,platform)) results1 = cursor.fetchall() output = [] for (loc,) in results1: cursor.execute('create temporary table t_rpkm as select samp_id,log2(rpkm+1) as rpkm_log from rpkm_gene_expr where gene_sym="%s"' % gN) cursor.execute('create temporary table t_methyl as select * from methyl where geneName="%s" and platform="%s"' % (gN,platform)) cursor.execute('select fraction,rpkm_log from t_methyl, t_rpkm where loc="%s" and pId=samp_id' % (loc)) results2 = cursor.fetchall() cursor.execute('drop table t_rpkm,t_methyl') if len(results2) == 0: continue methyl,expr = zip(*results2) r = numpy.corrcoef(methyl,expr)[0,1] output.append((platform,loc,len(methyl),r)) output.sort(lambda x,y: cmp(y[-1],x[-1])) try: (plat, loc, n, r) = output[-1] except: continue print '%s\t%s\t%s\t%s\t%.2f' % (plat,gN,loc,n,r)
def load_mutation_all(inFileN, server='smc1', dbN='ircr1'): (con, cursor) = mymysql.connectDB(user=mysetting.mysqlH[server]['user'], passwd=mysetting.mysqlH[server]['passwd'], db=dbN, host=mysetting.mysqlH[server]['host']) cursor.execute('DROP TABLE IF EXISTS mutation_normal') stmt = ''' CREATE TABLE mutation_normal ( samp_id varchar(63) NOT NULL, chrom varchar(10) NOT NULL, chrSta int unsigned NOT NULL, chrEnd int unsigned NOT NULL, ref varchar(63) NOT NULL, alt varchar(63) NOT NULL, n_nReads_ref mediumint unsigned NOT NULL, n_nReads_alt mediumint unsigned NOT NULL, nReads_ref mediumint unsigned NOT NULL, nReads_alt mediumint unsigned NOT NULL, strand char(1) NOT NULL, gene_symL varchar(63), ch_dna varchar(127), ch_aa varchar(63), ch_type varchar(127), cosmic text, mutsig text, index (samp_id,gene_symL), index (samp_id,chrom,chrSta,chrEnd), index (samp_id,chrom,chrSta,ref,alt), index (samp_id,chrom,chrSta,chrEnd,ref,alt) ) ''' cursor.execute(stmt) cursor.execute('CREATE TEMPORARY TABLE tmp LIKE mutation_normal') cursor.execute('LOAD DATA LOCAL INFILE "%s" INTO TABLE tmp' % inFileN) cursor.execute( 'CREATE TEMPORARY TABLE t2 SELECT tmp.samp_id,tmp.chrom,tmp.chrSta,tmp.chrEnd,tmp.ref,tmp.alt,tmp.n_nReads_ref,tmp.n_nReads_alt,tmp.nReads_ref,tmp.nReads_alt,tmp.strand,tmp.gene_symL,tmp.ch_dna,tmp.ch_aa,tmp.ch_type,cosmic.ch_aaL AS cosmic,cosmic.ch_typeL AS cosmic_type,tmp.mutsig FROM tmp LEFT JOIN cosmic ON tmp.chrom=cosmic.chrom AND tmp.chrSta=cosmic.chrSta AND tmp.chrEnd=cosmic.chrEnd AND tmp.ref=cosmic.ref AND tmp.alt=cosmic.alt AND tmp.gene_symL=cosmic.gene_symL' ) cursor.execute( 'INSERT INTO mutation_normal SELECT samp_id,chrom,chrSta,chrEnd,ref,alt,n_nReads_ref,n_nReads_alt,nReads_ref,nReads_alt,strand,gene_symL,ch_dna,ch_aa,ch_type,"" AS cosmic,mutsig FROM t2 WHERE cosmic IS NULL' ) cursor.execute( 'INSERT INTO mutation_normal SELECT samp_id,chrom,chrSta,chrEnd,ref,alt,n_nReads_ref,n_nReads_alt,nReads_ref,nReads_alt,strand,gene_symL,ch_dna,cosmic AS ch_aa,cosmic_type AS ch_type,cosmic,mutsig FROM t2 WHERE cosmic IS NOT NULL' )
def main(newDBN='', server='smc1'): (con, cursor) = mymysql.connectDB(user='******', passwd='123456', host=mysqlH[server]['host']) cursor.execute('CREATE DATABASE IF NOT EXISTS %s' % newDBN) cursor.execute("GRANT ALL ON %s.* TO 'cancer'@'localhost'" % newDBN) cursor.execute('USE %s' % newDBN) cursor.execute('show tables from ircr1') tableL = filter( lambda x: x not in ['census', 'cosmic', 'rpkm_gene_expr_lg2'] and 'bak' not in x, [x for (x, ) in cursor.fetchall()]) for table in tableL: cursor.execute('CREATE TABLE IF NOT EXISTS %s LIKE ircr1.%s' % (table, table))