Ejemplo n.º 1
0
def main(outFileName,dbNL,dTypeL,outDirName,outFileN):

	for dbN in dbNL:

		(con,cursor) = mymysql.connectDB(db=dbN)

		if dbN == 'ircr1':
			cursor.execute('create temporary table t_recur select distinct samp_id from sample_tag where substring(tag,1,6)="pair_P"')
		elif dbN == 'tcga1':
			cursor.execute('create temporary table t_recur select distinct samp_id from sample_tag where tag="Recur"')

		cursor.execute('SELECT distinct loc,geneName FROM tcga1.methyl_pId where platform="Infinium27k"')
		results1 = cursor.fetchall()

		for dType in dTypeL:

			for (loc,geneN) in results1:

				outFile = open(outFileName,'w')

				outFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % ('dbT','dType','geneN','PR','sId','val','loc'))

				for PR in ('P','R'):

					cursor.execute('select pId,%s from %s where platform="Infinium27k" and loc="%s" and geneName="%s" and pId %s in (select samp_id from t_recur)' % (dTypeH[dType][1],dTypeH[dType][0],loc,geneN,'not' if PR=='P' else ''))
					results = cursor.fetchall()

					for (sId,val) in results:
						outFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (dbH[dbN],dType,geneN,PR,sId,val,loc))

				outFile.close()
				
				os.system('Rscript %s/PrimRecur/unpaired_gene_methyl_ks.r %s %s &>> %s/error_kstest.txt' % (mysetting.SRC_HOME,outDirName,outFileN,outDirName))
		
		con.close()
Ejemplo n.º 2
0
def main(geneN1, geneN2, altType1='rpkm', altType2='rpkm', dset='tcga1'):

	tblN1,valN1,featN1,lab1 = altTypeH[altType1]
	tblN2,valN2,featN2,lab2 = altTypeH[altType2]

	con,cursor = mymysql.connectDB(db=dset)

	cursor.execute('select samp_id from mutation_normal where gene_symL="IDH1" and ch_aa like "%sR132%s"' % ('%','%'))
	idh1 = [x[0] for x in cursor.fetchall()]

	ret1 = os.system('''(echo "SELECT t1.samp_id,t1.%s %s,t2.%s %s FROM %s t1, %s t2 where t1.%s='%s' and t2.%s='%s' and t1.samp_id=t2.samp_id" | mysql %s -u cancer --password=cancer > /var/www/html/tmp/correlation.txt) &> /var/www/html/tmp/correaltion.err''' % \
		(valN1,geneN1, valN2,geneN2, tblN1,tblN2, featN1,geneN1, featN2,geneN2, dset))

	f = open('/var/www/html/tmp/correlation.txt')
	fo = open('/var/www/html/tmp/correlation_idh1.txt','w')

	for line in f:
		(sId, t1, t2) = line[:-1].split('\t')

		if sId in idh1:
			fo.write('%s\t%s\t%s\t%s\n' % (sId,t1,t2,'mut'))
		else:
			fo.write('%s\t%s\t%s\t%s\n' % (sId,t1,t2,'na'))
	f.close()
	fo.close()

	ret2 = os.system('Rscript correlation_idh1.r "%s" "%s" "%s" png &>> /var/www/html/tmp/correaltion.err' % (dsetH[dset],lab1,lab2))

	return ret1!=0 or ret2!=0
Ejemplo n.º 3
0
def post_s_rsq2expr(baseDir, server='smc1', dbN='ihlee_test'):
	sampN = baseDir.split('/')[-1]
	sid = sampN[:-4].replace('-','_').replace('.','_') ##drop '_RSq'

	if dbN in ['ihlee_test','ircr1']:
		gctFileN = '/EQL1/NSL/RNASeq/results/expression/%s.gct' % sampN
		datFileN = '/EQL1/NSL/RNASeq/results/expression/%s.dat' % sampN
	else:
		gctFileN = '%s/%s.gct' % (baseDir, sampN)
		datFileN = '%s/%s.dat' % (baseDir, sampN)
	print sampN, gctFileN
	rpkm_process.rpkm_process(inputDirN=baseDir, filePattern='*.rpkm', sampRegex='(.*)_RSq\.rpkm', outputFileN=gctFileN)
	## prep
	prepDB_rpkm_gene_expr.main(inGctFileName=gctFileN, geneList=[], samplePrefix='', outDatFileName=datFileN)
	## import
	(con, cursor) = mymysql.connectDB(user=mysqlH[server]['user'],passwd=mysqlH[server]['passwd'],db=dbN,host=mysqlH[server]['host'])
	cursor.execute('DELETE FROM rpkm_gene_expr WHERE samp_id="%s"' % sid)
	cursor.execute('LOAD DATA LOCAL INFILE "%s" INTO TABLE rpkm_gene_expr' % datFileN)
	cursor.execute('DROP VIEW IF EXISTS rpkm_gene_expr_lg2')
	cursor.execute('CREATE VIEW rpkm_gene_expr_lg2 AS SELECT samp_id,gene_sym,log2(rpkm+1) AS lg2_rpkm FROM rpkm_gene_expr')
	## make sure to update sample_tag that this sample has RNA-Seq
	cursor.execute('SELECT * FROM sample_tag WHERE samp_id="%s" AND tag="RNA-Seq"' % sid)
	results = cursor.fetchall()
	if len(results) < 1:
		cursor.execute('INSERT INTO sample_tag SET samp_id="%s", tag="RNA-Seq"' % sid)
	
	##draw boxplot
	boxplot_expr_cs_gene.main(sid, '/EQL1/NSL/RNASeq/results/expression')
Ejemplo n.º 4
0
def main(inDir, outDir, pbs=False, server='smc1'):

	inFileNL = os.listdir(inDir)
	inFileNL = filter(lambda x: re.match('.*\.ngCGH', x), inFileNL)

	print 'Files: %s' % inFileNL

	(con, cursor) = mymysql.connectDB(user=mysetting.mysqlH[server]['user'], passwd=mysetting.mysqlH[server]['passwd'], db='ircr1', host=mysetting.mysqlH[server]['host'])
	for inFileN in inFileNL:
		sampN = re.match('(.*)\.ngCGH', inFileN).group(1)
		(sid, tag) = re.match('(.*)_(T.{,2}.*)_[STKN]{2}\.ngCGH', inFileN).groups()
		if tag != 'T':
			sid = '%s_%s' % (sid, tag)
		cursor.execute('SELECT tumor_frac FROM xsq_purity WHERE samp_id="%s"' % sid)
		purity = int(cursor.fetchall()[0][0])

		iprefix = '%s/%s' % (inDir,sampN)
		oprefix = '%s/%s' % (outDir,sampN)
		cmd = '/usr/bin/python %s/NGS/copynumber/cn_corr.py -i %s.ngCGH -o %s.corr.ngCGH -p %s' % (mysetting.SRC_HOME, iprefix, oprefix, purity)
		log = '%s.cn_corr.qlog' % (oprefix)
		print cmd
		if pbs:
			os.system('echo "%s" | qsub -N %s -o %s -j oe' % (cmd, sampN, log))

		else:
			os.system('(%s) &> %s' % (cmd, log))		
Ejemplo n.º 5
0
def post_xsq2purity(outFileN, server='smc1', dbN='ircr1'):
    cmd = 'cat %s/*/*tumor_frac.txt | /usr/bin/python %s/Integration/prepDB_xsq_purity.py > %s' % (
        mysetting.wxsPurityDir, mysetting.SRC_HOME, outFileN)
    os.system(cmd)
    mymysql.reset_table(tableN='xsq_purity',
                        dataFileN=outFileN,
                        user=mysetting.mysqlH[server]['user'],
                        passwd=mysetting.mysqlH[server]['passwd'],
                        db=dbN,
                        host=mysetting.mysqlH[server]['host'])
    # add normal if missed
    (con,
     cursor) = mymysql.connectDB(user=mysetting.mysqlH[server]['user'],
                                 passwd=mysetting.mysqlH[server]['passwd'],
                                 db=dbN,
                                 host=mysetting.mysqlH[server]['host'])
    cursor.execute('''SELECT DISTINCT samp_id FROM %s.xsq_purity''' % dbN)
    sIdL = [x for (x, ) in cursor.fetchall()]
    cursor.execute(
        '''SELECT DISTINCT samp_id FROM %s.sample_tag WHERE tag = "XSeq_SS"'''
        % dbN)
    refL = [x for (x, ) in cursor.fetchall()]
    for sid in sIdL:
        if sid in refL:
            print sid
            cursor.execute(
                '''UPDATE %s.sample_tag SET samp_id="%s", tag="XSeq_SS,N" WHERE samp_id="%s" and tag="XSeq_SS"'''
                % (dbN, sid, sid))
Ejemplo n.º 6
0
def main(geneN,dType,dbN='ircr1',outFileDir=None):

	if outFileDir:
		outFile = open('%s/%s_%s_%s_paired.dst2' % (outFileDir,geneN,dType,dbH[dbN]),'w')
	else:
		outFile = sys.stdout

	(con,cursor) = mymysql.connectDB(db=dbN)

	cursor.execute('select distinct samp_id from sample_tag where substring(tag,1,6)="pair_R"')
	sIdL_prim = [x for (x,) in cursor.fetchall()]

	vL = []; sIdL_pair = []

	for sId_p in sIdL_prim:

		cursor.execute('select samp_id from sample_tag where tag="pair_P:%s"' % sId_p)
		(sId_r,) = cursor.fetchone()

		cursor.execute('select %s from %s where gene_sym="%s" and samp_id="%s"' % (dTypeH[dType][1],dTypeH[dType][0],geneN,sId_p))
		r_p = cursor.fetchone()

		cursor.execute('select %s from %s where gene_sym="%s" and samp_id="%s"' % (dTypeH[dType][1],dTypeH[dType][0],geneN,sId_r))
		r_r = cursor.fetchone()

		if r_p and r_r:
			vL.append("%.2f" % (r_r[0]-r_p[0],))
			sIdL_pair.append((sId_p,sId_r))

	outFile.write('%s-%s-%s\t%s\n' % (geneN,dType,dbN,len(sIdL_pair)))
	outFile.write(','.join(vL)+'\n')
	outFile.write(','.join(['%s_%s' % (x,y) for (x,y) in sIdL_pair])+'\n')

	con.close()
Ejemplo n.º 7
0
def main():

	con,cursor = mymysql.connectDB(db='tcga1')

	cursor.execute('SELECT distinct platform,loc FROM methyl_pId')
	results1 = cursor.fetchall()

	output = []

	for (plat,loc) in results1:

		#cursor.execute('select fraction,z_score from methyl_pId, array_gene_expr where platform="%s" and loc="%s" and gene_sym="MGMT" and pId=samp_id' % (plat,loc))
		cursor.execute('select fraction,log2(rpkm+1) from methyl_pId, rpkm_gene_expr where platform="%s" and loc="%s" and gene_sym="MGMT" and pId=samp_id' % (plat,loc))
		results2 = cursor.fetchall()

		methyl,expr = zip(*results2)

		r = numpy.corrcoef(methyl,expr)[0,1]

		output.append((plat,loc,len(methyl),r))

	output.sort(lambda x,y: cmp(y[-1],x[-1]))

	for (plat,loc,n,r) in output:
		print '%s\t%s\t%s\t%.2f' % (plat,loc,n,r)
Ejemplo n.º 8
0
def prep_single(outFileN, server='smc1', dbN='ircr1'):
    (con,
     cursor) = mymysql.connectDB(user=mysetting.mysqlH[server]['user'],
                                 passwd=mysetting.mysqlH[server]['passwd'],
                                 db=dbN,
                                 host=mysetting.mysqlH[server]['host'])
    cosmicL = []
    for dir in mysetting.wxsMutscanDirL:
        cosmicL += filter(
            lambda x: '_B_' not in x,
            glob('%s/*/*cosmic.dat' % dir) + glob('%s/*cosmic.dat' % dir))

    cursor.execute(
        'SELECT DISTINCT samp_id FROM sample_tag WHERE tag LIKE "XSeq_%%"')
    results = cursor.fetchall()
    sidL = []
    for res in results:
        sidL.append(res[0])
    for cosmic in cosmicL:
        (sid, postfix,
         platform) = re.match('(.*)_([XT].{,2})_([STKN]{2})_cosmic.dat',
                              os.path.basename(cosmic)).groups()
        if postfix not in ['T', 'RSq']:
            sid = '%s_%s' % (sid, postfix)
        if sid not in sidL:
            print sid, cosmic
            tag = 'XSeq_%s' % platform
            cursor.execute(
                'INSERT INTO sample_tag SET samp_id="%s", tag="%s"' %
                (sid, tag))

    cmd = 'cat %s | /usr/bin/python %s/Integration/prepDB_mutscan.py > %s' % (
        ' '.join(cosmicL), mysetting.SRC_HOME, outFileN)
    os.system(cmd)
Ejemplo n.º 9
0
def post_rsq2eiJunc(dirN, server='smc1', dbN='ihlee_test', sampL=[]):
	(con, cursor) = mymysql.connectDB(user=mysqlH[server]['user'],passwd=mysqlH[server]['passwd'],db=dbN,host=mysqlH[server]['host'])
	sampNL = filter(lambda x: os.path.isdir(dirN + '/' + x), os.listdir(dirN))
	for sampN in sampNL:
		baseDir = dirN + '/' + sampN
		sid = sampN[:-4].replace('.','_').replace('-','_') ## RNASeq sample has '***_RSq'
		if sampL != [] and sid not in sampL:
			continue
		print sampN, sid
		## make sure to update sample_tag that this sample has RNA-Seq
		cursor.execute('SELECT * FROM sample_tag WHERE samp_id="%s" AND tag="RNA-Seq"' % sid)
		results = cursor.fetchall()
		if len(results) < 1:
			cursor.execute('INSERT INTO sample_tag SET samp_id="%s", tag="RNA-Seq"' % sid)
		ei_dat = glob('%s/%s*ei.dat' % (baseDir, sampN))[0]
		if dbN in ['ihlee_test','ircr1']:
			splice_eiJunc_txt = '%s/eiJunc/splice_eiJunc_%s_ft.txt' % (BASE, sampN)
		else:
			splice_eiJunc_txt = '%s/splice_eiJunc_%s_ft.txt' % (baseDir, sampN)
		ei_junc_filter.main(overlap=10, minNReads=1, inFileN=ei_dat, outFileN=splice_eiJunc_txt)
		if dbN in ['ihlee_test','ircr1']:
			splice_eiJunc_dat = '%s/eiJunc/splice_eiJunc_%s.dat' % (BASE, sampN)
		else:
			splice_eiJunc_dat = '%s/splice_eiJunc_%s.dat' % (baseDir, sampN)
		prepDB_splice_eiJunc.main(minNReads=1, sampNamePat=RSQPattern, geneList=[], inFileN=splice_eiJunc_txt, outFileN=splice_eiJunc_dat)
		cursor.execute('DELETE FROM splice_eiJunc WHERE samp_id="%s"' % sid)
		cursor.execute('LOAD DATA LOCAL INFILE "%s" IGNORE INTO TABLE splice_eiJunc' % splice_eiJunc_dat)
	makeDB_splice_AF.eiJunc(dbN=dbN, cursor=cursor)
Ejemplo n.º 10
0
def load_mutation_all(inFileN, server='smc1', dbN='ircr1'):
	(con, cursor) = mymysql.connectDB(user=mysetting.mysqlH[server]['user'],passwd=mysetting.mysqlH[server]['passwd'],db=dbN,host=mysetting.mysqlH[server]['host'])

	cursor.execute('DROP TABLE IF EXISTS mutation_normal')
	stmt = '''
	CREATE TABLE mutation_normal (
		samp_id varchar(63) NOT NULL,
		chrom varchar(10) NOT NULL,
		chrSta int unsigned NOT NULL,
		chrEnd int unsigned NOT NULL,
		ref varchar(63) NOT NULL,
		alt varchar(63) NOT NULL,
		n_nReads_ref mediumint unsigned NOT NULL,
		n_nReads_alt mediumint unsigned NOT NULL,
		nReads_ref mediumint unsigned NOT NULL,
		nReads_alt mediumint unsigned NOT NULL,
		strand char(1) NOT NULL,
		gene_symL varchar(63),
		ch_dna varchar(127),
		ch_aa varchar(63),
		ch_type varchar(127),
		cosmic text,
		mutsig text,
		index (samp_id,gene_symL),
		index (samp_id,chrom,chrSta,chrEnd),
		index (samp_id,chrom,chrSta,ref,alt),
		index (samp_id,chrom,chrSta,chrEnd,ref,alt)
	)
	'''
	cursor.execute(stmt)
	cursor.execute('CREATE TEMPORARY TABLE tmp LIKE mutation_normal')
	cursor.execute('LOAD DATA LOCAL INFILE "%s" INTO TABLE tmp' % inFileN)
	cursor.execute('CREATE TEMPORARY TABLE t2 SELECT tmp.samp_id,tmp.chrom,tmp.chrSta,tmp.chrEnd,tmp.ref,tmp.alt,tmp.n_nReads_ref,tmp.n_nReads_alt,tmp.nReads_ref,tmp.nReads_alt,tmp.strand,tmp.gene_symL,tmp.ch_dna,tmp.ch_aa,tmp.ch_type,cosmic.ch_aaL AS cosmic,cosmic.ch_typeL AS cosmic_type,tmp.mutsig FROM tmp LEFT JOIN cosmic ON tmp.chrom=cosmic.chrom AND tmp.chrSta=cosmic.chrSta AND tmp.chrEnd=cosmic.chrEnd AND tmp.ref=cosmic.ref AND tmp.alt=cosmic.alt AND tmp.gene_symL=cosmic.gene_symL')
	cursor.execute('INSERT INTO mutation_normal SELECT samp_id,chrom,chrSta,chrEnd,ref,alt,n_nReads_ref,n_nReads_alt,nReads_ref,nReads_alt,strand,gene_symL,ch_dna,ch_aa,ch_type,"" AS cosmic,mutsig FROM t2 WHERE cosmic IS NULL')
	cursor.execute('INSERT INTO mutation_normal SELECT samp_id,chrom,chrSta,chrEnd,ref,alt,n_nReads_ref,n_nReads_alt,nReads_ref,nReads_alt,strand,gene_symL,ch_dna,cosmic AS ch_aa,cosmic_type AS ch_type,cosmic,mutsig FROM t2 WHERE cosmic IS NOT NULL')
Ejemplo n.º 11
0
def prep_somatic_new(outFileN, server='smc1', dbN='ircr1'):
	## run VEP
	vep_batch.main(glob('/EQL3/pipeline/somatic_mutation/*S'), postfixL=['.mutect_rerun_filter.vcf','.mutect_filter.vcf','.mutect_pair_filter.vcf','.indels_pair_filter.vcf'], fork=True)

	## make table
	DIR='/EQL3/pipeline/somatic_mutation'
	cmd = 'cat %s/*/*filter_vep.dat | /usr/bin/python %s/Integration/prepDB_mutation_xsq2mut_tmp.py > %s' % (DIR, mysetting.SRC_HOME, outFileN)
	os.system(cmd)

	## update tag
	sidL = map(lambda x: x.rstrip(), os.popen('cut -f 1 %s | sort | uniq' % outFileN).readlines())
	(con, cursor) = mymysql.connectDB(user=mysetting.mysqlH[server]['user'],passwd=mysetting.mysqlH[server]['passwd'],db=dbN,host=mysetting.mysqlH[server]['host'])
	for sid in sidL:
		cursor.execute('SELECT samp_id,tag FROM sample_tag WHERE samp_id="%s" AND tag LIKE "XSeq_%%"' % sid)
		results = cursor.fetchall()
		if len(results)>0:
			if len(results)>1:
				sys.stderr.write('Duplication in sample_tag: %s\n' % sid)
				sys.exit(1)
			else:
				old_tag = results[0][1]
				new_tag = '%s,N' % old_tag
				cursor.execute('UPDATE sample_tag SET samp_id="%s", tag="%s" WHERE samp_id="%s" AND tag="%s"' % (sid,new_tag, sid,old_tag))
		else:
			cursor.execute('INSERT INTO sample_tag SET samp_id="%s", tag="XSeq_SS,N"' % sid)
Ejemplo n.º 12
0
def main(inDir, outDir, cnDir, pbs=False, server='smc1'):

	inFileNL = os.listdir(inDir)
	inFileNL = filter(lambda x: not re.match('(.*)\.union_pos\.mutect$', x), filter(lambda x: re.match('(.*)\.mutect$', x), inFileNL))

	print 'Files: %s' % inFileNL

	(con, cursor) = mymysql.connectDB(user=mysetting.mysqlH[server]['user'], passwd=mysetting.mysqlH[server]['passwd'], db='ircr1', host=mysetting.mysqlH[server]['host'])
	for inFileN in inFileNL:
		sampN = re.match('(.*)\.mutect', inFileN).group(1)
		(sid, postfix) = re.match('(.*)_(T.{,2})_[STKN]{2}\.mutect', inFileN).groups()
		if postfix != 'T':
			sid = '%s_%s' % (sid, postfix)
		cursor.execute('SELECT tumor_frac FROM xsq_purity WHERE samp_id="%s"' % sid)
		result = cursor.fetchall()
		if len(result) > 0 and result[0][0] != 'ND':
			purity = int(result[0][0])
			iprefix = '%s/%s' % (inDir, sampN)
			oprefix = '%s/%s' % (outDir, sampN)
			segFile = '%s/%s/%s.ngCGH.seg' % (cnDir, sampN, sampN)
			if os.path.isfile(segFile) and not os.path.isfile('%s.mutect_cl.dat' % (oprefix)):
				cmd = '/usr/bin/python %s/NGS/mutation/mut_clonality.py -s %s -i %s.mutect -o %s.mutect_cl.dat -p %s' % (mysetting.SRC_HOME, segFile, iprefix, oprefix, purity)
				log = '%s.mutect_cl.log' % (oprefix)

				if pbs:
					os.system('echo "%s" | qsub -N %s -o %s -j oe' % (cmd, sampN, log))
				else:
					os.system('(%s) &> %s' % (cmd, log))
			else:
				print "Missing copy number segmentation file!"
				sys.exit(1)
Ejemplo n.º 13
0
def main(dbN='ircr1', cursor=None):
	if cursor == None:
		(con,cursor) = mymysql.connectDB(db=dbN)

	cursor.execute('drop table if exists mutation_rxsq')

	cursor.execute('create temporary table t_m as \
		select n.samp_id,n.chrom,n.chrSta,n.chrEnd,n.ref,n.alt,n.n_nReads_ref,n.n_nReads_alt,n.nReads_ref,n.nReads_alt,r.r_nReads_ref,r.r_nReads_alt,n.strand,n.gene_symL,n.ch_dna,n.ch_aa,n.ch_type,n.cosmic,n.mutsig \
		from mutation_normal n left join mutation_rsq r \
		on n.samp_id = r.samp_id and n.chrom=r.chrom and n.chrSta=r.chrSta and n.ref=r.ref and n.alt=r.alt \
		union \
		select r.samp_id,r.chrom,r.chrSta,r.chrEnd,r.ref,r.alt,n.n_nReads_ref,n.n_nReads_alt,n.nReads_ref,n.nReads_alt,r.r_nReads_ref,r.r_nReads_alt,r.strand,r.gene_symL,r.ch_dna,r.ch_aa,r.ch_type,r.cosmic,r.mutsig \
		from mutation_normal n right join mutation_rsq r \
		on n.samp_id = r.samp_id and n.chrom=r.chrom and n.chrSta=r.chrSta and n.ref=r.ref and n.alt=r.alt')

	cursor.execute('create table mutation_rxsq as \
		select * from (select * from t_m order by ch_dna desc) as i group by samp_id,chrom,chrSta,ref,alt,ch_aa')

	cursor.execute('update mutation_rxsq set r_nReads_ref = 0, r_nReads_alt = 0 where r_nReads_ref is null')
	cursor.execute('update mutation_rxsq set n_nReads_ref = 0, n_nReads_alt = 0 where n_nReads_ref is null')
	cursor.execute('update mutation_rxsq set nReads_ref = 0, nReads_alt = 0 where nReads_ref is null')

	cursor.execute('alter table mutation_rxsq add index (samp_id,gene_symL)')
	cursor.execute('alter table mutation_rxsq add index (samp_id,chrom,chrSta,chrEnd)')
	cursor.execute('alter table mutation_rxsq add index (samp_id,chrom,chrSta,ref,alt)')
	cursor.execute('alter table mutation_rxsq add index (samp_id,chrom,chrSta,chrEnd,ref,alt)')
	
	cursor.execute('drop temporary table if exists t_m')
Ejemplo n.º 14
0
def post_s_rsq2mut(baseDir, server='smc1', dbN='ihlee_test'):
    sampN = baseDir.split('/')[-1]
    sid = sampN[:-4].replace('.', '_').replace('-', '_')
    print sampN, sid

    cosmicDatFileN = '%s/%s_splice_cosmic.dat' % (baseDir, sampN)
    if dbN in ['ihlee_test', 'ircr1']:
        datFileN = '/EQL1/NSL/RNASeq/results/mutation/%s.dat' % sampN
    else:
        datFileN = '%s/%s.dat' % (baseDir, sampN)
    if os.path.isfile(cosmicDatFileN):
        prepDB_mutscan.main(sampNamePat=('(.*)_(RSq)', ''),
                            geneList=[],
                            inFileN=cosmicDatFileN,
                            outFileN=datFileN)

        ## import
        (con, cursor) = mymysql.connectDB(user=mysqlH[server]['user'],
                                          passwd=mysqlH[server]['passwd'],
                                          db=dbN,
                                          host=mysqlH[server]['host'])
        cursor.execute('DELETE FROM mutation_rsq WHERE samp_id="%s"' % sid)
        cursor.execute('LOAD DATA LOCAL INFILE "%s" INTO TABLE mutation_rsq' %
                       datFileN)
        ## make sure to update sample_tag that this sample has RNA-Seq
        cursor.execute(
            'SELECT * FROM sample_tag WHERE samp_id="%s" AND tag="RNA-Seq"' %
            sid)
        results = cursor.fetchall()
        if len(results) < 1:
            cursor.execute(
                'INSERT INTO sample_tag SET samp_id="%s", tag="RNA-Seq"' % sid)
Ejemplo n.º 15
0
def main(outFileName):

	(con,cursor) = mymysql.connectDB(db='ircr1')

	outFile = open(outFileName,'w')

	cursor.execute('select distinct samp_id from sample_tag where substring(tag,1,6)="pair_R" and samp_id!="S520" and samp_id!="S042"')
	sIdL_prim = [x for (x,) in cursor.fetchall()]

	outFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' % ('dType','geneN','sId_p','sId_r','val_p','val_r'))

	for dType in dTypeL:

		for geneN in geneL:

			for sId_p in sIdL_prim:

				(tbl,col_name,col_val) = dTypeH[dType]

				cursor.execute('select samp_id from sample_tag where tag="pair_P:%s"' % sId_p)
				(sId_r,) = cursor.fetchone()

				cursor.execute('select %s from %s where %s="%s" and samp_id="%s"' % (col_val,tbl,col_name,geneN,sId_p))
				r_p = cursor.fetchone()

				cursor.execute('select %s from %s where %s="%s" and samp_id="%s"' % (col_val,tbl,col_name,geneN,sId_r))
				r_r = cursor.fetchone()

				if r_p and r_r:
					outFile.write('%s\t%s\t%s\t%s\t%.2f\t%.2f\n' % (dType,geneN,sId_p,sId_r,r_p[0],r_r[0]))

	outFile.close()
	con.close()
Ejemplo n.º 16
0
def main(locFileName):

	con,cursor = mymysql.connectDB(db='tcga1')
	
	locFile = open(locFileName)

	cursor.execute('drop table if exists methyl_gene')

	for line in locFile:
		
		(plat, gN, loc, n, r) = line[:-1].split('\t')

		if r != '-nan' and float(r) <= -0.25:

			cursor.execute('create temporary table t_methyl as \
				select * from methyl where geneName="%s" and platform="%s" and loc="%s"' % (gN,plat,loc))
			
			cursor.execute('alter table t_methyl add column r float, add column n smallint unsigned')
			cursor.execute('update t_methyl set n=%s, r=%s' % (int(n),float(r)))

			try:
				cursor.execute('create table methyl_gene as \
					select platform,pId,geneName,loc,sum(fraction)/count(fraction) fraction, n, r from t_methyl group by pId')
			except:
				cursor.execute('insert into methyl_gene \
					select platform,pId,geneName,loc,sum(fraction)/count(fraction) fraction, n, r from t_methyl group by pId')

			cursor.execute('drop table t_methyl')

	cursor.execute('alter table methyl_gene add index (geneName)')
	cursor.execute('alter table methyl_gene add index (pId)')
Ejemplo n.º 17
0
def has_acgh(samp_id, dbN='ircr1'):
	(con, cursor) = mymysql.connectDB(db=dbN)
	cursor.execute('select count(distinct samp_id) from array_cn where samp_id="%s"' % samp_id)
	if cursor.fetchone()[0] > 0:
		return(True)
	else:
		return(False)
Ejemplo n.º 18
0
def main(dataN='TCGA_GBM', query):

    con, cursor = mymysql.connectDB(db='tcga1')

    geneN, altType, feature, cutoff = query.split(':')

    cursor.execute('create temporary table t1 select samp_id pId, % value from %s where gene_sym="%s"' % \
     (altTypeH[altType][1],altTypeH[altType][0],geneN))

    cursor.execute('select value from t1')
    valueL = [v for (v, ) in cursor.fetchall()]
    l = len(valueL)

    recordL = mymysql.dictSelect(
        "SELECT pId,days_followup time,if(days_death is not null,1,0) event,%s value \
		FROM clinical join %s on pId=samp_id and gene_sym='%s'" %
        (altTypeH[altType][1], altTypeH[altType][0], geneN), cursor)

    threshold = (mymath.percentile(valueL, cutoff[0]),
                 mymath.percentile(valueL, 100 - cutoff[1]))

    outFile = open('/var/www/html/survival/survival.mvc', 'w')

    colN = ['pId', 'time', 'event', 'value', 'label', 'priority']

    outFile.write('\t'.join(colN) + '\n')

    for r in recordL:

        if r['value'] < threshold[0]:

            label = '"%s %s < B%s%% (%.2f)"' % (geneN, altType, cutoff[0],
                                                threshold[0])
            priority = '1'

        elif r['value'] >= threshold[1]:

            label = '"%s %s > T%s%% (%.2f)"' % (geneN, altType, cutoff[1],
                                                threshold[1])
            priority = '2'

        else:

            label = '"%s %s Middle"' % (geneN, altType)
            priority = '9'

        outFile.write(
            '%s\t%s\t%s\t%s\t%s\t%s\n' %
            (r['pId'], r['time'], r['event'], r['value'], label, priority))

    outFile.close()

    ret1 = os.system(
        'Rscript distribution.r /var/www/html/survival/survival.mvc &> /var/www/html/survival/error_distr.txt'
    )
    ret2 = os.system(
        'Rscript survival.r /var/www/html/survival/survival.mvc &> /var/www/html/survival/error_surv.txt'
    )

    return ret1 != 0 or ret2 != 0
Ejemplo n.º 19
0
def post_xsq2cn(outFileN, platform='', server='smc1', dbN='ircr1'):
	if platform == 'CS':
		cmd = 'cat %s/*/*%s.cn_gene.dat | /usr/bin/python %s/Integration/prepDB_xsq_cn.py > %s' % (mysetting.CScnaDir,platform, mysetting.SRC_HOME, outFileN)
	else:
		cmd = 'cat %s/*/*%s.cn_gene.dat | /usr/bin/python %s/Integration/prepDB_xsq_cn.py > %s' % (mysetting.wxsCNADir,platform, mysetting.SRC_HOME, outFileN)
	os.system(cmd)
	if platform == 'SS':
		tableN = 'xsq_cn'
	elif platform == 'CS':
		tableN = 'cs_cn'
	else:
		sys.stderr.write('illegal platform name: %s' % platform)
		sys.exit(1)

	mymysql.reset_table(tableN=tableN, dataFileN=outFileN, user=mysetting.mysqlH[server]['user'],passwd=mysetting.mysqlH[server]['passwd'],db=dbN, host=mysetting.mysqlH[server]['host'])

	## add samp_id if missing
	(con, cursor) = mymysql.connectDB(user=mysetting.mysqlH[server]['user'], passwd=mysetting.mysqlH[server]['passwd'], db=dbN, host=mysetting.mysqlH[server]['host'])
	cursor.execute('SELECT DISTINCT samp_id FROM %s' % tableN)
	sIdL = [x for (x,) in cursor.fetchall()]
	cursor.execute('SELECT DISTINCT samp_id FROM sample_tag WHERE tag like "XSeq_%s%%"' % platform)
	refL = [x for (x,) in cursor.fetchall()]
	for sid in sIdL:
		if sid not in refL:
			pl = platform
			cursor.execute('INSERT INTO sample_tag SET samp_id="%s", tag="XSeq_%s"' % (sid, pl))
Ejemplo n.º 20
0
def fusion_s(sid, dbN='ircr1', cursor=None):
    if cursor == None:
        (con, cursor) = mymysql.connectDB(db=dbN)

    cursor.execute(
        'CREATE TEMPORARY TABLE t_m AS SELECT * FROM splice_fusion WHERE samp_id="%s"'
        % sid)
    cursor.execute('ALTER TABLE t_m ADD INDEX (samp_id,loc1)')
    cursor.execute('ALTER TABLE t_m ADD INDEX (samp_id,loc2)')

    cursor.execute(
        'CREATE TEMPORARY TABLE loc1 AS SELECT * FROM splice_normal_loc1 WHERE samp_id="%s"'
        % sid)
    cursor.execute('ALTER TABLE loc1 ADD INDEX (samp_id,loc1)')
    cursor.execute(
        'CREATE TEMPORARY TABLE loc2 AS SELECT * FROM splice_normal_loc2 WHERE samp_id="%s"'
        % sid)
    cursor.execute('ALTER TABLE loc2 ADD INDEX (samp_id,loc2)')
    cursor.execute('CREATE TEMPORARY TABLE af_m AS \
		SELECT t_m.samp_id,loc1,loc2,gene_sym1,gene_sym2,ftype,exon1,exon2,frame,nReads,nPos,nReads_w1,nReads_w2 FROM t_m \
		LEFT JOIN loc1 t_w1 USING (samp_id,loc1) LEFT JOIN loc2 t_w2 USING (samp_id,loc2)'
                   )

    cursor.execute('ALTER TABLE splice_fusion_AF DISABLE KEYS')
    cursor.execute('INSERT INTO splice_fusion_AF SELECT * FROM af_m')
    cursor.execute('ALTER TABLE splice_fusion_AF ENABLE KEYS')
    cursor.execute('DROP TEMPORARY TABLE IF EXISTS t_m,af_m,loc1,loc2')
Ejemplo n.º 21
0
def main():

	con,cursor = mymysql.connectDB(db='tcga1')

	cursor.execute('create temporary table t_EGFR as \
		select platform,pId,geneName,loc,sum(fraction)/count(fraction) fraction from tcga1.methyl where TN="T" and geneName = "EGFR" \
		group by platform, pId, loc')

	cursor.execute('alter table t_EGFR add index (geneName)')
	cursor.execute('alter table t_EGFR add index (pId)')

	cursor.execute('SELECT distinct platform,loc FROM t_EGFR')
	results1 = cursor.fetchall()

	output = []

	for (plat,loc) in results1:

		#cursor.execute('select fraction,z_score from methyl_pId, array_gene_expr where platform="%s" and loc="%s" and gene_sym="MGMT" and pId=samp_id' % (plat,loc))
		cursor.execute('select fraction,log2(rpkm+1) from t_EGFR, rpkm_gene_expr where platform="%s" and loc="%s" and gene_sym="EGFR" and pId=samp_id' % (plat,loc))
		results2 = cursor.fetchall()

		methyl,expr = zip(*results2)

		r = numpy.corrcoef(methyl,expr)[0,1]

		output.append((plat,loc,len(methyl),r))

	output.sort(lambda x,y: cmp(y[-1],x[-1]))

	for (plat,loc,n,r) in output:
		print '%s\t%s\t%s\t%.3f' % (plat,loc,n,r)
Ejemplo n.º 22
0
def main(outFileName,dbNL,dTypeL,geneNL):

	outFile = open(outFileName,'w')

	outFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' % ('dbT','dType','geneN','PR','sId','val'))

	for dbN in dbNL:

		(con,cursor) = mymysql.connectDB(db=dbN)

		if dbN == 'ircr1':
			cursor.execute('create temporary table t_recur select distinct samp_id from sample_tag where substring(tag,1,6)="pair_P"')
		elif dbN == 'tcga1':
			cursor.execute('create temporary table t_recur select distinct samp_id from sample_tag where tag="Recur"')

		for dType in dTypeL:

			for geneN in geneNL:

				for PR in ('P','R'):

					cursor.execute('select samp_id,%s from %s where gene_sym="%s" and samp_id %s in (select samp_id from t_recur)' % (dTypeH[dType][1],dTypeH[dType][0],geneN,'not' if PR=='P' else ''))
					results = cursor.fetchall()

					for (sId,val) in results:
						outFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' % (dbH[dbN],dType,geneN,PR,sId,val))

		con.close()

	outFile.close()
Ejemplo n.º 23
0
def post_rsq2fusion(dirN, server='smc1', dbN='ihlee_test', sampL=[]):
	(con, cursor) = mymysql.connectDB(user=mysqlH[server]['user'],passwd=mysqlH[server]['passwd'],db=dbN,host=mysqlH[server]['host'])
	sampNL = filter(lambda x: os.path.isdir(dirN + '/' + x), os.listdir(dirN))
	for sampN in sampNL:
		baseDir = dirN + '/' + sampN
		sid = sampN[:-4].replace('.','_').replace('-','_') ## RNASeq sample has '***_RSq'
		if sampL != [] and sid not in sampL:
			continue
		print sampN, sid
		## make sure to update sample_tag that this sample has RNA-Seq
		cursor.execute('SELECT * FROM sample_tag WHERE samp_id="%s" AND tag="RNA-Seq"' % sid)
		results = cursor.fetchall()
		if len(results) < 1:
			cursor.execute('INSERT INTO sample_tag SET samp_id="%s", tag="RNA-Seq"' % sid)

		fusion_report_annot = glob('%s/%s*_splice_transloc_annot1.report_annot.txt' % (baseDir, sampN))[0]
		if dbN in ['ihlee_test','ircr1']:
			splice_fusion_txt = '%s/fusion/splice_fusion_%s.txt' % (BASE, sampN)
		else:
			splice_fusion_txt = '%s/splice_fusion_%s.txt' % (baseDir, sampN)
		fusion_summarize.fusion_summarize_s(inputFileN=fusion_report_annot, minNPos=1, outFileN=splice_fusion_txt)
		if dbN in ['ihlee_test','ircr1']:
			splice_fusion_dat = '%s/fusion/splice_fusion_%s.dat' % (BASE, sampN)
		else:
			splice_fusion_dat = '%s/splice_fusion_%s.dat' % (baseDir, sampN)
		prepDB_splice_fusion.main(inGctFileName=splice_fusion_txt, minNPos=1, sampNamePat=RSQPattern, geneList=[], outFileN=splice_fusion_dat)
		cursor.execute('DELETE FROM splice_fusion WHERE samp_id="%s"' % sid)
		cursor.execute('LOAD DATA LOCAL INFILE "%s" IGNORE INTO TABLE splice_fusion' % splice_fusion_dat)
		cursor.execute('DELETE FROM splice_fusion WHERE gene_sym1 LIKE "HLA-%" AND gene_sym2 LIKE "HLA-%"')
	makeDB_splice_AF.fusion(dbN=dbN, cursor=cursor)
Ejemplo n.º 24
0
def make_mutation_rxsq_cs(dbN='CancerSCAN', cursor=None):
	if cursor == None:
		(con,cursor) = mymysql.connectDB(db=dbN)
	
	cursor.execute('''DROP TABLE IF EXISTS mutation_rxsq''')
	
	cursor.execute('''CREATE TEMPORARY TABLE t_m AS \
		SELECT n.samp_id,n.chrom,n.chrSta,n.chrEnd,n.ref,n.alt,n.n_nReads_ref,n.n_nReads_alt,n.nReads_ref,n.nReads_alt,r.r_nReads_ref,r.r_nReads_alt,ifnull(r.strand,''),n.gene_sym,n.ch_dna,n.ch_aa,n.ch_type,concat(n.cosmic,",",n.tcga) cosmic,'' mutsig \
		FROM mutation_cs n LEFT JOIN mutation_rsq r \
		ON n.samp_id=r.samp_id AND n.chrom=r.chrom AND n.chrSta=r.chrSta AND n.chrEnd=r.chrEnd AND n.ref=r.ref AND n.alt=r.alt\
		UNION \
		SELECT r.samp_id,r.chrom,r.chrSta,r.chrEnd,r.ref,r.alt,n.n_nReads_ref,n.n_nReads_alt,n.nReads_ref,n.nReads_alt,r.r_nReads_ref,r.r_nReads_alt,r.strand,r.gene_symL gene_sym,r.ch_dna,r.ch_aa,r.ch_type,r.cosmic,r.mutsig \
		FROM mutation_cs n RIGHT JOIN mutation_rsq r \
		ON n.samp_id=r.samp_id AND n.chrom=r.chrom AND n.chrSta=r.chrSta AND n.chrEnd=r.chrEnd AND n.ref=r.ref AND n.alt=r.alt
		''')
	cursor.execute('''CREATE TABLE mutation_rxsq AS \
		SELECT * FROM (SELECT * FROM t_m ORDER BY ch_dna desc) AS i GROUP BY samp_id,chrom,chrSta,ref,alt,ch_aa''')
	cursor.execute('''UPDATE mutation_rxsq SET r_nReads_ref=0, r_nReads_alt=0 WHERE r_nReads_ref IS NULL''')
	cursor.execute('''UPDATE mutation_rxsq SET n_nReads_ref=0, n_nReads_alt=0 WHERE n_nReads_ref IS NULL''')
	cursor.execute('''UPDATE mutation_rxsq SET nReads_ref=0, nReads_alt=0 WHERE nReads_ref IS NULL''')

	cursor.execute('''ALTER TABLE mutation_rxsq ADD INDEX (samp_id,gene_sym)''')
	cursor.execute('''ALTER TABLE mutation_rxsq ADD INDEX (samp_id,chrom,chrSta,chrEnd)''')
	cursor.execute('''ALTER TABLE mutation_rxsq ADD INDEX (samp_id,chrom,chrSta,ref,alt)''')
	cursor.execute('''ALTER TABLE mutation_rxsq ADD INDEX (samp_id,chrom,chrSta,chrEnd,ref,alt)''')

	cursor.execute('''DROP TEMPORARY TABLE IF EXISTS t_m''')
Ejemplo n.º 25
0
def main(dataN='TCGA_GBM', endPoint='death',  geneN='MGMT', altType='methyl', cutoff=(50,50)):

	colN = ['pId','time','event','value','label','priority']

	con,cursor = mymysql.connectDB(db='tcga1')

	cursor.execute('select samp_id from mutation_normal where gene_symL="IDH1" and ch_aa like "%sR132%s"' % ('%','%'))
	idh1 = [x[0] for x in cursor.fetchall()]

	recordL = mymysql.dictSelect("SELECT pId,days_followup time,if(days_death is not null,1,0) event,%s value \
		FROM clinical join %s on pId=samp_id and %s='%s'" % (altTypeH[altType][1],altTypeH[altType][0],altTypeH[altType][2],geneN), cursor)

	recordL2=[]
	for i in range(len(recordL)):
		if recordL[i]['pId'] in idh1:
			continue
		else:
			recordL2.append(recordL[i])
	recordL = recordL2	
	
#	for r in recordL:
#		if r['pId'] in idh1:
#			recordL.remove(r)

	valueL = [r['value'] for r in recordL]
	l = len(valueL)

	threshold = (mymath.percentile(valueL,cutoff[0]), mymath.percentile(valueL,100-cutoff[1]))

	outFile = open('/var/www/html/tmp/survival.mvc','w')

	outFile.write('\t'.join(colN)+'\n')

	for r in recordL:

		if r['value'] < threshold[0]:

			label = '"%s %s < B%s%% (%.2f)"' % (geneN,altType,cutoff[0],threshold[0])
			priority = '1'

		elif r['value'] >= threshold[1]:

			label = '"%s %s > T%s%% (%.2f)"' % (geneN,altType,cutoff[1],threshold[1])
			priority = '2'

		else:

			label = '"%s %s Middle"' % (geneN,altType)
			priority = '9'

		outFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' % (r['pId'], r['time'], r['event'], r['value'], label, priority))

	outFile.close()

	ret1 = os.system('Rscript distribution.r /var/www/html/tmp/survival.mvc &> /var/www/html/tmp/error_surv.txt')
	ret2 = os.system('Rscript survival.r /var/www/html/tmp/survival.mvc png &>> /var/www/html/tmp/error_surv.txt')
	os.system('Rscript survival.r /var/www/html/tmp/survival.mvc pdf &>> /var/www/html/tmp/error_surv.txt')

	return ret1!=0 or ret2!=0
Ejemplo n.º 26
0
def batch(outDir):
    (con, cursor) = mymysql.connectDB()
    cursor.execute('SELECT DISTINCT samp_id FROM rpkm_gene_expr')
    result = cursor.fetchall()
    for res in result:
        (sid, ) = res
        print sid
        main(sid, outDir)
Ejemplo n.º 27
0
def has_exome(samp_id, dbN='ircr1'):
	(con, cursor) = mymysql.connectDB(db=dbN)
	cursor.execute('select count(distinct samp_id) from sample_tag where (tag like "XSeq_TS%%" or tag like "XSeq_SS%%") and samp_id="%s"' % samp_id)
	idL = [x for (x,) in cursor.fetchall()]
	if int(idL[0]) > 0:
		return(True)
	else:
		return(False)
Ejemplo n.º 28
0
def batch(outDir):
	(con, cursor) = mymysql.connectDB()
	cursor.execute('SELECT DISTINCT samp_id FROM rpkm_gene_expr')
	result = cursor.fetchall()
	for res in result:
		(sid,) = res
		print sid
		main(sid, outDir)
Ejemplo n.º 29
0
def has_rsq(samp_id, dbN='ircr1'):
	(con, cursor) = mymysql.connectDB(db=dbN)
	cursor.execute('select count(distinct samp_id) from sample_tag where tag="RNA-Seq" and samp_id="%s"' % samp_id)
	idL = [x for (x,) in cursor.fetchall()]
	if int(idL[0]) > 0:
		return(True)
	else:
		return(False)
Ejemplo n.º 30
0
def main(inputFilePathL, projectN, clean=False, pbs=False, server='smc1'):
    storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN,
                                                         mkdir=False)) + '/'
    apacheBase = storageBase

    if glob(storageBase + projectN):
        print('File directory: already exists')
    else:
        os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (storageBase, projectN, storageBase, projectN))
        print('File directory: created')

    if glob(apacheBase + projectN):
        print('Log directory: already exists')
    else:
        os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (apacheBase, projectN, apacheBase, projectN))
        print('Log directory: created')

    (con, cursor) = mymysql.connectDB(user=mysqlH['smc1']['user'],
                                      passwd=mysqlH['smc1']['passwd'],
                                      db='ircr1',
                                      host=mysqlH['smc1']['host'])
    for inputFileP in inputFilePathL:

        inputFileN = inputFileP.split('/')[-1]
        sampN = re.match('(.*)\.ngCGH', inputFileN).group(1)
        (sid, tag) = re.match('(.*)_([XCT].{0,2})_.*\.ngCGH',
                              inputFileN).groups()
        if tag != 'T':
            sid = '%s_%s' % (sid, tag)


#		if sid not in ['IRCR_GBM13_352_T02_C01']:
#			continue

        cursor.execute('SELECT tumor_frac FROM xsq_purity WHERE samp_id="%s"' %
                       (sid))
        results = cursor.fetchall()
        if len(results) > 0 and results[0][
                0] != 'ND':  ##Of samples for which purity was calculated
            if any(
                    sid in x for x in os.listdir('/EQL3/pipeline/CNA_corr')
            ):  # only those for which corrected cn were not calculated, yet
                continue

            print sid
            cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2cnCorr.py -i %s -n %s -p %s -c %s -s %s' % (
                mysetting.SRC_HOME, inputFileP, sampN, projectN, False, server)
            print cmd
            if pbs:
                log = '%s/%s.Xsq_cnCorr.qlog' % (storageBase + projectN + '/' +
                                                 sampN, sampN)
                os.system('echo "%s" | qsub -q %s -N %s -o %s -j oe' %
                          (cmd, server, sampN, log))
            else:
                log = '%s/%s.Xsq_cnCorr.qlog' % (storageBase + projectN, sampN)
                os.system('(%s) 2> %s' % (cmd, log))
Ejemplo n.º 31
0
def retSig_CNA(mode='up', th=1.0, dbN='ircr1'):
	(con,cursor) = mymysql.connectDB(db=dbN)
	cursor.execute('select distinct samp_id from sample_tag where tag like "Pair_R:%%"')
	sidL = [x for (x,) in cursor.fetchall()]
	sidL.sort()

	geneH = {}
	fracH = {}
	for sid in sidL:
		if sid in ['S042']: ## consent form issue
			continue
		if '_X' in sid:
			continue
		cursor.execute('select distinct tag from sample_tag where tag like "Pair_R:%%" and samp_id="%s"' % sid)
		rid = cursor.fetchone()[0].split(':')[-1]

		pair = '%s:%s' % (sid, rid)

		if has_acgh(sid) and has_acgh(rid):
			tblN = 'array_cn'
		elif has_exome(sid) and has_exome(rid):
			tblN = 'xsq_cn'
		else:
			continue

		if mode == 'up':
			cursor.execute('select distinct gene_sym from %s where value_log2 >= %s and samp_id="%s"' % (tblN, th, sid))
			p_geneL = [item[0] for item in cursor.fetchall()]
			cursor.execute('select distinct gene_sym from %s where value_log2 >= %s and samp_id="%s"' % (tblN, th, rid))
			r_geneL = [item[0] for item in cursor.fetchall()]
		elif mode == 'dn':
			cursor.execute('select distinct gene_sym from %s where value_log2 <= %s and samp_id="%s"' % (tblN, th, sid))
			p_geneL = [item[0] for item in cursor.fetchall()]
			cursor.execute('select distinct gene_sym from %s where value_log2 <= %s and samp_id="%s"' % (tblN, th, rid))
			r_geneL = [item[0] for item in cursor.fetchall()]

		shared_geneL = list(set(p_geneL) & set(r_geneL))
		if len(p_geneL) < 1:
			continue
#		fracH[pair] = float(len(shared_geneL))/float(len(p_geneL))
		fracH[pair] = 0.5 ## uniform

		for gene in p_geneL:
			if gene in shared_geneL: ## retained
				if gene in geneH:
					geneH[gene]['retain'].append(pair)
					geneH[gene]['p_cnt'] += 1
				else:
					geneH[gene] = {'retain': [pair], 'vanish': [], 'p_cnt':1}
			else: ## vanished
				if gene in geneH:
					geneH[gene]['vanish'].append(pair)
					geneH[gene]['p_cnt'] += 1
				else:
					geneH[gene] = {'retain': [], 'vanish': [pair], 'p_cnt':1}
			##if
		##for
	print_mutSig(geneH, fracH)
Ejemplo n.º 32
0
def has_exome(samp_id, dbN='ircr1'):
    (con, cursor) = mymysql.connectDB(db=dbN)
    cursor.execute(
        'select count(distinct samp_id) from sample_tag where (tag like "XSeq_TS%%" or tag like "XSeq_SS%%") and samp_id="%s"'
        % samp_id)
    idL = [x for (x, ) in cursor.fetchall()]
    if int(idL[0]) > 0:
        return (True)
    else:
        return (False)
Ejemplo n.º 33
0
def main(outFileName,dbName):

	(con,cursor) = mymysql.connectDB(db=dbName)

	cursor.execute('select distinct samp_id from sample_tag where substring(tag,1,6)="pair_R" and \
		samp_id!="S042" and samp_id not like "%_X" and substring(samp_id,length(samp_id)-1)!="_2" and \
		find_in_set(samp_id,"S437,S586,S023,S697,S372,S538,S458,S453,S428,S460,S768,S780,S640,S096,S671,S592,S572,S520,S1A,S2A,S3A,S4A,S5A,S6A,S7A,S8A,S9A,S10A,S11A,S12A,S13A,S14A,S722,S171,S121,S652,S752,S386")>=1')
	sIdL_prim = [x for (x,) in cursor.fetchall()]

	print sIdL_prim

	resultL = []

	for dType in dTypeL:

		for geneN in geneL:

			for sId_p in sIdL_prim:

				cursor.execute('select t1.samp_id from sample_tag t1 where t1.tag="pair_P:%s" and \
					"%s" in (select t2.samp_id from sample_tag t2 where t2.tag=concat("pair_R:",t1.samp_id))' % (sId_p,sId_p))
				(sId_r,) = cursor.fetchone()

#				if sId_p=='S520':
#					sId_r = 'S602'

				cursor.execute('select %s from %s where gene_sym="%s" and samp_id="%s"' % (dTypeH[dType][1],dTypeH[dType][0],geneN,sId_p))
				r_p = cursor.fetchone()

				cursor.execute('select %s from %s where gene_sym="%s" and samp_id="%s"' % (dTypeH[dType][1],dTypeH[dType][0],geneN,sId_r))
				r_r = cursor.fetchone()

				if r_p and r_r:
					resultL.append((dType,geneN,sId_p,sId_r,r_p[0],r_r[0]))
	
	resultL_cna = filter(lambda x: x[0]=='CNA',resultL)
	resultL_oth = filter(lambda x: x[0]!='CNA',resultL)

	for r in resultL_cna:

		overlap = filter(lambda x: x[0]=='CNX' and x[1:4]==r[1:4], resultL_oth)

		if overlap:
			resultL_oth.remove(overlap[0])

		resultL_oth.append(r)

	outFile = open(outFileName,'w')
	outFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' % ('dType','geneN','sId_p','sId_r','val_p','val_r'))

	for r in resultL_oth:
		outFile.write('%s\t%s\t%s\t%s\t%.2f\t%.2f\n' % r)

	outFile.close()
	con.close()
Ejemplo n.º 34
0
def main(geneN, dType, dbN, outFileDir=None, pairedOnly=False):

    if outFileDir:
        if pairedOnly and dbN == 'ircr1':
            outFile = open(
                '%s/%s_%s_%s_paired.dst2' %
                (outFileDir, geneN, dType, dbH[dbN]), 'w')
        else:
            outFile = open(
                '%s/%s_%s_%s.dst2' % (outFileDir, geneN, dType, dbH[dbN]), 'w')
    else:
        outFile = sys.stdout

    (con, cursor) = mymysql.connectDB(db=dbN)

    if dbN == 'ircr1':
        cursor.execute(
            'create temporary table t_paired_prim select distinct samp_id from sample_tag where substring(tag,1,6)="pair_R"'
        )
        cursor.execute(
            'create temporary table t_recur select distinct samp_id from sample_tag where substring(tag,1,6)="pair_P"'
        )
    elif dbN == 'tcga1':
        cursor.execute(
            'create temporary table t_recur select distinct samp_id from sample_tag where tag="Recur"'
        )
    else:
        raise Exception

    if pairedOnly and dbN == 'ircr1':
        cursor.execute(
            'select %s from %s where gene_sym="%s" and samp_id in (select samp_id from t_paired_prim)'
            % (dTypeH[dType][1], dTypeH[dType][0], geneN))
    else:
        cursor.execute(
            'select %s from %s where gene_sym="%s" and samp_id not in (select samp_id from t_recur)'
            % (dTypeH[dType][1], dTypeH[dType][0], geneN))

    prim = [str(x) for (x, ) in cursor.fetchall()]

    cursor.execute(
        'select %s from %s where gene_sym="%s" and samp_id in (select samp_id from t_recur)'
        % (dTypeH[dType][1], dTypeH[dType][0], geneN))
    recur = [str(x) for (x, ) in cursor.fetchall()]

    outFile.write('%s-%s-%s-Prim\t%s\n' % (geneN, dType, dbN, len(prim)))
    outFile.write(','.join(prim) + '\n')
    outFile.write('\n')

    outFile.write('%s-%s-%s-Recur\t%s\n' % (geneN, dType, dbN, len(recur)))
    outFile.write(','.join(recur) + '\n')
    outFile.write('\n')

    con.close()
Ejemplo n.º 35
0
def post_rsq2skip(dirN, server='smc1', dbN='ihlee_test', sampL=[]):
	(con, cursor) = mymysql.connectDB(user=mysqlH[server]['user'],passwd=mysqlH[server]['passwd'],db=dbN,host=mysqlH[server]['host'])
	cursor.execute('ALTER TABLE splice_normal CHANGE COLUMN samp_id samp_id char(63)')
	cursor.execute('ALTER TABLE splice_normal_loc1 CHANGE COLUMN samp_id samp_id char(63)')
	cursor.execute('ALTER TABLE splice_normal_loc2 CHANGE COLUMN samp_id samp_id char(63)')
	cursor.execute('CREATE TEMPORARY TABLE splice_normal_tmp LIKE splice_normal')
	sampNL = filter(lambda x: os.path.isdir(dirN + '/' + x), os.listdir(dirN))
	for sampN in sampNL:
		baseDir = dirN + '/' + sampN
		sid = sampN[:-4].replace('.','_').replace('-','_') ## RNASeq sample has '***_RSq'
		if sampL != [] and sid not in sampL:
			continue
		print sampN, sid
		## make sure to update sample_tag that this sample has RNA-Seq
		cursor.execute('SELECT * FROM sample_tag WHERE samp_id="%s" AND tag="RNA-Seq"' % sid)
		results = cursor.fetchall()
		if len(results) < 1:
			cursor.execute('INSERT INTO sample_tag SET samp_id="%s", tag="RNA-Seq"' % sid)

		normal_report = glob('%s/%s*normal_report.txt' % (baseDir, sampN))[0]
		if dbN in ['ihlee_test','ircr1']:
			splice_normal = '%s/exonSkip_normal/splice_normal_%s.dat' % (BASE, sampN)
		else:
			splice_normal = '%s/splice_normal_%s.dat' % (baseDir, sampN)
		prepDB_splice_normal.main(sampNamePat=RSQPattern, inFileN=normal_report, outFileN=splice_normal)
		cursor.execute('LOAD DATA LOCAL INFILE "%s" INTO TABLE splice_normal_tmp' % splice_normal)

		skip_report_annot = glob('%s/%s*_splice_exonSkip_report_annot.txt' % (baseDir, sampN))[0]
		if dbN in ['ihlee_test','ircr1']:
			splice_skip_txt = '%s/exonSkip/splice_skip_%s.txt' % (BASE, sampN)
		else:
			splice_skip_txt = '%s/splice_skip_%s.txt' % (baseDir, sampN)
		exonSkip_summarize.exonSkip_summarize_s(inFileN=skip_report_annot, minPos=1, outFileN=splice_skip_txt)
		if dbN in ['ihlee_test','ircr1']:
			splice_skip_dat = '%s/exonSkip/splice_skip_%s.dat' % (BASE, sampN)
		else:
			splice_skip_dat = '%s/splice_skip_%s.dat' % (baseDir, sampN)
		prepDB_splice_skip.main(inFileName=splice_skip_txt, minNPos=1, sampNamePat=RSQPattern, geneList=[], outFileName=splice_skip_dat)
		cursor.execute('DELETE FROM splice_skip WHERE samp_id="%s"' % sid)
		cursor.execute('LOAD DATA LOCAL INFILE "%s" IGNORE INTO TABLE splice_skip' % splice_skip_dat)
	
	cursor.execute('ALTER TABLE splice_normal DISABLE KEYS')
	cursor.execute('INSERT INTO splice_normal SELECT * FROM splice_normal_tmp')
	cursor.execute('ALTER TABLE splice_normal ENABLE KEYS')
	cursor.execute('ALTER TABLE splice_normal_loc1 DISABLE KEYS')
	cursor.execute('DELETE FROM splice_normal_loc1 WHERE samp_id in (SELECT DISTINCT samp_id FROM splice_normal_tmp)')
	cursor.execute('INSERT INTO splice_normal_loc1 SELECT samp_id,loc1,sum(nReads) nReads_w1 FROM splice_normal_tmp GROUP BY samp_id,loc1')
	cursor.execute('ALTER TABLE splice_normal_loc1 ENABLE KEYS')
	cursor.execute('ALTER TABLE splice_normal_loc2 DISABLE KEYS')
	cursor.execute('DELETE FROM splice_normal_loc2 WHERE samp_id in (SELECT DISTINCT samp_id FROM splice_normal_tmp)')
	cursor.execute('INSERT INTO splice_normal_loc2 SELECT samp_id,loc2,sum(nReads) nReads_w2 FROM splice_normal_tmp GROUP BY samp_id,loc2')
	cursor.execute('ALTER TABLE splice_normal_loc2 ENABLE KEYS')
	makeDB_splice_AF.skip(dbN=dbN, cursor=cursor)
	cursor.execute('DROP TEMPORARY TABLE IF EXISTS splice_normal_tmp')
Ejemplo n.º 36
0
def coverage_calc_batch(inputDirNL,outputDirN,pbs=False,refFileName='/data1/Sequence/ucsc_hg19/annot/refFlat_exon_autosome_NM_merged.txt'):

	con,cursor = mymysql.connectDB(db='ircr1')
	cursor.execute('select distinct samp_id from sample_tag_paperfreeze')
	sampNameL = [x[0] for x in cursor.fetchall()]
	sampNameL = sampNameL + ['S641','S140']

	inputFileNL = [re.match('.*\/(S[0-9]{1}.*S$)', x).group(1) for x in inputDirNL]
	inDirSampNameL = [re.match('(S.*)_T_[TS]S$', x).group(1) for x in inputFileNL]
	
	sampNameS = set(sampNameL).intersection(inDirSampNameL)
	sampNameL = list(sampNameS)
	sampNameL.sort()

	print 'Samples: %s (%s)' % (sampNameL,len(sampNameL))
	
	fileNameL = []

	for fileN in inputDirNL:
		
		sampN = re.match('(S.*)_T_[TS]S$',fileN.split('/')[-1]).group(1)

		if sampN not in sampNameL:
			continue

		fileNameL = fileNameL + glob(fileN+'/*_[TB]_[TS]S*.bam')
		fileNameL.sort()

	procL = []

	for fileN in fileNameL:

		sampN = re.match('(.*).recal.bam',fileN.split('/')[-1]).group(1)

		if sampN in procL:
			continue

		print sampN
		
		cmd = 'samtools depth -b %s %s > %s/%s.recal.depth.txt' % \
			(refFileName,fileN, outputDirN,sampN)
		log = '%s/%s.depth.qlog' % (outputDirN,sampN)

		if pbs:
			cmd = "%s; awk '{cnt[\$3]+=1}END{for (x in cnt){print x,cnt[x]}}' %s/%s.recal.depth.txt | sort -n -k1 > %s/%s.recal.depth_hash.txt" % \
				(cmd, outputDirN,sampN, outputDirN,sampN)
			os.system('echo "%s" | qsub -N %s -o %s -j oe' % (cmd,sampN,log))
		else:
			cmd = '%s; awk "{cnt[\$3]+=1}END{for (x in cnt){print x,cnt[x]}}" %s/%s.recal.depth.txt | sort -n -k1 > %s/%s.recal.depth_hash.txt' % \
				(cmd, outputDirN,sampN, outputDirN,sampN)
			os.system('(%s) 2> %s' % (cmd, log))

		procL.append(sampN)
Ejemplo n.º 37
0
def main(dataN='TCGA_GBM', endPoint='death',  geneN='EGFR', altType='2-7', cutoff=(50,50)):

	colN = ['pId','time','event','value','label','priority']

	con,cursor = mymysql.connectDB(db='tcga1')

	cursor.execute('create temporary table t1 select distinct samp_id pId from splice_normal')

	cursor.execute('create temporary table t2 \
		select pId, nReads/(nReads+nReads_w1) af from t1 left join splice_skip_AF on pId=samp_id and gene_sym="EGFR" and delExons like "%2-7%"')

	cursor.execute('update t2 set af=0 where af is null')

	recordL = mymysql.dictSelect("SELECT pId,days_followup time,if(days_death is not null,1,0) event, af value \
		FROM clinical join t2 using (pId)", cursor)

#	valueL = [r['value'] for r in recordL]
#	l = len(valueL)

	threshold = (0.01,0.01)

	outFile = open('/var/www/html/tmp/survival.mvc','w')

	outFile.write('\t'.join(colN)+'\n')

	for r in recordL:
		
		r['value'] = float(r['value'])

		if r['value'] < threshold[0]:

			label = '"%s %s < %.2f"' % (geneN,altType,threshold[0])
			priority = '1'

		elif r['value'] >= threshold[1]:

			label = '"%s %s > %.2f"' % (geneN,altType,threshold[1])
			priority = '2'

		else:

			label = '"%s %s Middle"' % (geneN,altType)
			priority = '9'

		if r['pId'] not in CIMP:
			outFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' % (r['pId'], r['time'], r['event'], r['value'], label, priority))

	outFile.close()

	ret1 = os.system('Rscript distribution.r /var/www/html/tmp/survival.mvc png &> /var/www/html/tmp/error_distr.txt')
	ret2 = os.system('Rscript survival.r /var/www/html/tmp/survival.mvc png &> /var/www/html/tmp/error_surv.txt')

	return ret1!=0 or ret2!=0
Ejemplo n.º 38
0
def main(inFileName, outFileName):

    (con, cursor) = mymysql.connectDB(db='ircr1')

    inFile = open(inFileName)
    inFile.readline()

    outFile = open(outFileName, 'w')
    outFile.write('\t'.join(('dType', 'geneN', 'sId_p', 'sId_r', 'val_p',
                             'val_r', 'chemo', 'RT', 'either')) + '\n')

    for line in inFile:

        (sId_p, chemo, RT) = line[:-1].split('\t')

        if chemo == 'NA':
            chemo = 1000
        else:
            chemo = min(int(chemo), 1000)

        if RT == 'NA':
            RT = 1000
        else:
            RT = min(int(RT), 1000)

        for geneN in geneL:

            for dType in dTypeL:

                cursor.execute(
                    'select samp_id from sample_tag where tag="pair_P:%s"' %
                    sId_p)
                (sId_r, ) = cursor.fetchone()

                cursor.execute(
                    'select %s from %s where gene_sym="%s" and samp_id="%s"' %
                    (dTypeH[dType][1], dTypeH[dType][0], geneN, sId_p))
                r_p = cursor.fetchone()

                cursor.execute(
                    'select %s from %s where gene_sym="%s" and samp_id="%s"' %
                    (dTypeH[dType][1], dTypeH[dType][0], geneN, sId_r))
                r_r = cursor.fetchone()

                if r_p and r_r:
                    outFile.write('%s\t%s\t%s\t%s\t%.2f\t%.2f\t%d\t%d\t%d\n' %
                                  (dType, geneN, sId_p, sId_r, r_p[0], r_r[0],
                                   chemo, RT, min(chemo, RT)))

    inFile.close()
    outFile.close()
    con.close()
Ejemplo n.º 39
0
def main(newDBN='', server='smc1'):
	(con, cursor) = mymysql.connectDB(user='******', passwd='123456', host=mysqlH[server]['host'])
	
	cursor.execute('CREATE DATABASE IF NOT EXISTS %s' % newDBN)
	cursor.execute("GRANT ALL ON %s.* TO 'cancer'@'localhost'" % newDBN)

	cursor.execute('USE %s' % newDBN)

	cursor.execute('show tables from ircr1')
	tableL = filter(lambda x: x not in ['census','cosmic','rpkm_gene_expr_lg2'] and 'bak' not in x, [x for (x,) in cursor.fetchall()])

	for table in tableL:
		cursor.execute('CREATE TABLE IF NOT EXISTS %s LIKE ircr1.%s' % (table, table))
Ejemplo n.º 40
0
def prep_somatic(outFileN, server='smc1', dbN='ircr1'):
    #	##VEP mutect
    #	vep_mutect_batch.main([mysetting.wxsMutectDir])
    (con,
     cursor) = mymysql.connectDB(user=mysetting.mysqlH[server]['user'],
                                 passwd=mysetting.mysqlH[server]['passwd'],
                                 db=dbN,
                                 host=mysetting.mysqlH[server]['host'])
    cursor.execute(
        'SELECT DISTINCT samp_id,tag FROM sample_tag WHERE tag LIKE "XSeq_%%"')
    results = cursor.fetchall()
    singleL = []
    somaticL = []
    for res in results:
        pl_typeL = re.match('XSeq_(.*)', res[1]).group(1).split(',')
        if 'N' in pl_typeL:
            somaticL.append(res[0])
        else:
            singleL.append(res[0])
    cmd = 'cat %s/*mutect_vep.dat | /usr/bin/python %s/Integration/prepDB_mutation_mutect.py > %s' % (
        mysetting.wxsMutectDir, mysetting.SRC_HOME, outFileN)
    os.system(cmd)
    mutectL = glob('%s/*mutect_vep.dat' % mysetting.wxsMutectDir)
    for mutect in mutectL:
        (sid, postfix,
         platform) = re.match('(.*)_([XT].{,2})_([STKN]{2}).mutect_vep.dat',
                              os.path.basename(mutect)).groups()
        if postfix not in ['T']:
            sid = '%s_%s' % (sid, postfix)
        if sid in somaticL:
            continue
        else:
            if sid in singleL:
                ##previously analyzed without matched normal
                cursor.execute(
                    'SELECT samp_id,tag FROM sample_tag WHERE samp_id="%s" AND tag LIKE "XSeq_%%"'
                    % sid)
                results = cursor.fetchall()
                if len(results) > 1:
                    sys.stderr.write('Duplication in sample_tag: %s\n' % sid)
                    sys.exit(1)
                tag = '%s,N' % results[0][1]
                cursor.execute(
                    'UPDATE sample_tag SET samp_id="%s", tag="%s" WHERE samp_id="%s" AND tag LIKE "XSeq_%%"'
                    % (sid, tag, sid))
            else:
                ##brand new sample
                tag = 'XSeq_%s,N' % platform
                cursor.execute(
                    'INSERT INTO sample_tag SET samp_id="%s", tag="%s"' %
                    (sid, tag))
Ejemplo n.º 41
0
def main(outDirName):

    (con, cursor) = mymysql.connectDB(db='ircr1')

    cursor.execute(
        'select distinct samp_id from sample_tag where substring(tag,1,6)="pair_R" and samp_id!="S042" and samp_id not like "%_X"'
    )
    sIdL_prim = [x for (x, ) in cursor.fetchall() if x not in exeSampL]

    for dType in dTypeL:

        print dType

        outFile = open('%s/paired_df_%s.txt' % (outDirName, dType), 'w')
        outFile.write('%s\t%s\t%s\t%s\t%s\n' %
                      ('sId_p', 'sId_r', 'geneN', 'val_p', 'val_r'))

        for sId_p in sIdL_prim:

            print '\t%s' % sId_p

            cursor.execute(
                'select samp_id from sample_tag where tag="pair_P:%s" and samp_id!="%s"'
                % (sId_p, ('" and samp_id!="').join(sId_r1)))
            (sId_r, ) = cursor.fetchone()

            cursor.execute('drop table if exists tP')
            cursor.execute('drop table if exists tR')

            cursor.execute(
                'create temporary table tP select gene_sym,%s vP from %s where samp_id="%s"'
                % (dTypeH[dType][1], dTypeH[dType][0], sId_p))
            cursor.execute('alter table tP add index (gene_sym)')

            cursor.execute(
                'create temporary table tR select gene_sym,%s vR from %s where samp_id="%s"'
                % (dTypeH[dType][1], dTypeH[dType][0], sId_r))
            cursor.execute('alter table tR add index (gene_sym)')

            cursor.execute(
                'select gene_sym,vP,vR from tP join tR using (gene_sym)')
            results = cursor.fetchall()

            for (geneN, vP, vR) in results:
                outFile.write('%s\t%s\t%s\t%.2f\t%.2f\n' %
                              (sId_p, sId_r, geneN, vP, vR))

        outFile.close()

    con.close()
Ejemplo n.º 42
0
def post_xsq2purity(outFileN, server='smc1', dbN='ircr1'):
	cmd = 'cat %s/*/*tumor_frac.txt | /usr/bin/python %s/Integration/prepDB_xsq_purity.py > %s' % (mysetting.wxsPurityDir, mysetting.SRC_HOME, outFileN)
	os.system(cmd)
	mymysql.reset_table(tableN='xsq_purity', dataFileN=outFileN, user=mysetting.mysqlH[server]['user'],passwd=mysetting.mysqlH[server]['passwd'],db=dbN, host=mysetting.mysqlH[server]['host'])
	# add normal if missed
	(con, cursor) = mymysql.connectDB(user=mysetting.mysqlH[server]['user'], passwd=mysetting.mysqlH[server]['passwd'], db=dbN, host=mysetting.mysqlH[server]['host'])
	cursor.execute('''SELECT DISTINCT samp_id FROM %s.xsq_purity''' % dbN)
	sIdL = [x for (x,) in cursor.fetchall()]
	cursor.execute('''SELECT DISTINCT samp_id FROM %s.sample_tag WHERE tag = "XSeq_SS"''' % dbN)
	refL = [x for (x,) in cursor.fetchall()]
	for sid in sIdL:
		if sid in refL:
			print sid
			cursor.execute('''UPDATE %s.sample_tag SET samp_id="%s", tag="XSeq_SS,N" WHERE samp_id="%s" and tag="XSeq_SS"''' % (dbN, sid, sid))
Ejemplo n.º 43
0
def main(outFileName, dbNL, dTypeL, outDirName, outFileN):

    for dbN in dbNL:

        (con, cursor) = mymysql.connectDB(db=dbN)

        if dbN == 'ircr1':
            cursor.execute(
                'create temporary table t_recur select distinct samp_id from sample_tag where substring(tag,1,6)="pair_P"'
            )
        elif dbN == 'tcga1':
            cursor.execute(
                'create temporary table t_recur select distinct samp_id from sample_tag where tag="Recur"'
            )

        cursor.execute(
            'SELECT distinct loc,geneName FROM tcga1.methyl_pId where platform="Infinium27k"'
        )
        results1 = cursor.fetchall()

        for dType in dTypeL:

            for (loc, geneN) in results1:

                outFile = open(outFileName, 'w')

                outFile.write(
                    '%s\t%s\t%s\t%s\t%s\t%s\t%s\n' %
                    ('dbT', 'dType', 'geneN', 'PR', 'sId', 'val', 'loc'))

                for PR in ('P', 'R'):

                    cursor.execute(
                        'select pId,%s from %s where platform="Infinium27k" and loc="%s" and geneName="%s" and pId %s in (select samp_id from t_recur)'
                        % (dTypeH[dType][1], dTypeH[dType][0], loc, geneN,
                           'not' if PR == 'P' else ''))
                    results = cursor.fetchall()

                    for (sId, val) in results:
                        outFile.write(
                            '%s\t%s\t%s\t%s\t%s\t%s\t%s\n' %
                            (dbH[dbN], dType, geneN, PR, sId, val, loc))

                outFile.close()

                os.system(
                    'Rscript %s/PrimRecur/unpaired_gene_methyl_ks.r %s %s &>> %s/error_kstest.txt'
                    % (mysetting.SRC_HOME, outDirName, outFileN, outDirName))

        con.close()
Ejemplo n.º 44
0
def main():

    con, cursor = mymysql.connectDB(db='tcga1')

    cursor.execute(
        'select distinct geneName from methyl_pId where geneName <>""')
    results = cursor.fetchall()

    for (gN, ) in results:

        cursor.execute(
            'SELECT distinct platform,loc FROM methyl_pId where geneName ="%s"'
            % gN)
        results1 = cursor.fetchall()

        output = []

        for (plat, loc) in results1:

            #cursor.execute('select fraction,z_score from methyl_pId, array_gene_expr where platform="%s" and loc="%s" and gene_sym="MGMT" and pId=samp_id' % (plat,loc))
            #cursor.execute('select fraction,log2(rpkm+1) from methyl_pId, rpkm_gene_expr where platform="%s" and loc="%s" and gene_sym="%s" and pId=samp_id' % (plat,loc,gN))
            cursor.execute(
                'create temporary table t_rpkm as select samp_id,log2(rpkm+1) as rpkm_log from rpkm_gene_expr where gene_sym="%s"'
                % gN)
            cursor.execute(
                'create temporary table t_methyl as select * from tcga1.methyl_pId where geneName="%s"'
                % gN)
            cursor.execute(
                'select fraction,rpkm_log from t_methyl, t_rpkm where platform="%s" and loc="%s" and pId=samp_id'
                % (plat, loc))

            results2 = cursor.fetchall()

            cursor.execute('drop table t_rpkm,t_methyl')

            if len(results2) == 0:
                continue

            methyl, expr = zip(*results2)

            r = numpy.corrcoef(methyl, expr)[0, 1]

            output.append((plat, loc, len(methyl), r))

        output.sort(lambda x, y: cmp(y[-1], x[-1]))

        for (plat, loc, n, r) in output:
            print '%s\t%s\t%s\t%s\t%.2f' % (gN, plat, loc, n, r)
Ejemplo n.º 45
0
def main():

	con,cursor = mymysql.connectDB(db='tcga1')

	cursor.execute('select distinct platform from methyl')
	platformL = cursor.fetchall()

	for (platform,) in platformL:

		cursor.execute('select distinct geneName from methyl where geneName <>"" and platform="%s"' % platform)
		results = cursor.fetchall()

		for (gN,) in results:

			cursor.execute('SELECT distinct loc FROM methyl where geneName ="%s" and platform="%s"' % (gN,platform))
			results1 = cursor.fetchall()

			output = []

			for (loc,) in results1:

				cursor.execute('create temporary table t_rpkm as select samp_id,log2(rpkm+1) as rpkm_log from rpkm_gene_expr where gene_sym="%s"' % gN)
				cursor.execute('create temporary table t_methyl as select * from methyl where geneName="%s" and platform="%s"' % (gN,platform))
				cursor.execute('select fraction,rpkm_log from t_methyl, t_rpkm where loc="%s" and pId=samp_id' % (loc))
				
				results2 = cursor.fetchall()
				
				cursor.execute('drop table t_rpkm,t_methyl')
				
				if len(results2) == 0:
					continue

				methyl,expr = zip(*results2)

				r = numpy.corrcoef(methyl,expr)[0,1]

				output.append((platform,loc,len(methyl),r))

			output.sort(lambda x,y: cmp(y[-1],x[-1]))
			
			try:
				(plat, loc, n, r) = output[-1]
			except:
				continue

			print '%s\t%s\t%s\t%s\t%.2f' % (plat,gN,loc,n,r)
Ejemplo n.º 46
0
def load_mutation_all(inFileN, server='smc1', dbN='ircr1'):
    (con,
     cursor) = mymysql.connectDB(user=mysetting.mysqlH[server]['user'],
                                 passwd=mysetting.mysqlH[server]['passwd'],
                                 db=dbN,
                                 host=mysetting.mysqlH[server]['host'])

    cursor.execute('DROP TABLE IF EXISTS mutation_normal')
    stmt = '''
	CREATE TABLE mutation_normal (
		samp_id varchar(63) NOT NULL,
		chrom varchar(10) NOT NULL,
		chrSta int unsigned NOT NULL,
		chrEnd int unsigned NOT NULL,
		ref varchar(63) NOT NULL,
		alt varchar(63) NOT NULL,
		n_nReads_ref mediumint unsigned NOT NULL,
		n_nReads_alt mediumint unsigned NOT NULL,
		nReads_ref mediumint unsigned NOT NULL,
		nReads_alt mediumint unsigned NOT NULL,
		strand char(1) NOT NULL,
		gene_symL varchar(63),
		ch_dna varchar(127),
		ch_aa varchar(63),
		ch_type varchar(127),
		cosmic text,
		mutsig text,
		index (samp_id,gene_symL),
		index (samp_id,chrom,chrSta,chrEnd),
		index (samp_id,chrom,chrSta,ref,alt),
		index (samp_id,chrom,chrSta,chrEnd,ref,alt)
	)
	'''
    cursor.execute(stmt)
    cursor.execute('CREATE TEMPORARY TABLE tmp LIKE mutation_normal')
    cursor.execute('LOAD DATA LOCAL INFILE "%s" INTO TABLE tmp' % inFileN)
    cursor.execute(
        'CREATE TEMPORARY TABLE t2 SELECT tmp.samp_id,tmp.chrom,tmp.chrSta,tmp.chrEnd,tmp.ref,tmp.alt,tmp.n_nReads_ref,tmp.n_nReads_alt,tmp.nReads_ref,tmp.nReads_alt,tmp.strand,tmp.gene_symL,tmp.ch_dna,tmp.ch_aa,tmp.ch_type,cosmic.ch_aaL AS cosmic,cosmic.ch_typeL AS cosmic_type,tmp.mutsig FROM tmp LEFT JOIN cosmic ON tmp.chrom=cosmic.chrom AND tmp.chrSta=cosmic.chrSta AND tmp.chrEnd=cosmic.chrEnd AND tmp.ref=cosmic.ref AND tmp.alt=cosmic.alt AND tmp.gene_symL=cosmic.gene_symL'
    )
    cursor.execute(
        'INSERT INTO mutation_normal SELECT samp_id,chrom,chrSta,chrEnd,ref,alt,n_nReads_ref,n_nReads_alt,nReads_ref,nReads_alt,strand,gene_symL,ch_dna,ch_aa,ch_type,"" AS cosmic,mutsig FROM t2 WHERE cosmic IS NULL'
    )
    cursor.execute(
        'INSERT INTO mutation_normal SELECT samp_id,chrom,chrSta,chrEnd,ref,alt,n_nReads_ref,n_nReads_alt,nReads_ref,nReads_alt,strand,gene_symL,ch_dna,cosmic AS ch_aa,cosmic_type AS ch_type,cosmic,mutsig FROM t2 WHERE cosmic IS NOT NULL'
    )
Ejemplo n.º 47
0
def main(newDBN='', server='smc1'):
    (con, cursor) = mymysql.connectDB(user='******',
                                      passwd='123456',
                                      host=mysqlH[server]['host'])

    cursor.execute('CREATE DATABASE IF NOT EXISTS %s' % newDBN)
    cursor.execute("GRANT ALL ON %s.* TO 'cancer'@'localhost'" % newDBN)

    cursor.execute('USE %s' % newDBN)

    cursor.execute('show tables from ircr1')
    tableL = filter(
        lambda x: x not in ['census', 'cosmic', 'rpkm_gene_expr_lg2'] and 'bak'
        not in x, [x for (x, ) in cursor.fetchall()])

    for table in tableL:
        cursor.execute('CREATE TABLE IF NOT EXISTS %s LIKE ircr1.%s' %
                       (table, table))