def main(trioFileN, projectN, tidL=[], clean=False, pbs=False, server='smc1', genome='hg19'): storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/' apacheBase = storageBase if glob(storageBase + projectN): print('File directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (storageBase, projectN, storageBase, projectN)) print('File directory: created') if glob(apacheBase + projectN): print('Log directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (apacheBase, projectN, apacheBase, projectN)) print('Log directory: created') bamDirL = mysetting.wxsBamDirL trioH = mypipe.read_trio(trioFileN, bamDirL, tidL) ## assume 1 primary & normal per trio for tid in trioH: if tidL != [] and tid not in tidL: continue if trioH[tid]['Normal'] == [] or trioH[tid]['prim_id'] == []: continue bamS = set() if trioH[tid]['prim_id'] != []: ##primary bamS.add(trioH[tid]['Normal'][0]) bamS.add(trioH[tid]['Primary'][0]) if trioH[tid]['recur_id'] != []: ##recurrent for recur in range(len(trioH[tid]['Recurrent'])): bamS.add(trioH[tid]['Recurrent'][recur]) sampN = trioH[tid]['prim_id'][0] cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2phylotree.py -i %s -n %s -p %s -c %s -s %s -g %s' % ( mysetting.SRC_HOME, ','.join( list(bamS)), sampN, projectN, False, server, genome) print cmd if pbs: log = '%s/%s.Xsq2phylotree.qlog' % (storageBase + projectN + '/' + sampN, sampN) os.system('echo "%s" | qsub -q %s -N x2phylotree_%s -o %s -j oe' % (cmd, server, sampN, log)) else: log = '%s/%s.Xsq2phylotree.qlog' % (storageBase + projectN, sampN) os.system('(%s) 2> %s' % (cmd, log))
def main(trioFileN, projectN, tidL=[], clean=False, pbs=False, server='smc1', genome='hg19'): storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/' apacheBase = storageBase if glob(storageBase+projectN): print ('File directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (storageBase,projectN, storageBase,projectN)) print ('File directory: created') if glob(apacheBase+projectN): print ('Log directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (apacheBase,projectN, apacheBase,projectN)) print ('Log directory: created') bamDirL = mysetting.wxsBamDirL trioH = mypipe.read_trio(trioFileN, bamDirL, tidL) ## assume 1 primary & normal per trio for tid in trioH: if tidL != [] and tid not in tidL: continue if trioH[tid]['Normal'] == []: continue if trioH[tid]['prim_id'] != []: ##primary sampN = trioH[tid]['prim_id'][0] print tid, trioH[tid]['Primary'] tumor = trioH[tid]['Primary'][0] normal = trioH[tid]['Normal'][0] cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2somatic.py -i %s -j %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, tumor, normal, sampN, projectN, False, server, genome) print cmd if pbs: log = '%s/%s.Xsq2somatic.qlog' % (storageBase+projectN+'/'+sampN,sampN) os.system('echo "%s" | qsub -q %s -N x2somatic_%s -o %s -j oe' % (cmd, server, sampN, log)) else: log = '%s/%s.Xsq2somatic.qlog' % (storageBase+projectN, sampN) os.system('(%s) 2> %s' % (cmd, log)) if trioH[tid]['recur_id'] != []: ##recurrent for recur in range(len(trioH[tid]['Recurrent'])): sampN = trioH[tid]['recur_id'][recur] tumor = trioH[tid]['Recurrent'][recur] normal = trioH[tid]['Normal'][0] cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2somatic.py -i %s -j %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, tumor, normal, sampN, projectN, False, server, genome) print cmd if pbs: log = '%s/%s.Xsq2somatic.qlog' % (storageBase+projectN+'/'+sampN,sampN) os.system('echo "%s" | qsub -q %s -N x2somatic_%s -o %s -j oe' % (cmd, server, sampN, log)) else: log = '%s/%s.Xsq2somatic.qlog' % (storageBase+projectN, sampN) os.system('(%s) 2> %s' % (cmd, log))
def main(trioFileN, projectN, clean=False, pbs=False, server='smc1', genome='hg19', sampL=[]): storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/' apacheBase = storageBase if glob(storageBase+projectN): print ('File directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (storageBase,projectN, storageBase,projectN)) print('File directory: created') if glob(apacheBase+projectN): print ('Log directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (apacheBase,projectN, apacheBase,projectN)) print('Log directory: created') bamDirL = mysetting.wxsBamDirL trioH = mypipe.read_trio(trioFileN, bamDirL) ## assume 1 primary & normal per trio for tid in trioH: ## must have normal sample if trioH[tid]['norm_id'] == []: continue norm_id = trioH[tid]['norm_id'][0] # if norm_id == 'S567_B_SS': ## id flip for mutscan(B) # norm_id = 'S567_T_SS' mutscanN = '' for dir in mysetting.wxsMutscanDirL: mutscanL = os.popen('find %s -name %s*.mutscan' % (dir, norm_id)).readlines() if len(mutscanL) > 0: mutscanN = mutscanL[0].rstrip() break if mutscanN == '': ## .mutscan not found print norm_id sys.stderr.write('Can\'t find .mutscan\n') # sys.exit(1) continue if trioH[tid]['prim_id'] != []: sampN = trioH[tid]['prim_id'][0] if sampL == [] or (sampL != [] and sampN in sampL): procN = '' for dir in mysetting.wxsPileupProcDirL: id = sampN # if sampN == 'S567_T_SS': ## id flip for pileup_proc # id = 'S567_B_SS' fileL = os.popen('find %s -name %s*chr*.pileup_proc' % (dir, id)).readlines() if len(fileL) > 0: procDir = list(set(map(lambda x: os.path.dirname(x.rstrip()), fileL)))[0] procN = '%s/%s*chr*.pileup_proc' % (procDir,id) break if procN == '': ## .pileup_proc not found sys.stderr.write('Can\'t find .pileup_proc\n') sys.exit(1) cnN = os.popen('find %s -name %s*.ngCGH.seg' % (mysetting.wxsCNADir,sampN)).readlines()[0].rstrip() cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2purity.py -i \'%s\' -j %s -k %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, procN, mutscanN, cnN, sampN, projectN, clean, server, genome) print sampN print procN, mutscanN, cnN if pbs: log = '%s/%s.Xsq2purity.qlog' % (storageBase+projectN+'/'+sampN,sampN) os.system('echo "%s" | qsub -q %s -N x2purity_%s -o %s -j oe' % (cmd, server, sampN, log)) else: log = '%s/%s.Xsq2purity.qlog' % (storageBase+projectN, sampN) os.system('(%s) 2> %s' % (cmd, log)) ## primary of pair if trioH[tid]['recur_id'] != []: for recur in range(len(trioH[tid]['Recurrent'])): sampN = trioH[tid]['recur_id'][recur] if sampL == [] or (sampL != [] and sampN in sampL): procN = '' for dir in mysetting.wxsPileupProcDirL: fileL = os.popen('find %s -name %s*chr*.pileup_proc' % (dir, sampN)).readlines() if len(fileL) > 0: procDir = list(set(map(lambda x: os.path.dirname(x.rstrip()), fileL)))[0] procN = '%s/%s*chr*.pileup_proc' % (procDir,sampN) break if procN == '': ## .pileup_proc not found sys.stderr.write('Can\'t find .pileup_proc\n') sys.exit(1) cnN = os.popen('find %s -name %s*.ngCGH.seg' % (mysetting.wxsCNADir,sampN)).readlines()[0].rstrip() cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2purity.py -i \'%s\' -j %s -k %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, procN, mutscanN, cnN, sampN, projectN, clean, server, genome) print sampN print procN, mutscanN, cnN if pbs: log = '%s/%s.Xsq2purity.qlog' % (storageBase+projectN+'/'+sampN,sampN) os.system('echo "%s" | qsub -q %s -N x2purity_%s -o %s -j oe' % (cmd, server, sampN, log)) else: log = '%s/%s.Xsq2purity.qlog' % (storageBase+projectN, sampN) os.system('(%s) 2> %s' % (cmd, log))
#!/usr/bin/python import sys, os, re import mysetting, mybasic mybasic.add_module_path(['NGS/pipeline','NGS/mutation']) import mutect_batch, somaticindeldetector_batch import mypipe bamDirL = mysetting.wxsBamDirL trioH = mypipe.read_trio('/EQL1/NSL/clinical/trio_info.txt', bamDirL) #for tid in sorted(trioH.keys()): # if tid not in ['59','60','61']: # continue # print tid, trioH[tid]['prim_id'], trioH[tid]['recur_id'] # for role in ['Normal','Primary','Recurrent']: # print role,trioH[tid][role] #sys.exit(1) outDir='/EQL3/pipeline/somatic_mutect' ## assume 1 primary & normal per trio for tid in trioH: if trioH[tid]['norm_id'] == []: continue if tid not in ['63']: continue norm = trioH[tid]['norm_id'][0]
colL = line.rstrip().split('\t') rm = re.match('(chr[^:]*):([0-9]*)~([0-9]*)', colL[idxH['locus']]) (chr,chrSta,chrEnd) = rm.groups() ref = colL[idxH['ref']] alt = colL[idxH['alt']] if (chr,chrSta,chrEnd,ref,alt) not in annotH: annotH[(chr,chrSta,chrEnd,ref,alt)] = {} for col in ['gene_symL','ch_dna','ch_aa','ch_type','cosmic','mutsig']: annotH[(chr,chrSta,chrEnd,ref,alt)][col] = colL[idxH[col]] return annotH ### until it is merged into pipeline import mybasic mybasic.add_module_path(['NGS/pipeline']) import mypipe trioH = mypipe.read_trio(bamDirL=mysetting.wxsBamDirL) pairH = {} for tid in trioH: if trioH[tid]['recur_id'] != []: pid = trioH[tid]['prim_id'][0][:-5] pairH[pid] = map(lambda x: x[:-5], trioH[tid]['recur_id']) #### #(con,cursor) = mymysql.connectDB(db='ircr1') #tag = 'pair_R:%' #cursor.execute('select distinct samp_id from sample_tag where tag like "%s"' % tag) #sIdL_p = [x for (x,) in cursor.fetchall()] # #tag = 'XSeq%%,N' #cursor.execute('select distinct samp_id from sample_tag where tag like "%s"' % tag) #wxsL = [x for (x,) in cursor.fetchall()] #
def diff_batch(trioFileN, tidL=[]): bamDirL = mysetting.wxsBamDirL trioH = mypipe.read_trio(trioFileN, bamDirL, tidL) for tid in trioH: if tidL != [] and tid not in tidL: continue if trioH[tid]['Normal'] == [] or trioH[tid]['prim_id'] == [] or trioH[tid]['recur_id'] == []: continue datH = {} for ref in ['C','T']: for alt in ['A','C','G','T']: if alt == ref: continue mut = '%s>%s' % (ref, alt) for a in ['A','C','G','T']: for b in ['A','C','G','T']: context = a + ref + b datH[(mut, context)] = {'prim':0.0, 'recur':0.0, 'delta':0.0} #for b #for a #for alt #for ref print tid, trioH[tid]['prim_id'], trioH[tid]['recur_id'] for rid in trioH[tid]['recur_id']: pid = trioH[tid]['prim_id'][0] p_file = '/EQL3/pipeline/somatic_mutation/%s/%s.mutation_signature.txt' % (pid,pid) r_file = '/EQL3/pipeline/somatic_mutation/%s/%s.mutation_signature.txt' % (rid,rid) if os.path.isfile(p_file) and os.path.isfile(r_file): print p_file inFile = open(p_file, 'r') inFile.readline() for line in inFile: colL = line.rstrip().split('\t') mut = colL[1] context = colL[2] frac = float(colL[3])/float(colL[5]) datH[(mut,context)]['prim'] = frac #for line inFile.close() print r_file inFile = open(r_file, 'r') inFile.readline() for line in inFile: colL = line.rstrip().split('\t') mut = colL[1] context = colL[2] frac = float(colL[3])/float(colL[5]) datH[(mut,context)]['recur'] = frac #for line inFile.close() # ofileN = '/EQL3/pipeline/somatic_mutation/%s/%s.mutation_signature_diff.txt' % (rid,rid) # outFile = open(ofileN, 'w') # outFile.write('prim_id\trecur_id\tmutation\tcontext\tp_frac\tr_frac\tdelta\n') # for key in datH: # (mut, context) = key # datH[key]['delta'] = datH[key]['recur'] - datH[key]['prim'] # outFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (pid, rid, mut, context, datH[key]['prim'], datH[key]['recur'], datH[key]['delta'])) # outFile.flush() # outFile.close() os.system('Rscript %s/NGS/mutation/mutect_mutation_signature_diff_plot.R %s %s' % (mysetting.SRC_HOME, pid, rid))