def main(args): # debugging info logfile = args.outDirName + "/" + args.outBaseName + "/logs/%d" % os.getpid() + "." + args.outBaseName + ".mergepairs.log" logging.basicConfig(format='%(asctime)s %(message)s',filename=logfile,level=logging.DEBUG) logging.info("\ninDir1=%s\ninDir2=%s\noutBaseName=%s\nconfigFileName=%s" % (args.inDir1,args.inDir2,args.outBaseName,args.configFileName)) # create output directory pickreads.prepOutDir(args.outBaseName,args.outDirName,args.overwrite) # make sure input sources exist peakparser.checkOutDir(args.inDir1,args.outDirName) peakparser.checkOutDir(args.inDir2,args.outDirName) # make sure config files exist configPath1 = args.outDirName + "/" + args.inDir1 + "/" + args.configFileName configPath2 = args.outDirName + "/" + args.inDir2 + "/" + args.configFileName pickreads.checkfile(configPath1) pickreads.checkfile(configPath2) # read parameters for both inputs configDict1 = peakparser.readConfig(configPath1,args.inDir1,args.outDirName) configDict2 = peakparser.readConfig(configPath2,args.inDir2,args.outDirName) maxDist = int(configDict1['insertSize']) + 2*int(configDict1['readLength']) # merge readfiles outReadFileName = args.outDirName + "/" + args.outBaseName + "/" + args.outBaseName + ".readpairs.txt" readFileName1 = args.outDirName + "/" + args.inDir1 + "/" + args.inDir1 + ".readpairs.txt" readFileName2 = args.outDirName + "/" + args.inDir2 + "/" + args.inDir2 + ".readpairs.txt" logging.info("merging readfiles (%s, %s)" % (readFileName1,readFileName2)) # print "merging readfiles (%s, %s)" % (readFileName1,readFileName2) mergeChrPosFiles(readFileName1,readFileName2,outReadFileName,maxDist) # merge bedfiles outBedFileName = args.outDirName + "/" + args.outBaseName + "/" + args.outBaseName + ".reads.bed" bedFileName1 = args.outDirName + "/" + args.inDir1 + "/" + args.inDir1 + ".reads.bed" bedFileName2 = args.outDirName + "/" + args.inDir2 + "/" + args.inDir2 + ".reads.bed" logging.info("merging bedfiles (%s,%s)" % (bedFileName1,bedFileName2)) # print "merging bedfiles (%s,%s)" % (bedFileName1,bedFileName2) mergeChrPosFiles(bedFileName1,bedFileName2,outBedFileName,maxDist) # write new config file configPath = args.outDirName + "/" + args.outBaseName + "/" + args.configFileName configDict = configDict1 configDict['bamFileName1'] = configDict1['bamFileName'] configDict['bamFileName2'] = configDict2['bamFileName'] configDict['merged'] = 'True' configDict['outBaseName'] = args.outBaseName configDict['outDirName'] = args.outDirName configDict['readFileName'] = outReadFileName del configDict['bamFileName'] f = open(configPath, 'w') for k,v in configDict.iteritems(): f.write(k + "=" + v + "\n") f.close()
def main(args): # create output directory pickreads.prepOutDir(args.outBaseName,args.outDirName,args.overwrite) pickreads.checkfile(args.sampleListFile) sampleList = open(args.sampleListFile, 'r') readFileNames = [] bamFileNames = [] insertSizes = [] readLengths = [] lastConfig = None for sampleLine in sampleList: if not re.search("^#", sampleLine): (sampleBam,sampleSubDir,refGenome) = sampleLine.strip().split() peakparser.checkOutDir(sampleSubDir,args.outDirName) configPath = args.outDirName + "/" + sampleSubDir + "/" + args.configFileName pickreads.checkfile(configPath) configDict = peakparser.readConfig(configPath,sampleSubDir,args.outDirName) lastConfig = configDict insertSizes.append(int(configDict['insertSize'])) readLengths.append(int(configDict['readLength'])) readFileName = args.outDirName + "/" + sampleSubDir + "/" + sampleSubDir + ".readpairs.txt" readFileNames.append(readFileName) bamFileNames.append(configDict['bamFileName']) maxDist = max(insertSizes) + 2*max(readLengths) # merge readfiles outReadFileName = args.outDirName + "/" + args.outBaseName + "/" + args.outBaseName + ".readpairs.txt" mergeChrPosFiles(readFileNames,outReadFileName,maxDist) # write new config file configPath = args.outDirName + "/" + args.outBaseName + "/" + args.configFileName configDict = lastConfig bfnum = 0 for bamFileName in bamFileNames: bfvname = "bamFileName" + str(bfnum) configDict[bfvname] = bamFileName bfnum += 1 configDict['merged'] = 'True' configDict['outBaseName'] = args.outBaseName configDict['outDirName'] = args.outDirName configDict['readFileName'] = outReadFileName del configDict['bamFileName'] f = open(configPath, 'w') for k,v in configDict.iteritems(): f.write(k + "=" + v + "\n") f.close()
def main(args): # debugging info logfile = args.outDirName + "/" + args.outBaseName + "/logs/%d" % os.getpid( ) + "." + args.outBaseName + ".mergepairs.log" logging.basicConfig(format='%(asctime)s %(message)s', filename=logfile, level=logging.DEBUG) logging.info( "\ninDir1=%s\ninDir2=%s\noutBaseName=%s\nconfigFileName=%s" % (args.inDir1, args.inDir2, args.outBaseName, args.configFileName)) # create output directory pickreads.prepOutDir(args.outBaseName, args.outDirName, args.overwrite) # make sure input sources exist peakparser.checkOutDir(args.inDir1, args.outDirName) peakparser.checkOutDir(args.inDir2, args.outDirName) # make sure config files exist configPath1 = args.outDirName + "/" + args.inDir1 + "/" + args.configFileName configPath2 = args.outDirName + "/" + args.inDir2 + "/" + args.configFileName pickreads.checkfile(configPath1) pickreads.checkfile(configPath2) # read parameters for both inputs configDict1 = peakparser.readConfig(configPath1, args.inDir1, args.outDirName) configDict2 = peakparser.readConfig(configPath2, args.inDir2, args.outDirName) maxDist = int( configDict1['insertSize']) + 2 * int(configDict1['readLength']) # merge readfiles outReadFileName = args.outDirName + "/" + args.outBaseName + "/" + args.outBaseName + ".readpairs.txt" readFileName1 = args.outDirName + "/" + args.inDir1 + "/" + args.inDir1 + ".readpairs.txt" readFileName2 = args.outDirName + "/" + args.inDir2 + "/" + args.inDir2 + ".readpairs.txt" logging.info("merging readfiles (%s, %s)" % (readFileName1, readFileName2)) # print "merging readfiles (%s, %s)" % (readFileName1,readFileName2) mergeChrPosFiles(readFileName1, readFileName2, outReadFileName, maxDist) # merge bedfiles outBedFileName = args.outDirName + "/" + args.outBaseName + "/" + args.outBaseName + ".reads.bed" bedFileName1 = args.outDirName + "/" + args.inDir1 + "/" + args.inDir1 + ".reads.bed" bedFileName2 = args.outDirName + "/" + args.inDir2 + "/" + args.inDir2 + ".reads.bed" logging.info("merging bedfiles (%s,%s)" % (bedFileName1, bedFileName2)) # print "merging bedfiles (%s,%s)" % (bedFileName1,bedFileName2) mergeChrPosFiles(bedFileName1, bedFileName2, outBedFileName, maxDist) # write new config file configPath = args.outDirName + "/" + args.outBaseName + "/" + args.configFileName configDict = configDict1 configDict['bamFileName1'] = configDict1['bamFileName'] configDict['bamFileName2'] = configDict2['bamFileName'] configDict['merged'] = 'True' configDict['outBaseName'] = args.outBaseName configDict['outDirName'] = args.outDirName configDict['readFileName'] = outReadFileName del configDict['bamFileName'] f = open(configPath, 'w') for k, v in configDict.iteritems(): f.write(k + "=" + v + "\n") f.close()