def runAlignMol() : parser = argparse.ArgumentParser(description=description) parser.add_argument('-q', dest='queryDir', help='Path to merged cmap to align molecules (-b) to OR alignmol dir from Pipeline for merge (if latter, no alignments are performed), required', type=str) parser.add_argument('-b', dest='bnx', help='Input molecule (.bnx) file, required if aligning molecules', type=str) #parser.add_argument('-b', dest='bnx', help='Input molecule (.bnx) file OR path to dir containing split bnx pieces, required if aligning molecules', type=str) #I should add the split feature; for now, just do single bnx parser.add_argument('-a', dest='optArguments', help='Path to optArguments.xml (optional, default optArguments_human.xml in Pipeline dir if found, otherwise required)', default="", type=str) parser.add_argument('-r', help='If this flag is used, alignmolvref arguments are used, otherwise alignmol arguments are used (default alignmol; optional)', dest='ref', action='store_true') parser.add_argument('-o', dest='outputDir', help='output dir (optional, defaults to sub-dir of input map dir called "alignmol")', default="", type=str) parser.add_argument('-t', dest='RefAligner', help='Path to RefAligner or dir containing it (required)', type=str) parser.add_argument('-T', dest='numThreads', help='Total number of threads (cores) to use (optional, default 4)', default=4, type=int) parser.add_argument('-j', dest='maxthreads', help='Threads per Job, -maxthreads (non-cluster only;optional, default 4)', default=4, type=int) parser.add_argument('-e', dest='errFile', help='.err file to use for noise parameters--will supersede noise parameters in the optArgument supplied (but that file must still be supplied for non-noise parameters)--should be from autoNoise', default="", type=str) parser.add_argument('-E', dest='errbinFile', help='.errbin file to use for noise parameters--will supersede noise parameters in the optArgument supplied (but that file must still be supplied for non-noise parameters)--should be from autoNoise', default="", type=str) parser.add_argument('-p', dest='pipelineDir', help='Pipeline dir (optional, defaults to script dir, or current directory)', default="", type=str) parser.add_argument('-v', dest='pvalue', help='Alignment pvalue', default="1e-12") result = parser.parse_args() outprefix = "exp_refineFinal1" #this is the default; assume for now #check all Pipeline dependencies if result.pipelineDir : cwd = result.pipelineDir else : cwd = os.path.split(os.path.realpath(__file__))[0] #this is path of this script if not os.path.isfile(os.path.join(cwd,"utilities.py")) : #if still not here, last try is actual cwd cwd = os.getcwd() #still check this below #this is the only one imported here and in runCharacterize if not os.path.isfile(os.path.join(cwd,"utilities.py")): print "ERROR: utilities.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir" sys.exit(1) import utilities as util if not os.path.isfile(os.path.join(cwd,"AlignModule.py")): print "ERROR: AlignModule.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir" sys.exit(1) import AlignModule as alignmod if not util.checkFile(os.path.join(cwd,"Pipeline.py")): print "ERROR: Pipeline.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir" sys.exit(1) import Pipeline #input dir if not result.queryDir : print "ERROR: Query (-q) argument not supplied." sys.exit(1) qrypath = os.path.realpath(result.queryDir) if util.checkDir(qrypath, checkWritable=False, makeIfNotExist=False) : #output elsewhere so not writeable is ok runaligns = False elif util.checkCmap(qrypath) : runaligns = True else : print "ERROR: Query argument ("+qrypath+") not found or not a dir or cmap. Check -q argument." sys.exit(1) #this check isn't really necessary...make it a warning -- left over from runAlignMerge.py #if not os.path.split(qrypath)[1].endswith("alignmol") : # print "Warning: Query dir ("+qrypath+") does not end with 'alignmol'; please be sure this is a Pipeline alignmol dir\n" #RefAligner -- check for either path to RefAligner, or dir containing it, depending on cluster args rabin = "" #need empty string for generateJobList even though no jobs are run if runaligns : rabin = result.RefAligner #replicate Pipeline behavior: RefAligner is always required if os.path.isdir(rabin) : rabin = os.path.join(rabin, "RefAligner") if not util.checkExecutable(rabin): print "ERROR: RefAligner not found or not executable at", rabin, "\nPlease supply RefAligner dir or full path as -t arg." sys.exit(1) #optargs file optargs = None if runaligns and result.optArguments : #supplied on command line optargs = result.optArguments if not util.checkFile(optargs, ".xml") : print "optArguments path is supplied ("+optargs+") but not found or doesn't end in .xml, check -a argument." sys.exit(1) elif runaligns : #load from Pipeline dir if running alignments optargs = os.path.join(cwd,"optArguments_human.xml") if not util.checkFile(optargs): print "optArguments.xml missing in Pipeline directory ("+cwd+"). Try supplying path explicitly using -a." sys.exit(1) #output dir if not result.outputDir : outdir = os.path.join(qrypath, "merge") #should be same as in AlignModule else : outdir = os.path.realpath(result.outputDir) if os.path.isdir(outdir) : if not util.checkDir(outdir) : #check writeable print "\nERROR: Output dir is not writeable:\n", outdir, "\n" sys.exit(1) #this is ok here #elif outdir == contigdir : # print "\nERROR: Output dir cannot be same as input dir:\n", outdir, "\n" # sys.exit(1) print "\nWARNING: Output dir already exists, results will be overwritten:\n", outdir, "\n" elif not util.checkDir(outdir) : #does not exist, make, if False, can't make or not writeable print "\nERROR: Output dir cannot be created or is not writeable:\n", outdir, "\n" sys.exit(1) #bnx file bnxfile = result.bnx if bnxfile : #must check for empty string BEFORE you do realpath, or it returns cwd bnxfile = os.path.realpath(bnxfile) if not util.checkFile(bnxfile, ".bnx") : print "ERROR: bnx file supplied but not found or incorrect suffix:", bnxfile sys.exit(1) elif runaligns : print "ERROR: bnx file not supplied but running alignments; please supply bnx file as -b argument" sys.exit(1) #nthreads nthreads = result.numThreads if nthreads <= 0 : print "ERROR: Number of threads value invalid (must be > 0): %i" % nthreads sys.exit(1) #maxthreads maxthreads = result.maxthreads if maxthreads <= 0 : print "ERROR: Max threads value invalid (must be > 0): %i" % maxthreads sys.exit(1) elif nthreads < maxthreads : print "Warning: num threads (-T: %i) < max threads (-j: %i): increasing num threads to equal max threads\n" % (nthreads, maxthreads) nthreads = maxthreads #pvalue if result.pvalue : #supplied on command line pvalue = result.pvalue else : pvalue = "1e-12" #.errbin file errbinfile = result.errbinFile if errbinfile : errbinfile = os.path.realpath(result.errbinFile) if not util.checkFile(errbinfile, ".errbin") : print "ERROR: errbin file supplied but not found or incorrect suffix:", errbinfile sys.exit(1) #.err file errfile = result.errFile if errfile and errbinfile : print "Warning: .err and .errbin arguments supplied; ignoring .err file" errfile = "" elif errfile : errfile = os.path.realpath(result.errFile) if not util.checkFile(errfile, ".err") : print "err file supplied but not found or incorrect suffix:", errfile sys.exit(1) if errfile and not util.checkFile(os.path.join(cwd,"SampleCharModule.py")): print "SampleCharModule.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir" sys.exit(1) elif errfile : import SampleCharModule as scm doref = result.ref #DONE checking arguments print "Using output dir", outdir if runaligns : print "Aligning", bnxfile, "\nTo", qrypath, "\n" else : print "Merging", qrypath, "\n" startTime = time.time() #time since Epoch memory_log = os.path.join(outdir, "memory_log.txt") util.initMemoryLog(memory_log) varsP = Pipeline.varsPipeline() varsP.RefAlignerBin = rabin varsP.contigFolder = "" #not used but needs to be an attr varsP.outputContigFolder = "" #not used but needs to be a string attr varsP.pipeReportFile = os.path.join(outdir, "alignmol_jobs_log.txt") varsP.infoReportFile = os.path.join(outdir, "alignmol_log.txt") util.InitStatus( os.path.join(outdir, "status.xml") ) if runaligns : varsP.optArgumentsFileIn = optargs varsP.latestMergedCmap = qrypath #if !doref, need this one varsP.ref = qrypath #and if doref, need this one varsP.nThreads = nthreads #necessary otherwise job won't start -- max threads per node varsP.maxthreads = maxthreads #threads per job p = os.path.split(qrypath)[1] varsP.outputContigPrefix = p[:p.rfind(".")] #filename prefix varsP.stdoutlog = True #use -stdout -stderr varsP.memoryLogpath = os.path.join(outdir, "memory_log.txt") varsP.parseArguments() #parses optArgumentsFile varsP.replaceParam("alignmol", "-T", pvalue) varsP.checkDependencies() varsP.RefAlignerBinOrig = rabin varsP.prerunLog() #general information in log -- needed for refaligner_version noisep = {} if errbinfile : noisep = {"readparameters": errbinfile} #print "Using noise parameters from "+errbinfile+"\n" #move below elif errfile : noisep = scm.readNoiseParameters(errfile.replace(".err","")) if noisep.has_key('readparameters') : #remove this because it's redundant, and it can cause problems with RefAligner compatibility del noisep['readparameters'] if not noisep : #readNoiseParameters returns empty dict on failure print "ERROR reading noise parameters, check .err file:", errfile sys.exit(1) #redundant with below? print "Using noise parameters from "+errfile+":\n" + " ".join(["-"+str(k)+" "+str(v) for k,v in noisep.iteritems()])+"\n" #some code from SampleCharModule to load args into noise0 infoReport="Loaded noise parameters:\n" klist = ["FP", "FN", "sf", "sd", "sr", "bpp", "readparameters"] #hardcoding parameters is kind of bad, but it fixes the order without using OrderedDict. #noiseargs = self.varsP.argsListed('noise0') #not necessary for v in klist : if not noisep.has_key(v) : continue param=str(noisep[v]) util.LogStatus("parameter", "auto_"+v, param) infoReport+=v+":"+param+"\n" varsP.replaceParam("noise0", "-"+v, param) varsP.updateInfoReport(infoReport + '\n', printalso=True) else : print "Getting file list from", qrypath outFileList = getOutFileList(util, qrypath) if not outFileList : print "ERROR: Query dir ("+qrypath+") does not contain alignmol data. Check -q argument." sys.exit(1) else : print "Found", len(outFileList), "alignment results" #end if runaligns amod = alignmod.AlignModule(varsP, doref, outdir, bnxfile) #constructor will call generateJobList if runaligns : amod.runJobs() amod.checkResults() else : amod.outFileList = outFileList p = os.path.split(outFileList[0])[1] if p.count("_") > 1 : #expect something like "EXP_REFINEFINAL1_4" #p = p[:p.rfind("_")+1] #remove integer suffix p = p[:p.rfind("_")] #remove integer suffix (and underscore) #else : # p += "_" #because mrgstr is appended varsP.outputContigPrefix = p if not runaligns or len(amod.jobList) > 0 : amod.getAlignStats() if runaligns : print #copy from Pipeline.py if util.SummarizeErrors(varsP=varsP)==0: varsP.updatePipeReport("Pipeline has successfully completed\n") util.LogStatus("progress", "pipeline", "success") else: varsP.updatePipeReport("Pipeline has completed with errors\n") util.LogStatus("progress", "pipeline", "failure") #BELOW OLD CODE return #in Pipeline, this is called first #print "Calling getAlignStats:" #but it won't work without varsP atm; skip it #getAlignStats(self.varsP, self.outFileList, self.varsP.totAssemblyLenMb, isref=False, mergepath=self.mergedir) #getAlignStats(self.varsP, self.outFileList, self.varsP.totAssemblyLenMb, isref=False, mergepath=self.mergedir) print "Calling mergeMap" print outFileList[0] #, "\n", outputdir #moved above util.logMemory(memory_log, startTime, "mergeMap_start") #mergeMap(self.varsP, self.outFileList, mergepath=self.outputdir) #varsP is optional alignmod.mergeMap(None, outFileList, outputdir) util.logMemory(memory_log, startTime, "mergeMap_end") print "Calling mergeRcmaps" util.logMemory(memory_log, startTime, "mergeRcmaps_start") #mergeRcmaps(outFileList, outdir, varsP=None, splitByContig=None, stageName="alignmol") : alignmod.mergeRcmaps(outFileList, outputdir, splitByContig=True, stageName=outprefix) util.logMemory(memory_log, startTime, "mergeRcmaps_end") print "Calling split_XMap_byContig" #split_XMapQcmap_byContig" util.logMemory(memory_log, startTime, "split_XMap_byContig_start") #xmapdict = alignmod.split_XMap_byContig(outFileList, outputdir, stageName=outprefix) #old xmapdict = alignmod.split_XMap_byContig_new(outFileList, outputdir, stageName=outprefix) util.logMemory(memory_log, startTime, "split_XMap_byContig_end") print "Calling split_Qcmap_byContig" util.logMemory(memory_log, startTime, "split_Qcmap_byContig_start") #alignmod.split_Qcmap_byContig(outFileList, outputdir, xmapdict) #old alignmod.split_Qcmap_byContig_new(outFileList, outputdir, xmapdict, stageName=outprefix) #new: better performance util.logMemory(memory_log, startTime, "split_Qcmap_byContig_end") print "AlignMerge successfully completed"
def generateJobList(self): """AlignModule.generateJobList: create RefAligner jobs for aligning molecules to contigs. """ #for runAlignMol, this method is called but not used: exit if RefAlignerBin is empty if not self.varsP.RefAlignerBin: return #the contigs are obtained from varsP.latestMergedCmap--check its validity, a return will mean no jobs, and no jobs is now handled in multiThreadRunJobs. if not self.doref and ( not self.varsP.latestMergedCmap or not util.checkCmap(self.varsP.latestMergedCmap)): err = "Error in AlignModule.generateJobList: varsP.latestMergedCmap is not set or not valid cmap; skipping %s" % self.stageName self.varsP.updatePipeReport(err + "\n") util.LogError("error", err) return #Note: noise parameters should be fixed becuase when bnx is split, -M # would find different parameters for different contigs. Use noise0. baseargs = [self.varsP.RefAlignerBin] if not self.doref: baseargs += ['-ref', self.varsP.latestMergedCmap ] #reference is latest merged cmap mappref = os.path.split(self.varsP.latestMergedCmap)[1] mappref = mappref[:mappref.find(".")] else: baseargs += ['-ref', self.varsP.ref] mappref = self.stageName #use stageName also for output filename noiseargs = self.varsP.argsListed('noise0') haverefargs = False try: #argsListed does not check key refargs = self.varsP.argsListed(self.stageName) #'alignmolvref' haverefargs = True except KeyError: #this is same as old behavior #refargs = self.varsP.argsListed('noise0') + self.varsP.argsListed(self.argStageName) #old refargs = self.varsP.argsListed(self.argStageName) #new #refargs = noiseargs + refargs if haverefargs: self.jobargs = refargs #single job with bnxin (constructor) if self.bnxin: outarg = os.path.join(self.alignTarget, mappref) self.outFileList.append(outarg) #file prefixes jobargs = baseargs + ['-o', outarg] jobargs += ['-i', self.bnxin] stdoutf = None if self.varsP.stdoutlog: #remember, these must be after -o jobargs.extend(['-f', '-stdout', '-stderr']) stdoutf = outarg + ".stdout" jobargs += ['-maxthreads', str(self.varsP.maxthreads)] #add noise0 before alignmol (stageName) so that the latter can override the former jobargs += noiseargs jobargs.extend(['-output-veto-filter', 'intervals.txt$' ]) #this feature not in old RefAligner jobargs += refargs s1Job = mthread.singleJob(jobargs, self.stageName, outarg + ".xmap", self.stageName, maxThreads=self.varsP.maxthreads, clusterLogDir=self.varsP.clusterLogDir, expectedStdoutFile=stdoutf) self.addJob(s1Job) return #and this is the only job #loop over the split bnxs, make one job per bnx for idx in range(1, self.varsP.nPairwiseJobs + 1): outarg = os.path.join(self.alignTarget, mappref + "_" + str(idx)) self.outFileList.append(outarg) #file prefixes jobargs = baseargs + ['-o', outarg] idxstr = "_%s_of_%s" % (idx, self.varsP.nPairwiseJobs) jobargs += [ '-i', self.varsP.bnxFile.replace(".bnx", idxstr + ".bnx") ] stdoutf = None if self.varsP.stdoutlog: #remember, these must be after -o jobargs.extend(['-f', '-stdout', '-stderr']) stdoutf = outarg + ".stdout" jobargs += ['-maxthreads', str(self.varsP.maxthreads)] #add noise0 before alignmol (stageName) so that the latter can override the former jobargs += noiseargs #if idx != 1 : #keep _r for first job only -- copied from SVModule # jobargs.extend( ['-output-veto-filter', '_r.cmap$'] ) #need this for copy number; do NOT veto jobargs.extend(['-output-veto-filter', 'intervals.txt$' ]) #this feature not in old RefAligner jobargs += refargs s1Job = mthread.singleJob(jobargs, self.stageName + idxstr, outarg + ".xmap", self.stageName + idxstr, maxThreads=self.varsP.maxthreads, clusterLogDir=self.varsP.clusterLogDir, expectedStdoutFile=stdoutf) self.addJob(s1Job)
def generateJobList(self): """AlignModule.generateJobList: create RefAligner jobs for aligning molecules to contigs. """ #for runAlignMol, this method is called but not used: exit if RefAlignerBin is empty if not self.varsP.RefAlignerBin : return #the contigs are obtained from varsP.latestMergedCmap--check its validity, a return will mean no jobs, and no jobs is now handled in multiThreadRunJobs. if not self.doref and ( not self.varsP.latestMergedCmap or not util.checkCmap(self.varsP.latestMergedCmap) ) : err = "Error in AlignModule.generateJobList: varsP.latestMergedCmap is not set or not valid cmap; skipping %s" % self.stageName self.varsP.updatePipeReport(err+"\n") util.LogError("error", err) return #Note: noise parameters should be fixed becuase when bnx is split, -M # would find different parameters for different contigs. Use noise0. baseargs = [self.varsP.RefAlignerBin] if not self.doref : baseargs += ['-ref', self.varsP.latestMergedCmap] #reference is latest merged cmap mappref = os.path.split(self.varsP.latestMergedCmap)[1] mappref = mappref[:mappref.find(".")] else : baseargs += ['-ref', self.varsP.ref] mappref = self.stageName #use stageName also for output filename noiseargs = self.varsP.argsListed('noise0') haverefargs = False try : #argsListed does not check key refargs = self.varsP.argsListed(self.stageName) #'alignmolvref' haverefargs = True except KeyError : #this is same as old behavior #refargs = self.varsP.argsListed('noise0') + self.varsP.argsListed(self.argStageName) #old refargs = self.varsP.argsListed(self.argStageName) #new #refargs = noiseargs + refargs if haverefargs : self.jobargs = refargs #single job with bnxin (constructor) if self.bnxin : outarg = os.path.join(self.alignTarget, mappref) self.outFileList.append( outarg ) #file prefixes jobargs = baseargs + ['-o', outarg] jobargs += ['-i', self.bnxin] stdoutf = None if self.varsP.stdoutlog : #remember, these must be after -o jobargs.extend( ['-f', '-stdout', '-stderr'] ) stdoutf = outarg+".stdout" jobargs += ['-maxthreads', str(self.varsP.maxthreads)] #add noise0 before alignmol (stageName) so that the latter can override the former jobargs += noiseargs jobargs.extend( ['-output-veto-filter', 'intervals.txt$'] ) #this feature not in old RefAligner jobargs += refargs s1Job = mthread.singleJob(jobargs, self.stageName, outarg+".xmap", self.stageName, maxThreads=self.varsP.maxthreads, clusterLogDir=self.varsP.clusterLogDir, expectedStdoutFile=stdoutf) self.addJob(s1Job) return #and this is the only job #loop over the split bnxs, make one job per bnx for idx in range(1,self.varsP.nPairwiseJobs+1) : outarg = os.path.join(self.alignTarget, mappref+"_"+str(idx)) self.outFileList.append( outarg ) #file prefixes jobargs = baseargs + ['-o', outarg] idxstr = "_%s_of_%s" % (idx, self.varsP.nPairwiseJobs) jobargs += ['-i', self.varsP.bnxFile.replace(".bnx", idxstr+".bnx")] stdoutf = None if self.varsP.stdoutlog : #remember, these must be after -o jobargs.extend( ['-f', '-stdout', '-stderr'] ) stdoutf = outarg+".stdout" jobargs += ['-maxthreads', str(self.varsP.maxthreads)] #add noise0 before alignmol (stageName) so that the latter can override the former jobargs += noiseargs #if idx != 1 : #keep _r for first job only -- copied from SVModule # jobargs.extend( ['-output-veto-filter', '_r.cmap$'] ) #need this for copy number; do NOT veto jobargs.extend( ['-output-veto-filter', 'intervals.txt$'] ) #this feature not in old RefAligner jobargs += refargs s1Job = mthread.singleJob(jobargs, self.stageName+idxstr, outarg+".xmap", self.stageName+idxstr, maxThreads=self.varsP.maxthreads, clusterLogDir=self.varsP.clusterLogDir, expectedStdoutFile=stdoutf) self.addJob(s1Job)
def runAlignMol(): parser = argparse.ArgumentParser(description=description) parser.add_argument( '-q', dest='queryDir', help= 'Path to merged cmap to align molecules (-b) to OR alignmol dir from Pipeline for merge (if latter, no alignments are performed), required', type=str) parser.add_argument( '-b', dest='bnx', help='Input molecule (.bnx) file, required if aligning molecules', type=str) #parser.add_argument('-b', dest='bnx', help='Input molecule (.bnx) file OR path to dir containing split bnx pieces, required if aligning molecules', type=str) #I should add the split feature; for now, just do single bnx parser.add_argument( '-a', dest='optArguments', help= 'Path to optArguments.xml (optional, default optArguments_human.xml in Pipeline dir if found, otherwise required)', default="", type=str) parser.add_argument( '-r', help= 'If this flag is used, alignmolvref arguments are used, otherwise alignmol arguments are used (default alignmol; optional)', dest='ref', action='store_true') parser.add_argument( '-o', dest='outputDir', help= 'output dir (optional, defaults to sub-dir of input map dir called "alignmol")', default="", type=str) parser.add_argument( '-t', dest='RefAligner', help='Path to RefAligner or dir containing it (required)', type=str) parser.add_argument( '-T', dest='numThreads', help='Total number of threads (cores) to use (optional, default 4)', default=4, type=int) parser.add_argument( '-j', dest='maxthreads', help= 'Threads per Job, -maxthreads (non-cluster only;optional, default 4)', default=4, type=int) parser.add_argument( '-e', dest='errFile', help= '.err file to use for noise parameters--will supersede noise parameters in the optArgument supplied (but that file must still be supplied for non-noise parameters)--should be from autoNoise', default="", type=str) parser.add_argument( '-E', dest='errbinFile', help= '.errbin file to use for noise parameters--will supersede noise parameters in the optArgument supplied (but that file must still be supplied for non-noise parameters)--should be from autoNoise', default="", type=str) parser.add_argument( '-p', dest='pipelineDir', help= 'Pipeline dir (optional, defaults to script dir, or current directory)', default="", type=str) result = parser.parse_args() outprefix = "exp_refineFinal1" #this is the default; assume for now #check all Pipeline dependencies if result.pipelineDir: cwd = result.pipelineDir else: cwd = os.path.split( os.path.realpath(__file__))[0] #this is path of this script if not os.path.isfile(os.path.join( cwd, "utilities.py")): #if still not here, last try is actual cwd cwd = os.getcwd() #still check this below #this is the only one imported here and in runCharacterize if not os.path.isfile(os.path.join(cwd, "utilities.py")): print "ERROR: utilities.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir" sys.exit(1) import utilities as util if not os.path.isfile(os.path.join(cwd, "AlignModule.py")): print "ERROR: AlignModule.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir" sys.exit(1) import AlignModule as alignmod if not util.checkFile(os.path.join(cwd, "Pipeline.py")): print "ERROR: Pipeline.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir" sys.exit(1) import Pipeline if not util.checkFile(os.path.join(cwd, "mapClasses.py")): print "ERROR: mapClasses.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir" sys.exit(1) import mapClasses as mc #input dir if not result.queryDir: print "ERROR: Query (-q) argument not supplied." sys.exit(1) qrypath = os.path.realpath(result.queryDir) if util.checkDir( qrypath, checkWritable=False, makeIfNotExist=False): #output elsewhere so not writeable is ok runaligns = False elif util.checkCmap(qrypath): runaligns = True else: print "ERROR: Query argument (" + qrypath + ") not found or not a dir or cmap. Check -q argument." sys.exit(1) #this check isn't really necessary...make it a warning -- left over from runAlignMerge.py #if not os.path.split(qrypath)[1].endswith("alignmol") : # print "Warning: Query dir ("+qrypath+") does not end with 'alignmol'; please be sure this is a Pipeline alignmol dir\n" #RefAligner -- check for either path to RefAligner, or dir containing it, depending on cluster args rabin = "" #need empty string for generateJobList even though no jobs are run if runaligns: rabin = result.RefAligner #replicate Pipeline behavior: RefAligner is always required if os.path.isdir(rabin): rabin = os.path.join(rabin, "RefAligner") if not util.checkExecutable(rabin): print "ERROR: RefAligner not found or not executable at", rabin, "\nPlease supply RefAligner dir or full path as -t arg." sys.exit(1) #optargs file optargs = None if runaligns and result.optArguments: #supplied on command line optargs = result.optArguments if not util.checkFile(optargs, ".xml"): print "optArguments path is supplied (" + optargs + ") but not found or doesn't end in .xml, check -a argument." sys.exit(1) elif runaligns: #load from Pipeline dir if running alignments optargs = os.path.join(cwd, "optArguments_human.xml") if not util.checkFile(optargs): print "optArguments.xml missing in Pipeline directory (" + cwd + "). Try supplying path explicitly using -a." sys.exit(1) #output dir if not result.outputDir: outdir = os.path.join(qrypath, "merge") #should be same as in AlignModule else: outdir = os.path.realpath(result.outputDir) if os.path.isdir(outdir): if not util.checkDir(outdir): #check writeable print "\nERROR: Output dir is not writeable:\n", outdir, "\n" sys.exit(1) #this is ok here #elif outdir == contigdir : # print "\nERROR: Output dir cannot be same as input dir:\n", outdir, "\n" # sys.exit(1) print "\nWARNING: Output dir already exists, results will be overwritten:\n", outdir, "\n" elif not util.checkDir( outdir ): #does not exist, make, if False, can't make or not writeable print "\nERROR: Output dir cannot be created or is not writeable:\n", outdir, "\n" sys.exit(1) #bnx file bnxfile = result.bnx if bnxfile: #must check for empty string BEFORE you do realpath, or it returns cwd bnxfile = os.path.realpath(bnxfile) if not util.checkFile(bnxfile, ".bnx"): print "ERROR: bnx file supplied but not found or incorrect suffix:", bnxfile sys.exit(1) elif runaligns: print "ERROR: bnx file not supplied but running alignments; please supply bnx file as -b argument" sys.exit(1) #nthreads nthreads = result.numThreads if nthreads <= 0: print "ERROR: Number of threads value invalid (must be > 0): %i" % nthreads sys.exit(1) #maxthreads maxthreads = result.maxthreads if maxthreads <= 0: print "ERROR: Max threads value invalid (must be > 0): %i" % maxthreads sys.exit(1) elif nthreads < maxthreads: print "Warning: num threads (-T: %i) < max threads (-j: %i): increasing num threads to equal max threads\n" % ( nthreads, maxthreads) nthreads = maxthreads #.errbin file errbinfile = result.errbinFile if errbinfile: errbinfile = os.path.realpath(result.errbinFile) if not util.checkFile(errbinfile, ".errbin"): print "ERROR: errbin file supplied but not found or incorrect suffix:", errbinfile sys.exit(1) #.err file errfile = result.errFile if errfile and errbinfile: print "Warning: .err and .errbin arguments supplied; ignoring .err file" errfile = "" elif errfile: errfile = os.path.realpath(result.errFile) if not util.checkFile(errfile, ".err"): print "err file supplied but not found or incorrect suffix:", errfile sys.exit(1) if errfile and not util.checkFile(os.path.join(cwd, "SampleCharModule.py")): print "SampleCharModule.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir" sys.exit(1) elif errfile: import SampleCharModule as scm doref = result.ref #DONE checking arguments print "Using output dir", outdir if runaligns: print "Aligning", bnxfile, "\nTo", qrypath, "\n" else: print "Merging", qrypath, "\n" startTime = time.time() #time since Epoch memory_log = os.path.join(outdir, "memory_log.txt") util.initMemoryLog(memory_log) varsP = Pipeline.varsPipeline() varsP.RefAlignerBin = rabin varsP.contigFolder = "" #not used but needs to be an attr varsP.outputContigFolder = "" #not used but needs to be a string attr varsP.pipeReportFile = os.path.join(outdir, "alignmol_jobs_log.txt") varsP.infoReportFile = os.path.join(outdir, "alignmol_log.txt") util.InitStatus(os.path.join(outdir, "status.xml")) if runaligns: varsP.optArgumentsFileIn = optargs varsP.latestMergedCmap = qrypath #if !doref, need this one varsP.ref = qrypath #and if doref, need this one varsP.nThreads = nthreads #necessary otherwise job won't start -- max threads per node varsP.maxthreads = maxthreads #threads per job p = os.path.split(qrypath)[1] varsP.outputContigPrefix = p[:p.rfind(".")] #filename prefix varsP.stdoutlog = True #use -stdout -stderr varsP.sorted_file = bnxfile[:bnxfile.rfind( ".")] #enables the mol fraction align in AlignModule.getAlignStats if qrypath.endswith(".cmap"): #enable the mol stats varsP.totAssemblyLenMb = mc.multiCmap( qrypath, lengthonly=True).totalLength / 1e6 varsP.memoryLogpath = os.path.join(outdir, "memory_log.txt") varsP.parseArguments() #parses optArgumentsFile varsP.checkDependencies() varsP.RefAlignerBinOrig = rabin varsP.prerunLog( ) #general information in log -- needed for refaligner_version noisep = {} if errbinfile: noisep = {"readparameters": errbinfile} #print "Using noise parameters from "+errbinfile+"\n" #move below elif errfile: noisep = scm.readNoiseParameters(errfile.replace(".err", "")) if noisep.has_key( 'readparameters' ): #remove this because it's redundant, and it can cause problems with RefAligner compatibility del noisep['readparameters'] if not noisep: #readNoiseParameters returns empty dict on failure print "ERROR reading noise parameters, check .err file:", errfile sys.exit(1) #redundant with below? print "Using noise parameters from " + errfile + ":\n" + " ".join( ["-" + str(k) + " " + str(v) for k, v in noisep.iteritems()]) + "\n" #some code from SampleCharModule to load args into noise0 infoReport = "Loaded noise parameters:\n" klist = [ "FP", "FN", "sf", "sd", "sr", "bpp", "readparameters" ] #hardcoding parameters is kind of bad, but it fixes the order without using OrderedDict. #noiseargs = self.varsP.argsListed('noise0') #not necessary for v in klist: if not noisep.has_key(v): continue param = str(noisep[v]) util.LogStatus("parameter", "auto_" + v, param) infoReport += v + ":" + param + "\n" varsP.replaceParam("noise0", "-" + v, param) varsP.updateInfoReport(infoReport + '\n', printalso=True) else: print "Getting file list from", qrypath outFileList = getOutFileList(util, qrypath) if not outFileList: print "ERROR: Query dir (" + qrypath + ") does not contain alignmol data. Check -q argument." sys.exit(1) else: print "Found", len(outFileList), "alignment results" #end if runaligns amod = alignmod.AlignModule( varsP, doref, outdir, bnxfile) #constructor will call generateJobList if runaligns: amod.runJobs() amod.checkResults() else: amod.outFileList = outFileList p = os.path.split(outFileList[0])[1] if p.count("_") > 1: #expect something like "EXP_REFINEFINAL1_4" #p = p[:p.rfind("_")+1] #remove integer suffix p = p[:p.rfind("_")] #remove integer suffix (and underscore) #else : # p += "_" #because mrgstr is appended varsP.outputContigPrefix = p if not runaligns or len(amod.jobList) > 0: amod.getAlignStats() if runaligns: print #copy from Pipeline.py if util.SummarizeErrors(varsP=varsP) == 0: varsP.updatePipeReport("Pipeline has successfully completed\n") util.LogStatus("progress", "pipeline", "success") else: varsP.updatePipeReport("Pipeline has completed with errors\n") util.LogStatus("progress", "pipeline", "failure") #BELOW OLD CODE return #in Pipeline, this is called first #print "Calling getAlignStats:" #but it won't work without varsP atm; skip it #getAlignStats(self.varsP, self.outFileList, self.varsP.totAssemblyLenMb, isref=False, mergepath=self.mergedir) #getAlignStats(self.varsP, self.outFileList, self.varsP.totAssemblyLenMb, isref=False, mergepath=self.mergedir) print "Calling mergeMap" print outFileList[0] #, "\n", outputdir #moved above util.logMemory(memory_log, startTime, "mergeMap_start") #mergeMap(self.varsP, self.outFileList, mergepath=self.outputdir) #varsP is optional alignmod.mergeMap(None, outFileList, outputdir) util.logMemory(memory_log, startTime, "mergeMap_end") print "Calling mergeRcmaps" util.logMemory(memory_log, startTime, "mergeRcmaps_start") #mergeRcmaps(outFileList, outdir, varsP=None, splitByContig=None, stageName="alignmol") : alignmod.mergeRcmaps(outFileList, outputdir, splitByContig=True, stageName=outprefix) util.logMemory(memory_log, startTime, "mergeRcmaps_end") print "Calling split_XMap_byContig" #split_XMapQcmap_byContig" util.logMemory(memory_log, startTime, "split_XMap_byContig_start") #xmapdict = alignmod.split_XMap_byContig(outFileList, outputdir, stageName=outprefix) #old xmapdict = alignmod.split_XMap_byContig_new(outFileList, outputdir, stageName=outprefix) util.logMemory(memory_log, startTime, "split_XMap_byContig_end") print "Calling split_Qcmap_byContig" util.logMemory(memory_log, startTime, "split_Qcmap_byContig_start") #alignmod.split_Qcmap_byContig(outFileList, outputdir, xmapdict) #old alignmod.split_Qcmap_byContig_new( outFileList, outputdir, xmapdict, stageName=outprefix) #new: better performance util.logMemory(memory_log, startTime, "split_Qcmap_byContig_end") print "AlignMerge successfully completed"