def runAlignMol() :    
    parser = argparse.ArgumentParser(description=description)

    parser.add_argument('-q', dest='queryDir', help='Path to merged cmap to align molecules (-b) to OR alignmol dir from Pipeline for merge (if latter, no alignments are performed), required', type=str)
    parser.add_argument('-b', dest='bnx', help='Input molecule (.bnx) file, required if aligning molecules', type=str)
    #parser.add_argument('-b', dest='bnx', help='Input molecule (.bnx) file OR path to dir containing split bnx pieces, required if aligning molecules', type=str) #I should add the split feature; for now, just do single bnx
    parser.add_argument('-a', dest='optArguments', help='Path to optArguments.xml (optional, default optArguments_human.xml in Pipeline dir if found, otherwise required)', default="", type=str)
    parser.add_argument('-r', help='If this flag is used, alignmolvref arguments are used, otherwise alignmol arguments are used (default alignmol; optional)', dest='ref', action='store_true')
    parser.add_argument('-o', dest='outputDir', help='output dir (optional, defaults to sub-dir of input map dir called "alignmol")', default="", type=str)
    parser.add_argument('-t', dest='RefAligner', help='Path to RefAligner or dir containing it (required)', type=str) 
    parser.add_argument('-T', dest='numThreads', help='Total number of threads (cores) to use (optional, default 4)', default=4, type=int)
    parser.add_argument('-j', dest='maxthreads', help='Threads per Job, -maxthreads (non-cluster only;optional, default 4)', default=4, type=int)
    parser.add_argument('-e', dest='errFile', help='.err file to use for noise parameters--will supersede noise parameters in the optArgument supplied (but that file must still be supplied for non-noise parameters)--should be from autoNoise', default="", type=str)
    parser.add_argument('-E', dest='errbinFile', help='.errbin file to use for noise parameters--will supersede noise parameters in the optArgument supplied (but that file must still be supplied for non-noise parameters)--should be from autoNoise', default="", type=str)
    parser.add_argument('-p', dest='pipelineDir', help='Pipeline dir (optional, defaults to script dir, or current directory)', default="", type=str)
    parser.add_argument('-v', dest='pvalue', help='Alignment pvalue', default="1e-12")
    result = parser.parse_args()

    outprefix = "exp_refineFinal1" #this is the default; assume for now

    #check all Pipeline dependencies
    if result.pipelineDir :
        cwd = result.pipelineDir
    else :
        cwd = os.path.split(os.path.realpath(__file__))[0] #this is path of this script
        if not os.path.isfile(os.path.join(cwd,"utilities.py")) : #if still not here, last try is actual cwd
            cwd = os.getcwd() #still check this below

    #this is the only one imported here and in runCharacterize
    if not os.path.isfile(os.path.join(cwd,"utilities.py")):
        print "ERROR: utilities.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
        sys.exit(1)
    import utilities as util

    if not os.path.isfile(os.path.join(cwd,"AlignModule.py")):
        print "ERROR: AlignModule.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
        sys.exit(1)
    import AlignModule as alignmod

    if not util.checkFile(os.path.join(cwd,"Pipeline.py")):
        print "ERROR: Pipeline.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
        sys.exit(1)
    import Pipeline

    #input dir
    if not result.queryDir :
        print "ERROR: Query (-q) argument not supplied."
        sys.exit(1)
    qrypath = os.path.realpath(result.queryDir)
    if util.checkDir(qrypath, checkWritable=False, makeIfNotExist=False) : #output elsewhere so not writeable is ok
        runaligns = False
    elif util.checkCmap(qrypath) :
        runaligns = True
    else :
        print "ERROR: Query argument ("+qrypath+") not found or not a dir or cmap. Check -q argument."
        sys.exit(1)

    #this check isn't really necessary...make it a warning -- left over from runAlignMerge.py
    #if not os.path.split(qrypath)[1].endswith("alignmol") :
    #    print "Warning: Query dir ("+qrypath+") does not end with 'alignmol'; please be sure this is a Pipeline alignmol dir\n"

    #RefAligner -- check for either path to RefAligner, or dir containing it, depending on cluster args
    rabin = "" #need empty string for generateJobList even though no jobs are run
    if runaligns :
        rabin = result.RefAligner
        #replicate Pipeline behavior: RefAligner is always required
        if os.path.isdir(rabin) :
            rabin = os.path.join(rabin, "RefAligner")
        if not util.checkExecutable(rabin):
            print "ERROR: RefAligner not found or not executable at", rabin, "\nPlease supply RefAligner dir or full path as -t arg."
            sys.exit(1)

    #optargs file
    optargs = None
    if runaligns and result.optArguments : #supplied on command line
        optargs = result.optArguments
        if not util.checkFile(optargs, ".xml") :
            print "optArguments path is supplied ("+optargs+") but not found or doesn't end in .xml, check -a argument."
            sys.exit(1)
    elif runaligns : #load from Pipeline dir if running alignments
        optargs = os.path.join(cwd,"optArguments_human.xml")
        if not util.checkFile(optargs):
            print "optArguments.xml missing in Pipeline directory ("+cwd+"). Try supplying path explicitly using -a."
            sys.exit(1)

    #output dir
    if not result.outputDir :
        outdir = os.path.join(qrypath, "merge") #should be same as in AlignModule
    else :
        outdir = os.path.realpath(result.outputDir)
    if os.path.isdir(outdir) :
        if not util.checkDir(outdir) : #check writeable
            print "\nERROR: Output dir is not writeable:\n", outdir, "\n"                
            sys.exit(1)
        #this is ok here
        #elif outdir == contigdir :
        #    print "\nERROR: Output dir cannot be same as input dir:\n", outdir, "\n"                
        #    sys.exit(1)                
        print "\nWARNING: Output dir already exists, results will be overwritten:\n", outdir, "\n"
    elif not util.checkDir(outdir) : #does not exist, make, if False, can't make or not writeable
        print "\nERROR: Output dir cannot be created or is not writeable:\n", outdir, "\n"
        sys.exit(1)
    
    #bnx file
    bnxfile = result.bnx
    if bnxfile : #must check for empty string BEFORE you do realpath, or it returns cwd
        bnxfile = os.path.realpath(bnxfile)
        if not util.checkFile(bnxfile, ".bnx") :
            print "ERROR: bnx file supplied but not found or incorrect suffix:", bnxfile
            sys.exit(1)
    elif runaligns :
        print "ERROR: bnx file not supplied but running alignments; please supply bnx file as -b argument"
        sys.exit(1)

    #nthreads
    nthreads = result.numThreads
    if nthreads <= 0 :
        print "ERROR: Number of threads value invalid (must be > 0): %i" % nthreads
        sys.exit(1)

    #maxthreads
    maxthreads = result.maxthreads
    if maxthreads <= 0 :
        print "ERROR: Max threads value invalid (must be > 0): %i" % maxthreads
        sys.exit(1)
    elif nthreads < maxthreads :
        print "Warning: num threads (-T: %i) < max threads (-j: %i): increasing num threads to equal max threads\n" % (nthreads, maxthreads)
        nthreads = maxthreads

    #pvalue
    if result.pvalue : #supplied on command line
        pvalue = result.pvalue
    else :
        pvalue = "1e-12"    

    #.errbin file
    errbinfile = result.errbinFile
    if errbinfile :
        errbinfile = os.path.realpath(result.errbinFile)
        if not util.checkFile(errbinfile, ".errbin") :
            print "ERROR: errbin file supplied but not found or incorrect suffix:", errbinfile
            sys.exit(1)

    #.err file
    errfile = result.errFile
    if errfile and errbinfile :
        print "Warning: .err and .errbin arguments supplied; ignoring .err file"
        errfile = ""
    elif errfile :
        errfile = os.path.realpath(result.errFile)
        if not util.checkFile(errfile, ".err") :
            print "err file supplied but not found or incorrect suffix:", errfile
            sys.exit(1)

    if errfile and not util.checkFile(os.path.join(cwd,"SampleCharModule.py")):
        print "SampleCharModule.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
        sys.exit(1)
    elif errfile :
        import SampleCharModule as scm

    doref = result.ref

    #DONE checking arguments

    print "Using output dir", outdir
    if runaligns :
        print "Aligning", bnxfile, "\nTo", qrypath, "\n"
    else :
        print "Merging", qrypath, "\n"

    startTime = time.time() #time since Epoch
    memory_log = os.path.join(outdir, "memory_log.txt")
    util.initMemoryLog(memory_log)

    varsP = Pipeline.varsPipeline()
    varsP.RefAlignerBin        = rabin
    varsP.contigFolder         = "" #not used but needs to be an attr
    varsP.outputContigFolder   = "" #not used but needs to be a string attr
    varsP.pipeReportFile = os.path.join(outdir, "alignmol_jobs_log.txt")
    varsP.infoReportFile = os.path.join(outdir, "alignmol_log.txt")
    util.InitStatus( os.path.join(outdir, "status.xml") )

    if runaligns :
        varsP.optArgumentsFileIn   = optargs
        varsP.latestMergedCmap     = qrypath #if !doref, need this one
        varsP.ref                  = qrypath #and if doref, need this one
        varsP.nThreads             = nthreads #necessary otherwise job won't start -- max threads per node
        varsP.maxthreads           = maxthreads #threads per job
        p = os.path.split(qrypath)[1]
        varsP.outputContigPrefix   = p[:p.rfind(".")] #filename prefix
        varsP.stdoutlog    = True #use -stdout -stderr

        varsP.memoryLogpath  = os.path.join(outdir, "memory_log.txt")
        varsP.parseArguments() #parses optArgumentsFile
        varsP.replaceParam("alignmol", "-T", pvalue)
        varsP.checkDependencies()
        varsP.RefAlignerBinOrig = rabin
        varsP.prerunLog() #general information in log -- needed for refaligner_version

        noisep = {}
        if errbinfile :
            noisep = {"readparameters": errbinfile}
            #print "Using noise parameters from "+errbinfile+"\n" #move below
        elif errfile :
            noisep = scm.readNoiseParameters(errfile.replace(".err",""))
            if noisep.has_key('readparameters') : #remove this because it's redundant, and it can cause problems with RefAligner compatibility
                del noisep['readparameters']
            if not noisep : #readNoiseParameters returns empty dict on failure
                print "ERROR reading noise parameters, check .err file:", errfile
                sys.exit(1)
            #redundant with below?
            print "Using noise parameters from "+errfile+":\n" + " ".join(["-"+str(k)+" "+str(v) for k,v in noisep.iteritems()])+"\n"

        #some code from SampleCharModule to load args into noise0
        infoReport="Loaded noise parameters:\n"
        klist = ["FP", "FN", "sf", "sd", "sr", "bpp", "readparameters"] #hardcoding parameters is kind of bad, but it fixes the order without using OrderedDict.
        #noiseargs = self.varsP.argsListed('noise0') #not necessary
        for v in klist :
            if not noisep.has_key(v) :
                continue
            param=str(noisep[v])
            util.LogStatus("parameter", "auto_"+v, param)
            infoReport+=v+":"+param+"\n"
            varsP.replaceParam("noise0", "-"+v, param)
        varsP.updateInfoReport(infoReport + '\n', printalso=True)

    else :
        print "Getting file list from", qrypath
        outFileList = getOutFileList(util, qrypath)
        if not outFileList :
            print "ERROR: Query dir ("+qrypath+") does not contain alignmol data. Check -q argument."
            sys.exit(1)
        else :
            print "Found", len(outFileList), "alignment results"
    #end if runaligns

    amod = alignmod.AlignModule(varsP, doref, outdir, bnxfile) #constructor will call generateJobList

    if runaligns :
        amod.runJobs()
	amod.checkResults()
    else :
        amod.outFileList = outFileList
        p = os.path.split(outFileList[0])[1]
        if p.count("_") > 1 : #expect something like "EXP_REFINEFINAL1_4"
            #p = p[:p.rfind("_")+1] #remove integer suffix
            p = p[:p.rfind("_")] #remove integer suffix (and underscore)
        #else :
        #    p += "_" #because mrgstr is appended
        varsP.outputContigPrefix = p

    if not runaligns or len(amod.jobList) > 0 :
        amod.getAlignStats()

    if runaligns :
        print
        #copy from Pipeline.py
        if util.SummarizeErrors(varsP=varsP)==0:
            varsP.updatePipeReport("Pipeline has successfully completed\n") 
            util.LogStatus("progress", "pipeline", "success")
        else:
            varsP.updatePipeReport("Pipeline has completed with errors\n") 
            util.LogStatus("progress", "pipeline", "failure")

    #BELOW OLD CODE

    return

    #in Pipeline, this is called first
    #print "Calling getAlignStats:" #but it won't work without varsP atm; skip it
    #getAlignStats(self.varsP, self.outFileList, self.varsP.totAssemblyLenMb, isref=False, mergepath=self.mergedir)
    #getAlignStats(self.varsP, self.outFileList, self.varsP.totAssemblyLenMb, isref=False, mergepath=self.mergedir)

    print "Calling mergeMap"
    print outFileList[0] #, "\n", outputdir #moved above
    util.logMemory(memory_log, startTime, "mergeMap_start")
    #mergeMap(self.varsP, self.outFileList, mergepath=self.outputdir) #varsP is optional
    alignmod.mergeMap(None, outFileList, outputdir) 
    util.logMemory(memory_log, startTime, "mergeMap_end")

    print "Calling mergeRcmaps"
    util.logMemory(memory_log, startTime, "mergeRcmaps_start")
    #mergeRcmaps(outFileList, outdir, varsP=None, splitByContig=None, stageName="alignmol") :
    alignmod.mergeRcmaps(outFileList, outputdir, splitByContig=True, stageName=outprefix) 
    util.logMemory(memory_log, startTime, "mergeRcmaps_end")

    print "Calling split_XMap_byContig" #split_XMapQcmap_byContig"
    util.logMemory(memory_log, startTime, "split_XMap_byContig_start")
    #xmapdict = alignmod.split_XMap_byContig(outFileList, outputdir, stageName=outprefix) #old
    xmapdict = alignmod.split_XMap_byContig_new(outFileList, outputdir, stageName=outprefix)
    util.logMemory(memory_log, startTime, "split_XMap_byContig_end")

    print "Calling split_Qcmap_byContig" 
    util.logMemory(memory_log, startTime, "split_Qcmap_byContig_start")
    #alignmod.split_Qcmap_byContig(outFileList, outputdir, xmapdict) #old
    alignmod.split_Qcmap_byContig_new(outFileList, outputdir, xmapdict, stageName=outprefix) #new: better performance
    util.logMemory(memory_log, startTime, "split_Qcmap_byContig_end")

    print "AlignMerge successfully completed"
Exemple #2
0
    def generateJobList(self):
        """AlignModule.generateJobList: create RefAligner jobs for aligning molecules to contigs.
        """
        #for runAlignMol, this method is called but not used: exit if RefAlignerBin is empty
        if not self.varsP.RefAlignerBin:
            return

        #the contigs are obtained from varsP.latestMergedCmap--check its validity, a return will mean no jobs, and no jobs is now handled in multiThreadRunJobs.
        if not self.doref and (
                not self.varsP.latestMergedCmap
                or not util.checkCmap(self.varsP.latestMergedCmap)):
            err = "Error in AlignModule.generateJobList: varsP.latestMergedCmap is not set or not valid cmap; skipping %s" % self.stageName
            self.varsP.updatePipeReport(err + "\n")
            util.LogError("error", err)
            return

        #Note: noise parameters should be fixed becuase when bnx is split, -M
        # would find different parameters for different contigs. Use noise0.

        baseargs = [self.varsP.RefAlignerBin]
        if not self.doref:
            baseargs += ['-ref', self.varsP.latestMergedCmap
                         ]  #reference is latest merged cmap
            mappref = os.path.split(self.varsP.latestMergedCmap)[1]
            mappref = mappref[:mappref.find(".")]
        else:
            baseargs += ['-ref', self.varsP.ref]
            mappref = self.stageName  #use stageName also for output filename

        noiseargs = self.varsP.argsListed('noise0')
        haverefargs = False
        try:  #argsListed does not check key
            refargs = self.varsP.argsListed(self.stageName)  #'alignmolvref'
            haverefargs = True
        except KeyError:  #this is same as old behavior
            #refargs = self.varsP.argsListed('noise0') + self.varsP.argsListed(self.argStageName) #old
            refargs = self.varsP.argsListed(self.argStageName)  #new
        #refargs = noiseargs + refargs

        if haverefargs:
            self.jobargs = refargs

        #single job with bnxin (constructor)
        if self.bnxin:
            outarg = os.path.join(self.alignTarget, mappref)
            self.outFileList.append(outarg)  #file prefixes
            jobargs = baseargs + ['-o', outarg]
            jobargs += ['-i', self.bnxin]

            stdoutf = None
            if self.varsP.stdoutlog:  #remember, these must be after -o
                jobargs.extend(['-f', '-stdout', '-stderr'])
                stdoutf = outarg + ".stdout"
            jobargs += ['-maxthreads', str(self.varsP.maxthreads)]
            #add noise0 before alignmol (stageName) so that the latter can override the former
            jobargs += noiseargs
            jobargs.extend(['-output-veto-filter', 'intervals.txt$'
                            ])  #this feature not in old RefAligner
            jobargs += refargs

            s1Job = mthread.singleJob(jobargs,
                                      self.stageName,
                                      outarg + ".xmap",
                                      self.stageName,
                                      maxThreads=self.varsP.maxthreads,
                                      clusterLogDir=self.varsP.clusterLogDir,
                                      expectedStdoutFile=stdoutf)
            self.addJob(s1Job)
            return  #and this is the only job

        #loop over the split bnxs, make one job per bnx
        for idx in range(1, self.varsP.nPairwiseJobs + 1):

            outarg = os.path.join(self.alignTarget, mappref + "_" + str(idx))
            self.outFileList.append(outarg)  #file prefixes
            jobargs = baseargs + ['-o', outarg]
            idxstr = "_%s_of_%s" % (idx, self.varsP.nPairwiseJobs)
            jobargs += [
                '-i',
                self.varsP.bnxFile.replace(".bnx", idxstr + ".bnx")
            ]

            stdoutf = None
            if self.varsP.stdoutlog:  #remember, these must be after -o
                jobargs.extend(['-f', '-stdout', '-stderr'])
                stdoutf = outarg + ".stdout"
            jobargs += ['-maxthreads', str(self.varsP.maxthreads)]
            #add noise0 before alignmol (stageName) so that the latter can override the former
            jobargs += noiseargs
            #if idx != 1 : #keep _r for first job only -- copied from SVModule
            #    jobargs.extend( ['-output-veto-filter', '_r.cmap$'] ) #need this for copy number; do NOT veto
            jobargs.extend(['-output-veto-filter', 'intervals.txt$'
                            ])  #this feature not in old RefAligner
            jobargs += refargs

            s1Job = mthread.singleJob(jobargs,
                                      self.stageName + idxstr,
                                      outarg + ".xmap",
                                      self.stageName + idxstr,
                                      maxThreads=self.varsP.maxthreads,
                                      clusterLogDir=self.varsP.clusterLogDir,
                                      expectedStdoutFile=stdoutf)
            self.addJob(s1Job)
    def generateJobList(self):
        """AlignModule.generateJobList: create RefAligner jobs for aligning molecules to contigs.
        """
        #for runAlignMol, this method is called but not used: exit if RefAlignerBin is empty
        if not self.varsP.RefAlignerBin :
            return

        #the contigs are obtained from varsP.latestMergedCmap--check its validity, a return will mean no jobs, and no jobs is now handled in multiThreadRunJobs.
        if not self.doref and ( not self.varsP.latestMergedCmap or
                                not util.checkCmap(self.varsP.latestMergedCmap) ) :
            err = "Error in AlignModule.generateJobList: varsP.latestMergedCmap is not set or not valid cmap; skipping %s" % self.stageName
            self.varsP.updatePipeReport(err+"\n")
            util.LogError("error", err)
            return

        #Note: noise parameters should be fixed becuase when bnx is split, -M
        # would find different parameters for different contigs. Use noise0.

        baseargs = [self.varsP.RefAlignerBin]
        if not self.doref :
            baseargs += ['-ref', self.varsP.latestMergedCmap] #reference is latest merged cmap
            mappref = os.path.split(self.varsP.latestMergedCmap)[1]
            mappref = mappref[:mappref.find(".")]
        else :
            baseargs += ['-ref', self.varsP.ref] 
            mappref = self.stageName #use stageName also for output filename

        noiseargs = self.varsP.argsListed('noise0')
        haverefargs = False
        try : #argsListed does not check key
            refargs = self.varsP.argsListed(self.stageName) #'alignmolvref'
            haverefargs = True
        except KeyError : #this is same as old behavior
            #refargs = self.varsP.argsListed('noise0') + self.varsP.argsListed(self.argStageName) #old
            refargs = self.varsP.argsListed(self.argStageName) #new
        #refargs = noiseargs + refargs

        if haverefargs :
            self.jobargs = refargs

        #single job with bnxin (constructor)
        if self.bnxin :
            outarg = os.path.join(self.alignTarget, mappref)
            self.outFileList.append( outarg ) #file prefixes
            jobargs = baseargs + ['-o', outarg]
            jobargs += ['-i', self.bnxin]

            stdoutf = None
            if self.varsP.stdoutlog : #remember, these must be after -o
                jobargs.extend( ['-f', '-stdout', '-stderr'] )
                stdoutf = outarg+".stdout"
            jobargs += ['-maxthreads', str(self.varsP.maxthreads)]
            #add noise0 before alignmol (stageName) so that the latter can override the former
            jobargs += noiseargs
            jobargs.extend( ['-output-veto-filter', 'intervals.txt$'] ) #this feature not in old RefAligner
            jobargs += refargs

            s1Job = mthread.singleJob(jobargs, self.stageName, outarg+".xmap", self.stageName, maxThreads=self.varsP.maxthreads, clusterLogDir=self.varsP.clusterLogDir, expectedStdoutFile=stdoutf)
            self.addJob(s1Job)
            return #and this is the only job

        #loop over the split bnxs, make one job per bnx
        for idx in range(1,self.varsP.nPairwiseJobs+1) :

            outarg = os.path.join(self.alignTarget, mappref+"_"+str(idx))
            self.outFileList.append( outarg ) #file prefixes
            jobargs = baseargs + ['-o', outarg]
            idxstr = "_%s_of_%s" % (idx, self.varsP.nPairwiseJobs)
            jobargs += ['-i', self.varsP.bnxFile.replace(".bnx", idxstr+".bnx")]

            stdoutf = None
            if self.varsP.stdoutlog : #remember, these must be after -o
                jobargs.extend( ['-f', '-stdout', '-stderr'] )
                stdoutf = outarg+".stdout"
            jobargs += ['-maxthreads', str(self.varsP.maxthreads)]
            #add noise0 before alignmol (stageName) so that the latter can override the former
            jobargs += noiseargs
            #if idx != 1 : #keep _r for first job only -- copied from SVModule
            #    jobargs.extend( ['-output-veto-filter', '_r.cmap$'] ) #need this for copy number; do NOT veto
            jobargs.extend( ['-output-veto-filter', 'intervals.txt$'] ) #this feature not in old RefAligner
            jobargs += refargs

            s1Job = mthread.singleJob(jobargs, self.stageName+idxstr, outarg+".xmap", self.stageName+idxstr, maxThreads=self.varsP.maxthreads, clusterLogDir=self.varsP.clusterLogDir, expectedStdoutFile=stdoutf)
            self.addJob(s1Job)
def runAlignMol():
    parser = argparse.ArgumentParser(description=description)

    parser.add_argument(
        '-q',
        dest='queryDir',
        help=
        'Path to merged cmap to align molecules (-b) to OR alignmol dir from Pipeline for merge (if latter, no alignments are performed), required',
        type=str)
    parser.add_argument(
        '-b',
        dest='bnx',
        help='Input molecule (.bnx) file, required if aligning molecules',
        type=str)
    #parser.add_argument('-b', dest='bnx', help='Input molecule (.bnx) file OR path to dir containing split bnx pieces, required if aligning molecules', type=str) #I should add the split feature; for now, just do single bnx
    parser.add_argument(
        '-a',
        dest='optArguments',
        help=
        'Path to optArguments.xml (optional, default optArguments_human.xml in Pipeline dir if found, otherwise required)',
        default="",
        type=str)
    parser.add_argument(
        '-r',
        help=
        'If this flag is used, alignmolvref arguments are used, otherwise alignmol arguments are used (default alignmol; optional)',
        dest='ref',
        action='store_true')
    parser.add_argument(
        '-o',
        dest='outputDir',
        help=
        'output dir (optional, defaults to sub-dir of input map dir called "alignmol")',
        default="",
        type=str)
    parser.add_argument(
        '-t',
        dest='RefAligner',
        help='Path to RefAligner or dir containing it (required)',
        type=str)
    parser.add_argument(
        '-T',
        dest='numThreads',
        help='Total number of threads (cores) to use (optional, default 4)',
        default=4,
        type=int)
    parser.add_argument(
        '-j',
        dest='maxthreads',
        help=
        'Threads per Job, -maxthreads (non-cluster only;optional, default 4)',
        default=4,
        type=int)
    parser.add_argument(
        '-e',
        dest='errFile',
        help=
        '.err file to use for noise parameters--will supersede noise parameters in the optArgument supplied (but that file must still be supplied for non-noise parameters)--should be from autoNoise',
        default="",
        type=str)
    parser.add_argument(
        '-E',
        dest='errbinFile',
        help=
        '.errbin file to use for noise parameters--will supersede noise parameters in the optArgument supplied (but that file must still be supplied for non-noise parameters)--should be from autoNoise',
        default="",
        type=str)
    parser.add_argument(
        '-p',
        dest='pipelineDir',
        help=
        'Pipeline dir (optional, defaults to script dir, or current directory)',
        default="",
        type=str)
    result = parser.parse_args()

    outprefix = "exp_refineFinal1"  #this is the default; assume for now

    #check all Pipeline dependencies
    if result.pipelineDir:
        cwd = result.pipelineDir
    else:
        cwd = os.path.split(
            os.path.realpath(__file__))[0]  #this is path of this script
        if not os.path.isfile(os.path.join(
                cwd,
                "utilities.py")):  #if still not here, last try is actual cwd
            cwd = os.getcwd()  #still check this below

    #this is the only one imported here and in runCharacterize
    if not os.path.isfile(os.path.join(cwd, "utilities.py")):
        print "ERROR: utilities.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
        sys.exit(1)
    import utilities as util

    if not os.path.isfile(os.path.join(cwd, "AlignModule.py")):
        print "ERROR: AlignModule.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
        sys.exit(1)
    import AlignModule as alignmod

    if not util.checkFile(os.path.join(cwd, "Pipeline.py")):
        print "ERROR: Pipeline.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
        sys.exit(1)
    import Pipeline

    if not util.checkFile(os.path.join(cwd, "mapClasses.py")):
        print "ERROR: mapClasses.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
        sys.exit(1)
    import mapClasses as mc

    #input dir
    if not result.queryDir:
        print "ERROR: Query (-q) argument not supplied."
        sys.exit(1)
    qrypath = os.path.realpath(result.queryDir)
    if util.checkDir(
            qrypath, checkWritable=False,
            makeIfNotExist=False):  #output elsewhere so not writeable is ok
        runaligns = False
    elif util.checkCmap(qrypath):
        runaligns = True
    else:
        print "ERROR: Query argument (" + qrypath + ") not found or not a dir or cmap. Check -q argument."
        sys.exit(1)

    #this check isn't really necessary...make it a warning -- left over from runAlignMerge.py
    #if not os.path.split(qrypath)[1].endswith("alignmol") :
    #    print "Warning: Query dir ("+qrypath+") does not end with 'alignmol'; please be sure this is a Pipeline alignmol dir\n"

    #RefAligner -- check for either path to RefAligner, or dir containing it, depending on cluster args
    rabin = ""  #need empty string for generateJobList even though no jobs are run
    if runaligns:
        rabin = result.RefAligner
        #replicate Pipeline behavior: RefAligner is always required
        if os.path.isdir(rabin):
            rabin = os.path.join(rabin, "RefAligner")
        if not util.checkExecutable(rabin):
            print "ERROR: RefAligner not found or not executable at", rabin, "\nPlease supply RefAligner dir or full path as -t arg."
            sys.exit(1)

    #optargs file
    optargs = None
    if runaligns and result.optArguments:  #supplied on command line
        optargs = result.optArguments
        if not util.checkFile(optargs, ".xml"):
            print "optArguments path is supplied (" + optargs + ") but not found or doesn't end in .xml, check -a argument."
            sys.exit(1)
    elif runaligns:  #load from Pipeline dir if running alignments
        optargs = os.path.join(cwd, "optArguments_human.xml")
        if not util.checkFile(optargs):
            print "optArguments.xml missing in Pipeline directory (" + cwd + "). Try supplying path explicitly using -a."
            sys.exit(1)

    #output dir
    if not result.outputDir:
        outdir = os.path.join(qrypath,
                              "merge")  #should be same as in AlignModule
    else:
        outdir = os.path.realpath(result.outputDir)
    if os.path.isdir(outdir):
        if not util.checkDir(outdir):  #check writeable
            print "\nERROR: Output dir is not writeable:\n", outdir, "\n"
            sys.exit(1)
        #this is ok here
        #elif outdir == contigdir :
        #    print "\nERROR: Output dir cannot be same as input dir:\n", outdir, "\n"
        #    sys.exit(1)
        print "\nWARNING: Output dir already exists, results will be overwritten:\n", outdir, "\n"
    elif not util.checkDir(
            outdir
    ):  #does not exist, make, if False, can't make or not writeable
        print "\nERROR: Output dir cannot be created or is not writeable:\n", outdir, "\n"
        sys.exit(1)

    #bnx file
    bnxfile = result.bnx
    if bnxfile:  #must check for empty string BEFORE you do realpath, or it returns cwd
        bnxfile = os.path.realpath(bnxfile)
        if not util.checkFile(bnxfile, ".bnx"):
            print "ERROR: bnx file supplied but not found or incorrect suffix:", bnxfile
            sys.exit(1)
    elif runaligns:
        print "ERROR: bnx file not supplied but running alignments; please supply bnx file as -b argument"
        sys.exit(1)

    #nthreads
    nthreads = result.numThreads
    if nthreads <= 0:
        print "ERROR: Number of threads value invalid (must be > 0): %i" % nthreads
        sys.exit(1)

    #maxthreads
    maxthreads = result.maxthreads
    if maxthreads <= 0:
        print "ERROR: Max threads value invalid (must be > 0): %i" % maxthreads
        sys.exit(1)
    elif nthreads < maxthreads:
        print "Warning: num threads (-T: %i) < max threads (-j: %i): increasing num threads to equal max threads\n" % (
            nthreads, maxthreads)
        nthreads = maxthreads

    #.errbin file
    errbinfile = result.errbinFile
    if errbinfile:
        errbinfile = os.path.realpath(result.errbinFile)
        if not util.checkFile(errbinfile, ".errbin"):
            print "ERROR: errbin file supplied but not found or incorrect suffix:", errbinfile
            sys.exit(1)

    #.err file
    errfile = result.errFile
    if errfile and errbinfile:
        print "Warning: .err and .errbin arguments supplied; ignoring .err file"
        errfile = ""
    elif errfile:
        errfile = os.path.realpath(result.errFile)
        if not util.checkFile(errfile, ".err"):
            print "err file supplied but not found or incorrect suffix:", errfile
            sys.exit(1)

    if errfile and not util.checkFile(os.path.join(cwd,
                                                   "SampleCharModule.py")):
        print "SampleCharModule.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
        sys.exit(1)
    elif errfile:
        import SampleCharModule as scm

    doref = result.ref

    #DONE checking arguments

    print "Using output dir", outdir
    if runaligns:
        print "Aligning", bnxfile, "\nTo", qrypath, "\n"
    else:
        print "Merging", qrypath, "\n"

    startTime = time.time()  #time since Epoch
    memory_log = os.path.join(outdir, "memory_log.txt")
    util.initMemoryLog(memory_log)

    varsP = Pipeline.varsPipeline()
    varsP.RefAlignerBin = rabin
    varsP.contigFolder = ""  #not used but needs to be an attr
    varsP.outputContigFolder = ""  #not used but needs to be a string attr
    varsP.pipeReportFile = os.path.join(outdir, "alignmol_jobs_log.txt")
    varsP.infoReportFile = os.path.join(outdir, "alignmol_log.txt")
    util.InitStatus(os.path.join(outdir, "status.xml"))

    if runaligns:
        varsP.optArgumentsFileIn = optargs
        varsP.latestMergedCmap = qrypath  #if !doref, need this one
        varsP.ref = qrypath  #and if doref, need this one
        varsP.nThreads = nthreads  #necessary otherwise job won't start -- max threads per node
        varsP.maxthreads = maxthreads  #threads per job
        p = os.path.split(qrypath)[1]
        varsP.outputContigPrefix = p[:p.rfind(".")]  #filename prefix
        varsP.stdoutlog = True  #use -stdout -stderr
        varsP.sorted_file = bnxfile[:bnxfile.rfind(
            ".")]  #enables the mol fraction align in AlignModule.getAlignStats
        if qrypath.endswith(".cmap"):  #enable the mol stats
            varsP.totAssemblyLenMb = mc.multiCmap(
                qrypath, lengthonly=True).totalLength / 1e6

        varsP.memoryLogpath = os.path.join(outdir, "memory_log.txt")
        varsP.parseArguments()  #parses optArgumentsFile
        varsP.checkDependencies()
        varsP.RefAlignerBinOrig = rabin
        varsP.prerunLog(
        )  #general information in log -- needed for refaligner_version

        noisep = {}
        if errbinfile:
            noisep = {"readparameters": errbinfile}
            #print "Using noise parameters from "+errbinfile+"\n" #move below
        elif errfile:
            noisep = scm.readNoiseParameters(errfile.replace(".err", ""))
            if noisep.has_key(
                    'readparameters'
            ):  #remove this because it's redundant, and it can cause problems with RefAligner compatibility
                del noisep['readparameters']
            if not noisep:  #readNoiseParameters returns empty dict on failure
                print "ERROR reading noise parameters, check .err file:", errfile
                sys.exit(1)
            #redundant with below?
            print "Using noise parameters from " + errfile + ":\n" + " ".join(
                ["-" + str(k) + " " + str(v)
                 for k, v in noisep.iteritems()]) + "\n"

        #some code from SampleCharModule to load args into noise0
        infoReport = "Loaded noise parameters:\n"
        klist = [
            "FP", "FN", "sf", "sd", "sr", "bpp", "readparameters"
        ]  #hardcoding parameters is kind of bad, but it fixes the order without using OrderedDict.
        #noiseargs = self.varsP.argsListed('noise0') #not necessary
        for v in klist:
            if not noisep.has_key(v):
                continue
            param = str(noisep[v])
            util.LogStatus("parameter", "auto_" + v, param)
            infoReport += v + ":" + param + "\n"
            varsP.replaceParam("noise0", "-" + v, param)
        varsP.updateInfoReport(infoReport + '\n', printalso=True)

    else:
        print "Getting file list from", qrypath
        outFileList = getOutFileList(util, qrypath)
        if not outFileList:
            print "ERROR: Query dir (" + qrypath + ") does not contain alignmol data. Check -q argument."
            sys.exit(1)
        else:
            print "Found", len(outFileList), "alignment results"
    #end if runaligns

    amod = alignmod.AlignModule(
        varsP, doref, outdir, bnxfile)  #constructor will call generateJobList

    if runaligns:
        amod.runJobs()
        amod.checkResults()
    else:
        amod.outFileList = outFileList
        p = os.path.split(outFileList[0])[1]
        if p.count("_") > 1:  #expect something like "EXP_REFINEFINAL1_4"
            #p = p[:p.rfind("_")+1] #remove integer suffix
            p = p[:p.rfind("_")]  #remove integer suffix (and underscore)
        #else :
        #    p += "_" #because mrgstr is appended
        varsP.outputContigPrefix = p

    if not runaligns or len(amod.jobList) > 0:
        amod.getAlignStats()

    if runaligns:
        print
        #copy from Pipeline.py
        if util.SummarizeErrors(varsP=varsP) == 0:
            varsP.updatePipeReport("Pipeline has successfully completed\n")
            util.LogStatus("progress", "pipeline", "success")
        else:
            varsP.updatePipeReport("Pipeline has completed with errors\n")
            util.LogStatus("progress", "pipeline", "failure")

    #BELOW OLD CODE

    return

    #in Pipeline, this is called first
    #print "Calling getAlignStats:" #but it won't work without varsP atm; skip it
    #getAlignStats(self.varsP, self.outFileList, self.varsP.totAssemblyLenMb, isref=False, mergepath=self.mergedir)
    #getAlignStats(self.varsP, self.outFileList, self.varsP.totAssemblyLenMb, isref=False, mergepath=self.mergedir)

    print "Calling mergeMap"
    print outFileList[0]  #, "\n", outputdir #moved above
    util.logMemory(memory_log, startTime, "mergeMap_start")
    #mergeMap(self.varsP, self.outFileList, mergepath=self.outputdir) #varsP is optional
    alignmod.mergeMap(None, outFileList, outputdir)
    util.logMemory(memory_log, startTime, "mergeMap_end")

    print "Calling mergeRcmaps"
    util.logMemory(memory_log, startTime, "mergeRcmaps_start")
    #mergeRcmaps(outFileList, outdir, varsP=None, splitByContig=None, stageName="alignmol") :
    alignmod.mergeRcmaps(outFileList,
                         outputdir,
                         splitByContig=True,
                         stageName=outprefix)
    util.logMemory(memory_log, startTime, "mergeRcmaps_end")

    print "Calling split_XMap_byContig"  #split_XMapQcmap_byContig"
    util.logMemory(memory_log, startTime, "split_XMap_byContig_start")
    #xmapdict = alignmod.split_XMap_byContig(outFileList, outputdir, stageName=outprefix) #old
    xmapdict = alignmod.split_XMap_byContig_new(outFileList,
                                                outputdir,
                                                stageName=outprefix)
    util.logMemory(memory_log, startTime, "split_XMap_byContig_end")

    print "Calling split_Qcmap_byContig"
    util.logMemory(memory_log, startTime, "split_Qcmap_byContig_start")
    #alignmod.split_Qcmap_byContig(outFileList, outputdir, xmapdict) #old
    alignmod.split_Qcmap_byContig_new(
        outFileList, outputdir, xmapdict,
        stageName=outprefix)  #new: better performance
    util.logMemory(memory_log, startTime, "split_Qcmap_byContig_end")

    print "AlignMerge successfully completed"