def cleaning(programDirectory,previousProcessFiles,processed,account): sys.path.append(os.path.join(programDirectory,"modules/cleaning")) import cleaningScript,common #read the process files processFiles=common.readProcessFiles(previousProcessFiles,processed,"cleaning") for tools in previousProcessFiles: print("cleaning: "+tools); for sample in previousProcessFiles[tools]["analysed"]: if sample in processFiles[tools]["ongoing"]: done=common.get_slurm_job_status(int(processFiles[tools]["ongoing"][sample]["pid"])) processFiles=common.get_process_status(done,processFiles,tools,sample) elif sample in processFiles[tools]["failed"].keys(): print "sample {0} FAILED".format(sample) elif sample in processFiles[tools]["cancelled"].keys(): print "sample {0} CANCELLED".format(sample) elif sample in processFiles[tools]["timeout"].keys(): print "sample {0} TIMEOUT".format(sample) elif sample in processFiles[tools]["excluded"].keys(): print "sample {0} EXCLUDED".format(sample) elif sample in processFiles[tools]["analysed"].keys(): print "sample {0} COMPLETE".format(sample) else: print("submitting: " + sample); try: outgoing=cleaningScript.submit2Cleaning(tools,sample,previousProcessFiles,programDirectory,account); processFiles[tools]["ongoing"].update(outgoing) except: print("FAILED:was the sample excluded?"); common.UpdateProcessFiles(processFiles,processed,"cleaning") finished=1; return(processFiles);
def applyFilter(programDirectory,previousProcessFiles,processed,account,frequency_db): sys.path.append(os.path.join(programDirectory,"modules/filter")) import common,filterScripts print("applying filter"); processFiles=common.readProcessFiles(previousProcessFiles,processed,"filter") #itterate through every available tool for tool in previousProcessFiles: print("filtering the output of {0}".format(tool)); #this is a list of all the files that have already passed the filter filtered=processFiles[tool]["analysed"].keys()+processFiles[tool]["cancelled"].keys()+processFiles[tool]["timeout"].keys()+processFiles[tool]["failed"].keys()+processFiles[tool]["excluded"].keys() database=previousProcessFiles[tool]["analysed"].keys(); #only run the query when the previous query was finished and when there are newly generated databases #WARNING, if the user removes samples from the database log file, the condition will always be true,TODO: change to set comparison if(processFiles[tool]["ongoing"] == {} and sorted(filtered) != sorted(database) ): add2Ongoing=filterScripts.runScripts(tool,previousProcessFiles[tool]["analysed"].keys(),previousProcessFiles,programDirectory,account,frequency_db); processFiles[tool]["ongoing"].update(add2Ongoing); samples=[] for sample in processFiles[tool]["ongoing"]: samples.append(sample); while len(samples) > 0: try: done=common.get_slurm_job_status(int(processFiles[tool]["ongoing"][samples[0]]["pid"])) processFiles=common.get_process_status(done,processFiles,tool,samples[0]) del samples[0]; except: print("Warning, unnable to get slurm job status for job {}, please try again".format(processFiles[tool]["ongoing"][sample]["pid"])); del samples[0]; common.UpdateProcessFiles(processFiles,processed,"filter") return(processFiles);
def combine(programDirectory,previousProcessFiles,processed,account,bam_files): sys.path.append(os.path.join(programDirectory,"modules/combine")) import common,combineScript #read the process files processFiles=common.readProcessFiles(["FindSV"],processed,"combine") samplesToMerge={}; #combinedProcessFile={} firstTool= True #find all samples that has been analysed by the callers for tools in previousProcessFiles: for sample in previousProcessFiles[tools]["analysed"]: if firstTool: #combinedProcessFile[sample].update(samplepreviousProcessFiles[tools][sample]) samplesToMerge.update({sample:1}) elif( sample in samplesToMerge): samplesToMerge[sample]+=1 firstTool=False #merge the caller results of each sample that has been analysed by all available callers for sample in samplesToMerge: if samplesToMerge[sample] == len(previousProcessFiles): #check the status of each sample if sample in processFiles["FindSV"]["ongoing"]: done=common.get_slurm_job_status(int(processFiles["FindSV"]["ongoing"][sample]["pid"])) processFiles=common.get_process_status(done,processFiles,"FindSV",sample) elif sample in processFiles["FindSV"]["failed"].keys(): print "sample {0} FAILED".format(sample) elif sample in processFiles["FindSV"]["cancelled"].keys(): print "sample {0} CANCELLED".format(sample) elif sample in processFiles["FindSV"]["timeout"].keys(): print "sample {0} TIMEOUT".format(sample) elif sample in processFiles["FindSV"]["excluded"].keys(): print "sample {0} EXCLUDED".format(sample) elif sample in processFiles["FindSV"]["analysed"].keys(): print "sample {0} COMPLETE".format(sample) else: print("submitting: " + sample); combinedProcessFile={} tool=""; for tools in previousProcessFiles: tool = tools combinedProcessFile.update({tools:previousProcessFiles[tools]["analysed"][sample]}) outgoing=combineScript.submit4combination(tool,sample,combinedProcessFile,programDirectory,account,bam_files[sample]["path"]); processFiles["FindSV"]["ongoing"].update(outgoing) common.UpdateProcessFiles(processFiles,processed,"combine") finished=1; return(processFiles);
def buildDatabase(programDirectory,previousProcessFiles,processed,account): sys.path.append(os.path.join(programDirectory,"modules/database")) import submitToDatabase,common,time,process print("constructing databases"); processFiles=common.readProcessFiles(previousProcessFiles,processed,"database") #create a dictionary containing the samples that are yet to be run through build db newsamples={} for tool in previousProcessFiles: newsamples[tool]={} for sample in previousProcessFiles[tool]["analysed"]: if sample not in processFiles[tool]["ongoing"] and sample not in processFiles[tool]["analysed"] and sample not in processFiles[tool]["cancelled"] and sample not in processFiles[tool]["failed"] and sample not in processFiles[tool]["excluded"] and sample not in processFiles[tool]["timeout"]: newsamples[tool].update({sample:previousProcessFiles[tool]["analysed"][sample]}) #check if any of the ongoing samples are finished, and add finished samples to the finished dictionary for tools in processFiles: samples=[] for sample in processFiles[tools]["ongoing"]: samples.append(sample); while len(samples) > 0: try: done=common.get_slurm_job_status(int(processFiles[tools]["ongoing"][samples[0]]["pid"])) processFiles=common.get_process_status(done,processFiles,tools,samples[0]) del samples[0]; except: print("Warning, unnable to get slurm job status for job {}, please try again".format(processFiles[tools]["ongoing"][samples[0]]["pid"])); del samples[0]; #submit the new samples and add them to ongoing print("submitting"); for tools in newsamples: print(tools); for sample in newsamples[tools]: print("sample:" + sample); databaseOutput=submitToDatabase.submit2DB(newsamples,tools,sample,programDirectory,processed,account) processFiles[tools]["ongoing"].update({sample:newsamples[tools][sample]}) processFiles[tools]["ongoing"][sample]["pid"]=databaseOutput[0]; processFiles[tools]["ongoing"][sample]["outputFile"]=databaseOutput[1]; project=processFiles[tools]["ongoing"][sample]["project"] common.UpdateProcessFiles(processFiles,processed,"database") return(processFiles)