def cleaning(programDirectory,previousProcessFiles,processed,account): sys.path.append(os.path.join(programDirectory,"modules/cleaning")) import cleaningScript,common #read the process files processFiles=common.readProcessFiles(previousProcessFiles,processed,"cleaning") for tools in previousProcessFiles: print("cleaning: "+tools); for sample in previousProcessFiles[tools]["analysed"]: if sample in processFiles[tools]["ongoing"]: done=common.get_slurm_job_status(int(processFiles[tools]["ongoing"][sample]["pid"])) processFiles=common.get_process_status(done,processFiles,tools,sample) elif sample in processFiles[tools]["failed"].keys(): print "sample {0} FAILED".format(sample) elif sample in processFiles[tools]["cancelled"].keys(): print "sample {0} CANCELLED".format(sample) elif sample in processFiles[tools]["timeout"].keys(): print "sample {0} TIMEOUT".format(sample) elif sample in processFiles[tools]["excluded"].keys(): print "sample {0} EXCLUDED".format(sample) elif sample in processFiles[tools]["analysed"].keys(): print "sample {0} COMPLETE".format(sample) else: print("submitting: " + sample); try: outgoing=cleaningScript.submit2Cleaning(tools,sample,previousProcessFiles,programDirectory,account); processFiles[tools]["ongoing"].update(outgoing) except: print("FAILED:was the sample excluded?"); common.UpdateProcessFiles(processFiles,processed,"cleaning") finished=1; return(processFiles);
def applyFilter(programDirectory,previousProcessFiles,processed,account,frequency_db): sys.path.append(os.path.join(programDirectory,"modules/filter")) import common,filterScripts print("applying filter"); processFiles=common.readProcessFiles(previousProcessFiles,processed,"filter") #itterate through every available tool for tool in previousProcessFiles: print("filtering the output of {0}".format(tool)); #this is a list of all the files that have already passed the filter filtered=processFiles[tool]["analysed"].keys()+processFiles[tool]["cancelled"].keys()+processFiles[tool]["timeout"].keys()+processFiles[tool]["failed"].keys()+processFiles[tool]["excluded"].keys() database=previousProcessFiles[tool]["analysed"].keys(); #only run the query when the previous query was finished and when there are newly generated databases #WARNING, if the user removes samples from the database log file, the condition will always be true,TODO: change to set comparison if(processFiles[tool]["ongoing"] == {} and sorted(filtered) != sorted(database) ): add2Ongoing=filterScripts.runScripts(tool,previousProcessFiles[tool]["analysed"].keys(),previousProcessFiles,programDirectory,account,frequency_db); processFiles[tool]["ongoing"].update(add2Ongoing); samples=[] for sample in processFiles[tool]["ongoing"]: samples.append(sample); while len(samples) > 0: try: done=common.get_slurm_job_status(int(processFiles[tool]["ongoing"][samples[0]]["pid"])) processFiles=common.get_process_status(done,processFiles,tool,samples[0]) del samples[0]; except: print("Warning, unnable to get slurm job status for job {}, please try again".format(processFiles[tool]["ongoing"][sample]["pid"])); del samples[0]; common.UpdateProcessFiles(processFiles,processed,"filter") return(processFiles);
def combine(programDirectory,previousProcessFiles,processed,account,bam_files): sys.path.append(os.path.join(programDirectory,"modules/combine")) import common,combineScript #read the process files processFiles=common.readProcessFiles(["FindSV"],processed,"combine") samplesToMerge={}; #combinedProcessFile={} firstTool= True #find all samples that has been analysed by the callers for tools in previousProcessFiles: for sample in previousProcessFiles[tools]["analysed"]: if firstTool: #combinedProcessFile[sample].update(samplepreviousProcessFiles[tools][sample]) samplesToMerge.update({sample:1}) elif( sample in samplesToMerge): samplesToMerge[sample]+=1 firstTool=False #merge the caller results of each sample that has been analysed by all available callers for sample in samplesToMerge: if samplesToMerge[sample] == len(previousProcessFiles): #check the status of each sample if sample in processFiles["FindSV"]["ongoing"]: done=common.get_slurm_job_status(int(processFiles["FindSV"]["ongoing"][sample]["pid"])) processFiles=common.get_process_status(done,processFiles,"FindSV",sample) elif sample in processFiles["FindSV"]["failed"].keys(): print "sample {0} FAILED".format(sample) elif sample in processFiles["FindSV"]["cancelled"].keys(): print "sample {0} CANCELLED".format(sample) elif sample in processFiles["FindSV"]["timeout"].keys(): print "sample {0} TIMEOUT".format(sample) elif sample in processFiles["FindSV"]["excluded"].keys(): print "sample {0} EXCLUDED".format(sample) elif sample in processFiles["FindSV"]["analysed"].keys(): print "sample {0} COMPLETE".format(sample) else: print("submitting: " + sample); combinedProcessFile={} tool=""; for tools in previousProcessFiles: tool = tools combinedProcessFile.update({tools:previousProcessFiles[tools]["analysed"][sample]}) outgoing=combineScript.submit4combination(tool,sample,combinedProcessFile,programDirectory,account,bam_files[sample]["path"]); processFiles["FindSV"]["ongoing"].update(outgoing) common.UpdateProcessFiles(processFiles,processed,"combine") finished=1; return(processFiles);
def buildDatabase(programDirectory,previousProcessFiles,processed,account): sys.path.append(os.path.join(programDirectory,"modules/database")) import submitToDatabase,common,time,process print("constructing databases"); processFiles=common.readProcessFiles(previousProcessFiles,processed,"database") #create a dictionary containing the samples that are yet to be run through build db newsamples={} for tool in previousProcessFiles: newsamples[tool]={} for sample in previousProcessFiles[tool]["analysed"]: if sample not in processFiles[tool]["ongoing"] and sample not in processFiles[tool]["analysed"] and sample not in processFiles[tool]["cancelled"] and sample not in processFiles[tool]["failed"] and sample not in processFiles[tool]["excluded"] and sample not in processFiles[tool]["timeout"]: newsamples[tool].update({sample:previousProcessFiles[tool]["analysed"][sample]}) #check if any of the ongoing samples are finished, and add finished samples to the finished dictionary for tools in processFiles: samples=[] for sample in processFiles[tools]["ongoing"]: samples.append(sample); while len(samples) > 0: try: done=common.get_slurm_job_status(int(processFiles[tools]["ongoing"][samples[0]]["pid"])) processFiles=common.get_process_status(done,processFiles,tools,samples[0]) del samples[0]; except: print("Warning, unnable to get slurm job status for job {}, please try again".format(processFiles[tools]["ongoing"][samples[0]]["pid"])); del samples[0]; #submit the new samples and add them to ongoing print("submitting"); for tools in newsamples: print(tools); for sample in newsamples[tools]: print("sample:" + sample); databaseOutput=submitToDatabase.submit2DB(newsamples,tools,sample,programDirectory,processed,account) processFiles[tools]["ongoing"].update({sample:newsamples[tools][sample]}) processFiles[tools]["ongoing"][sample]["pid"]=databaseOutput[0]; processFiles[tools]["ongoing"][sample]["outputFile"]=databaseOutput[1]; project=processFiles[tools]["ongoing"][sample]["project"] common.UpdateProcessFiles(processFiles,processed,"database") return(processFiles)
def variantCalling(programDirectory,analysis,projectToProcess,working_dir,path_to_bam,available_tools,account,modules,bam_files,exclude,processFiles,processed): sys.path.append(os.path.join(programDirectory,"modules/calling")) import scripts,common project_name=projectToProcess if not project_name.startswith('.') and project_name not in exclude.keys(): local_project_dir = os.path.join(working_dir, project_name) if not os.path.isdir(local_project_dir): os.makedirs(local_project_dir) for tools in available_tools: print(tools); for sample_name in bam_files: if sample_name in processFiles[tools]["analysed"].keys(): # sample state is ANALYSED print "sample {0} ANALYSED".format(sample_name) elif sample_name in processFiles[tools]["ongoing"].keys(): # sample state is UNDER_ANALYSIS # check if it is still running in that case delete it from ongoing and add to analysed done=common.get_slurm_job_status(int(processFiles[tools]["ongoing"][sample_name]["pid"])) processFiles=common.get_process_status(done,processFiles,tools,sample_name); elif sample_name in processFiles[tools]["failed"].keys(): print "sample {0} FAILED".format(sample_name) elif sample_name in processFiles[tools]["cancelled"].keys(): print "sample {0} CANCELLED".format(sample_name) elif sample_name in processFiles[tools]["excluded"].keys(): print "sample {0} EXCLUDED".format(sample_name) elif sample_name in processFiles[tools]["timeout"].keys(): print "sample {0} TIMEOUT".format(sample_name) else: # sample state is NEW # submit this sample, if submission works fine store it in under analysis with the PID call="scripts." + tools+"(\""+programDirectory+"\",\""+local_project_dir+"/"+tools+"\",\""+sample_name+"\",\""+bam_files[sample_name]["path"]+"\",\""+account+"\",\""+str(modules)+"\")" callerOutput = eval(call) processFiles[tools]["ongoing"][sample_name] = {"pid":callerOutput[0],"project":project_name,"outpath": local_project_dir,"outputFile":callerOutput[1]} print "sample {0} LAUNCHED".format(sample_name) common.UpdateProcessFiles(processFiles,processed,"calling") return(processFiles);