Example #1
0
def cleaning(programDirectory,previousProcessFiles,processed,account):
        sys.path.append(os.path.join(programDirectory,"modules/cleaning"))
        import cleaningScript,common
        #read the process files
        processFiles=common.readProcessFiles(previousProcessFiles,processed,"cleaning")
        for tools in previousProcessFiles: 
             print("cleaning: "+tools);
             for sample in previousProcessFiles[tools]["analysed"]:
                if sample in processFiles[tools]["ongoing"]:
                    done=common.get_slurm_job_status(int(processFiles[tools]["ongoing"][sample]["pid"]))
                    processFiles=common.get_process_status(done,processFiles,tools,sample)
                elif sample in processFiles[tools]["failed"].keys():
                    print "sample {0} FAILED".format(sample)
                elif sample in processFiles[tools]["cancelled"].keys():
                    print "sample {0} CANCELLED".format(sample)
                elif sample in processFiles[tools]["timeout"].keys():
                    print "sample {0} TIMEOUT".format(sample)
                elif sample in processFiles[tools]["excluded"].keys():
                    print "sample {0} EXCLUDED".format(sample)
                elif sample in processFiles[tools]["analysed"].keys():
                    print "sample {0} COMPLETE".format(sample)
                else:
                    print("submitting: " + sample);
                    try:
                        outgoing=cleaningScript.submit2Cleaning(tools,sample,previousProcessFiles,programDirectory,account);
                        processFiles[tools]["ongoing"].update(outgoing)
                    except:
                        print("FAILED:was the sample excluded?");         

        common.UpdateProcessFiles(processFiles,processed,"cleaning")  
        finished=1;
        return(processFiles);
Example #2
0
def applyFilter(programDirectory,previousProcessFiles,processed,account,frequency_db):
    sys.path.append(os.path.join(programDirectory,"modules/filter"))
    import common,filterScripts
    print("applying filter");
    processFiles=common.readProcessFiles(previousProcessFiles,processed,"filter")
        
    #itterate through every available tool
    for tool in previousProcessFiles:

        print("filtering the output of {0}".format(tool));
        #this is a list of all the files that have already passed the filter
        filtered=processFiles[tool]["analysed"].keys()+processFiles[tool]["cancelled"].keys()+processFiles[tool]["timeout"].keys()+processFiles[tool]["failed"].keys()+processFiles[tool]["excluded"].keys()
        database=previousProcessFiles[tool]["analysed"].keys();
        #only run the query when the previous query was finished and when there are newly generated databases
        #WARNING, if the user removes samples from the database log file, the condition will always be true,TODO: change to set comparison
        if(processFiles[tool]["ongoing"] == {} and sorted(filtered) != sorted(database) ):
            add2Ongoing=filterScripts.runScripts(tool,previousProcessFiles[tool]["analysed"].keys(),previousProcessFiles,programDirectory,account,frequency_db);
            processFiles[tool]["ongoing"].update(add2Ongoing);
                                                        
        samples=[]
        for sample in processFiles[tool]["ongoing"]:
            samples.append(sample);

        while len(samples) > 0:
            try:                
                done=common.get_slurm_job_status(int(processFiles[tool]["ongoing"][samples[0]]["pid"]))
                processFiles=common.get_process_status(done,processFiles,tool,samples[0])
                del samples[0];
            except:
                print("Warning, unnable to get slurm job status for job {}, please try again".format(processFiles[tool]["ongoing"][sample]["pid"]));
                del samples[0];

    common.UpdateProcessFiles(processFiles,processed,"filter")      
    return(processFiles);
Example #3
0
def combine(programDirectory,previousProcessFiles,processed,account,bam_files):
        sys.path.append(os.path.join(programDirectory,"modules/combine"))
        import common,combineScript
        #read the process files
        processFiles=common.readProcessFiles(["FindSV"],processed,"combine")
        samplesToMerge={};
        #combinedProcessFile={}
        firstTool= True
        #find all samples that has been analysed by the callers
        for tools in previousProcessFiles: 
            for sample in previousProcessFiles[tools]["analysed"]:
                if firstTool:

                    #combinedProcessFile[sample].update(samplepreviousProcessFiles[tools][sample])
                    samplesToMerge.update({sample:1})
                elif( sample in samplesToMerge):
                    samplesToMerge[sample]+=1
            firstTool=False
        
        #merge the caller results of each sample that has been analysed by all available callers
        for sample in samplesToMerge:
            if samplesToMerge[sample] == len(previousProcessFiles):
                #check the status of each sample
                if sample in processFiles["FindSV"]["ongoing"]:
                    done=common.get_slurm_job_status(int(processFiles["FindSV"]["ongoing"][sample]["pid"]))
                    processFiles=common.get_process_status(done,processFiles,"FindSV",sample)
                elif sample in processFiles["FindSV"]["failed"].keys():
                    print "sample {0} FAILED".format(sample)
                elif sample in processFiles["FindSV"]["cancelled"].keys():
                    print "sample {0} CANCELLED".format(sample)
                elif sample in processFiles["FindSV"]["timeout"].keys():
                    print "sample {0} TIMEOUT".format(sample)
                elif sample in processFiles["FindSV"]["excluded"].keys():
                    print "sample {0} EXCLUDED".format(sample)
                elif sample in processFiles["FindSV"]["analysed"].keys():
                    print "sample {0} COMPLETE".format(sample)
                else:
                    print("submitting: " + sample);
                    combinedProcessFile={}
                    tool="";
                    for tools in previousProcessFiles:
                        tool = tools
                        combinedProcessFile.update({tools:previousProcessFiles[tools]["analysed"][sample]})
                    outgoing=combineScript.submit4combination(tool,sample,combinedProcessFile,programDirectory,account,bam_files[sample]["path"]);
                    processFiles["FindSV"]["ongoing"].update(outgoing)
       
        common.UpdateProcessFiles(processFiles,processed,"combine")  
        finished=1;
        return(processFiles);
Example #4
0
def buildDatabase(programDirectory,previousProcessFiles,processed,account):
    sys.path.append(os.path.join(programDirectory,"modules/database"))
    import submitToDatabase,common,time,process
    print("constructing databases");
    processFiles=common.readProcessFiles(previousProcessFiles,processed,"database")

        
    #create a dictionary containing the samples that are yet to be run through build db
    newsamples={}
    for tool in previousProcessFiles:
        newsamples[tool]={}
        for sample in previousProcessFiles[tool]["analysed"]:
            if sample not in processFiles[tool]["ongoing"] and sample not in processFiles[tool]["analysed"] and sample not in processFiles[tool]["cancelled"] and sample not in processFiles[tool]["failed"] and sample not in processFiles[tool]["excluded"] and sample not in processFiles[tool]["timeout"]:
                newsamples[tool].update({sample:previousProcessFiles[tool]["analysed"][sample]})

    #check if any of the ongoing samples are finished, and add finished samples to the finished dictionary
    for tools in processFiles:
        samples=[]
        for sample in processFiles[tools]["ongoing"]:
            samples.append(sample);

        while len(samples) > 0:
            try:
                done=common.get_slurm_job_status(int(processFiles[tools]["ongoing"][samples[0]]["pid"]))
                processFiles=common.get_process_status(done,processFiles,tools,samples[0])
                del samples[0];
            except:
                print("Warning, unnable to get slurm job status for job {}, please try again".format(processFiles[tools]["ongoing"][samples[0]]["pid"]));
                del samples[0];

    #submit the new samples and add them to ongoing
    print("submitting");
    for tools in newsamples:
        print(tools);
        for sample in newsamples[tools]:
            print("sample:" + sample);
            databaseOutput=submitToDatabase.submit2DB(newsamples,tools,sample,programDirectory,processed,account)
            processFiles[tools]["ongoing"].update({sample:newsamples[tools][sample]})
            processFiles[tools]["ongoing"][sample]["pid"]=databaseOutput[0];
            processFiles[tools]["ongoing"][sample]["outputFile"]=databaseOutput[1];
            project=processFiles[tools]["ongoing"][sample]["project"]

    common.UpdateProcessFiles(processFiles,processed,"database")


    return(processFiles)
Example #5
0
def variantCalling(programDirectory,analysis,projectToProcess,working_dir,path_to_bam,available_tools,account,modules,bam_files,exclude,processFiles,processed):
    sys.path.append(os.path.join(programDirectory,"modules/calling"))
    import scripts,common
    project_name=projectToProcess
    if not project_name.startswith('.') and project_name not in exclude.keys():
        local_project_dir = os.path.join(working_dir, project_name)
        if not os.path.isdir(local_project_dir):
            os.makedirs(local_project_dir)
        for tools in available_tools:
            print(tools);
            for sample_name in bam_files:
                if sample_name in processFiles[tools]["analysed"].keys():
                    # sample state is ANALYSED
                    print "sample {0} ANALYSED".format(sample_name)
                elif sample_name in processFiles[tools]["ongoing"].keys():
                    # sample state is UNDER_ANALYSIS
                    # check if it is still running in that case delete it from ongoing and add to analysed
                    done=common.get_slurm_job_status(int(processFiles[tools]["ongoing"][sample_name]["pid"]))
                    processFiles=common.get_process_status(done,processFiles,tools,sample_name);
                elif sample_name in processFiles[tools]["failed"].keys():
                    print "sample {0} FAILED".format(sample_name)
                elif sample_name in processFiles[tools]["cancelled"].keys():
                    print "sample {0} CANCELLED".format(sample_name)
                elif sample_name in processFiles[tools]["excluded"].keys():
                    print "sample {0} EXCLUDED".format(sample_name)
                elif sample_name in processFiles[tools]["timeout"].keys():
                    print "sample {0} TIMEOUT".format(sample_name)
                else:
                    # sample state is NEW
                    # submit this sample, if submission works fine store it in under analysis with the PID 
                    call="scripts." + tools+"(\""+programDirectory+"\",\""+local_project_dir+"/"+tools+"\",\""+sample_name+"\",\""+bam_files[sample_name]["path"]+"\",\""+account+"\",\""+str(modules)+"\")"
                    callerOutput = eval(call)
                    processFiles[tools]["ongoing"][sample_name] = {"pid":callerOutput[0],"project":project_name,"outpath": local_project_dir,"outputFile":callerOutput[1]}
                    print "sample {0} LAUNCHED".format(sample_name)

        common.UpdateProcessFiles(processFiles,processed,"calling")
        return(processFiles);