Beispiel #1
0
def cleaning(programDirectory,previousProcessFiles,processed,account):
        sys.path.append(os.path.join(programDirectory,"modules/cleaning"))
        import cleaningScript,common
        #read the process files
        processFiles=common.readProcessFiles(previousProcessFiles,processed,"cleaning")
        for tools in previousProcessFiles: 
             print("cleaning: "+tools);
             for sample in previousProcessFiles[tools]["analysed"]:
                if sample in processFiles[tools]["ongoing"]:
                    done=common.get_slurm_job_status(int(processFiles[tools]["ongoing"][sample]["pid"]))
                    processFiles=common.get_process_status(done,processFiles,tools,sample)
                elif sample in processFiles[tools]["failed"].keys():
                    print "sample {0} FAILED".format(sample)
                elif sample in processFiles[tools]["cancelled"].keys():
                    print "sample {0} CANCELLED".format(sample)
                elif sample in processFiles[tools]["timeout"].keys():
                    print "sample {0} TIMEOUT".format(sample)
                elif sample in processFiles[tools]["excluded"].keys():
                    print "sample {0} EXCLUDED".format(sample)
                elif sample in processFiles[tools]["analysed"].keys():
                    print "sample {0} COMPLETE".format(sample)
                else:
                    print("submitting: " + sample);
                    try:
                        outgoing=cleaningScript.submit2Cleaning(tools,sample,previousProcessFiles,programDirectory,account);
                        processFiles[tools]["ongoing"].update(outgoing)
                    except:
                        print("FAILED:was the sample excluded?");         

        common.UpdateProcessFiles(processFiles,processed,"cleaning")  
        finished=1;
        return(processFiles);
Beispiel #2
0
def applyFilter(programDirectory,previousProcessFiles,processed,account,frequency_db):
    sys.path.append(os.path.join(programDirectory,"modules/filter"))
    import common,filterScripts
    print("applying filter");
    processFiles=common.readProcessFiles(previousProcessFiles,processed,"filter")
        
    #itterate through every available tool
    for tool in previousProcessFiles:

        print("filtering the output of {0}".format(tool));
        #this is a list of all the files that have already passed the filter
        filtered=processFiles[tool]["analysed"].keys()+processFiles[tool]["cancelled"].keys()+processFiles[tool]["timeout"].keys()+processFiles[tool]["failed"].keys()+processFiles[tool]["excluded"].keys()
        database=previousProcessFiles[tool]["analysed"].keys();
        #only run the query when the previous query was finished and when there are newly generated databases
        #WARNING, if the user removes samples from the database log file, the condition will always be true,TODO: change to set comparison
        if(processFiles[tool]["ongoing"] == {} and sorted(filtered) != sorted(database) ):
            add2Ongoing=filterScripts.runScripts(tool,previousProcessFiles[tool]["analysed"].keys(),previousProcessFiles,programDirectory,account,frequency_db);
            processFiles[tool]["ongoing"].update(add2Ongoing);
                                                        
        samples=[]
        for sample in processFiles[tool]["ongoing"]:
            samples.append(sample);

        while len(samples) > 0:
            try:                
                done=common.get_slurm_job_status(int(processFiles[tool]["ongoing"][samples[0]]["pid"]))
                processFiles=common.get_process_status(done,processFiles,tool,samples[0])
                del samples[0];
            except:
                print("Warning, unnable to get slurm job status for job {}, please try again".format(processFiles[tool]["ongoing"][sample]["pid"]));
                del samples[0];

    common.UpdateProcessFiles(processFiles,processed,"filter")      
    return(processFiles);
Beispiel #3
0
def combine(programDirectory,previousProcessFiles,processed,account,bam_files):
        sys.path.append(os.path.join(programDirectory,"modules/combine"))
        import common,combineScript
        #read the process files
        processFiles=common.readProcessFiles(["FindSV"],processed,"combine")
        samplesToMerge={};
        #combinedProcessFile={}
        firstTool= True
        #find all samples that has been analysed by the callers
        for tools in previousProcessFiles: 
            for sample in previousProcessFiles[tools]["analysed"]:
                if firstTool:

                    #combinedProcessFile[sample].update(samplepreviousProcessFiles[tools][sample])
                    samplesToMerge.update({sample:1})
                elif( sample in samplesToMerge):
                    samplesToMerge[sample]+=1
            firstTool=False
        
        #merge the caller results of each sample that has been analysed by all available callers
        for sample in samplesToMerge:
            if samplesToMerge[sample] == len(previousProcessFiles):
                #check the status of each sample
                if sample in processFiles["FindSV"]["ongoing"]:
                    done=common.get_slurm_job_status(int(processFiles["FindSV"]["ongoing"][sample]["pid"]))
                    processFiles=common.get_process_status(done,processFiles,"FindSV",sample)
                elif sample in processFiles["FindSV"]["failed"].keys():
                    print "sample {0} FAILED".format(sample)
                elif sample in processFiles["FindSV"]["cancelled"].keys():
                    print "sample {0} CANCELLED".format(sample)
                elif sample in processFiles["FindSV"]["timeout"].keys():
                    print "sample {0} TIMEOUT".format(sample)
                elif sample in processFiles["FindSV"]["excluded"].keys():
                    print "sample {0} EXCLUDED".format(sample)
                elif sample in processFiles["FindSV"]["analysed"].keys():
                    print "sample {0} COMPLETE".format(sample)
                else:
                    print("submitting: " + sample);
                    combinedProcessFile={}
                    tool="";
                    for tools in previousProcessFiles:
                        tool = tools
                        combinedProcessFile.update({tools:previousProcessFiles[tools]["analysed"][sample]})
                    outgoing=combineScript.submit4combination(tool,sample,combinedProcessFile,programDirectory,account,bam_files[sample]["path"]);
                    processFiles["FindSV"]["ongoing"].update(outgoing)
       
        common.UpdateProcessFiles(processFiles,processed,"combine")  
        finished=1;
        return(processFiles);
Beispiel #4
0
def buildDatabase(programDirectory,previousProcessFiles,processed,account):
    sys.path.append(os.path.join(programDirectory,"modules/database"))
    import submitToDatabase,common,time,process
    print("constructing databases");
    processFiles=common.readProcessFiles(previousProcessFiles,processed,"database")

        
    #create a dictionary containing the samples that are yet to be run through build db
    newsamples={}
    for tool in previousProcessFiles:
        newsamples[tool]={}
        for sample in previousProcessFiles[tool]["analysed"]:
            if sample not in processFiles[tool]["ongoing"] and sample not in processFiles[tool]["analysed"] and sample not in processFiles[tool]["cancelled"] and sample not in processFiles[tool]["failed"] and sample not in processFiles[tool]["excluded"] and sample not in processFiles[tool]["timeout"]:
                newsamples[tool].update({sample:previousProcessFiles[tool]["analysed"][sample]})

    #check if any of the ongoing samples are finished, and add finished samples to the finished dictionary
    for tools in processFiles:
        samples=[]
        for sample in processFiles[tools]["ongoing"]:
            samples.append(sample);

        while len(samples) > 0:
            try:
                done=common.get_slurm_job_status(int(processFiles[tools]["ongoing"][samples[0]]["pid"]))
                processFiles=common.get_process_status(done,processFiles,tools,samples[0])
                del samples[0];
            except:
                print("Warning, unnable to get slurm job status for job {}, please try again".format(processFiles[tools]["ongoing"][samples[0]]["pid"]));
                del samples[0];

    #submit the new samples and add them to ongoing
    print("submitting");
    for tools in newsamples:
        print(tools);
        for sample in newsamples[tools]:
            print("sample:" + sample);
            databaseOutput=submitToDatabase.submit2DB(newsamples,tools,sample,programDirectory,processed,account)
            processFiles[tools]["ongoing"].update({sample:newsamples[tools][sample]})
            processFiles[tools]["ongoing"][sample]["pid"]=databaseOutput[0];
            processFiles[tools]["ongoing"][sample]["outputFile"]=databaseOutput[1];
            project=processFiles[tools]["ongoing"][sample]["project"]

    common.UpdateProcessFiles(processFiles,processed,"database")


    return(processFiles)