def QC_tumor_normal(self): # Separate the runs into tumor and normal lists normal_runs, tumor_runs = self.getTumor_Normal() if self.sample_json['analysis']['settings'][ 'type'] == 'all_tumor_normal': # Use the sample_status here to not re-run the QC and to not overwrite run status. The 'sample_status' should be reset to 'pushed' when new runs are pushed.. #if self.sample_json['sample_status'] != 'pending_merge' and self.sample_json['sample_status'] != 'pending_3x3_review' and self.sample_json['sample_status'] != 'merged': # if the user specified the '--pass_fail' option, then run this part still if self.sample_json[ 'sample_status'] == 'pushed' or self.options.pass_fail or self.options.qc_all: # QC the normal or tumor runs with each other self.QC_runs(normal_runs, 'normal_') self.QC_runs(tumor_runs, 'tumor_') # now QC the tumor and normal runs together. self.QC_normal_tumor_runs(normal_runs, tumor_runs) # make the merger merger = Merger(self.sample_json, self.options.recalc_3x3_tables) # Check to see if the normal runs are ready to be merged. self.sample_json, merge_normal = merger.check_merge( normal_runs, 'Normal/', 'normal_') if merge_normal == True: # merge the normal and/or tumor runs. Will only merge the passing runs with each other. self.sample_json = merger.merge_runs('normal', 'Normal_', 'normal_') # Check to see if the tumor runs are ready to be merged. self.sample_json, merge_tumor = merger.check_merge( tumor_runs, 'Tumor/', 'tumor_') if merge_tumor == True: self.sample_json = merger.merge_runs('tumor', 'Tumor_', 'tumor_') # If any runs were merged, QC them. If there are only 1 normal and tumor run, they won't be QCd again. #if normal_merge_dir != '' or tumor_merge_dir != '' or (len(normal_passing_bams) == 1 and len(tumor_passing_bams) == 1): # now QC the tumor and normal merged bams together if both normal and tumor runs are ready. # To only QC all for the actual merged runs (PNET), change the 'final' part to 'merged'. # The 'final_normal_json' and 'final_tumor_json' flags are set by merger.py in the function check_merge, line 157 #if (merge_normal or merge_tumor) and ('merged_normal_json' in self.sample_json and 'merged_tumor_json' in self.sample_json): if 'final_normal_json' in self.sample_json and 'final_tumor_json' in self.sample_json: self.sample_json, qc_json = self.qc_run.QC_2Runs( self.sample_json, self.sample_json['final_normal_json'], self.sample_json['final_tumor_json'], 'normal_', 'tumor_', '_merged') self.sample_json, merged_perc_avail_bases = self.qc_run.update_3x3_runs_status( self.sample_json, self.sample_json['final_normal_json'], self.sample_json['final_tumor_json'], qc_json) # update the merged run status merger.update_merged_run_status( self.sample_json['final_normal_json'], merged_perc_avail_bases) merger.update_merged_run_status( self.sample_json['final_tumor_json'], merged_perc_avail_bases) # cleanup the individual run bam files if merged_perc_avail_bases > .9: final_qc_dir = "%s/all%svs%s" % ( self.sample_json['qc_folder'], json.load(open(self.sample_json['final_normal_json'])) ['run_name'], json.load(open( self.sample_json['final_tumor_json']))['run_name']) # annotate the final somatic variants command = "bash %s/Somatic_Variants/somatic_variants.sh %s %s %s" % ( self.sample_json['analysis']['software_directory'], final_qc_dir, self.sample_json['sample_name'], self.sample_json['analysis']['software_directory']) if runCommandLine(command) != 0: sys.stderr.write("ERROR: somatic annotation failed!\n") # Cleanup the PTRIM.bam and chr bam files after all of the QC is done. # are there any other files to clean up? self.cleanup_sample.cleanup_runs( self.sample_json['runs'], self.sample_json['analysis']['settings']['cleanup'], self.no_errors) #self.cleanup_sample.delete_runs(runs, self.sample_json['analysis']['settings']['cleanup'], self.no_errors) # Cleanup after the merging QC is done. self.cleanup_sample.cleanup_runs([ self.sample_json['final_normal_json'], self.sample_json['final_tumor_json'] ], self.sample_json['analysis']['settings']['cleanup'], self.no_errors) # Set the sample_status self.sample_json['sample_status'] = 'merged_pass' else: self.sample_json[ 'sample_status'] = 'awaiting_more_sequencing'
def QC_germline(self): # Use the sample_status here to not re-run the QC and to not overwrite run status. The 'sample_status' should be reset to 'pushed' when new runs are pushed.. #if self.sample_json['sample_status'] != 'pending_merge' and self.sample_json['sample_status'] != 'pending_3x3_review' and self.sample_json['sample_status'] != 'merged': # if the user specified the '--pass_fail' option, then run this part still if self.sample_json[ 'sample_status'] == 'pushed' or self.options.pass_fail or self.options.qc_all: # QC the normal runs with each other self.QC_runs(self.sample_json['runs']) # what if there is only one run that passes all of the metrics? It should be marked as the 'final_json' and have the 'pass_fail_merged' flag marked as pass. # make the merger merger = Merger(self.sample_json, self.options.recalc_3x3_tables) # Check to see if the normal runs are ready to be merged. self.sample_json, merge = merger.check_merge(self.sample_json['runs']) if merge != True: if 'final_json' in self.sample_json: # update the final run status merger.update_merged_run_status(self.sample_json['final_json']) elif merge == True: # merge the normal and/or tumor runs. Will only merge the passing runs with each other. self.sample_json = merger.merge_runs('germline') # update the merged run status merger.update_merged_run_status(self.sample_json['merged_json']) if json.load(open(self.sample_json['merged_json']) )['pass_fail_merged_status'] == 'pass': # Set the sample_status self.sample_json['sample_status'] = 'merged_pass' # cleanup the individual run bam files self.cleanup_sample.cleanup_runs( self.sample_json['runs'], self.sample_json['analysis']['settings']['cleanup'], self.no_errors) # Cleanup the merged dir self.cleanup_sample.cleanup_runs( [self.sample_json['merged_json']], self.sample_json['analysis']['settings']['cleanup'], self.no_errors) else: self.sample_json['sample_status'] = 'awaiting_more_sequencing' # copy the final run's VCF file to the final_dir if it passes the "merged" coverage flag if 'final_json' in self.sample_json: final_json = json.load(open(self.sample_json['final_json'])) if final_json['pass_fail_merged_status'] == 'pass': final_vcf = glob.glob("%s/*.vcf" % final_json['run_folder'])[0] final_project_dir = "/home/ionadmin/jeff/%s_Final_VCFs" % ( self.sample_json['project']) print "copying %s to %s" % (final_vcf, final_project_dir) # check to make sure the final dir exists. if not os.path.isdir(final_project_dir): os.mkdir(final_project_dir) shutil.copy( final_vcf, "%s/%s.vcf" % (final_project_dir, self.sample_json['sample_name'])) # now push the sample to s3 storage if self.sample_json['project'] == 'Einstein': print "pushing %s to amazon s3 storage" % self.sample_json[ 'sample_name'] self.push_sample_to_s3(final_json)
def QC_merge_runs(self): # if this is a germline sample, QC all of the normal runs with each other. if self.sample_json['sample_type'] == 'germline': # Use the sample_status here to not re-run the QC and to not overwrite run status. The 'sample_status' should be reset to 'pushed' when new runs are pushed.. #if self.sample_json['sample_status'] != 'pending_merge' and self.sample_json['sample_status'] != 'pending_3x3_review' and self.sample_json['sample_status'] != 'merged': # if the user specified the '--pass_fail' option, then run this part still if self.sample_json['sample_status'] == 'pushed' or self.options.pass_fail or self.options.qc_all: # QC the normal runs with each other self.QC_runs(self.sample_json['runs']) # write the sample json file write_json(self.sample_json['json_file'], self.sample_json) # what if there is only one run that passes all of the metrics? It should be marked as the 'final_json' and have the 'pass_fail_merged' flag marked as pass. # make the merger merger = Merger(self.sample_json['json_file']) # Check to see if the normal runs are ready to be merged. merge = merger.check_merge(self.sample_json['runs']) if merge == True: # merge the normal and/or tumor runs. Will only merge the passing runs with each other. merger.merge_runs('germline') # load the sample json file because merger edited it. self.sample_json = json.load(open(self.sample_json['json_file'])) # update the merged run status merger.update_merged_run_status(self.sample_json['merged_json']) if json.load(open(self.sample_json['merged_json']))['pass_fail_merged_status'] == 'pass': # Set the sample_status self.sample_json['sample_status'] = 'merged' # cleanup the individual run bam files self.cleanup_sample.cleanup_runs(self.sample_json['runs'], self.sample_json['analysis']['settings']['cleanup'], self.no_errors) # Cleanup the merged dir self.cleanup_sample.cleanup_runs([self.sample_json['merged_json']], self.sample_json['analysis']['settings']['cleanup'], self.no_errors) else: self.sample_json['sample_status'] = 'awaiting_more_sequencing' # if this is a tumor_normal sample, find the normal and tumor runs, and then QC them with each other. elif self.sample_json['sample_type'] == 'tumor_normal': # Separate the runs into tumor and normal lists normal_runs, tumor_runs = self.getTumor_Normal() if self.sample_json['analysis']['settings']['type'] == 'all_tumor_normal': # Use the sample_status here to not re-run the QC and to not overwrite run status. The 'sample_status' should be reset to 'pushed' when new runs are pushed.. #if self.sample_json['sample_status'] != 'pending_merge' and self.sample_json['sample_status'] != 'pending_3x3_review' and self.sample_json['sample_status'] != 'merged': # if the user specified the '--pass_fail' option, then run this part still if self.sample_json['sample_status'] == 'pushed' or self.options.pass_fail or self.options.qc_all: # QC the normal or tumor runs with each other self.QC_runs(normal_runs, 'normal_') self.QC_runs(tumor_runs, 'tumor_') # now QC the tumor and normal runs together. self.QC_normal_tumor_runs(normal_runs, tumor_runs) # make the excel spreadsheet containing the data and copy it back to the proton #self._make_xlsx() # write the sample json file write_json(self.sample_json['json_file'], self.sample_json) # make the merger merger = Merger(self.sample_json['json_file']) # Check to see if the normal runs are ready to be merged. merge_normal = merger.check_merge(normal_runs, 'Normal/', 'normal_') if merge_normal == True: # merge the normal and/or tumor runs. Will only merge the passing runs with each other. merger.merge_runs('normal', 'Normal_', 'normal_') # Check to see if the tumor runs are ready to be merged. merge_tumor = merger.check_merge(tumor_runs, 'Tumor/', 'tumor_') if merge_tumor == True: merger.merge_runs('tumor', 'Tumor_', 'tumor_') # load the sample json file because merger edited it. self.sample_json = json.load(open(self.sample_json['json_file'])) # If any runs were merged, QC them. If there are only 1 normal and tumor run, they won't be QCd again. #if normal_merge_dir != '' or tumor_merge_dir != '' or (len(normal_passing_bams) == 1 and len(tumor_passing_bams) == 1): # only QC all for the actual merged runs for now (PNET). # now QC the tumor and normal merged bams together if both normal and tumor runs are ready. if merge_normal or merge_tumor and ('merged_normal_json' in self.sample_json and 'merged_tumor_json' in self.sample_json): self.sample_json, qc_json = self.qc_run.QC_2Runs(self.sample_json, self.sample_json['merged_normal_json'], self.sample_json['merged_tumor_json'], 'normal_', 'tumor_', '_merged') self.sample_json, merged_perc_avail_bases = self.qc_run.update_3x3_runs_status(self.sample_json, self.sample_json['merged_normal_json'], self.sample_json['merged_tumor_json'], qc_json) # update the merged run status merger.update_merged_run_status(self.sample_json['merged_normal_json'], merged_perc_avail_bases) merger.update_merged_run_status(self.sample_json['merged_tumor_json'], merged_perc_avail_bases) # cleanup the individual run bam files if merged_perc_avail_bases > .9: # Cleanup the PTRIM.bam and chr bam files after all of the QC is done. # are there any other files to clean up? self.cleanup_sample.cleanup_runs(self.sample_json['runs'], self.sample_json['analysis']['settings']['cleanup'], self.no_errors) #self.cleanup_sample.delete_runs(runs, self.sample_json['analysis']['settings']['cleanup'], self.no_errors) # Cleanup after the merging QC is done. self.cleanup_sample.cleanup_runs([self.sample_json['final_normal_json'], self.sample_json['final_tumor_json']], self.sample_json['analysis']['settings']['cleanup'], self.no_errors) # Set the sample_status self.sample_json['sample_status'] = 'merged_pass' else: self.sample_json['sample_status'] = 'awaiting_more_sequencing' # print the final status if self.no_errors == False or self.qc_run.no_errors == False: sys.stderr.write("%s finished with errors. See %s/sge.log for more details"%(self.sample_json['sample_name'], self.sample_json['output_folder'])) self.sample_json['sample_status'] == 'failed' write_json(self.sample_json['json_file'], self.sample_json) sys.exit(1) else: print "%s finished with no errors!"%(self.sample_json['sample_name']) # write the sample json file write_json(self.sample_json['json_file'], self.sample_json) # make the excel spreadsheet containing the data and copy it back to the proton self._make_xlsx()