add_dependencies(sample.jobs['merge_results'], sample.jobs['idr_format_inputs']) if len(sample.replicates) > 1: peakcaller.replicate_scoring('replicate_scoring', sample) add_dependencies(sample.jobs['merge_results'], sample.jobs['replicate_scoring']) if archive_results: add_dependencies(sample.jobs['replicate_scoring'], sample.jobs['archive_sample']) add_dependencies(sample.jobs['idr_format_inputs'], sample.jobs['replicate_scoring']) peakcaller.idr_analysis('idr_analysis', sample) add_dependencies(sample.jobs['idr_format_inputs'], sample.jobs['idr_analysis']) peakcaller.idr_filter('idr_filter', sample) add_dependencies(sample.jobs['idr_analysis'], sample.jobs['idr_filter']) if archive_results: add_dependencies(sample.jobs['idr_filter'], sample.jobs['archive_sample']) # Cross-Correlation Analysis idr.cross_correlation_analysis('cross_correlation_analysis', sample, no_duplicates=no_duplicates, options=xcorrelation_options) add_dependencies(sample.jobs['form_sample_files'], sample.jobs['cross_correlation_analysis']) if archive_results: add_dependencies(sample.jobs['cross_correlation_analysis'], sample.jobs['archive_sample']) add_dependencies(sample.jobs['cross_correlation_analysis'], sample.jobs['peakcaller']) jobs += sample.all_jobs() if emails: jobs.append(peakcaller.mail_results(sample, control, run_name, emails)) jobs.append(peakcaller.cleanup(sample, control)) if SNAP_RUN and sample_conf: snap_job = sjm.Job("SNAP", "bash /srv/gs1/apps/snap_support/production/current/peakseq_report_parser_wrapper.sh production %s >& ~alwon/peakseq_report_out " % sample_conf.path, queue=QUEUE, project=PROJECT, host='localhost', dependencies=sample.all_jobs()) jobs.append(snap_job)
def main(syapseMode, peakcaller, run_name, control_conf, sample_conf=None, print_cmds=False, log_dir=None, no_duplicates=False, archive_results=True, emails=None, peakcaller_options=None, xcorrelation_options=None, remove_duplicates=False, paired_end=False, force=False, rescore_control=0, genome=False, no_control_lock=False): rescore_control = 24 * rescore_control #convert from days to hours scriptDir = os.path.dirname(__file__) if not emails: emails = [] if not log_dir: log_dir = os.getcwd() if not peakcaller_options: peakcaller_options = {} if not xcorrelation_options: xcorrelation_options = {} control_conf = ConfigControl( control_conf) #parse fields out of control file if sample_conf: sample_conf = ConfigSample(sample_conf) if genome: sample_conf.GENOME = genome ###Several runs from early on, such as in the beginning of 2012, had not BAM files as input, but instead eland files. These eland file names are in the SNAP lims for many runs. Those eland files are no longer in the archive, ### and for these runs, all input should now be bam. The code below converts an eland file name to a bam file name. If the bam file doens't exist, then the pipeline will crash and the bam files must be created in ### a separate task. count = -1 for i in control_conf.CONTROL_MAPPED_READS: count += 1 if i.startswith("/opt/scg/scg1_prd_10/"): i = i.lstrip("/opt/scg/scg1_prd_10/" ) #some runs, such is ID 388 have this incorrect path i = GBSC_DIR + i ##The following check for .Eland in the dirname accounts for the numerous cases where the mapping file path is like: ## /srv/gs1/projects/scg/Archive/IlluminaRuns/2012/may/120501_ROCKFORD_00145_FC64VL1.Eland/L4/120501_ROCKFORD_00145_FC64VL1_L4_pf.bam ## and it should instaed lack the ".Eland" part (i.e. see http://scg-snap.stanford.edu/api/peakseq_inputs/show?experiment_run_id=380). if ".Eland" in i: i = i.replace(".Eland", "") basename = os.path.basename(i) if "_eland" in basename: i = elandToBamFileName(i) control_conf.CONTROL_MAPPED_READS[count] = i if sample_conf: replicates = [x[0] for x in sample_conf.REPLICATES ] #sample_conf.REPLICATE is a list of 1-item lists count = -1 for rep in replicates: count += 1 if rep.startswith("/opt/scg/scg1_prd_10/"): rep = rep.lstrip("/opt/scg/scg1_prd_10/") rep = GBSC_DIR + rep if ".Eland" in rep: rep = rep.replace(".Eland", "") basename = os.path.basename(rep) if "_eland" in basename: sample_conf.REPLICATES[count] = [elandToBamFileName(rep)] ###NW End code to convert file names from eland to bam names. ### ###Nathaniel Watson. May 12, 2014. ### Now, need to check if BAM files exist. If so, good, otherwise, generate BAM files. ###Currently, only support for making single-end mappings on the fly # if not paired_end: # if sample_conf: if paired_end: controls = control_conf.CONTROL_MAPPED_READS all_mapped_reads = controls[:] # print ("Controls: " + str(controls) + "\n") if sample_conf: replicates = [x[0] for x in sample_conf.REPLICATES ] #sample_conf.REPLICATE is a list of 1-item lists # print ("Replicates are: " + str(replicates) + "\n") all_mapped_reads.extend(replicates) jobs = [] forwardReadExt = "_forwardReads.bam" progressReadExt = "_forwardReads.bam.encours" for i in all_mapped_reads: frf = i.rstrip(".bam") + forwardReadExt frf_progress = i.rstrip(".bam") + progressReadExt if os.path.exists(frf) or os.path.exists(frf_progress): bamDone = False countLimit = 5 count = 0 while count < countLimit: count += 1 try: age = getFileAgeMinutes(frf) except IOError: age = 0 try: progressFileAge = getFileAgeMinutes(frf_progress) except IoError: progressFileAge = 0 if (progressFileAge >= 20) or (progressfileAge == 0): raise Exception( "Expected to find forward reads file {frf} since the progress sentinal file {frf_progress} is present, but unable to do so." .format(frf=frf, frf_progress=frf_progress)) pass if age >= 20: bamDone = True break else: #sleep an hour time.sleep(600) if not bamDone: raise Exception( "Waited too long for BAM file {} from other project to finish to finish being made. Exiting." .format(frf)) else: print( "Need to create a single-end reads only file from file {frf}." .format(frf=frf)) cmd = "samtools view -hbF 0x40 {peFile} > {seFile}".format( peFile=i, seFile=frf) jobname = "toSingleEnd_{0}".format(os.path.basename(i)) job = sjm.Job(jobname, cmd, modules=["samtools/1.2"], queue=conf.QUEUE, memory="5G", sched_options="-m e") jobs.append(job) if jobs: submission = sjm.Submission(jobs=jobs, log_directory=log_dir, notify=SJM_NOTIFY) sjmfile = os.path.join(log_dir, run_name + '_MakeSingleEndMappings.jobs') print( "Removing reverse reads in control and sample BAM files. Commands are in SJM file {sjmfile}" .format(sjmfile=sjmfile)) try: submission.run(sjmfile, foreground=True) except subprocess.CalledProcessError: raise control_conf.CONTROL_MAPPED_READS = [ x.rstrip(".bam") + forwardReadExt for x in controls ] sample_conf.REPLICATES = [[x.rstrip(".bam") + forwardReadExt] for x in replicates] print("Controls: " + " ".join(control_conf.CONTROL_MAPPED_READS) + "\n") print("Replicates: " + " ".join([x[0] for x in sample_conf.REPLICATES])) print " archive results: ", archive_results jobs = [] sample = None if sample_conf: sample = Sample(sample_conf.RUN_NAME, sample_conf.RESULTS_DIR, sample_conf.TEMP_DIR, sample_conf.GENOME, [ SampleReplicate(i + 1, x) for i, x in enumerate(sample_conf.REPLICATES) ], sample_conf) control_lock = peakcaller.USE_CONTROL_LOCK if no_control_lock: control_lock = False control = Control(control_conf.RUN_NAME, control_conf.RESULTS_DIR, control_conf.TEMP_DIR, control_conf.GENOME, control_conf.CONTROL_MAPPED_READS, control_conf, peakcaller.NAME) controlScored = False countLimit = 5 count = 0 while count < countLimit: count += 1 scoreTime = checkControlScored(control) if not scoreTime: #means it returned False b/c control output file didn't exist, so never scored. break #otherwise, it returned a number of hours as a float that the control file has been untouched elif scoreTime >= 1: controlScored = True break else: #sleep an hour time.sleep(3600) if not controlScored and count == countLimit: raise Exception( "Waited too long for control scoring from other project to finish. Exiting." ) doRescore = False if rescore_control and (scoreTime > rescore_control): doRescore = True elif (not scoreTime) or doRescore: peakcaller.form_control_files( 'form_control_files', control) #add job merge_and_filter_reads.py if archive_results: peakcaller.archive_control('archive_control', control, force=force) add_dependencies(control.jobs['form_control_files'], control.jobs['archive_control']) jobs += control.all_jobs() ###nathankw comment out below on 2014-06-08 # if not control_scoring.check_for_control(results_dir=control_conf.RESULTS_DIR, peakcaller=control.peakcaller,use_control_lock=control_lock) or rescore_control: # try: # peakcaller.check_control_inputs(control) #checks that genome and BAMS of control exist # peakcaller.form_control_files('form_control_files', control) #add job merge_and_filter_reads.py # #The call below peakcaller.complete_control() adds the job complete_control_scoring.py if USE_CONTROL_LOCK is True, whose goal is to run the below command # # "UPDATE encode_controls SET ready=1 WHERE name='%s' AND peakcaller='%s'" % (results_dir, peakcaller) # # Which menas that the control is now scored. # peakcaller.complete_control('complete_control', control) # add_dependencies(control.jobs['form_control_files'], control.jobs['complete_control']) # if archive_results: # peakcaller.archive_control('archive_control', control,force=force) # add_dependencies(control.jobs['form_control_files'], control.jobs['archive_control']) # jobs += control.all_jobs() # except Exception, e: # import traceback # print "error detected, removing control lock" # traceback.print_exc() # control_scoring.remove_lock(control.results_dir, control.peakcaller, control_lock) # raise e # else: # print " Control %s already scored, skipping." % control.run_name if sample_conf: peakcaller.check_sample_inputs(sample, force=force) if remove_duplicates: print "rm dups" peakcaller.form_sample_files_nodups('form_sample_files', sample) else: print "no dups" peakcaller.form_sample_files('form_sample_files', sample) peakcaller.calc_pbc('calc_pbc', control, sample) peakcaller.run_peakcaller('peakcaller', control, sample, peakcaller_options) add_dependencies(sample.jobs['form_sample_files'], sample.jobs['calc_pbc']) add_dependencies(sample.jobs['form_sample_files'], sample.jobs['peakcaller']) if control.jobs: add_dependencies(control.jobs['form_control_files'], sample.jobs['peakcaller']) peakcaller.merge_results('merge_results', sample) add_dependencies(sample.jobs['peakcaller'], sample.jobs['merge_results']) if archive_results: peakcaller.archive_sample('archive_sample', sample, control, force=force) add_dependencies(sample.jobs['merge_results'], sample.jobs['archive_sample']) # IDR Analysis peakcaller.form_idr_inputs('idr_format_inputs', sample) add_dependencies(sample.jobs['merge_results'], sample.jobs['idr_format_inputs']) if len(sample.replicates) > 1: peakcaller.replicate_scoring('replicate_scoring', sample) add_dependencies(sample.jobs['merge_results'], sample.jobs['replicate_scoring']) if archive_results: add_dependencies(sample.jobs['replicate_scoring'], sample.jobs['archive_sample']) add_dependencies(sample.jobs['idr_format_inputs'], sample.jobs['replicate_scoring']) peakcaller.idr_analysis('idr_analysis', sample) add_dependencies(sample.jobs['idr_format_inputs'], sample.jobs['idr_analysis']) peakcaller.idr_filter('idr_filter', sample) add_dependencies(sample.jobs['idr_analysis'], sample.jobs['idr_filter']) if archive_results: add_dependencies(sample.jobs['idr_filter'], sample.jobs['archive_sample']) # Cross-Correlation Analysis idr.cross_correlation_analysis('cross_correlation_analysis', sample, no_duplicates=no_duplicates, options=xcorrelation_options) add_dependencies(sample.jobs['form_sample_files'], sample.jobs['cross_correlation_analysis']) if archive_results: add_dependencies(sample.jobs['cross_correlation_analysis'], sample.jobs['archive_sample']) add_dependencies(sample.jobs['cross_correlation_analysis'], sample.jobs['peakcaller']) jobs += sample.all_jobs() print "emails" % emails mail_job = peakcaller.mail_results(sample, control, run_name, emails) mail_job_name = mail_job.name jobs.append(mail_job) #jobs.append(peakcaller.cleanup(sample, control)) #create job to set the "Scoring Status" attribute of the ChIP Seq Scoring object in Syapse to "Scoring Completed". # cmd = "setScoringStatusInSyapse.py --mode {syapseMode} --name {run_name} --status 'Scoring Completed'".format(syapseMode=syapseMode,run_name=run_name) # set_scoring_status_complete_job_name = "setScoringStatusCompleted" # job = sjm.Job(set_scoring_status_complete_job_name,cmd,modules = ["python/2.7.9","gbsc/encode-scoring/prod"],queue=conf.QUEUE,host="localhost",dependencies=[mail_job],sched_options="-m e") jobs.append(job) if SNAP_RUN and sample_conf: scriptPath = os.path.join( scriptDir, "snap_support/production/current/peakseq_report_parser_wrapper.sh") snap_job = sjm.Job("SNAP", "bash " + scriptPath + " production %s >& %s/peakseq_report_out " % (sample_conf.path, sample_conf.RESULTS_DIR), queue=QUEUE, project=PROJECT, host='localhost', dependencies=sample.all_jobs(), sched_options='-m e -A chipseq_scoring') jobs.append(snap_job) if control.jobs: peakcaller.prep_control(control) if sample.jobs: peakcaller.prep_sample(sample) #scoringStatusCmd="setScoringStatusProp.py --syapse-mode {syapseMode} -p scoringStatus --value Scored --unique-id {runName}".format(syapseMode=syapseMode,runName=runName)) #scoringStatusJobName = run_name + "_setScoringStatusFlagToComplete" #job = sjm.Job(name=scoringStatusJobName,commands=scoringStatusCmd,modules=["gbsc/encode/prod"],dependencies=["mail_results"]) #jobs.append(job) submission = sjm.Submission(jobs, log_directory=log_dir, notify=SJM_NOTIFY) if print_cmds: submission.build(run_name + '.jobs') raise SystemExit(1) if log_dir: submission.run(os.path.join(log_dir, run_name + '.jobs'), foreground=True) else: submission.run(run_name + '.jobs', foreground=True)
def main(syapseMode,peakcaller, run_name, control_conf, sample_conf=None, print_cmds=False, log_dir=None, no_duplicates=False, archive_results=True, emails=None, peakcaller_options=None, xcorrelation_options=None, remove_duplicates=False, paired_end=False,force=False,rescore_control=0,genome=False,no_control_lock=False): rescore_control = 24 * rescore_control #convert from days to hours scriptDir = os.path.dirname(__file__) if not emails: emails = [] if not log_dir: log_dir = os.getcwd() if not peakcaller_options: peakcaller_options = {} if not xcorrelation_options: xcorrelation_options = {} control_conf = ConfigControl(control_conf) #parse fields out of control file if sample_conf: sample_conf = ConfigSample(sample_conf) if genome: sample_conf.GENOME=genome ###Several runs from early on, such as in the beginning of 2012, had not BAM files as input, but instead eland files. These eland file names are in the SNAP lims for many runs. Those eland files are no longer in the archive, ### and for these runs, all input should now be bam. The code below converts an eland file name to a bam file name. If the bam file doens't exist, then the pipeline will crash and the bam files must be created in ### a separate task. count = -1 for i in control_conf.CONTROL_MAPPED_READS: count += 1 if i.startswith("/opt/scg/scg1_prd_10/"): i = i.lstrip("/opt/scg/scg1_prd_10/") #some runs, such is ID 388 have this incorrect path i = GBSC_DIR + i ##The following check for .Eland in the dirname accounts for the numerous cases where the mapping file path is like: ## /srv/gs1/projects/scg/Archive/IlluminaRuns/2012/may/120501_ROCKFORD_00145_FC64VL1.Eland/L4/120501_ROCKFORD_00145_FC64VL1_L4_pf.bam ## and it should instaed lack the ".Eland" part (i.e. see http://scg-snap.stanford.edu/api/peakseq_inputs/show?experiment_run_id=380). if ".Eland" in i: i = i.replace(".Eland","") basename = os.path.basename(i) if "_eland" in basename: i = elandToBamFileName(i) control_conf.CONTROL_MAPPED_READS[count] = i if sample_conf: replicates = [x[0] for x in sample_conf.REPLICATES] #sample_conf.REPLICATE is a list of 1-item lists count = -1 for rep in replicates: count += 1 if rep.startswith("/opt/scg/scg1_prd_10/"): rep = rep.lstrip("/opt/scg/scg1_prd_10/") rep = GBSC_DIR + rep if ".Eland" in rep: rep = rep.replace(".Eland","") basename = os.path.basename(rep) if "_eland" in basename: sample_conf.REPLICATES[count] = [elandToBamFileName(rep)] ###NW End code to convert file names from eland to bam names. ### ###Nathaniel Watson. May 12, 2014. ### Now, need to check if BAM files exist. If so, good, otherwise, generate BAM files. ###Currently, only support for making single-end mappings on the fly # if not paired_end: # if sample_conf: if paired_end: controls = control_conf.CONTROL_MAPPED_READS all_mapped_reads = controls[:] # print ("Controls: " + str(controls) + "\n") if sample_conf: replicates = [x[0] for x in sample_conf.REPLICATES] #sample_conf.REPLICATE is a list of 1-item lists # print ("Replicates are: " + str(replicates) + "\n") all_mapped_reads.extend(replicates) jobs = [] forwardReadExt = "_forwardReads.bam" progressReadExt = "_forwardReads.bam.encours" for i in all_mapped_reads: frf = i.rstrip(".bam") + forwardReadExt frf_progress = i.rstrip(".bam") + progressReadExt if os.path.exists(frf) or os.path.exists(frf_progress): bamDone = False countLimit = 5 count = 0 while count < countLimit: count += 1 try: age = getFileAgeMinutes(frf) except IOError: age = 0 try: progressFileAge = getFileAgeMinutes(frf_progress) except IoError: progressFileAge=0 if (progressFileAge >= 20) or (progressfileAge == 0): raise Exception("Expected to find forward reads file {frf} since the progress sentinal file {frf_progress} is present, but unable to do so.".format(frf=frf,frf_progress=frf_progress)) pass if age >= 20: bamDone = True break else: #sleep an hour time.sleep(600) if not bamDone: raise Exception("Waited too long for BAM file {} from other project to finish to finish being made. Exiting.".format(frf)) else: print("Need to create a single-end reads only file from file {frf}.".format(frf=frf)) cmd = "samtools view -hbF 0x40 {peFile} > {seFile}".format(peFile=i,seFile=frf) jobname = "toSingleEnd_{0}".format(os.path.basename(i)) job = sjm.Job(jobname,cmd,modules = ["samtools"],queue=conf.QUEUE,memory="5G",sched_options="-m e") jobs.append(job) if jobs: submission = sjm.Submission(jobs=jobs,log_directory=log_dir,notify=SJM_NOTIFY) sjmfile = os.path.join(log_dir, run_name + '_MakeSingleEndMappings.jobs') print ("Removing reverse reads in control and sample BAM files. Commands are in SJM file {sjmfile}".format(sjmfile=sjmfile)) try: submission.run(sjmfile,foreground=True) except subprocess.CalledProcessError: raise control_conf.CONTROL_MAPPED_READS = [x.rstrip(".bam") + forwardReadExt for x in controls] sample_conf.REPLICATES = [ [x.rstrip(".bam") + forwardReadExt] for x in replicates] print ("Controls: " + " ".join(control_conf.CONTROL_MAPPED_READS) + "\n") print ("Replicates: " + " ".join([x[0] for x in sample_conf.REPLICATES])) print " archive results: ", archive_results jobs = [] sample = None if sample_conf: sample = Sample(sample_conf.RUN_NAME, sample_conf.RESULTS_DIR, sample_conf.TEMP_DIR, sample_conf.GENOME, [SampleReplicate(i+1, x)for i, x in enumerate(sample_conf.REPLICATES)], sample_conf) control_lock = peakcaller.USE_CONTROL_LOCK if no_control_lock: control_lock = False control = Control(control_conf.RUN_NAME, control_conf.RESULTS_DIR, control_conf.TEMP_DIR, control_conf.GENOME, control_conf.CONTROL_MAPPED_READS, control_conf, peakcaller.NAME) controlScored = False countLimit = 5 count = 0 while count < countLimit: count += 1 scoreTime = checkControlScored(control) if not scoreTime: #means it returned False b/c control output file didn't exist, so never scored. break #otherwise, it returned a number of hours as a float that the control file has been untouched elif scoreTime >= 1: controlScored = True break else: #sleep an hour time.sleep(3600) if not controlScored and count == countLimit: raise Exception("Waited too long for control scoring from other project to finish. Exiting.") doRescore = False if rescore_control and (scoreTime > rescore_control): doRescore = True elif (not scoreTime) or doRescore: peakcaller.form_control_files('form_control_files', control) #add job merge_and_filter_reads.py if archive_results: peakcaller.archive_control('archive_control', control,force=force) add_dependencies(control.jobs['form_control_files'], control.jobs['archive_control']) jobs += control.all_jobs() ###nathankw comment out below on 2014-06-08 # if not control_scoring.check_for_control(results_dir=control_conf.RESULTS_DIR, peakcaller=control.peakcaller,use_control_lock=control_lock) or rescore_control: # try: # peakcaller.check_control_inputs(control) #checks that genome and BAMS of control exist # peakcaller.form_control_files('form_control_files', control) #add job merge_and_filter_reads.py # #The call below peakcaller.complete_control() adds the job complete_control_scoring.py if USE_CONTROL_LOCK is True, whose goal is to run the below command # # "UPDATE encode_controls SET ready=1 WHERE name='%s' AND peakcaller='%s'" % (results_dir, peakcaller) # # Which menas that the control is now scored. # peakcaller.complete_control('complete_control', control) # add_dependencies(control.jobs['form_control_files'], control.jobs['complete_control']) # if archive_results: # peakcaller.archive_control('archive_control', control,force=force) # add_dependencies(control.jobs['form_control_files'], control.jobs['archive_control']) # jobs += control.all_jobs() # except Exception, e: # import traceback # print "error detected, removing control lock" # traceback.print_exc() # control_scoring.remove_lock(control.results_dir, control.peakcaller, control_lock) # raise e # else: # print " Control %s already scored, skipping." % control.run_name if sample_conf: peakcaller.check_sample_inputs(sample,force=force) if remove_duplicates: print "rm dups" peakcaller.form_sample_files_nodups('form_sample_files', sample) else: print "no dups" peakcaller.form_sample_files('form_sample_files', sample) peakcaller.calc_pbc('calc_pbc', control, sample) peakcaller.run_peakcaller('peakcaller', control, sample, peakcaller_options) add_dependencies(sample.jobs['form_sample_files'], sample.jobs['calc_pbc']) add_dependencies(sample.jobs['form_sample_files'], sample.jobs['peakcaller']) if control.jobs: add_dependencies(control.jobs['form_control_files'], sample.jobs['peakcaller']) peakcaller.merge_results('merge_results', sample) add_dependencies(sample.jobs['peakcaller'], sample.jobs['merge_results']) if archive_results: peakcaller.archive_sample('archive_sample', sample, control,force=force) add_dependencies(sample.jobs['merge_results'], sample.jobs['archive_sample']) # IDR Analysis peakcaller.form_idr_inputs('idr_format_inputs', sample) add_dependencies(sample.jobs['merge_results'], sample.jobs['idr_format_inputs']) if len(sample.replicates) > 1: peakcaller.replicate_scoring('replicate_scoring', sample) add_dependencies(sample.jobs['merge_results'], sample.jobs['replicate_scoring']) if archive_results: add_dependencies(sample.jobs['replicate_scoring'], sample.jobs['archive_sample']) add_dependencies(sample.jobs['idr_format_inputs'], sample.jobs['replicate_scoring']) peakcaller.idr_analysis('idr_analysis', sample) add_dependencies(sample.jobs['idr_format_inputs'], sample.jobs['idr_analysis']) peakcaller.idr_filter('idr_filter', sample) add_dependencies(sample.jobs['idr_analysis'], sample.jobs['idr_filter']) if archive_results: add_dependencies(sample.jobs['idr_filter'], sample.jobs['archive_sample']) # Cross-Correlation Analysis idr.cross_correlation_analysis('cross_correlation_analysis', sample, no_duplicates=no_duplicates, options=xcorrelation_options) add_dependencies(sample.jobs['form_sample_files'], sample.jobs['cross_correlation_analysis']) if archive_results: add_dependencies(sample.jobs['cross_correlation_analysis'], sample.jobs['archive_sample']) add_dependencies(sample.jobs['cross_correlation_analysis'], sample.jobs['peakcaller']) jobs += sample.all_jobs() if emails: print "emails" % emails jobs.append(peakcaller.mail_results(sample, control, run_name, emails)) #jobs.append(peakcaller.cleanup(sample, control)) if SNAP_RUN and sample_conf: scriptPath = os.path.join(scriptDir,"snap_support/production/current/peakseq_report_parser_wrapper.sh") snap_job = sjm.Job("SNAP", "bash " + scriptPath + " production %s >& %s/peakseq_report_out " % (sample_conf.path,sample_conf.RESULTS_DIR), queue=QUEUE, project=PROJECT, host='localhost', dependencies=sample.all_jobs(), sched_options='-m e -A chipseq_scoring') jobs.append(snap_job) if control.jobs: peakcaller.prep_control(control) if sample.jobs: peakcaller.prep_sample(sample) #scoringStatusCmd="setScoringStatusProp.py --syapse-mode {syapseMode} -p scoringStatus --value Scored --unique-id {runName}".format(syapseMode=syapseMode,runName=runName)) #scoringStatusJobName = run_name + "_setScoringStatusFlagToComplete" #job = sjm.Job(name=scoringStatusJobName,commands=scoringStatusCmd,modules=["gbsc/encode/prod"],dependencies=["mail_results"]) #jobs.append(job) submission = sjm.Submission(jobs, log_directory=log_dir, notify=SJM_NOTIFY) if print_cmds: submission.build(run_name + '.jobs') raise SystemExit(1) if log_dir: submission.run(os.path.join(log_dir, run_name + '.jobs'),foreground=True) else: submission.run(run_name + '.jobs',foreground=True)