Example #1
0
		add_dependencies(sample.jobs['merge_results'], sample.jobs['idr_format_inputs'])
		if len(sample.replicates) > 1:
			peakcaller.replicate_scoring('replicate_scoring', sample)
			add_dependencies(sample.jobs['merge_results'], sample.jobs['replicate_scoring'])
			if archive_results:
				add_dependencies(sample.jobs['replicate_scoring'], sample.jobs['archive_sample'])
			add_dependencies(sample.jobs['idr_format_inputs'], sample.jobs['replicate_scoring'])
		peakcaller.idr_analysis('idr_analysis', sample)
		add_dependencies(sample.jobs['idr_format_inputs'], sample.jobs['idr_analysis'])
 		peakcaller.idr_filter('idr_filter', sample)
 		add_dependencies(sample.jobs['idr_analysis'], sample.jobs['idr_filter'])
 		if archive_results:
 			add_dependencies(sample.jobs['idr_filter'], sample.jobs['archive_sample'])
		
		# Cross-Correlation Analysis
		idr.cross_correlation_analysis('cross_correlation_analysis', sample, no_duplicates=no_duplicates, options=xcorrelation_options)
		add_dependencies(sample.jobs['form_sample_files'], sample.jobs['cross_correlation_analysis'])
		if archive_results:
			add_dependencies(sample.jobs['cross_correlation_analysis'], sample.jobs['archive_sample'])
		add_dependencies(sample.jobs['cross_correlation_analysis'], sample.jobs['peakcaller'])
			
		jobs += sample.all_jobs()

	if emails:
		jobs.append(peakcaller.mail_results(sample, control, run_name, emails))
	jobs.append(peakcaller.cleanup(sample, control))
	
	if SNAP_RUN and sample_conf:
		snap_job = sjm.Job("SNAP", "bash /srv/gs1/apps/snap_support/production/current/peakseq_report_parser_wrapper.sh production %s >& ~alwon/peakseq_report_out " % sample_conf.path,  queue=QUEUE, project=PROJECT, host='localhost', dependencies=sample.all_jobs())
		jobs.append(snap_job)
				
Example #2
0
def main(syapseMode,
         peakcaller,
         run_name,
         control_conf,
         sample_conf=None,
         print_cmds=False,
         log_dir=None,
         no_duplicates=False,
         archive_results=True,
         emails=None,
         peakcaller_options=None,
         xcorrelation_options=None,
         remove_duplicates=False,
         paired_end=False,
         force=False,
         rescore_control=0,
         genome=False,
         no_control_lock=False):
    rescore_control = 24 * rescore_control  #convert from days to hours

    scriptDir = os.path.dirname(__file__)
    if not emails:
        emails = []
    if not log_dir:
        log_dir = os.getcwd()
    if not peakcaller_options:
        peakcaller_options = {}
    if not xcorrelation_options:
        xcorrelation_options = {}
    control_conf = ConfigControl(
        control_conf)  #parse fields out of control file
    if sample_conf:
        sample_conf = ConfigSample(sample_conf)
    if genome:
        sample_conf.GENOME = genome

    ###Several runs from early on, such as in the beginning of 2012, had not BAM files as input, but instead eland files. These eland file names are in the SNAP lims for many runs. Those eland files are no longer in the archive,
    ### and for these runs, all input should now be bam.  The code below converts an eland file name to a bam file name. If the bam file doens't exist, then the pipeline will crash and the bam files must be created in
    ### a separate task.
    count = -1
    for i in control_conf.CONTROL_MAPPED_READS:
        count += 1
        if i.startswith("/opt/scg/scg1_prd_10/"):
            i = i.lstrip("/opt/scg/scg1_prd_10/"
                         )  #some runs, such is ID 388 have this incorrect path
            i = GBSC_DIR + i
        ##The following check for .Eland in the dirname accounts for the numerous cases where the mapping file path is like:
        ##  /srv/gs1/projects/scg/Archive/IlluminaRuns/2012/may/120501_ROCKFORD_00145_FC64VL1.Eland/L4/120501_ROCKFORD_00145_FC64VL1_L4_pf.bam
        ## and it should instaed lack the ".Eland" part (i.e. see http://scg-snap.stanford.edu/api/peakseq_inputs/show?experiment_run_id=380).
        if ".Eland" in i:
            i = i.replace(".Eland", "")
        basename = os.path.basename(i)
        if "_eland" in basename:
            i = elandToBamFileName(i)
        control_conf.CONTROL_MAPPED_READS[count] = i

    if sample_conf:
        replicates = [x[0] for x in sample_conf.REPLICATES
                      ]  #sample_conf.REPLICATE is a list of 1-item lists
        count = -1
        for rep in replicates:
            count += 1
            if rep.startswith("/opt/scg/scg1_prd_10/"):
                rep = rep.lstrip("/opt/scg/scg1_prd_10/")
                rep = GBSC_DIR + rep
            if ".Eland" in rep:
                rep = rep.replace(".Eland", "")
            basename = os.path.basename(rep)
            if "_eland" in basename:
                sample_conf.REPLICATES[count] = [elandToBamFileName(rep)]

    ###NW End code to convert file names from eland to bam names.
    ###

    ###Nathaniel Watson. May 12, 2014.
    ### Now, need to check if BAM files exist. If so, good, otherwise, generate BAM files.
    ###Currently, only support for making single-end mappings on the fly
#	if not paired_end:
#		if sample_conf:

    if paired_end:
        controls = control_conf.CONTROL_MAPPED_READS
        all_mapped_reads = controls[:]
        #		print ("Controls: " +  str(controls) + "\n")
        if sample_conf:
            replicates = [x[0] for x in sample_conf.REPLICATES
                          ]  #sample_conf.REPLICATE is a list of 1-item lists
            #			print ("Replicates are: " + str(replicates) + "\n")
            all_mapped_reads.extend(replicates)
        jobs = []
        forwardReadExt = "_forwardReads.bam"
        progressReadExt = "_forwardReads.bam.encours"
        for i in all_mapped_reads:
            frf = i.rstrip(".bam") + forwardReadExt
            frf_progress = i.rstrip(".bam") + progressReadExt
            if os.path.exists(frf) or os.path.exists(frf_progress):
                bamDone = False
                countLimit = 5
                count = 0
                while count < countLimit:
                    count += 1
                    try:
                        age = getFileAgeMinutes(frf)
                    except IOError:
                        age = 0
                        try:
                            progressFileAge = getFileAgeMinutes(frf_progress)
                        except IoError:
                            progressFileAge = 0
                        if (progressFileAge >= 20) or (progressfileAge == 0):
                            raise Exception(
                                "Expected to find forward reads file {frf} since the progress sentinal file {frf_progress} is present, but unable to do so."
                                .format(frf=frf, frf_progress=frf_progress))
                        pass
                    if age >= 20:
                        bamDone = True
                        break
                    else:
                        #sleep an hour
                        time.sleep(600)
                if not bamDone:
                    raise Exception(
                        "Waited too long for BAM file {} from other project to finish to finish being made. Exiting."
                        .format(frf))
            else:
                print(
                    "Need to create a single-end reads only file from file {frf}."
                    .format(frf=frf))
                cmd = "samtools view -hbF 0x40 {peFile} > {seFile}".format(
                    peFile=i, seFile=frf)
                jobname = "toSingleEnd_{0}".format(os.path.basename(i))
                job = sjm.Job(jobname,
                              cmd,
                              modules=["samtools/1.2"],
                              queue=conf.QUEUE,
                              memory="5G",
                              sched_options="-m e")
                jobs.append(job)
        if jobs:
            submission = sjm.Submission(jobs=jobs,
                                        log_directory=log_dir,
                                        notify=SJM_NOTIFY)
            sjmfile = os.path.join(log_dir,
                                   run_name + '_MakeSingleEndMappings.jobs')
            print(
                "Removing reverse reads in control and sample BAM files. Commands are in SJM file {sjmfile}"
                .format(sjmfile=sjmfile))
            try:
                submission.run(sjmfile, foreground=True)
            except subprocess.CalledProcessError:
                raise

        control_conf.CONTROL_MAPPED_READS = [
            x.rstrip(".bam") + forwardReadExt for x in controls
        ]
        sample_conf.REPLICATES = [[x.rstrip(".bam") + forwardReadExt]
                                  for x in replicates]
    print("Controls: " + "  ".join(control_conf.CONTROL_MAPPED_READS) + "\n")
    print("Replicates: " + "  ".join([x[0] for x in sample_conf.REPLICATES]))

    print " archive results: ", archive_results
    jobs = []

    sample = None
    if sample_conf:
        sample = Sample(sample_conf.RUN_NAME, sample_conf.RESULTS_DIR,
                        sample_conf.TEMP_DIR, sample_conf.GENOME, [
                            SampleReplicate(i + 1, x)
                            for i, x in enumerate(sample_conf.REPLICATES)
                        ], sample_conf)

    control_lock = peakcaller.USE_CONTROL_LOCK
    if no_control_lock:
        control_lock = False

    control = Control(control_conf.RUN_NAME, control_conf.RESULTS_DIR,
                      control_conf.TEMP_DIR, control_conf.GENOME,
                      control_conf.CONTROL_MAPPED_READS, control_conf,
                      peakcaller.NAME)

    controlScored = False
    countLimit = 5
    count = 0
    while count < countLimit:
        count += 1
        scoreTime = checkControlScored(control)
        if not scoreTime:
            #means it returned False b/c control output file didn't exist, so never scored.
            break
        #otherwise, it returned a number of hours as a float that the control file has been untouched
        elif scoreTime >= 1:
            controlScored = True
            break
        else:
            #sleep an hour
            time.sleep(3600)

    if not controlScored and count == countLimit:
        raise Exception(
            "Waited too long for control scoring from other project to finish. Exiting."
        )

    doRescore = False
    if rescore_control and (scoreTime > rescore_control):
        doRescore = True
    elif (not scoreTime) or doRescore:
        peakcaller.form_control_files(
            'form_control_files', control)  #add job merge_and_filter_reads.py
        if archive_results:
            peakcaller.archive_control('archive_control', control, force=force)
            add_dependencies(control.jobs['form_control_files'],
                             control.jobs['archive_control'])
        jobs += control.all_jobs()


###nathankw comment out below on 2014-06-08
#	if not control_scoring.check_for_control(results_dir=control_conf.RESULTS_DIR, peakcaller=control.peakcaller,use_control_lock=control_lock) or rescore_control:
#		try:
#			peakcaller.check_control_inputs(control) #checks that genome and BAMS of control exist
#			peakcaller.form_control_files('form_control_files', control) #add job merge_and_filter_reads.py
#			#The call below peakcaller.complete_control() adds the job complete_control_scoring.py if USE_CONTROL_LOCK is True, whose goal is to run the below command
#			# "UPDATE encode_controls SET ready=1 WHERE name='%s' AND peakcaller='%s'" % (results_dir, peakcaller)
#			#  Which menas that the control is now scored.
#			peakcaller.complete_control('complete_control', control)
#			add_dependencies(control.jobs['form_control_files'], control.jobs['complete_control'])
#			if archive_results:
#				peakcaller.archive_control('archive_control', control,force=force)
#				add_dependencies(control.jobs['form_control_files'], control.jobs['archive_control'])
#			jobs += control.all_jobs()
#		except Exception, e:
#			import traceback
# 			print "error detected, removing control lock"
# 			traceback.print_exc()
# 			control_scoring.remove_lock(control.results_dir, control.peakcaller, control_lock)
# 			raise e
#	else:
#		print " Control %s already scored, skipping." % control.run_name

    if sample_conf:
        peakcaller.check_sample_inputs(sample, force=force)

        if remove_duplicates:
            print "rm dups"
            peakcaller.form_sample_files_nodups('form_sample_files', sample)
        else:
            print "no dups"
            peakcaller.form_sample_files('form_sample_files', sample)

        peakcaller.calc_pbc('calc_pbc', control, sample)
        peakcaller.run_peakcaller('peakcaller', control, sample,
                                  peakcaller_options)
        add_dependencies(sample.jobs['form_sample_files'],
                         sample.jobs['calc_pbc'])
        add_dependencies(sample.jobs['form_sample_files'],
                         sample.jobs['peakcaller'])
        if control.jobs:
            add_dependencies(control.jobs['form_control_files'],
                             sample.jobs['peakcaller'])
        peakcaller.merge_results('merge_results', sample)
        add_dependencies(sample.jobs['peakcaller'],
                         sample.jobs['merge_results'])
        if archive_results:
            peakcaller.archive_sample('archive_sample',
                                      sample,
                                      control,
                                      force=force)
            add_dependencies(sample.jobs['merge_results'],
                             sample.jobs['archive_sample'])

        # IDR Analysis
        peakcaller.form_idr_inputs('idr_format_inputs', sample)
        add_dependencies(sample.jobs['merge_results'],
                         sample.jobs['idr_format_inputs'])
        if len(sample.replicates) > 1:
            peakcaller.replicate_scoring('replicate_scoring', sample)
            add_dependencies(sample.jobs['merge_results'],
                             sample.jobs['replicate_scoring'])
            if archive_results:
                add_dependencies(sample.jobs['replicate_scoring'],
                                 sample.jobs['archive_sample'])
            add_dependencies(sample.jobs['idr_format_inputs'],
                             sample.jobs['replicate_scoring'])
        peakcaller.idr_analysis('idr_analysis', sample)
        add_dependencies(sample.jobs['idr_format_inputs'],
                         sample.jobs['idr_analysis'])
        peakcaller.idr_filter('idr_filter', sample)
        add_dependencies(sample.jobs['idr_analysis'],
                         sample.jobs['idr_filter'])
        if archive_results:
            add_dependencies(sample.jobs['idr_filter'],
                             sample.jobs['archive_sample'])

        # Cross-Correlation Analysis
        idr.cross_correlation_analysis('cross_correlation_analysis',
                                       sample,
                                       no_duplicates=no_duplicates,
                                       options=xcorrelation_options)
        add_dependencies(sample.jobs['form_sample_files'],
                         sample.jobs['cross_correlation_analysis'])
        if archive_results:
            add_dependencies(sample.jobs['cross_correlation_analysis'],
                             sample.jobs['archive_sample'])
        add_dependencies(sample.jobs['cross_correlation_analysis'],
                         sample.jobs['peakcaller'])

        jobs += sample.all_jobs()

    print "emails" % emails
    mail_job = peakcaller.mail_results(sample, control, run_name, emails)
    mail_job_name = mail_job.name
    jobs.append(mail_job)
    #jobs.append(peakcaller.cleanup(sample, control))

    #create job to set the "Scoring Status" attribute of the ChIP Seq Scoring object in Syapse to "Scoring Completed".
    #	cmd = "setScoringStatusInSyapse.py --mode {syapseMode} --name {run_name} --status 'Scoring Completed'".format(syapseMode=syapseMode,run_name=run_name)
    #	set_scoring_status_complete_job_name = "setScoringStatusCompleted"
    #	job = sjm.Job(set_scoring_status_complete_job_name,cmd,modules = ["python/2.7.9","gbsc/encode-scoring/prod"],queue=conf.QUEUE,host="localhost",dependencies=[mail_job],sched_options="-m e")
    jobs.append(job)

    if SNAP_RUN and sample_conf:
        scriptPath = os.path.join(
            scriptDir,
            "snap_support/production/current/peakseq_report_parser_wrapper.sh")
        snap_job = sjm.Job("SNAP",
                           "bash " + scriptPath +
                           " production %s >& %s/peakseq_report_out " %
                           (sample_conf.path, sample_conf.RESULTS_DIR),
                           queue=QUEUE,
                           project=PROJECT,
                           host='localhost',
                           dependencies=sample.all_jobs(),
                           sched_options='-m e -A chipseq_scoring')
        jobs.append(snap_job)

    if control.jobs:
        peakcaller.prep_control(control)
    if sample.jobs:
        peakcaller.prep_sample(sample)

    #scoringStatusCmd="setScoringStatusProp.py --syapse-mode {syapseMode} -p scoringStatus --value Scored --unique-id {runName}".format(syapseMode=syapseMode,runName=runName))
    #scoringStatusJobName = run_name + "_setScoringStatusFlagToComplete"
    #job = sjm.Job(name=scoringStatusJobName,commands=scoringStatusCmd,modules=["gbsc/encode/prod"],dependencies=["mail_results"])
    #jobs.append(job)

    submission = sjm.Submission(jobs, log_directory=log_dir, notify=SJM_NOTIFY)
    if print_cmds:
        submission.build(run_name + '.jobs')
        raise SystemExit(1)
    if log_dir:
        submission.run(os.path.join(log_dir, run_name + '.jobs'),
                       foreground=True)
    else:
        submission.run(run_name + '.jobs', foreground=True)
Example #3
0
def main(syapseMode,peakcaller, run_name, control_conf, sample_conf=None, print_cmds=False, log_dir=None, no_duplicates=False, archive_results=True, emails=None, peakcaller_options=None, xcorrelation_options=None, remove_duplicates=False, paired_end=False,force=False,rescore_control=0,genome=False,no_control_lock=False):
	rescore_control = 24 * rescore_control #convert from days to hours

	scriptDir = os.path.dirname(__file__)
	if not emails:
		emails = []
	if not log_dir:
		log_dir = os.getcwd()
	if not peakcaller_options:
		peakcaller_options = {}
	if not xcorrelation_options:
		xcorrelation_options = {}
	control_conf = ConfigControl(control_conf) #parse fields out of control file
	if sample_conf:
		sample_conf = ConfigSample(sample_conf)
	if genome:
		sample_conf.GENOME=genome

	###Several runs from early on, such as in the beginning of 2012, had not BAM files as input, but instead eland files. These eland file names are in the SNAP lims for many runs. Those eland files are no longer in the archive,
	### and for these runs, all input should now be bam.  The code below converts an eland file name to a bam file name. If the bam file doens't exist, then the pipeline will crash and the bam files must be created in 
	### a separate task. 
	count = -1
	for i in control_conf.CONTROL_MAPPED_READS:
		count += 1
		if i.startswith("/opt/scg/scg1_prd_10/"):
			i = i.lstrip("/opt/scg/scg1_prd_10/") #some runs, such is ID 388 have this incorrect path
			i = GBSC_DIR + i
		##The following check for .Eland in the dirname accounts for the numerous cases where the mapping file path is like:
		##  /srv/gs1/projects/scg/Archive/IlluminaRuns/2012/may/120501_ROCKFORD_00145_FC64VL1.Eland/L4/120501_ROCKFORD_00145_FC64VL1_L4_pf.bam
		## and it should instaed lack the ".Eland" part (i.e. see http://scg-snap.stanford.edu/api/peakseq_inputs/show?experiment_run_id=380).
		if ".Eland" in i:
			i = i.replace(".Eland","")
		basename = os.path.basename(i)
		if "_eland" in basename:
			i = elandToBamFileName(i)
		control_conf.CONTROL_MAPPED_READS[count] = i
		
	if sample_conf:
		replicates = [x[0] for x in sample_conf.REPLICATES] #sample_conf.REPLICATE is a list of 1-item lists
		count = -1
		for rep in replicates:
			count += 1
			if rep.startswith("/opt/scg/scg1_prd_10/"):
				rep = rep.lstrip("/opt/scg/scg1_prd_10/")
				rep = GBSC_DIR + rep
			if ".Eland" in rep:
				rep = rep.replace(".Eland","")
			basename = os.path.basename(rep)
			if "_eland" in basename:
				sample_conf.REPLICATES[count] = [elandToBamFileName(rep)]

	###NW End code to convert file names from eland to bam names.
	###


	###Nathaniel Watson. May 12, 2014.
  ### Now, need to check if BAM files exist. If so, good, otherwise, generate BAM files.
	###Currently, only support for making single-end mappings on the fly
#	if not paired_end:
#		if sample_conf:
			
	
	if paired_end:
		controls = control_conf.CONTROL_MAPPED_READS
		all_mapped_reads = controls[:]
#		print ("Controls: " +  str(controls) + "\n")
		if sample_conf:
			replicates = [x[0] for x in sample_conf.REPLICATES] #sample_conf.REPLICATE is a list of 1-item lists
#			print ("Replicates are: " + str(replicates) + "\n")
			all_mapped_reads.extend(replicates) 
		jobs = []
		forwardReadExt = "_forwardReads.bam"
		progressReadExt = "_forwardReads.bam.encours"
		for i in all_mapped_reads:
			frf = i.rstrip(".bam") + forwardReadExt		
			frf_progress = i.rstrip(".bam") + progressReadExt
			if os.path.exists(frf) or os.path.exists(frf_progress):
				bamDone = False
				countLimit = 5
				count = 0
				while count < countLimit:
					count += 1
					try:
						age = getFileAgeMinutes(frf)
					except IOError:
						age = 0
						try:
							progressFileAge = getFileAgeMinutes(frf_progress)
						except IoError:
							progressFileAge=0
						if (progressFileAge >= 20) or (progressfileAge == 0):
							raise Exception("Expected to find forward reads file {frf} since the progress sentinal file {frf_progress} is present, but unable to do so.".format(frf=frf,frf_progress=frf_progress))
						pass
					if age >= 20:
						bamDone = True
						break
					else:
						#sleep an hour
						time.sleep(600)
				if not bamDone:
					raise Exception("Waited too long for BAM file {} from other project to finish to finish being made. Exiting.".format(frf))
			else:
				print("Need to create a single-end reads only file from file {frf}.".format(frf=frf))
				cmd = "samtools view -hbF 0x40 {peFile} > {seFile}".format(peFile=i,seFile=frf)
				jobname = "toSingleEnd_{0}".format(os.path.basename(i))
				job = sjm.Job(jobname,cmd,modules = ["samtools"],queue=conf.QUEUE,memory="5G",sched_options="-m e")
				jobs.append(job)
		if jobs:
			submission = sjm.Submission(jobs=jobs,log_directory=log_dir,notify=SJM_NOTIFY)		
			sjmfile = os.path.join(log_dir, run_name + '_MakeSingleEndMappings.jobs')
			print ("Removing reverse reads in control and sample BAM files. Commands are in SJM file {sjmfile}".format(sjmfile=sjmfile))
			try:
				submission.run(sjmfile,foreground=True)
			except subprocess.CalledProcessError:
				raise

		control_conf.CONTROL_MAPPED_READS = [x.rstrip(".bam") + forwardReadExt for x in controls]
		sample_conf.REPLICATES = [ [x.rstrip(".bam") + forwardReadExt] for x in replicates]
	print ("Controls: " + "  ".join(control_conf.CONTROL_MAPPED_READS) + "\n")
	print ("Replicates: " + "  ".join([x[0] for x in sample_conf.REPLICATES]))

	print " archive results: ", archive_results
	jobs = []	

	sample = None
	if sample_conf:
		sample = Sample(sample_conf.RUN_NAME, sample_conf.RESULTS_DIR, sample_conf.TEMP_DIR, sample_conf.GENOME, [SampleReplicate(i+1, x)for i, x in enumerate(sample_conf.REPLICATES)], sample_conf)

	control_lock = peakcaller.USE_CONTROL_LOCK
	if no_control_lock:
		control_lock = False

	control = Control(control_conf.RUN_NAME, control_conf.RESULTS_DIR, control_conf.TEMP_DIR, control_conf.GENOME, control_conf.CONTROL_MAPPED_READS, control_conf, peakcaller.NAME)

	controlScored = False
	countLimit = 5
	count = 0
	while count < countLimit:
		count += 1
		scoreTime = checkControlScored(control)
		if not scoreTime:
			#means it returned False b/c control output file didn't exist, so never scored.
			break
		#otherwise, it returned a number of hours as a float that the control file has been untouched
		elif  scoreTime >= 1:
			controlScored = True
			break
		else:
			#sleep an hour
			time.sleep(3600)
	
	if not controlScored and count == countLimit:
		raise Exception("Waited too long for control scoring from other project to finish. Exiting.")

	doRescore = False
	if rescore_control and (scoreTime > rescore_control):
		doRescore = True
	elif (not scoreTime) or doRescore:
			peakcaller.form_control_files('form_control_files', control) #add job merge_and_filter_reads.py
			if archive_results:
				peakcaller.archive_control('archive_control', control,force=force)
				add_dependencies(control.jobs['form_control_files'], control.jobs['archive_control'])
			jobs += control.all_jobs()

###nathankw comment out below on 2014-06-08
#	if not control_scoring.check_for_control(results_dir=control_conf.RESULTS_DIR, peakcaller=control.peakcaller,use_control_lock=control_lock) or rescore_control:
#		try:
#			peakcaller.check_control_inputs(control) #checks that genome and BAMS of control exist
#			peakcaller.form_control_files('form_control_files', control) #add job merge_and_filter_reads.py
#			#The call below peakcaller.complete_control() adds the job complete_control_scoring.py if USE_CONTROL_LOCK is True, whose goal is to run the below command
#			# "UPDATE encode_controls SET ready=1 WHERE name='%s' AND peakcaller='%s'" % (results_dir, peakcaller)
#			#  Which menas that the control is now scored.
#			peakcaller.complete_control('complete_control', control) 
#			add_dependencies(control.jobs['form_control_files'], control.jobs['complete_control'])
#			if archive_results:
#				peakcaller.archive_control('archive_control', control,force=force)
#				add_dependencies(control.jobs['form_control_files'], control.jobs['archive_control'])
#			jobs += control.all_jobs()
#		except Exception, e:
#			import traceback
# 			print "error detected, removing control lock"
# 			traceback.print_exc()
# 			control_scoring.remove_lock(control.results_dir, control.peakcaller, control_lock)
# 			raise e
#	else:
#		print " Control %s already scored, skipping." % control.run_name

	if sample_conf:
		peakcaller.check_sample_inputs(sample,force=force)
		
		if remove_duplicates:
			print "rm dups"
			peakcaller.form_sample_files_nodups('form_sample_files', sample)
		else:
			print "no dups"
			peakcaller.form_sample_files('form_sample_files', sample)
		
		peakcaller.calc_pbc('calc_pbc', control, sample)
		peakcaller.run_peakcaller('peakcaller', control, sample, peakcaller_options)
		add_dependencies(sample.jobs['form_sample_files'], sample.jobs['calc_pbc'])
		add_dependencies(sample.jobs['form_sample_files'], sample.jobs['peakcaller'])
		if control.jobs:
			add_dependencies(control.jobs['form_control_files'], sample.jobs['peakcaller'])
		peakcaller.merge_results('merge_results', sample)
		add_dependencies(sample.jobs['peakcaller'], sample.jobs['merge_results'])
		if archive_results:
			peakcaller.archive_sample('archive_sample', sample, control,force=force)
			add_dependencies(sample.jobs['merge_results'], sample.jobs['archive_sample'])
          		
		# IDR Analysis
		peakcaller.form_idr_inputs('idr_format_inputs', sample)
		add_dependencies(sample.jobs['merge_results'], sample.jobs['idr_format_inputs'])
		if len(sample.replicates) > 1:
			peakcaller.replicate_scoring('replicate_scoring', sample)
			add_dependencies(sample.jobs['merge_results'], sample.jobs['replicate_scoring'])
			if archive_results:
				add_dependencies(sample.jobs['replicate_scoring'], sample.jobs['archive_sample'])
			add_dependencies(sample.jobs['idr_format_inputs'], sample.jobs['replicate_scoring'])
		peakcaller.idr_analysis('idr_analysis', sample)
		add_dependencies(sample.jobs['idr_format_inputs'], sample.jobs['idr_analysis'])
 		peakcaller.idr_filter('idr_filter', sample)
 		add_dependencies(sample.jobs['idr_analysis'], sample.jobs['idr_filter'])
 		if archive_results:
 			add_dependencies(sample.jobs['idr_filter'], sample.jobs['archive_sample'])
		
		# Cross-Correlation Analysis
		idr.cross_correlation_analysis('cross_correlation_analysis', sample, no_duplicates=no_duplicates, options=xcorrelation_options)
		add_dependencies(sample.jobs['form_sample_files'], sample.jobs['cross_correlation_analysis'])
		if archive_results:
			add_dependencies(sample.jobs['cross_correlation_analysis'], sample.jobs['archive_sample'])
		add_dependencies(sample.jobs['cross_correlation_analysis'], sample.jobs['peakcaller'])
			
		jobs += sample.all_jobs()

	if emails:
		print "emails" % emails
		jobs.append(peakcaller.mail_results(sample, control, run_name, emails))
	#jobs.append(peakcaller.cleanup(sample, control))
	
	if SNAP_RUN and sample_conf:
		scriptPath = os.path.join(scriptDir,"snap_support/production/current/peakseq_report_parser_wrapper.sh")
		snap_job = sjm.Job("SNAP", "bash " + scriptPath + " production %s >& %s/peakseq_report_out " % (sample_conf.path,sample_conf.RESULTS_DIR),  queue=QUEUE, project=PROJECT, host='localhost', dependencies=sample.all_jobs(), sched_options='-m e -A chipseq_scoring')
		jobs.append(snap_job)
				
	if control.jobs:
		peakcaller.prep_control(control)
	if sample.jobs:
		peakcaller.prep_sample(sample)

	
	#scoringStatusCmd="setScoringStatusProp.py --syapse-mode {syapseMode} -p scoringStatus --value Scored --unique-id {runName}".format(syapseMode=syapseMode,runName=runName))
	#scoringStatusJobName = run_name + "_setScoringStatusFlagToComplete"
	#job = sjm.Job(name=scoringStatusJobName,commands=scoringStatusCmd,modules=["gbsc/encode/prod"],dependencies=["mail_results"])
	#jobs.append(job)

	submission = sjm.Submission(jobs, log_directory=log_dir, notify=SJM_NOTIFY)
	if print_cmds:
		submission.build(run_name + '.jobs') 
		raise SystemExit(1)
	if log_dir:
		submission.run(os.path.join(log_dir, run_name + '.jobs'),foreground=True)
	else:
		submission.run(run_name + '.jobs',foreground=True)