def getReferenceSequence(self, workflow=None, **keywords):
		"""
		2013.3.20 yh_pegasus.registerRefFastaFile() returns a PassingData
		2013.1.25
		"""
		sys.stderr.write("Getting reference sequences ...")
		if workflow is None:
			workflow = self
		refSequence = VervetDB.IndividualSequence.get(self.ref_ind_seq_id)
		refFastaFname = os.path.join(self.data_dir, refSequence.path)
		registerReferenceData = yh_pegasus.registerRefFastaFile(workflow=workflow, refFastaFname=refFastaFname, \
							registerAffiliateFiles=True, \
							input_site_handler=self.input_site_handler,\
							checkAffiliateFileExistence=True)
		
		sys.stderr.write(" %s files.\n"%(len(registerReferenceData.refFastaFList)))
		return registerReferenceData
	def run(self):
		"""
		"""
		
		if self.debug:
			import pdb
			pdb.set_trace()
		
		
		db_vervet = self.db_vervet
		if not self.data_dir:
			self.data_dir = db_vervet.data_dir
		
		if not self.local_data_dir:
			self.local_data_dir = db_vervet.data_dir
		"""
		#without commenting out db_vervet connection code. schema "genome" wont' be default path.
		db_genome = GenomeDB.GenomeDatabase(drivername=self.drivername, username=self.db_user,
						password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema="genome")
		db_genome.setup(create_tables=False)
		chr2size = db_genome.getTopNumberOfChomosomes(contigMaxRankBySize=80000, contigMinRankBySize=1, tax_id=60711, sequence_type_id=9)
		"""
		
		workflow = self.initiateWorkflow()
		
		self.registerJars(workflow)
		self.registerCommonExecutables(workflow)
		self.registerCustomExecutables(workflow)
		
		
		refSequence = VervetDB.IndividualSequence.get(self.ref_ind_seq_id)
		
		refFastaFname = os.path.join(self.data_dir, refSequence.path)
		registerReferenceData = yh_pegasus.registerRefFastaFile(workflow, refFastaFname, registerAffiliateFiles=True, \
						input_site_handler=self.input_site_handler,\
						checkAffiliateFileExistence=True)
		if self.depthFoldChange and self.depthFoldChange>0:
			self.outputAlignmentDepthAndOthersForFilter(db_vervet=db_vervet, outputFname=self.alnStatForFilterFname, \
												ref_ind_seq_id=self.ref_ind_seq_id, \
												foldChange=self.depthFoldChange, minGQ=self.minGQ)
			alnStatForFilterF = self.registerOneInputFile(inputFname=os.path.abspath(self.alnStatForFilterFname))
		else:
			alnStatForFilterF = None
		
		
		if self.vcf1Dir:
			# 2012.5.1 filter only on the 1st vcf folder
			#a relative-path name for vcf1Dir
			vcf1Name = self.findProperVCFDirIdentifier(self.vcf1Dir, defaultName='vcf1')
			inputData = self.registerAllInputFiles(workflow, self.vcf1Dir, input_site_handler=self.input_site_handler, \
												checkEmptyVCFByReading=self.checkEmptyVCFByReading,\
												pegasusFolderName="%s_%s"%(self.pegasusFolderName, vcf1Name), \
												maxContigID=self.maxContigID, \
												minContigID=self.minContigID)
			
			vcf2PlinkJobData = self.addVCF2PlinkJobs(workflow, inputData=inputData, db_vervet=db_vervet, minMAC=None, minMAF=None,\
							maxSNPMissingRate=None, transferOutput=False,\
							maxContigID=self.maxContigID, outputDirPrefix="vcf2plink", outputPedigreeAsTFAM=True,\
							treatEveryOneIndependent=False,\
							returnMode=2, ModifyTPEDRunType=1, chr_id2cumu_chr_start=None)
			mendelJobData = self.addPlinkMendelErrorJobs(inputData=vcf2PlinkJobData, transferOutput=True,\
							maxContigID=self.maxContigID, outputDirPrefix="mendel", locusSamplingRate=0.1, 
							returnMode=2)
			
			locusMendelJobData = mendelJobData.jobDataLs[-1]	#last job from PlinkMendelWorkflow is the locus-mendel merge job.
			keepSNPPosF = File('sitesWithMax%sMendelError.tsv'%(self.maxMendelError))
			outputSitesBelowMaxMendelJob = self.addGenericJob(executable=self.OutputSitesBelowMaxMendelError, inputFile=locusMendelJobData.file, \
					inputArgumentOption="-i", \
					outputFile=keepSNPPosF, outputArgumentOption="-o", \
					parentJobLs=locusMendelJobData.jobLs, extraDependentInputLs=None, extraOutputLs=None, transferOutput=True, \
					extraArguments="-m %s"%(self.maxMendelError), extraArgumentList=None, job_max_memory=2000,  sshDBTunnel=None, \
					key2ObjectForJob=None)
			
			self.addJobsToFilterOneVCFDir(workflow, inputData=inputData, registerReferenceData=registerReferenceData, \
									alnStatForFilterF=alnStatForFilterF, keepSNPPosF=keepSNPPosF, \
									onlyKeepBiAllelicSNP=self.onlyKeepBiAllelicSNP,\
									minMAC=self.minMAC, minMAF=self.minMAF, maxSNPMissingRate=self.maxSNPMissingRate,\
									minDepthPerGenotype=self.minDepthPerGenotype, outputDirPrefix="filter",\
									minNeighborDistance=self.minNeighborDistance, keepSNPPosParentJobLs=[outputSitesBelowMaxMendelJob])
		# Write the DAX to stdout
		outf = open(self.outputFname, 'w')
		workflow.writeXML(outf)
	def run(self):
		"""
		"""
		
		if self.debug:
			import pdb
			pdb.set_trace()
		
		#have to be in front of the db_vervet connection code. Otherwise schema "genome" wont' be default path and its visible will not be visible.
		db_genome = GenomeDB.GenomeDatabase(drivername=self.drivername, db_user=self.db_user,
						db_passwd=self.db_passwd, hostname=self.hostname, dbname=self.dbname, schema="genome")
		db_genome.setup(create_tables=False)
		chr2size = db_genome.getTopNumberOfChomosomes(contigMaxRankBySize=80000, contigMinRankBySize=1, tax_id=60711, \
											sequence_type_id=9)
		
		
		db_vervet = self.db_vervet
		if not self.data_dir:
			self.data_dir = db_vervet.data_dir
		
		if not self.local_data_dir:
			self.local_data_dir = db_vervet.data_dir
		"""
		#without commenting out db_vervet connection code. schema "genome" wont' be default path.
		db_genome = GenomeDB.GenomeDatabase(drivername=self.drivername, username=self.db_user,
						password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema="genome")
		db_genome.setup(create_tables=False)
		chr2size = db_genome.getTopNumberOfChomosomes(contigMaxRankBySize=80000, contigMinRankBySize=1, tax_id=60711, sequence_type_id=9)
		"""
		
		workflow = self.initiateWorkflow()
		
		self.registerJars(workflow)
		self.registerCommonExecutables(workflow)
		self.registerCustomExecutables(workflow)
		
		
		refSequence = VervetDB.IndividualSequence.get(self.ref_ind_seq_id)
		
		refFastaFname = os.path.join(self.data_dir, refSequence.path)
		registerReferenceData = yh_pegasus.registerRefFastaFile(workflow, refFastaFname, registerAffiliateFiles=True, \
						input_site_handler=self.input_site_handler,\
						checkAffiliateFileExistence=True)
		
		depthFoldChange = self.depthFoldChangeLs[0]
		if depthFoldChange>0:
			self.outputAlignmentDepthAndOthersForFilter(db_vervet=db_vervet, outputFname=self.alnStatForFilterFname, \
												ref_ind_seq_id=self.ref_ind_seq_id, \
												foldChange=depthFoldChange, minGQ=self.minGQ)
			alnStatForFilterF = self.registerOneInputFile(inputFname=os.path.abspath(self.alnStatForFilterFname),\
														folderName=self.pegasusFolderName)
		else:
			alnStatForFilterF = None
		if self.keepSNPPosFname:
			keepSNPPosF = self.registerOneInputFile(inputFname=os.path.abspath(self.keepSNPPosFname),\
														folderName=self.pegasusFolderName)
		else:
			keepSNPPosF = None
		# 2012.5.1 filter only on the 1st vcf folder
		inputData = self.registerAllInputFiles(workflow, self.inputDir, input_site_handler=self.input_site_handler, \
									checkEmptyVCFByReading=self.checkEmptyVCFByReading,\
									pegasusFolderName="%s"%(self.pegasusFolderName), maxContigID=self.maxContigID, \
									minContigID=self.minContigID)
		counter =0
		for minMAC in self.minMACLs:
			for minMAF in self.minMAFLs:
				for maxSNPMissingRate in self.maxSNPMissingRateLs:
					for minDepthPerGenotype in self.minDepthPerGenotypeLs:
						folderSignature='minMAC%s_minMAF%s_maxSNPMissingRate%s_minDepthPerGenotype%s_depthFoldChange%s_keepSNPFile%s_onlyBiAllelic%s'%\
							(minMAC, minMAF, maxSNPMissingRate, minDepthPerGenotype, depthFoldChange, getattr(keepSNPPosF, 'name', None),\
							self.onlyKeepBiAllelicSNP)
						filterJobData = self.addJobsToFilterOneVCFDir(workflow, inputData=inputData, registerReferenceData=registerReferenceData, \
												alnStatForFilterF=alnStatForFilterF, keepSNPPosF=keepSNPPosF, \
												onlyKeepBiAllelicSNP=self.onlyKeepBiAllelicSNP,\
												minMAC=minMAC, minMAF=minMAF, maxSNPMissingRate=maxSNPMissingRate,\
												minDepthPerGenotype=minDepthPerGenotype, transferOutput=False, \
												outputDirPrefix="run%s_%s"%(counter, folderSignature))
						returnData = self.addStatCalculationJobs(workflow=workflow, inputData=filterJobData, registerReferenceData=registerReferenceData, \
												chr2size=chr2size, windowSize=self.windowSize, minChrLengthForPlot=self.minChrLengthForPlot, \
												minChrSize=self.minChrSize, LDWindowSize=self.LDWindowSize, transferOutput=True, \
												outputDirPrefix="run%s_%s"%(counter, folderSignature),\
												samplingRate=self.samplingRate, minSiteGap=30000)
						counter += 1
		# Write the DAX to stdout
		outf = open(self.outputFname, 'w')
		workflow.writeXML(outf)
	def run(self):
		"""
		2011-7-11
		"""
		
		if self.debug:
			import pdb
			pdb.set_trace()
		
		db_vervet = VervetDB.VervetDB(drivername=self.drivername, username=self.db_user,
					password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema)
		db_vervet.setup(create_tables=False)
		self.db_vervet = db_vervet
		
		if not self.data_dir:
			self.data_dir = db_vervet.data_dir
		
		if not self.local_data_dir:
			self.local_data_dir = db_vervet.data_dir
		
		refName2size = self.getTopNumberOfContigs(self.topNumberOfContigs, contigMinRankBySize=self.contigMinRankBySize)
		#refName2size = set(['Contig149'])	#temporary when testing Contig149
		#refName2size = set(['1MbBAC'])	#temporary when testing the 1Mb-BAC (formerly vervet_path2)
		refNameLs = refName2size.keys()
		
		alignmentLs = self.getAlignments(self.ref_ind_seq_id, ind_seq_id_ls=self.ind_seq_id_ls, ind_aln_id_ls=self.ind_aln_id_ls,\
										alignment_method_id=2, data_dir=self.local_data_dir)
		
		#site id 447 is the VRC site
		alignmentLs = self.filterAlignments(alignmentLs, max_coverage=self.max_coverage, individual_site_id=self.site_id)
		
		workflowName = os.path.splitext(os.path.basename(self.outputFname))[0]
		workflow = self.initiateWorkflow(workflowName)
		
		refSequence = VervetDB.IndividualSequence.get(self.ref_ind_seq_id)
		refFastaFname = os.path.join(self.data_dir, refSequence.path)
		registerReferenceData = yh_pegasus.registerRefFastaFile(workflow, refFastaFname, registerAffiliateFiles=True, \
							input_site_handler=self.input_site_handler,\
							checkAffiliateFileExistence=True)
		
		self.registerJars(workflow)
		
		self.registerCommonExecutables(workflow)
		self.registerCustomExecutables(workflow)
		
		
		if self.run_type==1:	#multi-sample calling
			dirPrefix = ""
			# Add a mkdir job for the call directory.
			callOutputDir = "%scall"%(dirPrefix)
			callOutputDirJob = self.addMkDirJob(workflow, mkdir=workflow.mkdirWrap, outputDir=callOutputDir, \
											namespace=workflow.namespace, version=workflow.version)
			gatkDir = "%sgatk"%(dirPrefix)
			gatkDirJob = self.addMkDirJob(workflow, mkdir=workflow.mkdirWrap, outputDir=gatkDir, \
										namespace=workflow.namespace, version=workflow.version)
			samtoolsDir = "%ssamtools"%(dirPrefix)
			samtoolsDirJob = self.addMkDirJob(workflow, mkdir=workflow.mkdirWrap, outputDir=samtoolsDir, \
											namespace=workflow.namespace, version=workflow.version)
			unionDir = "%sgatk_samtools_union"%(dirPrefix)
			unionDirJob = self.addMkDirJob(workflow, mkdir=workflow.mkdirWrap, outputDir=unionDir, \
									namespace=workflow.namespace, version=workflow.version)
			intersectionDir = "%sgatk_samtools_intersection"%(dirPrefix)
			intersectionDirJob = self.addMkDirJob(workflow, mkdir=workflow.mkdirWrap, outputDir=intersectionDir, \
												namespace=workflow.namespace, version=workflow.version)
			
			
			self.addGenotypeCallJobs(workflow, alignmentLs, refName2size, samtools=workflow.samtools, \
					genotyperJava=workflow.genotyperJava, GenomeAnalysisTKJar=workflow.GenomeAnalysisTKJar, \
					addOrReplaceReadGroupsJava=workflow.addOrReplaceReadGroupsJava, AddOrReplaceReadGroupsJar=workflow.AddOrReplaceReadGroupsJar, \
					CreateSequenceDictionaryJava=workflow.CreateSequenceDictionaryJava, CreateSequenceDictionaryJar=workflow.CreateSequenceDictionaryJar, \
					MergeSamFilesJar=workflow.MergeSamFilesJar, \
					BuildBamIndexFilesJava=workflow.BuildBamIndexFilesJava, BuildBamIndexJar=workflow.BuildBamIndexJar, \
					mv=workflow.mv, CallVariantBySamtools=workflow.CallVariantBySamtools,\
					bgzip_tabix=workflow.bgzip_tabix, vcf_convert=workflow.vcf_convert, vcf_isec=workflow.vcf_isec, vcf_concat=workflow.vcf_concat, \
					concatGATK=workflow.concatGATK, concatSamtools=workflow.concatSamtools,\
					genotypeCallByCoverage=workflow.genotypeCallByCoverage, registerReferenceData=registerReferenceData, bamListF=None, \
					callOutputDirJob =callOutputDirJob, gatkDirJob=gatkDirJob, samtoolsDirJob=samtoolsDirJob, unionDirJob=unionDirJob, intersectionDirJob=intersectionDirJob,\
					namespace=workflow.namespace, version=workflow.version, site_handler=self.site_handler, input_site_handler=self.input_site_handler,\
					seqCoverageF=None, \
					needFastaIndexJob=self.needFastaIndexJob, needFastaDictJob=self.needFastaDictJob, \
					chunkSize=2000000, site_type=self.site_type, data_dir=self.data_dir)
		
		# Write the DAX to stdout
		outf = open(self.outputFname, 'w')
		workflow.writeXML(outf)
예제 #5
0
	def run(self):
		"""
		"""
		
		if self.debug:
			import pdb
			pdb.set_trace()
		
		db_vervet = self.db_vervet
		if not self.data_dir:
			self.data_dir = db_vervet.data_dir
		
		if not self.local_data_dir:
			self.local_data_dir = db_vervet.data_dir
		
		# Create a abstract dag
		workflowName = os.path.splitext(os.path.basename(self.outputFname))[0]
		workflow = self.initiateWorkflow(workflowName)
		
		self.registerJars(workflow)
		self.registerCommonExecutables(workflow)
		self.registerCustomExecutables(workflow)
		
		refSequence = VervetDB.IndividualSequence.get(self.ref_ind_seq_id)
		
		refFastaFname = os.path.join(self.data_dir, refSequence.path)
		registerReferenceData = yh_pegasus.registerRefFastaFile(workflow, refFastaFname, registerAffiliateFiles=True, \
						input_site_handler=self.input_site_handler,\
						checkAffiliateFileExistence=True)
		refFastaFList = registerReferenceData.refFastaFList
		
		self.outputAlignmentDepthAndOthersForFilter(self.alnStatForFilterFname, ref_ind_seq_id=self.ref_ind_seq_id, \
												foldChange=self.depthFoldChange, minGQ=self.minGQ)
		alnStatForFilterF = self.registerOneInputFile(workflow, self.alnStatForFilterFname)
		
		#name to distinguish between vcf1Dir, and vcf2Dir
		vcf1Name = self.findProperVCFDirIdentifier(self.vcf1Dir, defaultName='vcf1')
		vcf2Name = self.findProperVCFDirIdentifier(self.vcf2Dir, defaultName='vcf2')
		if vcf2Name==vcf1Name or not vcf2Name:
			vcf2Name = "vcf2"
		
		no_of_jobs = 0
		vcf1DepthFilterDir = "%s_DepthFilter"%(vcf1Name)
		vcf1DepthFilterDirJob = self.addMkDirJob(outputDir=vcf1DepthFilterDir)
		#vcf2DepthFilterDir = "%s_DepthFilter"%(vcf2Name)
		#vcf2DepthFilterDirJob = yh_pegasus.addMkDirJob(workflow, mkdir=workflow.mkdirWrap, outputDir=vcf2DepthFilterDir)
		
		trioInconsistencyDir = "trioInconsistency"
		trioInconsistencyDirJob = self.addMkDirJob(outputDir=trioInconsistencyDir)
		
		
		SNPMismatchStatDir = "SNPMismatchStat"
		SNPMismatchStatDirJob = self.addMkDirJob(outputDir=SNPMismatchStatDir)
		
		input_site_handler = self.input_site_handler
		
		
		#whole genome reduction job.
		wholeGenomeSiteStatFile = File('siteStatAndTrioInconsistency.tsv')
		wholeGenomeSiteStatMergeJob = self.addStatMergeJob(workflow, statMergeProgram=workflow.mergeSameHeaderTablesIntoOne, \
							outputF=wholeGenomeSiteStatFile,transferOutput=False)
		
		wholeGenomeSiteStatBGzipFile = File("%s.gz"%wholeGenomeSiteStatFile.name)
		wholeGenomeSiteStatBGZipTabixJob = self.addBGZIP_tabix_Job(workflow, bgzip_tabix=workflow.bgzip_tabix, \
							parentJob=wholeGenomeSiteStatMergeJob, inputF=wholeGenomeSiteStatFile, \
							outputF=wholeGenomeSiteStatBGzipFile, \
							transferOutput=True, tabixArguments="-s 1 -b 2 -e 2")
		no_of_jobs += 5
		
		#read the trioInconsistencyByPosistionFname and figure out how many contigs in it and add an extraction job for each contig
		chrLs = self.getChrListInTrioInconsistencyFile(self.tabixPath, self.trioInconsistencyByPosistionFname)
		chr2tabixRetrieveJob = {}
		trioInconsistencyByPosistionF = self.registerOneInputFile(workflow, self.trioInconsistencyByPosistionFname)
		trioInconsistencyByPosistion_tbi_Fname = '%s.tbi'%(self.trioInconsistencyByPosistionFname)
		trioInconsistencyByPosistion_tbi_F = self.registerOneInputFile(workflow, trioInconsistencyByPosistion_tbi_Fname)
		
		for chr in chrLs:
			outputF = File(os.path.join(trioInconsistencyDir, '%s.trioInconsistency.tsv'%chr))
			tabixRetrieveJob = self.addTabixRetrieveJob(workflow, executable=workflow.tabixRetrieve, tabixPath=self.tabixPath, \
							inputF=trioInconsistencyByPosistionF, outputF=outputF, regionOfInterest=chr, includeHeader=True,\
							parentJobLs=[trioInconsistencyDirJob], job_max_memory=100, extraDependentInputLs=[trioInconsistencyByPosistion_tbi_F], \
							transferOutput=False)
			chr2tabixRetrieveJob[chr] = tabixRetrieveJob
			no_of_jobs += 1
		
		counter = 0
		no_of_vcf = 0
		no_of_good_vcf = 0
		for inputFname in os.listdir(self.vcf1Dir):
			counter += 1
			if counter%500==0:
				sys.stderr.write("%s %s jobs %s good vcf, %s total vcf, %s total files"%('\x08'*180, no_of_jobs, \
														no_of_good_vcf, no_of_vcf, counter))
			
			vcf1AbsPath = os.path.join(os.path.abspath(self.vcf1Dir), inputFname)
			vcf2AbsPath = os.path.join(os.path.abspath(self.vcf2Dir), inputFname)
			if NextGenSeq.isFileNameVCF(inputFname, includeIndelVCF=False) and not NextGenSeq.isVCFFileEmpty(vcf1AbsPath):
				if not NextGenSeq.isVCFFileEmpty(vcf2AbsPath, checkContent=self.checkEmptyVCFByReading):	#make sure the samtools vcf exists
					no_of_vcf += 1
					chr = self.getChrFromFname(inputFname)
					if not chr or chr not in chr2tabixRetrieveJob:
						continue
					no_of_good_vcf += 1
					#find the contig id and  the matching tabix job
					commonPrefix = inputFname.split('.')[0]
					vcf1 = File(os.path.join(vcf1Name, inputFname))	#relative path
					vcf1.absPath = vcf1AbsPath
					self.registerVCFAndItsTabixIndex(workflow, vcf1, input_site_handler)
					vcf2 = File(os.path.join(vcf2Name, inputFname))	#relative path
					vcf2.absPath = vcf2AbsPath
					self.registerVCFAndItsTabixIndex(workflow, vcf2, input_site_handler)
					
					outputSiteStatF = File(os.path.join(vcf1DepthFilterDir, '%s.siteStat.tsv'%(commonPrefix)))
					vcf1FilterByDepthJob = self.addFilterVCFByDepthJob(workflow, FilterVCFByDepthJava=workflow.FilterVCFByDepthJava, \
							GenomeAnalysisTKJar=workflow.GenomeAnalysisTKJar, \
							refFastaFList=refFastaFList, inputVCFF=vcf1, outputVCFF=None, outputSiteStatF=outputSiteStatF,\
							parentJobLs=[vcf1DepthFilterDirJob], \
							alnStatForFilterF=alnStatForFilterF, \
							extraDependentInputLs=[vcf1.tbi_F], onlyKeepBiAllelicSNP=self.onlyKeepBiAllelicSNP)
					
					snpMisMatchStatFile = File(os.path.join(SNPMismatchStatDir, '%s_snpMismatchStat.tsv'%(os.path.splitext(commonPrefix)[0])))
					calculateSNPMismatchRateOfTwoVCFJob = self.addCalculateTwoVCFSNPMismatchRateJob(workflow, \
							executable=workflow.CalculateSNPMismatchRateOfTwoVCF, \
							vcf1=vcf1, vcf2=vcf2, snpMisMatchStatFile=snpMisMatchStatFile, \
							maxSNPMismatchRate=1.0, parentJobLs=[SNPMismatchStatDirJob], \
							job_max_memory=1000, extraDependentInputLs=[], \
							transferOutput=False)
					
					#add a ReduceMatrixByMergeColumnsWithSameKey job
					chrMergingStatF = File('%s_variantSiteStatAndTrioInconsistencyRate.tsv'%(chr))
					chrMergingStatJob = self.addStatMergeJob(workflow, \
									statMergeProgram=workflow.ReduceMatrixByMergeColumnsWithSameKey, \
									outputF=chrMergingStatF, extraArguments='-k 0,1', transferOutput=False)
					tabixRetrieveJob = chr2tabixRetrieveJob[chr]
					self.addInputToStatMergeJob(workflow, statMergeJob=chrMergingStatJob, \
								inputF=tabixRetrieveJob.output, \
								parentJobLs=[tabixRetrieveJob])
					
					self.addInputToStatMergeJob(workflow, statMergeJob=chrMergingStatJob, \
								inputF=outputSiteStatF, \
								parentJobLs=[vcf1FilterByDepthJob])
					self.addInputToStatMergeJob(workflow, statMergeJob=chrMergingStatJob, \
								inputF=snpMisMatchStatFile, \
								parentJobLs=[calculateSNPMismatchRateOfTwoVCFJob])
					
					#add to the whole genome reduction job
					self.addInputToStatMergeJob(workflow, statMergeJob=wholeGenomeSiteStatMergeJob, \
								inputF=chrMergingStatJob.output, \
								parentJobLs=[chrMergingStatJob])
					no_of_jobs += 3
		
		sys.stderr.write("%s %s jobs %s good vcf, %s total vcf, %s total files.\n"%('\x08'*180, no_of_jobs, \
														no_of_good_vcf, no_of_vcf, counter))
		
		# Write the DAX to stdout
		outf = open(self.outputFname, 'w')
		workflow.writeXML(outf)