Example #1
0
def main():
	assert (len(argv) == 2), "need the sampleID and nothing else"
	sampleId = argv[1]
	explainFailure = True
	path = "kmer_histograms"

	print sampleId

	# perform haploid fit to the sample (ignoring thge diploid component)

	hFitter = HaploidFitter(path+"/"+sampleId+".mixed.kmer_dist")
	hParamNames = hFitter.paramNames

	hFitParams = hFitter.fit()
	if (hFitParams == None):
		print >>stderr, "haploid: failure or non-convergence"
		print "(haploid: failure or non-convergence)"
		if (explainFailure):
			print "... return code ..."
			print hFitter.retCode
			print "... stdout ..."
			print hFitter.stdout
			print "... stderr ..."
			print hFitter.stderr
	else:
		print params_to_text(hParamNames,hFitParams,prefix="cvrg.haploid:")

	# ask for default values for the hap-hap enrichment model

	hhFitter = EnrichedHapHapFitter(path+"/"+sampleId+".mixed.kmer_dist")
	hhParamNames = hhFitter.paramNames

	hhDefaultParams = hhFitter.default_params()
	if (hhDefaultParams == None):
		print >>stderr, "hap-hap: failed to get default params"
		print "(hap-hap: failed to get default params)"
		if (explainFailure):
			print "... return code ..."
			print hhFitter.retCode
			print "... stdout ..."
			print hhFitter.stdout
			print "... stderr ..."
			print hhFitter.stderr
	else:
		print params_to_text(hhParamNames,hhDefaultParams,prefix="dflt.haphap:")

	assert (hFitParams != None) and (hhDefaultParams != None), \
	       "(no point in trying to fit the hap-hap model)"

	# create an initial vector for the enrichment model, borrowing some
	# elements from the haploid model fit

	hhInitParams = dict(hhDefaultParams)
	hhInitParams["zp.copy.y"] = hFitParams["zp.copy"]
	hhInitParams["p.e"      ] = hFitParams["p.e"]
	hhInitParams["shape.e"  ] = hFitParams["shape.e"]
	hhInitParams["scale.e"  ] = hFitParams["scale.e"]
	hhInitParams["u.y"      ] = hFitParams["u.v"]
	hhInitParams["sd.y"     ] = hFitParams["sd.v"]
	hhInitParams["shape.y"  ] = hFitParams["shape.v"]

	pAuto = 1 - float(hhInitParams["p.y"])
	hhInitParams["u.auto"   ] =         pAuto * float(hhInitParams["u.y"])
	hhInitParams["sd.auto"  ] = sdHom = pAuto * float(hhInitParams["sd.y"])

	# perform hap-hap fit to the mixed components

	hhFitParams = hhFitter.fit(hhInitParams)
	if (hhFitParams == None):
		print >>stderr, "hap-hap: failure or non-convergence"
		print "(hap-hap: failure or non-convergence)"
		print params_to_text(hhParamNames,hhInitParams,prefix="smart.haphap:")
		if (explainFailure):
			print "... return code ..."
			print hhFitter.retCode
			print "... stdout ..."
			print hhFitter.stdout
			print "... stderr ..."
			print hhFitter.stderr
	else:
		print params_to_text(hhParamNames,hhInitParams,hhFitParams,
		                     prefix="smart.haphap:",prefix2="cvrg.haphap:")

	assert (hhFitParams != None), \
	       "(no point in trying to fit the hap-dip model)"

	# ask for default values for the hap-dip enrichment model

	hdFitter = EnrichedHapDipFitter(path+"/"+sampleId+".mixed.kmer_dist")
	hdParamNames = hdFitter.paramNames

	hdDefaultParams = hdFitter.default_params()
	if (hdDefaultParams == None):
		print >>stderr, "hap-dip: failed to get default params"
		print "(hap-dip: failed to get default params)"
		if (explainFailure):
			print "... return code ..."
			print hdFitter.retCode
			print "... stdout ..."
			print hdFitter.stdout
			print "... stderr ..."
			print hdFitter.stderr
	else:
		print params_to_text(hdParamNames,hdDefaultParams,prefix="dflt.hapdip:")

	assert (hdDefaultParams != None), \
	       "(no point in trying to fit the hap-dip model)"

	# read the sample's "cheat" parameters for comparison (usually produced by
	# explore3_hap_dip)

	fitFilename = path+"/"+sampleId+".mixed.fit"

	f = file(fitFilename,"rt")
	hdCheatParams = params_from_text([line for line in f])
	f.close()

	for name in hdDefaultParams:
		assert (name in hdCheatParams), \
		       "parameter \"%s\" missing from %s" % (name,fitFilename)

	for name in hdCheatParams:
		assert (name in hdDefaultParams), \
		       "extra parameter \"%s\" in %s" % (name,fitFilename)

	# create an initial vector for the hap-dip enrichment model, borrowing some
	# elements from the hap-hap model fit

	hdInitParams = dict(hdDefaultParams)
	hdInitParams["zp.copy.y"] = hhFitParams["zp.copy.y"]
	hdInitParams["p.e"      ] = hhFitParams["p.e"]
	hdInitParams["shape.e"  ] = hhFitParams["shape.e"]
	hdInitParams["scale.e"  ] = hhFitParams["scale.e"]
	hdInitParams["p.y"      ] = hhFitParams["p.y"]
	hdInitParams["u.y"      ] = hhFitParams["u.y"]
	hdInitParams["sd.y"     ] = hhFitParams["sd.y"]
	hdInitParams["shape.y"  ] = hhFitParams["shape.y"]

	pAuto = 1 - float(hdInitParams["p.y"])
	pHom  =     float(hdInitParams["p.hom"])
	hdInitParams["u.hom"    ] =         pAuto * pHom * float(hdInitParams["u.y"])
	hdInitParams["sd.hom"   ] = sdHom = pAuto * pHom * float(hdInitParams["sd.y"])
	hdInitParams["var.het"  ] = sdHom * sdHom

	# perform hap-dip fit to the mixed components

	hdFitParams = hdFitter.fit(hdInitParams)
	if (hdFitParams == None):
		print >>stderr, "hap-dip: failure or non-convergence"
		print "(hap-dip: failure or non-convergence)"
		print params_to_text(hdParamNames,hdInitParams,hdCheatParams,
		                     prefix="smart.hapdip:",prefix2="cheat.hapdip:")
		if (explainFailure):
			print "... return code ..."
			print hdFitter.retCode
			print "... stdout ..."
			print hdFitter.stdout
			print "... stderr ..."
			print hdFitter.stderr
	else:
		print params_to_text(hdParamNames,hdInitParams,hdFitParams,
		                     prefix="smart.hapdip:",prefix2="cvrg.hapdip:")
		print params_to_text(hdParamNames,hdCheatParams,prefix="cheat.hapdip:")

	# if convergence failed, try moving the initial parameters toward the
	# cheat parameters in small steps until we get convergence
	# $$$ a binary search would be "better"

	numSteps = 100
	step = 0

	while (hdFitParams == None):
		step += 1
		if (step == numSteps): break
		print >>stderr, "step %d" % step

		hdStepParams = {}
		for name in hdInitParams:
			if (name in ["u.hom","sd.hom","var.het"]): continue
			param = float(hdInitParams[name])
			param += (step * (float(hdCheatParams[name]) - param)) / numSteps
			hdStepParams[name] = param

		pAuto = 1 - float(hdStepParams["p.y"])
		pHom  =     float(hdStepParams["p.hom"])
		hdStepParams["u.hom"    ] =         pAuto * pHom * float(hdStepParams["u.y"])
		hdStepParams["sd.hom"   ] = sdHom = pAuto * pHom * float(hdStepParams["sd.y"])
		hdStepParams["var.het"  ] = sdHom * sdHom

		hdFitParams = hdFitter.fit(hdStepParams)
		if (hdFitParams == None):
			print params_to_text(hdParamNames,hdStepParams,
			                     prefix="step[%d].hapdip:" % step)
			#if (explainFailure):
			#	print "... return code ..."
			#	print hdFitter.retCode
			#	print "... stdout ..."
			#	print hdFitter.stdout
			#	print "... stderr ..."
			#	print hdFitter.stderr
		else:
			print params_to_text(hdParamNames,hdStepParams,hdFitParams,
			                     prefix="step[%d].hapdip:" % step,prefix2="cvrg.hapdip:")
Example #2
0
def main():
	assert (len(argv) == 2), "need the sampleID and nothing else"
	sampleId = argv[1]
	explainFailure = True
	path = "kmer_histograms"

	print sampleId

	# perform haploid fit to the sample (ignoring thge diploid component)

	hFitter = HaploidFitter(path+"/"+sampleId+".mixed.kmer_dist")
	hParamNames = hFitter.paramNames

	hFitParams = hFitter.fit()
	if (hFitParams == None):
		print >>stderr, "haploid: failure or non-convergence"
		print "(haploid: failure or non-convergence)"
		if (explainFailure):
			print "... return code ..."
			print hFitter.retCode
			print "... stdout ..."
			print hFitter.stdout
			print "... stderr ..."
			print hFitter.stderr
	else:
		print params_to_text(hParamNames,hFitParams,prefix="cvrg.haploid:")

	# ask for default values for the enrichment model

	hhFitter = EnrichedHapHapFitter(path+"/"+sampleId+".mixed.kmer_dist")
	hhParamNames = hhFitter.paramNames

	hhDefaultParams = hhFitter.default_params()
	if (hhDefaultParams == None):
		print >>stderr, "hap-hap: failed to get default params"
		print "(hap-hap: failed to get default params)"
		if (explainFailure):
			print "... return code ..."
			print hhFitter.retCode
			print "... stdout ..."
			print hhFitter.stdout
			print "... stderr ..."
			print hhFitter.stderr
	else:
		print params_to_text(hhParamNames,hhDefaultParams,prefix="dflt.haphap:")

	assert (hFitParams != None) and (hhDefaultParams != None), \
	       "(no point in trying to fit the hap-hap model)"

	# create an initial vector for the enrichment model, borrowing some
	# elements from the haploid model fit

	hhInitParams = dict(hhDefaultParams)
	hhInitParams["zp.copy.y"] = hFitParams["zp.copy"]
	hhInitParams["p.e"      ] = hFitParams["p.e"]
	hhInitParams["shape.e"  ] = hFitParams["shape.e"]
	hhInitParams["scale.e"  ] = hFitParams["scale.e"]
	hhInitParams["u.y"      ] = hFitParams["u.v"]
	hhInitParams["sd.y"     ] = hFitParams["sd.v"]
	hhInitParams["shape.y"  ] = hFitParams["shape.v"]

	pAuto = 1 - float(hhInitParams["p.y"])
	hhInitParams["u.auto"   ] =         pAuto * float(hhInitParams["u.y"])
	hhInitParams["sd.auto"  ] = sdHom = pAuto * float(hhInitParams["sd.y"])

	# perform hap-hap fit to the mixed components

	hhFitParams = hhFitter.fit(hhInitParams)
	if (hhFitParams == None):
		print >>stderr, "hap-hap: failure or non-convergence"
		print "(hap-hap: failure or non-convergence)"
		print params_to_text(hhParamNames,hhInitParams,prefix="smart.haphap:")
		if (explainFailure):
			print "... return code ..."
			print hhFitter.retCode
			print "... stdout ..."
			print hhFitter.stdout
			print "... stderr ..."
			print hhFitter.stderr
	else:
		print params_to_text(hhParamNames,hhInitParams,hhFitParams,
		                     prefix="smart.haphap:",prefix2="cvrg.haphap:")