コード例 #1
0
def main():
	assert (len(argv) == 3), "need the sampleID and number of trials, and nothing else"
	sampleId = argv[1]
	numTrials = int(argv[2])

	random_seed("acorn")
	explainFailure = False
	path = "kmer_histograms"

	# ask the curve fitter what the default paramters are

	fitter = EnrichedHapDipFitter(path+"/"+sampleId+".mixed.kmer_dist")
	paramNames = fitter.paramNames

	defaultParams = fitter.default_params()
	if (defaultParams == None):
		print "(failed to get default params)"
		if (explainFailure):
			print "... return code ..."
			print hdFitter.retCode
			print "... stdout ..."
			print hdFitter.stdout
			print "... stderr ..."
			print hdFitter.stderr
		assert (False)

	defaultParams = params_to_float(defaultParams)

	# read the "good" parameters (usually produced by explore3_hap_dip)

	fitFilename = path+"/"+sampleId+".mixed.fit"

	f = file(fitFilename,"rt")
	goodParams = params_from_text([line for line in f])
	f.close()

	for name in defaultParams:
		assert (name in goodParams), \
		       "parameter \"%s\" missing from %s" % (name,fitFilename)

	for name in goodParams:
		assert (name in defaultParams), \
		       "extra parameter \"%s\" in %s" % (name,fitFilename)

	goodParams = params_to_float(goodParams)

	print params_to_text(paramNames,goodParams,defaultParams,
	                     prefix="good:",prefix2="dflt:")

	# run the convergence trials

	convergenceCount = 0
	for trialNumber in xrange(numTrials):
		print "=== trial %d of %d ===" \
		    % (1+trialNumber,numTrials)

		# choose initial params as a random point in hypercube between "good"
		# and "bad"

		initParams = dict(goodParams)
		norm2Init = 0.0
		for (paramIx,name) in enumerate(paramNames):
			step = unit_random()
			initParams[name] += step*(defaultParams[name]-goodParams[name])
			norm2Init += step*step
		normInit = sqrt(norm2Init) / len(paramNames)

		fitter.set_params(initParams)
		fitParams = fitter.fit()
		if (fitParams == None):
			print params_to_text(paramNames,initParams,prefix="init-[%d]:" % trialNumber)
			print "normInit: %.8f" % normInit
			print "(failure or non-convergence)"
			if (explainFailure):
				print "... return code ..."
				print fitter.retCode
				print "... stdout ..."
				print fitter.stdout
				print "... stderr ..."
				print fitter.stderr
			continue

		print params_to_text(paramNames,initParams,fitParams,
		                     prefix="init+[%d]:" % trialNumber,
		                     prefix2="cvrg[%d]:" % trialNumber)
		fitParams = params_to_float(fitParams)
		dGood = vector_distance(fitParams,goodParams)
		print "normInit: %.8f" % normInit
		print "dGood: %.8f" % dGood
		convergenceCount += 1

	print "%d of %d trials converged" % (convergenceCount,numTrials)
コード例 #2
0
def main():
	assert (len(argv) == 2), "need the sampleID and nothing else"
	sampleId = argv[1]
	saveConvergence = True
	explainFailure = True
	path = "kmer_histograms"

	# clear the convergence file, in case we have a failure (we don't want
	# previous results to leak through)

	if (saveConvergence):
		f = file(path+"/"+sampleId+".mixed.fit","wt")
		f.close()

	# perform haploid fit to the haploid component

	hFitter = HaploidFitter(path+"/"+sampleId+".haploid_from_mixed.kmer_dist")
	hParamNames = hFitter.paramNames

	hFitParams = hFitter.fit()
	if (hFitParams == None):
		print "(haploid: failure or non-convergence)"
		if (explainFailure):
			print "... return code ..."
			print hFitter.retCode
			print "... stdout ..."
			print hFitter.stdout
			print "... stderr ..."
			print hFitter.stderr
	else:
		print params_to_text(hParamNames,hFitParams,prefix="cvrg.haploid:")

	# perform diploid fit to the diploid component

	dFitter = DiploidFitter(path+"/"+sampleId+".diploid_from_mixed.kmer_dist")
	dParamNames = dFitter.paramNames

	dFitParams = dFitter.fit()
	if (dFitParams == None):
		print "(diploid: failure or non-convergence)"
		if (explainFailure):
			print "... return code ..."
			print dFitter.retCode
			print "... stdout ..."
			print dFitter.stdout
			print "... stderr ..."
			print dFitter.stderr
	else:
		print params_to_text(dParamNames,dFitParams,prefix="cvrg.diploid:")

	# create an initial vector for the enrichment model, combining elements
	# from the component fits with the usual defaults

	hdFitter = EnrichedHapDipFitter(path+"/"+sampleId+".mixed.kmer_dist")
	hdParamNames = hdFitter.paramNames

	hdDefaultParams = hdFitter.default_params()
	if (hdDefaultParams == None):
		print "(hap-dip: failed to get default params)"
		if (explainFailure):
			print "... return code ..."
			print hdFitter.retCode
			print "... stdout ..."
			print hdFitter.stdout
			print "... stderr ..."
			print hdFitter.stderr
	else:
		print params_to_text(hdParamNames,hdDefaultParams,prefix="dflt.hapdip:")

	assert (hFitParams != None) and (dFitParams != None) and (hdDefaultParams != None), \
	       "(no point in trying to fit the hap-dip model)"

	hdInitParams = {}
	hdInitParams["zp.copy.y"  ] = hFitParams["zp.copy"]
	hdInitParams["zp.copy.hom"] = dFitParams["zp.copy"]
	hdInitParams["zp.copy.het"] = dFitParams["zp.copy.het"]
	hdInitParams["p.e"        ] = hFitParams["p.e"]
	hdInitParams["shape.e"    ] = hFitParams["shape.e"]
	hdInitParams["scale.e"    ] = hFitParams["scale.e"]
	hdInitParams["p.y"        ] = hdDefaultParams["p.y"]
	hdInitParams["u.y"        ] = hFitParams["u.v"]
	hdInitParams["sd.y"       ] = hFitParams["sd.v"]
	hdInitParams["shape.y"    ] = hFitParams["shape.v"]
	hdInitParams["p.hom"      ] = 1 - float(dFitParams["p.d"])
	hdInitParams["u.hom"      ] = dFitParams["u.v"]
	hdInitParams["sd.hom"     ] = dFitParams["sd.v"]
	hdInitParams["var.het"    ] = dFitParams["var.w"]

	# perform hap-dip fit to the mixed components

	hdFitParams = hdFitter.fit(hdInitParams)
	if (hdFitParams == None):
		print "(hap-dip: failure or non-convergence)"
		print params_to_text(hdParamNames,hdInitParams,prefix="init.hapdip:")
		if (explainFailure):
			print "... return code ..."
			print hdFitter.retCode
			print "... stdout ..."
			print hdFitter.stdout
			print "... stderr ..."
			print hdFitter.stderr
	else:
		print params_to_text(hdParamNames,hdInitParams,hdFitParams,
		                     prefix="init.hapdip:",prefix2="cvrg.hapdip:")

	# write the convergence file

	if (saveConvergence):
		f = file(path+"/"+sampleId+".mixed.fit","wt")
		print >>f, params_to_text(hdParamNames,hdFitParams)
		f.close()
コード例 #3
0
def main():
	assert (len(argv) == 2), "need the sampleID and nothing else"
	sampleId = argv[1]
	explainFailure = True
	path = "kmer_histograms"

	print sampleId

	# perform haploid fit to the sample (ignoring thge diploid component)

	hFitter = HaploidFitter(path+"/"+sampleId+".mixed.kmer_dist")
	hParamNames = hFitter.paramNames

	hFitParams = hFitter.fit()
	if (hFitParams == None):
		print >>stderr, "haploid: failure or non-convergence"
		print "(haploid: failure or non-convergence)"
		if (explainFailure):
			print "... return code ..."
			print hFitter.retCode
			print "... stdout ..."
			print hFitter.stdout
			print "... stderr ..."
			print hFitter.stderr
	else:
		print params_to_text(hParamNames,hFitParams,prefix="cvrg.haploid:")

	# ask for default values for the hap-hap enrichment model

	hhFitter = EnrichedHapHapFitter(path+"/"+sampleId+".mixed.kmer_dist")
	hhParamNames = hhFitter.paramNames

	hhDefaultParams = hhFitter.default_params()
	if (hhDefaultParams == None):
		print >>stderr, "hap-hap: failed to get default params"
		print "(hap-hap: failed to get default params)"
		if (explainFailure):
			print "... return code ..."
			print hhFitter.retCode
			print "... stdout ..."
			print hhFitter.stdout
			print "... stderr ..."
			print hhFitter.stderr
	else:
		print params_to_text(hhParamNames,hhDefaultParams,prefix="dflt.haphap:")

	assert (hFitParams != None) and (hhDefaultParams != None), \
	       "(no point in trying to fit the hap-hap model)"

	# create an initial vector for the enrichment model, borrowing some
	# elements from the haploid model fit

	hhInitParams = dict(hhDefaultParams)
	hhInitParams["zp.copy.y"] = hFitParams["zp.copy"]
	hhInitParams["p.e"      ] = hFitParams["p.e"]
	hhInitParams["shape.e"  ] = hFitParams["shape.e"]
	hhInitParams["scale.e"  ] = hFitParams["scale.e"]
	hhInitParams["u.y"      ] = hFitParams["u.v"]
	hhInitParams["sd.y"     ] = hFitParams["sd.v"]
	hhInitParams["shape.y"  ] = hFitParams["shape.v"]

	pAuto = 1 - float(hhInitParams["p.y"])
	hhInitParams["u.auto"   ] =         pAuto * float(hhInitParams["u.y"])
	hhInitParams["sd.auto"  ] = sdHom = pAuto * float(hhInitParams["sd.y"])

	# perform hap-hap fit to the mixed components

	hhFitParams = hhFitter.fit(hhInitParams)
	if (hhFitParams == None):
		print >>stderr, "hap-hap: failure or non-convergence"
		print "(hap-hap: failure or non-convergence)"
		print params_to_text(hhParamNames,hhInitParams,prefix="smart.haphap:")
		if (explainFailure):
			print "... return code ..."
			print hhFitter.retCode
			print "... stdout ..."
			print hhFitter.stdout
			print "... stderr ..."
			print hhFitter.stderr
	else:
		print params_to_text(hhParamNames,hhInitParams,hhFitParams,
		                     prefix="smart.haphap:",prefix2="cvrg.haphap:")

	assert (hhFitParams != None), \
	       "(no point in trying to fit the hap-dip model)"

	# ask for default values for the hap-dip enrichment model

	hdFitter = EnrichedHapDipFitter(path+"/"+sampleId+".mixed.kmer_dist")
	hdParamNames = hdFitter.paramNames

	hdDefaultParams = hdFitter.default_params()
	if (hdDefaultParams == None):
		print >>stderr, "hap-dip: failed to get default params"
		print "(hap-dip: failed to get default params)"
		if (explainFailure):
			print "... return code ..."
			print hdFitter.retCode
			print "... stdout ..."
			print hdFitter.stdout
			print "... stderr ..."
			print hdFitter.stderr
	else:
		print params_to_text(hdParamNames,hdDefaultParams,prefix="dflt.hapdip:")

	assert (hdDefaultParams != None), \
	       "(no point in trying to fit the hap-dip model)"

	# read the sample's "cheat" parameters for comparison (usually produced by
	# explore3_hap_dip)

	fitFilename = path+"/"+sampleId+".mixed.fit"

	f = file(fitFilename,"rt")
	hdCheatParams = params_from_text([line for line in f])
	f.close()

	for name in hdDefaultParams:
		assert (name in hdCheatParams), \
		       "parameter \"%s\" missing from %s" % (name,fitFilename)

	for name in hdCheatParams:
		assert (name in hdDefaultParams), \
		       "extra parameter \"%s\" in %s" % (name,fitFilename)

	# create an initial vector for the hap-dip enrichment model, borrowing some
	# elements from the hap-hap model fit

	hdInitParams = dict(hdDefaultParams)
	hdInitParams["zp.copy.y"] = hhFitParams["zp.copy.y"]
	hdInitParams["p.e"      ] = hhFitParams["p.e"]
	hdInitParams["shape.e"  ] = hhFitParams["shape.e"]
	hdInitParams["scale.e"  ] = hhFitParams["scale.e"]
	hdInitParams["p.y"      ] = hhFitParams["p.y"]
	hdInitParams["u.y"      ] = hhFitParams["u.y"]
	hdInitParams["sd.y"     ] = hhFitParams["sd.y"]
	hdInitParams["shape.y"  ] = hhFitParams["shape.y"]

	pAuto = 1 - float(hdInitParams["p.y"])
	pHom  =     float(hdInitParams["p.hom"])
	hdInitParams["u.hom"    ] =         pAuto * pHom * float(hdInitParams["u.y"])
	hdInitParams["sd.hom"   ] = sdHom = pAuto * pHom * float(hdInitParams["sd.y"])
	hdInitParams["var.het"  ] = sdHom * sdHom

	# perform hap-dip fit to the mixed components

	hdFitParams = hdFitter.fit(hdInitParams)
	if (hdFitParams == None):
		print >>stderr, "hap-dip: failure or non-convergence"
		print "(hap-dip: failure or non-convergence)"
		print params_to_text(hdParamNames,hdInitParams,hdCheatParams,
		                     prefix="smart.hapdip:",prefix2="cheat.hapdip:")
		if (explainFailure):
			print "... return code ..."
			print hdFitter.retCode
			print "... stdout ..."
			print hdFitter.stdout
			print "... stderr ..."
			print hdFitter.stderr
	else:
		print params_to_text(hdParamNames,hdInitParams,hdFitParams,
		                     prefix="smart.hapdip:",prefix2="cvrg.hapdip:")
		print params_to_text(hdParamNames,hdCheatParams,prefix="cheat.hapdip:")

	# if convergence failed, try moving the initial parameters toward the
	# cheat parameters in small steps until we get convergence
	# $$$ a binary search would be "better"

	numSteps = 100
	step = 0

	while (hdFitParams == None):
		step += 1
		if (step == numSteps): break
		print >>stderr, "step %d" % step

		hdStepParams = {}
		for name in hdInitParams:
			if (name in ["u.hom","sd.hom","var.het"]): continue
			param = float(hdInitParams[name])
			param += (step * (float(hdCheatParams[name]) - param)) / numSteps
			hdStepParams[name] = param

		pAuto = 1 - float(hdStepParams["p.y"])
		pHom  =     float(hdStepParams["p.hom"])
		hdStepParams["u.hom"    ] =         pAuto * pHom * float(hdStepParams["u.y"])
		hdStepParams["sd.hom"   ] = sdHom = pAuto * pHom * float(hdStepParams["sd.y"])
		hdStepParams["var.het"  ] = sdHom * sdHom

		hdFitParams = hdFitter.fit(hdStepParams)
		if (hdFitParams == None):
			print params_to_text(hdParamNames,hdStepParams,
			                     prefix="step[%d].hapdip:" % step)
			#if (explainFailure):
			#	print "... return code ..."
			#	print hdFitter.retCode
			#	print "... stdout ..."
			#	print hdFitter.stdout
			#	print "... stderr ..."
			#	print hdFitter.stderr
		else:
			print params_to_text(hdParamNames,hdStepParams,hdFitParams,
			                     prefix="step[%d].hapdip:" % step,prefix2="cvrg.hapdip:")