def get_response_content(fs): numpy.set_printoptions( linewidth=1000000, threshold=1000000, ) out = StringIO() # args = construct_args() # # # Precompute some ndarrays # according to properties of DNA and the genetic code. if args.mtdna: code = npcodon.g_code_mito stop = npcodon.g_stop_mito else: code = npcodon.g_code stop = npcodon.g_stop # all_codons = npcodon.enum_codons(stop) codons = all_codons[:-len(stop)] gtr = npcodon.get_gtr(codons) syn, nonsyn = npcodon.get_syn_nonsyn(code, codons) compo = npcodon.get_compo(codons) asym_compo = npcodon.get_asym_compo(codons) ham = npcodon.get_hamming(codons) # subs_counts = yangdata.get_subs_counts_from_data_files(args) codon_counts = ( numpy.sum(subs_counts, axis=0) + numpy.sum(subs_counts, axis=1)) codon_counts = codon_counts[:len(codons)] subs_counts = subs_counts[:len(codons), :len(codons)] v = codon_counts / float(numpy.sum(codon_counts)) log_counts = numpy.log(codon_counts) # log_mu = -3.61291826 log_gtr_exch = numpy.array([ 0.76101439, 1.61870564, 0.2481876, 0.02708148, 1.39976982, 0]) log_omega = -2.26059034 d = 5.82284506 log_kb = -1.58612396 log_nt_weights = numpy.array([ -1.01321584, -0.0838657, 0.32300651, 0]) D = get_sparse_D( gtr, compo, log_counts, d, log_kb, log_nt_weights) print >> out, D print >> out, numpy.unique(D) # return out.getvalue()
def get_response_content(fs): numpy.set_printoptions( linewidth=1000000, threshold=1000000, ) out = StringIO() # args = construct_args() # # # Precompute some ndarrays # according to properties of DNA and the genetic code. if args.mtdna: code = npcodon.g_code_mito stop = npcodon.g_stop_mito else: code = npcodon.g_code stop = npcodon.g_stop # all_codons = npcodon.enum_codons(stop) codons = all_codons[:-len(stop)] gtr = npcodon.get_gtr(codons) syn, nonsyn = npcodon.get_syn_nonsyn(code, codons) compo = npcodon.get_compo(codons) asym_compo = npcodon.get_asym_compo(codons) ham = npcodon.get_hamming(codons) # subs_counts = yangdata.get_subs_counts_from_data_files(args) codon_counts = (numpy.sum(subs_counts, axis=0) + numpy.sum(subs_counts, axis=1)) codon_counts = codon_counts[:len(codons)] subs_counts = subs_counts[:len(codons), :len(codons)] v = codon_counts / float(numpy.sum(codon_counts)) log_counts = numpy.log(codon_counts) # log_mu = -4.02576875, log_gtr_exch = numpy.array( [0.50335873, 1.31231415, 0.3491126, -0.17953527, 1.55231821, 0]) log_omega = -2.2685352 # d = 2.86523675 log_kb = -0.63087496 log_nt_weights = numpy.array([-0.12604391, 0.46524165, 0.96822465, 0]) log_repop = -0.01399715 # D = get_D(gtr, compo, log_counts, d, log_kb, log_nt_weights, log_repop) print >> out, D print >> out, numpy.unique(D) mask = numpy.sum(gtr, axis=2) D = D * mask print >> out, 'unique in mask:', numpy.unique(mask) print >> out, D print >> out, numpy.unique(D) # return out.getvalue()
def submain_kacser_dominance_gtr(args): # # Precompute some ndarrays # according to properties of DNA and the genetic code. if args.mtdna: code = npcodon.g_code_mito stop = npcodon.g_stop_mito else: code = npcodon.g_code stop = npcodon.g_stop # all_codons = npcodon.enum_codons(stop) codons = all_codons[:-len(stop)] gtr = npcodon.get_gtr(codons) syn, nonsyn = npcodon.get_syn_nonsyn(code, codons) compo = npcodon.get_compo(codons) asym_compo = npcodon.get_asym_compo(codons) ham = npcodon.get_hamming(codons) # subs_counts = yangdata.get_subs_counts_from_data_files(args) codon_counts = ( numpy.sum(subs_counts, axis=0) + numpy.sum(subs_counts, axis=1)) for a, b in zip(codons, codon_counts): print a, ':', b print 'raw codon total:', numpy.sum(codon_counts) print 'raw codon counts:', codon_counts codon_counts = codon_counts[:len(codons)] print 'non-stop codon total:', numpy.sum(codon_counts) subs_counts = subs_counts[:len(codons), :len(codons)] v = codon_counts / float(numpy.sum(codon_counts)) log_counts = numpy.log(codon_counts) # # get the minimum expected number of substitutions between codons mu_empirical = npcodon.get_lb_expected_subs(ham, subs_counts) print 'lower bound on expected mutations per codon site:', mu_empirical print print 'entropy lower bound on negative log likelihood:', print npcodon.get_lb_neg_ll(subs_counts) print # # initialize parameter value guesses d = 0.5 log_kb = 0 theta = numpy.array([ d, log_kb, 0, 0, 0, ], dtype=float) boxed_guess = [None] fmin_args = ( mu_empirical, subs_counts, log_counts, v, gtr, syn, nonsyn, compo, asym_compo, boxed_guess, ) f = eval_f_kacser_gtr results = scipy.optimize.minimize( f, theta, args=fmin_args, method='Nelder-Mead', ) print 'results:', results xopt = results.x print 'optimal solution vector:', xopt print 'exp optimal solution vector:', numpy.exp(xopt) print
def get_response_content(fs): numpy.set_printoptions( linewidth=1000000, threshold=1000000, ) out = StringIO() # args = construct_args() # # # Precompute some ndarrays # according to properties of DNA and the genetic code. if args.mtdna: code = npcodon.g_code_mito stop = npcodon.g_stop_mito else: code = npcodon.g_code stop = npcodon.g_stop # all_codons = npcodon.enum_codons(stop) codons = all_codons[:-len(stop)] gtr = npcodon.get_gtr(codons) syn, nonsyn = npcodon.get_syn_nonsyn(code, codons) compo = npcodon.get_compo(codons) asym_compo = npcodon.get_asym_compo(codons) ham = npcodon.get_hamming(codons) # subs_counts = yangdata.get_subs_counts_from_data_files(args) codon_counts = ( numpy.sum(subs_counts, axis=0) + numpy.sum(subs_counts, axis=1)) codon_counts = codon_counts[:len(codons)] subs_counts = subs_counts[:len(codons), :len(codons)] v = codon_counts / float(numpy.sum(codon_counts)) log_counts = numpy.log(codon_counts) # log_mu = -4.02576875, log_gtr_exch = numpy.array([ 0.50335873, 1.31231415, 0.3491126, -0.17953527, 1.55231821, 0]) log_omega = -2.2685352 # d = 2.86523675 log_kb = -0.63087496 log_nt_weights = numpy.array([ -0.12604391, 0.46524165, 0.96822465, 0]) log_repop = -0.01399715 # D = get_D( gtr, compo, log_counts, d, log_kb, log_nt_weights, log_repop) print >> out, D print >> out, numpy.unique(D) mask = numpy.sum(gtr, axis=2) D = D * mask print >> out, 'unique in mask:', numpy.unique(mask) print >> out, D print >> out, numpy.unique(D) # return out.getvalue()
def submain_constrained_dominance(args): # # Precompute some ndarrays # according to properties of DNA and the genetic code. if args.mtdna or args.force_mtcode: code = npcodon.g_code_mito stop = npcodon.g_stop_mito else: code = npcodon.g_code stop = npcodon.g_stop # all_codons = npcodon.enum_codons(stop) codons = all_codons[:-len(stop)] gtr = npcodon.get_gtr(codons) syn, nonsyn = npcodon.get_syn_nonsyn(code, codons) compo = npcodon.get_compo(codons) asym_compo = npcodon.get_asym_compo(codons) ham = npcodon.get_hamming(codons) # subs_counts = yangdata.get_subs_counts_from_data_files(args) codon_counts = ( numpy.sum(subs_counts, axis=0) + numpy.sum(subs_counts, axis=1)) for a, b in zip(codons, codon_counts): print a, ':', b print 'raw codon total:', numpy.sum(codon_counts) print 'raw codon counts:', codon_counts codon_counts = codon_counts[:len(codons)] print 'non-stop codon total:', numpy.sum(codon_counts) subs_counts = subs_counts[:len(codons), :len(codons)] v = codon_counts / float(numpy.sum(codon_counts)) log_counts = numpy.log(codon_counts) # if args.disease == 'genic': h = get_fixation_genic elif args.disease == 'recessive': h = get_fixation_recessive_disease elif args.disease == 'dominant': h = get_fixation_dominant_disease else: raise Exception # # predefine some plausible parameters but not the scaling parameter log_mu = 0 log_g = numpy.zeros(6, dtype=float) log_omega = -3 log_nt_weights = numpy.zeros(4, dtype=float) # # get the rate matrix associated with the initial guess Q = get_Q( gtr, syn, nonsyn, compo, asym_compo, h, log_counts, log_mu, log_g, log_omega, log_nt_weights) # # get the minimum expected number of substitutions between codons mu_empirical = npcodon.get_lb_expected_subs(ham, subs_counts) mu_implied = -numpy.sum(numpy.diag(Q) * v) log_mu = math.log(mu_empirical) - math.log(mu_implied) print 'lower bound on expected mutations per codon site:', mu_empirical print # construct the initial guess theta = numpy.array([ log_mu, 0, 0, 0, 0, 0, log_omega, 0, 0, 0, ]) # # get the log likelihood associated with the initial guess fmin_args = ( subs_counts, log_counts, v, h, gtr, syn, nonsyn, compo, asym_compo, ) initial_cost = eval_f(theta, *fmin_args) print 'negative log likelihood of initial guess:', print initial_cost print print 'entropy bound on negative log likelihood:', print npcodon.get_lb_neg_ll(subs_counts) print # # search for the minimum negative log likelihood over multiple parameters if args.fmin == 'simplex': results = scipy.optimize.fmin( eval_f, theta, args=fmin_args, maxfun=10000, maxiter=10000, xtol=1e-8, ftol=1e-8, full_output=True, ) elif args.fmin == 'bfgs': results = scipy.optimize.fmin_bfgs( eval_f, theta, args=fmin_args, maxiter=10000, full_output=True, ) elif args.fmin == 'jeffopt': results = jeffopt.fmin_jeff_unconstrained( eval_f, theta, args=fmin_args, ) elif args.fmin == 'ncg': results = scipy.optimize.fmin_ncg( eval_f, theta, fprime=eval_grad_f, fhess=eval_hess_f, args=fmin_args, avextol=1e-6, maxiter=10000, full_output=True, disp=True, retall=True, ) else: raise Exception print 'results:', results xopt = results[0] print 'optimal solution vector:', xopt print 'exp optimal solution vector:', numpy.exp(xopt) print print 'inverse of hessian:' print scipy.linalg.inv(eval_hess_f(xopt, *fmin_args)) print
def submain_constrained_dominance(args): # # Precompute some ndarrays # according to properties of DNA and the genetic code. if args.mtdna or args.force_mtcode: code = npcodon.g_code_mito stop = npcodon.g_stop_mito else: code = npcodon.g_code stop = npcodon.g_stop # all_codons = npcodon.enum_codons(stop) codons = all_codons[:-len(stop)] gtr = npcodon.get_gtr(codons) syn, nonsyn = npcodon.get_syn_nonsyn(code, codons) compo = npcodon.get_compo(codons) asym_compo = npcodon.get_asym_compo(codons) ham = npcodon.get_hamming(codons) # subs_counts = yangdata.get_subs_counts_from_data_files(args) codon_counts = (numpy.sum(subs_counts, axis=0) + numpy.sum(subs_counts, axis=1)) for a, b in zip(codons, codon_counts): print a, ':', b print 'raw codon total:', numpy.sum(codon_counts) print 'raw codon counts:', codon_counts codon_counts = codon_counts[:len(codons)] print 'non-stop codon total:', numpy.sum(codon_counts) subs_counts = subs_counts[:len(codons), :len(codons)] v = codon_counts / float(numpy.sum(codon_counts)) log_counts = numpy.log(codon_counts) # if args.disease == 'genic': h = get_fixation_genic elif args.disease == 'recessive': h = get_fixation_recessive_disease elif args.disease == 'dominant': h = get_fixation_dominant_disease else: raise Exception # # predefine some plausible parameters but not the scaling parameter log_mu = 0 log_g = numpy.zeros(6, dtype=float) log_omega = -3 log_nt_weights = numpy.zeros(4, dtype=float) # # get the rate matrix associated with the initial guess Q = get_Q(gtr, syn, nonsyn, compo, asym_compo, h, log_counts, log_mu, log_g, log_omega, log_nt_weights) # # get the minimum expected number of substitutions between codons mu_empirical = npcodon.get_lb_expected_subs(ham, subs_counts) mu_implied = -numpy.sum(numpy.diag(Q) * v) log_mu = math.log(mu_empirical) - math.log(mu_implied) print 'lower bound on expected mutations per codon site:', mu_empirical print # construct the initial guess theta = numpy.array([ log_mu, 0, 0, 0, 0, 0, log_omega, 0, 0, 0, ]) # # get the log likelihood associated with the initial guess fmin_args = ( subs_counts, log_counts, v, h, gtr, syn, nonsyn, compo, asym_compo, ) initial_cost = eval_f(theta, *fmin_args) print 'negative log likelihood of initial guess:', print initial_cost print print 'entropy bound on negative log likelihood:', print npcodon.get_lb_neg_ll(subs_counts) print # # search for the minimum negative log likelihood over multiple parameters if args.fmin == 'simplex': results = scipy.optimize.fmin( eval_f, theta, args=fmin_args, maxfun=10000, maxiter=10000, xtol=1e-8, ftol=1e-8, full_output=True, ) elif args.fmin == 'bfgs': results = scipy.optimize.fmin_bfgs( eval_f, theta, args=fmin_args, maxiter=10000, full_output=True, ) elif args.fmin == 'jeffopt': results = jeffopt.fmin_jeff_unconstrained( eval_f, theta, args=fmin_args, ) elif args.fmin == 'ncg': results = scipy.optimize.fmin_ncg( eval_f, theta, fprime=eval_grad_f, fhess=eval_hess_f, args=fmin_args, avextol=1e-6, maxiter=10000, full_output=True, disp=True, retall=True, ) else: raise Exception print 'results:', results xopt = results[0] print 'optimal solution vector:', xopt print 'exp optimal solution vector:', numpy.exp(xopt) print print 'inverse of hessian:' print scipy.linalg.inv(eval_hess_f(xopt, *fmin_args)) print