Ejemplo n.º 1
0
def get_response_content(fs):
    numpy.set_printoptions(
            linewidth=1000000,
            threshold=1000000,
            )
    out = StringIO()
    #
    args = construct_args()
    #
    #
    # Precompute some ndarrays
    # according to properties of DNA and the genetic code.
    if args.mtdna:
        code = npcodon.g_code_mito
        stop = npcodon.g_stop_mito
    else:
        code = npcodon.g_code
        stop = npcodon.g_stop
    #
    all_codons = npcodon.enum_codons(stop)
    codons = all_codons[:-len(stop)]
    gtr = npcodon.get_gtr(codons)
    syn, nonsyn = npcodon.get_syn_nonsyn(code, codons)
    compo = npcodon.get_compo(codons)
    asym_compo = npcodon.get_asym_compo(codons)
    ham = npcodon.get_hamming(codons)
    #
    subs_counts = yangdata.get_subs_counts_from_data_files(args)
    codon_counts = (
            numpy.sum(subs_counts, axis=0) + numpy.sum(subs_counts, axis=1))
    codon_counts = codon_counts[:len(codons)]
    subs_counts = subs_counts[:len(codons), :len(codons)]
    v = codon_counts / float(numpy.sum(codon_counts))
    log_counts = numpy.log(codon_counts)
    #
    log_mu = -3.61291826
    log_gtr_exch = numpy.array([
        0.76101439,
        1.61870564,
        0.2481876,
        0.02708148,
        1.39976982,
        0])
    log_omega = -2.26059034
    d = 5.82284506
    log_kb = -1.58612396
    log_nt_weights = numpy.array([
        -1.01321584,
        -0.0838657,
        0.32300651,
        0])
    D = get_sparse_D(
            gtr, compo, log_counts, d, log_kb, log_nt_weights)
    print >> out, D
    print >> out, numpy.unique(D)
    #
    return out.getvalue()
Ejemplo n.º 2
0
def get_response_content(fs):
    numpy.set_printoptions(
        linewidth=1000000,
        threshold=1000000,
    )
    out = StringIO()
    #
    args = construct_args()
    #
    #
    # Precompute some ndarrays
    # according to properties of DNA and the genetic code.
    if args.mtdna:
        code = npcodon.g_code_mito
        stop = npcodon.g_stop_mito
    else:
        code = npcodon.g_code
        stop = npcodon.g_stop
    #
    all_codons = npcodon.enum_codons(stop)
    codons = all_codons[:-len(stop)]
    gtr = npcodon.get_gtr(codons)
    syn, nonsyn = npcodon.get_syn_nonsyn(code, codons)
    compo = npcodon.get_compo(codons)
    asym_compo = npcodon.get_asym_compo(codons)
    ham = npcodon.get_hamming(codons)
    #
    subs_counts = yangdata.get_subs_counts_from_data_files(args)
    codon_counts = (numpy.sum(subs_counts, axis=0) +
                    numpy.sum(subs_counts, axis=1))
    codon_counts = codon_counts[:len(codons)]
    subs_counts = subs_counts[:len(codons), :len(codons)]
    v = codon_counts / float(numpy.sum(codon_counts))
    log_counts = numpy.log(codon_counts)
    #
    log_mu = -4.02576875,
    log_gtr_exch = numpy.array(
        [0.50335873, 1.31231415, 0.3491126, -0.17953527, 1.55231821, 0])
    log_omega = -2.2685352
    #
    d = 2.86523675
    log_kb = -0.63087496
    log_nt_weights = numpy.array([-0.12604391, 0.46524165, 0.96822465, 0])
    log_repop = -0.01399715
    #
    D = get_D(gtr, compo, log_counts, d, log_kb, log_nt_weights, log_repop)
    print >> out, D
    print >> out, numpy.unique(D)
    mask = numpy.sum(gtr, axis=2)
    D = D * mask
    print >> out, 'unique in mask:', numpy.unique(mask)
    print >> out, D
    print >> out, numpy.unique(D)
    #
    return out.getvalue()
Ejemplo n.º 3
0
def get_response_content(fs):
    numpy.set_printoptions(
            linewidth=1000000,
            threshold=1000000,
            )
    out = StringIO()
    #
    args = construct_args()
    #
    #
    # Precompute some ndarrays
    # according to properties of DNA and the genetic code.
    if args.mtdna:
        code = npcodon.g_code_mito
        stop = npcodon.g_stop_mito
    else:
        code = npcodon.g_code
        stop = npcodon.g_stop
    #
    all_codons = npcodon.enum_codons(stop)
    codons = all_codons[:-len(stop)]
    ts, tv = npcodon.get_ts_tv(codons)
    syn, nonsyn = npcodon.get_syn_nonsyn(code, codons)
    compo = npcodon.get_compo(codons)
    asym_compo = npcodon.get_asym_compo(codons)
    ham = npcodon.get_hamming(codons)
    #
    subs_counts = yangdata.get_subs_counts_from_data_files(args)
    codon_counts = (
            numpy.sum(subs_counts, axis=0) + numpy.sum(subs_counts, axis=1))
    codon_counts = codon_counts[:len(codons)]
    subs_counts = subs_counts[:len(codons), :len(codons)]
    v = codon_counts / float(numpy.sum(codon_counts))
    log_counts = numpy.log(codon_counts)
    #
    log_mu, log_kappa, log_omega, d, log_kb, nta, ntc, ntg = g_opt_x.tolist()
    log_nt_weights = numpy.array([nta, ntc, ntg, 0])
    D = get_sparse_D(
            ts, tv, compo, log_counts, d, log_kb, log_nt_weights)
    print >> out, D
    print >> out, numpy.unique(D)
    #
    return out.getvalue()
Ejemplo n.º 4
0
def submain_kacser_dominance_gtr(args):
    #
    # Precompute some ndarrays
    # according to properties of DNA and the genetic code.
    if args.mtdna:
        code = npcodon.g_code_mito
        stop = npcodon.g_stop_mito
    else:
        code = npcodon.g_code
        stop = npcodon.g_stop
    #
    all_codons = npcodon.enum_codons(stop)
    codons = all_codons[:-len(stop)]
    gtr = npcodon.get_gtr(codons)
    syn, nonsyn = npcodon.get_syn_nonsyn(code, codons)
    compo = npcodon.get_compo(codons)
    asym_compo = npcodon.get_asym_compo(codons)
    ham = npcodon.get_hamming(codons)
    #
    subs_counts = yangdata.get_subs_counts_from_data_files(args)
    codon_counts = (
            numpy.sum(subs_counts, axis=0) + numpy.sum(subs_counts, axis=1))
    for a, b in zip(codons, codon_counts):
        print a, ':', b
    print 'raw codon total:', numpy.sum(codon_counts)
    print 'raw codon counts:', codon_counts
    codon_counts = codon_counts[:len(codons)]
    print 'non-stop codon total:', numpy.sum(codon_counts)
    subs_counts = subs_counts[:len(codons), :len(codons)]
    v = codon_counts / float(numpy.sum(codon_counts))
    log_counts = numpy.log(codon_counts)
    #
    # get the minimum expected number of substitutions between codons
    mu_empirical = npcodon.get_lb_expected_subs(ham, subs_counts)
    print 'lower bound on expected mutations per codon site:', mu_empirical
    print
    print 'entropy lower bound on negative log likelihood:',
    print npcodon.get_lb_neg_ll(subs_counts)
    print
    #
    # initialize parameter value guesses
    d = 0.5
    log_kb = 0
    theta = numpy.array([
        d, log_kb,
        0, 0, 0,
        ], dtype=float)
    boxed_guess = [None]
    fmin_args = (
            mu_empirical, subs_counts, log_counts, v,
            gtr, syn, nonsyn, compo, asym_compo,
            boxed_guess,
            )
    f = eval_f_kacser_gtr
    results = scipy.optimize.minimize(
            f,
            theta,
            args=fmin_args,
            method='Nelder-Mead',
            )
    print 'results:', results
    xopt = results.x
    print 'optimal solution vector:', xopt
    print 'exp optimal solution vector:', numpy.exp(xopt)
    print
Ejemplo n.º 5
0
def get_response_content(fs):
    numpy.set_printoptions(
            linewidth=1000000,
            threshold=1000000,
            )
    out = StringIO()
    #
    args = construct_args()
    #
    #
    # Precompute some ndarrays
    # according to properties of DNA and the genetic code.
    if args.mtdna:
        code = npcodon.g_code_mito
        stop = npcodon.g_stop_mito
    else:
        code = npcodon.g_code
        stop = npcodon.g_stop
    #
    all_codons = npcodon.enum_codons(stop)
    codons = all_codons[:-len(stop)]
    gtr = npcodon.get_gtr(codons)
    syn, nonsyn = npcodon.get_syn_nonsyn(code, codons)
    compo = npcodon.get_compo(codons)
    asym_compo = npcodon.get_asym_compo(codons)
    ham = npcodon.get_hamming(codons)
    #
    subs_counts = yangdata.get_subs_counts_from_data_files(args)
    codon_counts = (
            numpy.sum(subs_counts, axis=0) + numpy.sum(subs_counts, axis=1))
    codon_counts = codon_counts[:len(codons)]
    subs_counts = subs_counts[:len(codons), :len(codons)]
    v = codon_counts / float(numpy.sum(codon_counts))
    log_counts = numpy.log(codon_counts)
    #
    log_mu = -4.02576875,
    log_gtr_exch = numpy.array([
        0.50335873,
        1.31231415,
        0.3491126,
        -0.17953527,
        1.55231821,
        0])
    log_omega = -2.2685352
    #
    d = 2.86523675
    log_kb = -0.63087496
    log_nt_weights = numpy.array([
        -0.12604391,
        0.46524165,
        0.96822465,
        0])
    log_repop = -0.01399715
    #
    D = get_D(
            gtr, compo,
            log_counts, d, log_kb, log_nt_weights, log_repop)
    print >> out, D
    print >> out, numpy.unique(D)
    mask = numpy.sum(gtr, axis=2)
    D = D * mask
    print >> out, 'unique in mask:', numpy.unique(mask)
    print >> out, D
    print >> out, numpy.unique(D)
    #
    return out.getvalue()
Ejemplo n.º 6
0
def submain_constrained_dominance(args):
    #
    # Precompute some ndarrays
    # according to properties of DNA and the genetic code.
    if args.mtdna or args.force_mtcode:
        code = npcodon.g_code_mito
        stop = npcodon.g_stop_mito
    else:
        code = npcodon.g_code
        stop = npcodon.g_stop
    #
    all_codons = npcodon.enum_codons(stop)
    codons = all_codons[:-len(stop)]
    gtr = npcodon.get_gtr(codons)
    syn, nonsyn = npcodon.get_syn_nonsyn(code, codons)
    compo = npcodon.get_compo(codons)
    asym_compo = npcodon.get_asym_compo(codons)
    ham = npcodon.get_hamming(codons)
    #
    subs_counts = yangdata.get_subs_counts_from_data_files(args)
    codon_counts = (
            numpy.sum(subs_counts, axis=0) + numpy.sum(subs_counts, axis=1))
    for a, b in zip(codons, codon_counts):
        print a, ':', b
    print 'raw codon total:', numpy.sum(codon_counts)
    print 'raw codon counts:', codon_counts
    codon_counts = codon_counts[:len(codons)]
    print 'non-stop codon total:', numpy.sum(codon_counts)
    subs_counts = subs_counts[:len(codons), :len(codons)]
    v = codon_counts / float(numpy.sum(codon_counts))
    log_counts = numpy.log(codon_counts)
    #
    if args.disease == 'genic':
        h = get_fixation_genic
    elif args.disease == 'recessive':
        h = get_fixation_recessive_disease
    elif args.disease == 'dominant':
        h = get_fixation_dominant_disease
    else:
        raise Exception
    #
    # predefine some plausible parameters but not the scaling parameter
    log_mu = 0
    log_g = numpy.zeros(6, dtype=float)
    log_omega = -3
    log_nt_weights = numpy.zeros(4, dtype=float)
    #
    # get the rate matrix associated with the initial guess
    Q = get_Q(
            gtr, syn, nonsyn, compo, asym_compo,
            h,
            log_counts,
            log_mu, log_g, log_omega, log_nt_weights)
    #
    # get the minimum expected number of substitutions between codons
    mu_empirical = npcodon.get_lb_expected_subs(ham, subs_counts)
    mu_implied = -numpy.sum(numpy.diag(Q) * v)
    log_mu = math.log(mu_empirical) - math.log(mu_implied)
    print 'lower bound on expected mutations per codon site:', mu_empirical
    print
    # construct the initial guess
    theta = numpy.array([
        log_mu,
        0, 0, 0, 0, 0,
        log_omega,
        0, 0, 0,
        ])
    #
    # get the log likelihood associated with the initial guess
    fmin_args = (
            subs_counts, log_counts, v,
            h,
            gtr, syn, nonsyn, compo, asym_compo,
            )
    initial_cost = eval_f(theta, *fmin_args)
    print 'negative log likelihood of initial guess:',
    print initial_cost
    print
    print 'entropy bound on negative log likelihood:',
    print npcodon.get_lb_neg_ll(subs_counts)
    print
    #
    # search for the minimum negative log likelihood over multiple parameters
    if args.fmin == 'simplex':
        results = scipy.optimize.fmin(
                eval_f,
                theta,
                args=fmin_args,
                maxfun=10000,
                maxiter=10000,
                xtol=1e-8,
                ftol=1e-8,
                full_output=True,
                )
    elif args.fmin == 'bfgs':
        results = scipy.optimize.fmin_bfgs(
                eval_f,
                theta,
                args=fmin_args,
                maxiter=10000,
                full_output=True,
                )
    elif args.fmin == 'jeffopt':
        results = jeffopt.fmin_jeff_unconstrained(
                eval_f,
                theta,
                args=fmin_args,
                )
    elif args.fmin == 'ncg':
        results = scipy.optimize.fmin_ncg(
                eval_f,
                theta,
                fprime=eval_grad_f,
                fhess=eval_hess_f,
                args=fmin_args,
                avextol=1e-6,
                maxiter=10000,
                full_output=True,
                disp=True,
                retall=True,
                )
    else:
        raise Exception
    print 'results:', results
    xopt = results[0]
    print 'optimal solution vector:', xopt
    print 'exp optimal solution vector:', numpy.exp(xopt)
    print
    print 'inverse of hessian:'
    print scipy.linalg.inv(eval_hess_f(xopt, *fmin_args))
    print
Ejemplo n.º 7
0
def submain_constrained_dominance(args):
    #
    # Precompute some ndarrays
    # according to properties of DNA and the genetic code.
    if args.mtdna or args.force_mtcode:
        code = npcodon.g_code_mito
        stop = npcodon.g_stop_mito
    else:
        code = npcodon.g_code
        stop = npcodon.g_stop
    #
    all_codons = npcodon.enum_codons(stop)
    codons = all_codons[:-len(stop)]
    ts, tv = npcodon.get_ts_tv(codons)
    syn, nonsyn = npcodon.get_syn_nonsyn(code, codons)
    compo = npcodon.get_compo(codons)
    asym_compo = npcodon.get_asym_compo(codons)
    ham = npcodon.get_hamming(codons)
    #
    subs_counts = yangdata.get_subs_counts_from_data_files(args)
    codon_counts = numpy.sum(subs_counts, axis=0) + numpy.sum(
            subs_counts, axis=1)
    for a, b in zip(codons, codon_counts):
        print a, ':', b
    print 'raw codon total:', numpy.sum(codon_counts)
    print 'raw codon counts:', codon_counts
    codon_counts = codon_counts[:len(codons)]
    print 'non-stop codon total:', numpy.sum(codon_counts)
    subs_counts = subs_counts[:len(codons), :len(codons)]
    v = codon_counts / float(numpy.sum(codon_counts))
    log_counts = numpy.log(codon_counts)
    #
    if args.disease == 'genic':
        h = get_fixation_genic
    elif args.disease == 'recessive':
        h = get_fixation_recessive_disease
    elif args.disease == 'dominant':
        h = get_fixation_dominant_disease
    else:
        raise Exception
    #
    # initialize parameter values
    mu_r = 1.0
    kappa = 2.0
    omega = 0.1
    #pA = 0.25
    #pC = 0.25
    #pG = 0.25
    #pT = 0.25
    theta = numpy.array([mu_r, kappa, omega, 0, 0, 0, 0])
    #
    # adjust the expected rate parameter
    Q = get_Q_slsqp(
            ts, tv, syn, nonsyn, compo, asym_compo,
            h,
            log_counts, v,
            theta)
    expected_rate = -algopy.dot(algopy.diag(Q), v)
    mu_n = 1. / expected_rate
    mu_r = npcodon.get_lb_expected_subs(ham, subs_counts)
    theta = numpy.array([mu_r, kappa, omega, 0, 0, 0, 0])
    #
    # get the log likelihood associated with the initial guess
    fmin_args = (
            subs_counts, log_counts, v,
            h,
            ts, tv, syn, nonsyn, compo, asym_compo,
            )
    initial_cost = eval_f(theta, *fmin_args)
    print 'negative log likelihood of initial guess:',
    print initial_cost
    print
    print 'entropy bound on negative log likelihood:',
    print npcodon.get_lb_neg_ll(subs_counts)
    print
    do_opt(args, eval_f, theta, fmin_args)
Ejemplo n.º 8
0
def submain_unconstrained_dominance(args):
    #
    # Precompute some ndarrays
    # according to properties of DNA and the genetic code.
    if args.mtdna or args.force_mtcode:
        code = npcodon.g_code_mito
        stop = npcodon.g_stop_mito
    else:
        code = npcodon.g_code
        stop = npcodon.g_stop
    #
    all_codons = npcodon.enum_codons(stop)
    codons = all_codons[:-len(stop)]
    ts, tv = npcodon.get_ts_tv(codons)
    syn, nonsyn = npcodon.get_syn_nonsyn(code, codons)
    compo = npcodon.get_compo(codons)
    asym_compo = npcodon.get_asym_compo(codons)
    ham = npcodon.get_hamming(codons)
    #
    subs_counts = yangdata.get_subs_counts_from_data_files(args)
    codon_counts = (
            numpy.sum(subs_counts, axis=0) + numpy.sum(subs_counts, axis=1))
    for a, b in zip(codons, codon_counts):
        print a, ':', b
    print 'raw codon total:', numpy.sum(codon_counts)
    print 'raw codon counts:', codon_counts
    codon_counts = codon_counts[:len(codons)]
    print 'non-stop codon total:', numpy.sum(codon_counts)
    subs_counts = subs_counts[:len(codons), :len(codons)]
    v = codon_counts / float(numpy.sum(codon_counts))
    log_counts = numpy.log(codon_counts)
    #
    """
    if args.disease == 'unconstrained':
        if args.integrate == 'quadrature':
            h = get_fixation_unconstrained_quad
        elif args.integrate == 'special':
            h = get_fixation_unconstrained
        else:
            raise Exception
    else:
        raise Exception
    """
    #FIXME: the h parameter is becoming obsolete
    h = None
    # predefine some plausible parameters but not the scaling parameter
    log_mu = 0
    log_kappa = 1
    log_omega = -3
    d = 1.6
    #d = 0.5
    #d = -1.2
    log_nt_weights = numpy.zeros(4)
    #
    # get the rate matrix associated with the initial guess
    Q = get_Q_unconstrained(
            ts, tv, syn, nonsyn, compo, asym_compo,
            h,
            log_counts,
            log_mu, log_kappa, log_omega, d, log_nt_weights)
    #
    # get the minimum expected number of substitutions between codons
    mu_empirical = npcodon.get_lb_expected_subs(ham, subs_counts)
    mu_implied = -numpy.sum(numpy.diag(Q) * v)
    log_mu = math.log(mu_empirical) - math.log(mu_implied)
    print 'lower bound on expected mutations per codon site:', mu_empirical
    print
    # construct the initial guess
    theta = numpy.array([
        log_mu,
        log_kappa,
        log_omega,
        d,
        0,
        0,
        0,
        ])
    #
    # get the log likelihood associated with the initial guess
    fmin_args = (
            subs_counts, log_counts, v,
            h,
            ts, tv, syn, nonsyn, compo, asym_compo,
            )
    initial_cost = eval_f_unconstrained(theta, *fmin_args)
    print 'negative log likelihood of initial guess:',
    print initial_cost
    print
    print 'entropy bound on negative log likelihood:',
    print npcodon.get_lb_neg_ll(subs_counts)
    print
    do_opt(args, eval_f_unconstrained, theta, fmin_args)
Ejemplo n.º 9
0
def submain_constrained_dominance(args):
    #
    # Precompute some ndarrays
    # according to properties of DNA and the genetic code.
    if args.mtdna or args.force_mtcode:
        code = npcodon.g_code_mito
        stop = npcodon.g_stop_mito
    else:
        code = npcodon.g_code
        stop = npcodon.g_stop
    #
    all_codons = npcodon.enum_codons(stop)
    codons = all_codons[:-len(stop)]
    gtr = npcodon.get_gtr(codons)
    syn, nonsyn = npcodon.get_syn_nonsyn(code, codons)
    compo = npcodon.get_compo(codons)
    asym_compo = npcodon.get_asym_compo(codons)
    ham = npcodon.get_hamming(codons)
    #
    subs_counts = yangdata.get_subs_counts_from_data_files(args)
    codon_counts = (numpy.sum(subs_counts, axis=0) +
                    numpy.sum(subs_counts, axis=1))
    for a, b in zip(codons, codon_counts):
        print a, ':', b
    print 'raw codon total:', numpy.sum(codon_counts)
    print 'raw codon counts:', codon_counts
    codon_counts = codon_counts[:len(codons)]
    print 'non-stop codon total:', numpy.sum(codon_counts)
    subs_counts = subs_counts[:len(codons), :len(codons)]
    v = codon_counts / float(numpy.sum(codon_counts))
    log_counts = numpy.log(codon_counts)
    #
    if args.disease == 'genic':
        h = get_fixation_genic
    elif args.disease == 'recessive':
        h = get_fixation_recessive_disease
    elif args.disease == 'dominant':
        h = get_fixation_dominant_disease
    else:
        raise Exception
    #
    # predefine some plausible parameters but not the scaling parameter
    log_mu = 0
    log_g = numpy.zeros(6, dtype=float)
    log_omega = -3
    log_nt_weights = numpy.zeros(4, dtype=float)
    #
    # get the rate matrix associated with the initial guess
    Q = get_Q(gtr, syn, nonsyn, compo, asym_compo, h, log_counts, log_mu,
              log_g, log_omega, log_nt_weights)
    #
    # get the minimum expected number of substitutions between codons
    mu_empirical = npcodon.get_lb_expected_subs(ham, subs_counts)
    mu_implied = -numpy.sum(numpy.diag(Q) * v)
    log_mu = math.log(mu_empirical) - math.log(mu_implied)
    print 'lower bound on expected mutations per codon site:', mu_empirical
    print
    # construct the initial guess
    theta = numpy.array([
        log_mu,
        0,
        0,
        0,
        0,
        0,
        log_omega,
        0,
        0,
        0,
    ])
    #
    # get the log likelihood associated with the initial guess
    fmin_args = (
        subs_counts,
        log_counts,
        v,
        h,
        gtr,
        syn,
        nonsyn,
        compo,
        asym_compo,
    )
    initial_cost = eval_f(theta, *fmin_args)
    print 'negative log likelihood of initial guess:',
    print initial_cost
    print
    print 'entropy bound on negative log likelihood:',
    print npcodon.get_lb_neg_ll(subs_counts)
    print
    #
    # search for the minimum negative log likelihood over multiple parameters
    if args.fmin == 'simplex':
        results = scipy.optimize.fmin(
            eval_f,
            theta,
            args=fmin_args,
            maxfun=10000,
            maxiter=10000,
            xtol=1e-8,
            ftol=1e-8,
            full_output=True,
        )
    elif args.fmin == 'bfgs':
        results = scipy.optimize.fmin_bfgs(
            eval_f,
            theta,
            args=fmin_args,
            maxiter=10000,
            full_output=True,
        )
    elif args.fmin == 'jeffopt':
        results = jeffopt.fmin_jeff_unconstrained(
            eval_f,
            theta,
            args=fmin_args,
        )
    elif args.fmin == 'ncg':
        results = scipy.optimize.fmin_ncg(
            eval_f,
            theta,
            fprime=eval_grad_f,
            fhess=eval_hess_f,
            args=fmin_args,
            avextol=1e-6,
            maxiter=10000,
            full_output=True,
            disp=True,
            retall=True,
        )
    else:
        raise Exception
    print 'results:', results
    xopt = results[0]
    print 'optimal solution vector:', xopt
    print 'exp optimal solution vector:', numpy.exp(xopt)
    print
    print 'inverse of hessian:'
    print scipy.linalg.inv(eval_hess_f(xopt, *fmin_args))
    print