예제 #1
0
파일: 20120201a.py 프로젝트: BIGtigr/xgcode
def do_mut_hyper_2_3(fs, to_gtr):
    out = StringIO()
    # define the path mutation rate matrix
    M = mrate.get_sparse_sequence_rate_matrix(2, 3)
    print >> out, '*** mutation rate matrix (8-state cube) ***'
    print >> out
    print >> out, get_rate_matrix_summary(M)
    print >> out
    print >> out
    # kill the last state by natural selection
    p_other = (1 - fs.p_mid)/7
    p_target = [p_other]*7 + [fs.p_mid]
    Q = to_gtr(M, p_target)
    print >> out, '*** mutation-selection balance ***'
    print >> out
    print >> out, get_rate_matrix_summary(Q)
    print >> out
    print >> out
    # define a reference mutation rate matrix
    R = mrate.get_sparse_sequence_rate_matrix(2, 3)
    nstates = 7
    M = np.zeros((nstates, nstates))
    for i in range(nstates):
        for j in range(nstates):
            if i != j:
                M[i, j] = R[i, j]
    M -= np.diag(np.sum(M, axis=1))
    M /= mrate.Q_to_expected_rate(M)
    print >> out, '*** reference mutation rate matrix (corner removed) ***'
    print >> out
    print >> out, get_rate_matrix_summary(M)
    print >> out
    print >> out
    return out.getvalue().rstrip()
예제 #2
0
파일: 20120201a.py 프로젝트: BIGtigr/xgcode
def do_mut_hyper_2_3_square(fs, to_gtr):
    out = StringIO()
    # define the path mutation rate matrix
    M = mrate.get_sparse_sequence_rate_matrix(2, 3)
    print >> out, '*** mutation rate matrix (8-state cube) ***'
    print >> out
    print >> out, get_rate_matrix_summary(M)
    print >> out
    print >> out
    # kill the last state by natural selection
    p_other = (1 - 4*fs.p_mid)/4
    p_target = [p_other]*4 + [fs.p_mid]*4
    Q = to_gtr(M, p_target)
    print >> out, '*** mutation-selection balance ***'
    print >> out
    print >> out, get_rate_matrix_summary(Q)
    print >> out
    print >> out
    # define a reference mutation rate matrix
    M = mrate.get_sparse_sequence_rate_matrix(2, 2)
    print >> out, '*** reference mutation rate matrix (square) ***'
    print >> out
    print >> out, get_rate_matrix_summary(M)
    print >> out
    print >> out
    return out.getvalue().rstrip()
예제 #3
0
파일: 20120201a.py 프로젝트: BIGtigr/xgcode
def do_mut_hyper_2_2(fs, to_gtr):
    out = StringIO()
    # define the path mutation rate matrix
    M = mrate.get_sparse_sequence_rate_matrix(2, 2)
    print >> out, '*** mutation rate matrix (4-state square) ***'
    print >> out
    print >> out, get_rate_matrix_summary(M)
    print >> out
    print >> out
    # kill the last state by natural selection
    p_other = (1 - fs.p_mid)/3
    p_target = (p_other, p_other, p_other, fs.p_mid)
    Q = to_gtr(M, p_target)
    print >> out, '*** mutation-selection balance ***'
    print >> out
    print >> out, get_rate_matrix_summary(Q)
    print >> out
    print >> out
    # define a reference mutation rate matrix
    M = mrate.get_path_rate_matrix(3)
    print >> out, '*** reference mutation rate matrix (3-state path) ***'
    print >> out
    print >> out, get_rate_matrix_summary(M)
    print >> out
    print >> out
    return out.getvalue().rstrip()
예제 #4
0
파일: 20120124a.py 프로젝트: BIGtigr/xgcode
def get_table_string_and_scripts(fs):
    nstates = fs.nresidues**fs.nsites
    if nstates > 256:
        raise ValueError('the mutation rate matrix is too big')
    # get the mutation matrix
    Q_mut = mrate.get_sparse_sequence_rate_matrix(fs.nresidues, fs.nsites)
    # sample a bunch of mutation-selection rate matrices
    Q_sels = []
    for selection_index in range(fs.nselections):
        # sample the selection parameters
        if fs.low_var:
            v = 0.2
        elif fs.medium_var:
            v = 1
        elif fs.high_var:
            v = 5.0
        elif fs.really_high_var:
            v = 25.0
        s = math.sqrt(v)
        if fs.neg_skew:
            sels = [-random.expovariate(1 / s) for i in range(nstates)]
        elif fs.no_skew:
            sels = [random.gauss(0, s) for i in range(nstates)]
        elif fs.pos_skew:
            sels = [random.expovariate(1 / s) for i in range(nstates)]
        # define the mutation-selection rate matrix using Halpern-Bruno
        Q = np.zeros_like(Q_mut)
        for i in range(nstates):
            for j in range(nstates):
                if i != j:
                    tau = math.exp(-(sels[j] - sels[i]))
                    coeff = math.log(tau) / (1 - 1 / tau)
                    Q[i, j] = Q_mut[i, j] * coeff
        for i in range(nstates):
            Q[i, i] = -np.sum(Q[i])
        Q_sels.append(Q)
    # define the time points
    incr = (fs.t_high - fs.t_low) / (fs.ntimes - 1)
    times = [fs.t_low + i * incr for i in range(fs.ntimes)]
    # compute the statistics
    nsels = len(Q_sels)
    pairs = [get_time_point_summary(Q_mut, Q_sels, t) for t in times]
    mi_sign_lists, time_stats = zip(*pairs)
    ncrossing_list = []
    # look at how the signs change over time for each selection sample
    for signs in zip(*mi_sign_lists):
        count = 0
        for sign_a, sign_b in iterutils.pairwise(signs):
            if sign_a != sign_b:
                count += 1
        ncrossing_list.append(count)
    # get the R scripts
    scripts = [
        get_r_band_script(nsels, time_stats),
        get_r_prop_script(nsels, time_stats),
        get_r_cross_script(ncrossing_list)
    ]
    table_string = RUtil.get_table_string(time_stats, g_time_stats_headers)
    return table_string, scripts
예제 #5
0
def get_table_string_and_scripts(fs):
    nstates = fs.nresidues ** fs.nsites
    if nstates > 256:
        raise ValueError('the mutation rate matrix is too big')
    # get the mutation matrix
    Q_mut = mrate.get_sparse_sequence_rate_matrix(fs.nresidues, fs.nsites)
    # sample a bunch of mutation-selection rate matrices
    Q_sels = []
    for selection_index in range(fs.nselections):
        # sample the selection parameters
        if fs.low_var:
            v = 0.2
        elif fs.medium_var:
            v = 1
        elif fs.high_var:
            v = 5.0
        elif fs.really_high_var:
            v = 25.0
        s = math.sqrt(v)
        if fs.neg_skew:
            sels = [-random.expovariate(1/s) for i in range(nstates)]
        elif fs.no_skew:
            sels = [random.gauss(0, s) for i in range(nstates)]
        elif fs.pos_skew:
            sels = [random.expovariate(1/s) for i in range(nstates)]
        # define the mutation-selection rate matrix using Halpern-Bruno
        Q = np.zeros_like(Q_mut)
        for i in range(nstates):
            for j in range(nstates):
                if i != j:
                    tau = math.exp(-(sels[j] - sels[i]))
                    coeff = math.log(tau) / (1 - 1/tau)
                    Q[i, j] = Q_mut[i, j] * coeff
        for i in range(nstates):
            Q[i, i] = -np.sum(Q[i])
        Q_sels.append(Q)
    # define the time points
    incr = (fs.t_high - fs.t_low) / (fs.ntimes - 1)
    times = [fs.t_low + i*incr for i in range(fs.ntimes)]
    # compute the statistics
    nsels = len(Q_sels)
    pairs = [get_time_point_summary(Q_mut, Q_sels, t) for t in times]
    mi_sign_lists, time_stats = zip(*pairs)
    ncrossing_list = []
    # look at how the signs change over time for each selection sample
    for signs in zip(*mi_sign_lists):
        count = 0
        for sign_a, sign_b in iterutils.pairwise(signs):
            if sign_a != sign_b:
                count += 1
        ncrossing_list.append(count)
    # get the R scripts
    scripts = [
            get_r_band_script(nsels, time_stats),
            get_r_prop_script(nsels, time_stats),
            get_r_cross_script(ncrossing_list)]
    table_string = RUtil.get_table_string(time_stats, g_time_stats_headers)
    return table_string, scripts
예제 #6
0
def get_table_string_and_scripts(fs):
    """
    The latex documentbody should have a bunch of tikz pieces in it.
    Each tikz piece should have been generated from R.
    """
    nstates = fs.nresidues ** fs.nsites
    if nstates > 256:
        raise ValueError("the mutation rate matrix is too big")
    # get the mutation matrix
    Q_mut = mrate.get_sparse_sequence_rate_matrix(fs.nresidues, fs.nsites)
    # sample a bunch of mutation-selection rate matrices
    Q_sels = []
    for selection_index in range(fs.nselections):
        # sample the selection parameters
        if fs.low_var:
            v = 0.2
        elif fs.medium_var:
            v = 1
        elif fs.high_var:
            v = 5.0
        elif fs.really_high_var:
            v = 25.0
        s = math.sqrt(v)
        if fs.neg_skew:
            sels = [-random.expovariate(1 / s) for i in range(nstates)]
        elif fs.no_skew:
            sels = [random.gauss(0, s) for i in range(nstates)]
        elif fs.pos_skew:
            sels = [random.expovariate(1 / s) for i in range(nstates)]
        # define the mutation-selection rate matrix using Halpern-Bruno
        Q = np.zeros_like(Q_mut)
        for i in range(nstates):
            for j in range(nstates):
                if i != j:
                    tau = math.exp(-(sels[j] - sels[i]))
                    coeff = math.log(tau) / (1 - 1 / tau)
                    Q[i, j] = Q_mut[i, j] * coeff
        for i in range(nstates):
            Q[i, i] = -np.sum(Q[i])
        Q_sels.append(Q)
    # define the time points
    incr = (fs.t_high - fs.t_low) / (fs.ntimes - 1)
    times = [fs.t_low + i * incr for i in range(fs.ntimes)]
    # compute the statistics
    nsels = len(Q_sels)
    time_stats = [get_time_point_summary(Q_mut, Q_sels, t) for t in times]
    # get the R scripts
    scripts = [
        # get_r_tikz_mi_plot(nsels, time_stats),
        get_r_tikz_corr_plot(nsels, time_stats),
        get_r_tikz_prop_plot(nsels, time_stats),
        get_r_tikz_info_plot(nsels, time_stats),
    ]
    table_string = RUtil.get_table_string(time_stats, g_time_stats_headers)
    return table_string, scripts
예제 #7
0
파일: 20120122a.py 프로젝트: BIGtigr/xgcode
def get_table_string_and_scripts(fs):
    """
    The latex documentbody should have a bunch of tikz pieces in it.
    Each tikz piece should have been generated from R.
    """
    nstates = fs.nresidues ** fs.nsites
    if nstates > 256:
        raise ValueError('the mutation rate matrix is too big')
    # get the mutation matrix
    Q_mut = mrate.get_sparse_sequence_rate_matrix(fs.nresidues, fs.nsites)
    # sample a bunch of mutation-selection rate matrices
    Q_sels = []
    for selection_index in range(fs.nselections):
        # sample the selection parameters
        if fs.low_var:
            v = 0.2
        elif fs.medium_var:
            v = 1
        elif fs.high_var:
            v = 5.0
        elif fs.really_high_var:
            v = 25.0
        s = math.sqrt(v)
        if fs.neg_skew:
            sels = [-random.expovariate(1/s) for i in range(nstates)]
        elif fs.no_skew:
            sels = [random.gauss(0, s) for i in range(nstates)]
        elif fs.pos_skew:
            sels = [random.expovariate(1/s) for i in range(nstates)]
        # define the mutation-selection rate matrix using Halpern-Bruno
        Q = np.zeros_like(Q_mut)
        for i in range(nstates):
            for j in range(nstates):
                if i != j:
                    tau = math.exp(-(sels[j] - sels[i]))
                    coeff = math.log(tau) / (1 - 1/tau)
                    Q[i, j] = Q_mut[i, j] * coeff
        for i in range(nstates):
            Q[i, i] = -np.sum(Q[i])
        Q_sels.append(Q)
    # define the time points
    incr = (fs.t_high - fs.t_low) / (fs.ntimes - 1)
    times = [fs.t_low + i*incr for i in range(fs.ntimes)]
    # compute the statistics
    nsels = len(Q_sels)
    time_stats = [get_time_point_summary(Q_mut, Q_sels, t) for t in times]
    # get the R scripts
    scripts = [
            #get_r_tikz_mi_plot(nsels, time_stats),
            get_r_tikz_corr_plot(nsels, time_stats),
            get_r_tikz_prop_plot(nsels, time_stats),
            get_r_tikz_info_plot(nsels, time_stats)]
    table_string = RUtil.get_table_string(time_stats, g_time_stats_headers)
    return table_string, scripts
예제 #8
0
def get_time_stats(fs):
    nstates = fs.nresidues ** fs.nsites
    if nstates > 256:
        raise ValueError('the mutation rate matrix is too big')
    # get the mutation matrix
    Q_mut = mrate.get_sparse_sequence_rate_matrix(fs.nresidues, fs.nsites)
    # sample a bunch of mutation-selection rate matrices
    Q_sels = []
    for selection_index in range(fs.nselections):
        # sample the selection parameters
        if fs.low_var:
            v = 0.2
        elif fs.medium_var:
            v = 1
        elif fs.high_var:
            v = 5.0
        elif fs.really_high_var:
            v = 25.0
        s = math.sqrt(v)
        if fs.neg_skew:
            sels = [-random.expovariate(1/s) for i in range(nstates)]
        elif fs.no_skew:
            sels = [random.gauss(0, s) for i in range(nstates)]
        elif fs.pos_skew:
            sels = [random.expovariate(1/s) for i in range(nstates)]
        # define the mutation-selection rate matrix using Halpern-Bruno
        Q = np.zeros_like(Q_mut)
        for i in range(nstates):
            for j in range(nstates):
                if i != j:
                    tau = math.exp(-(sels[j] - sels[i]))
                    coeff = math.log(tau) / (1 - 1/tau)
                    Q[i, j] = Q_mut[i, j] * coeff
        for i in range(nstates):
            Q[i, i] = -np.sum(Q[i])
        Q_sels.append(Q)
    # define the time points
    incr = (fs.t_high - fs.t_low) / (fs.ntimes - 1)
    times = [fs.t_low + i*incr for i in range(fs.ntimes)]
    # compute the statistics
    time_stats = [get_time_point_summary(Q_mut, Q_sels, t) for t in times]
    return time_stats
예제 #9
0
파일: 20120124b.py 프로젝트: BIGtigr/xgcode
def get_time_stats(fs):
    nstates = fs.nresidues**fs.nsites
    if nstates > 256:
        raise ValueError('the mutation rate matrix is too big')
    # get the mutation matrix
    Q_mut = mrate.get_sparse_sequence_rate_matrix(fs.nresidues, fs.nsites)
    # sample a bunch of mutation-selection rate matrices
    Q_sels = []
    for selection_index in range(fs.nselections):
        # sample the selection parameters
        if fs.low_var:
            v = 0.2
        elif fs.medium_var:
            v = 1
        elif fs.high_var:
            v = 5.0
        elif fs.really_high_var:
            v = 25.0
        s = math.sqrt(v)
        if fs.neg_skew:
            sels = [-random.expovariate(1 / s) for i in range(nstates)]
        elif fs.no_skew:
            sels = [random.gauss(0, s) for i in range(nstates)]
        elif fs.pos_skew:
            sels = [random.expovariate(1 / s) for i in range(nstates)]
        # define the mutation-selection rate matrix using Halpern-Bruno
        Q = np.zeros_like(Q_mut)
        for i in range(nstates):
            for j in range(nstates):
                if i != j:
                    tau = math.exp(-(sels[j] - sels[i]))
                    coeff = math.log(tau) / (1 - 1 / tau)
                    Q[i, j] = Q_mut[i, j] * coeff
        for i in range(nstates):
            Q[i, i] = -np.sum(Q[i])
        Q_sels.append(Q)
    # define the time points
    incr = (fs.t_high - fs.t_low) / (fs.ntimes - 1)
    times = [fs.t_low + i * incr for i in range(fs.ntimes)]
    # compute the statistics
    time_stats = [get_time_point_summary(Q_mut, Q_sels, t) for t in times]
    return time_stats
예제 #10
0
파일: 20120127a.py 프로젝트: BIGtigr/xgcode
def get_response_content(fs):
    nstates = fs.nresidues ** fs.nsites
    if nstates > 256:
        raise ValueError('the mutation rate matrix is too big')
    # get the mutation matrix
    Q_mut = mrate.get_sparse_sequence_rate_matrix(fs.nresidues, fs.nsites)
    # get the random selection matrix which we will use from now on
    Q_sel = sample_rate_matrix(fs, Q_mut)
    # define the time points
    #incr = (fs.t_high - fs.t_low) / (fs.ntimes - 1)
    #times = [fs.t_low + i*incr for i in range(fs.ntimes)]
    mut_info = RateProperties(Q_mut)
    sel_info = RateProperties(Q_sel)
    # compute the intersection time
    x_time_top = math.log(2 * nstates - 1)
    x_time_bot = 2 * abs(mut_info.lam - sel_info.lam)
    x_time = x_time_top / x_time_bot
    # compute the upper bound on the judgement time
    T_second_order = max(
            x_time,
            mut_info.time_to_usefulness,
            sel_info.time_to_usefulness)
    # define the name of the eventually winning process
    if mut_info.relaxation_time > sel_info.relaxation_time:
        x = 'mutation'
        slow_info = mut_info
        fast_info = sel_info
    else:
        x = 'mutation-selection balance'
        slow_info = sel_info
        fast_info = mut_info
    eventual_winner_name = 'the %s process' % x
    # get a more sophisticated bound
    third_order_x_time = ctmcmitaylor.get_sophisticated_time_bound(
            -slow_info.lam,
            -fast_info.lam,
            slow_info.N,
            fast_info.N,
            slow_info.p,
            fast_info.p)
    if third_order_x_time is not None:
        T_third_order = max(
                third_order_x_time,
                mut_info.time_to_uniformity,
                sel_info.time_to_uniformity)
    else:
        T_third_order = None
    # Define a naive crossing time.
    # This is not a bound on the true mutual information doomsday,
    # but it shows a limit of our approach.
    # It is the bound on the spectral taylor approximation
    # given only the second eigenvalues and not the other ones.
    naive_x_time_top = math.log(nstates - 1)
    naive_x_time_bot = 2 * abs(mut_info.lam - sel_info.lam)
    naive_x_time = naive_x_time_top / naive_x_time_bot
    # write the report
    np.set_printoptions(linewidth=200)
    out = StringIO()
    print >> out, '*** mutation rate matrix info ***'
    print >> out
    print >> out, mut_info
    print >> out
    print >> out
    print >> out, '*** mutation-selection balance rate matrix info ***'
    print >> out
    print >> out, sel_info
    print >> out
    print >> out
    print >> out, '*** note ***'
    print >> out
    print >> out, 'with the general approach taken here,'
    print >> out, 'we will not find an eigenvalue time bound'
    print >> out, 'smaller than', naive_x_time
    print >> out
    print >> out
    print >> out, '*** weak inequality ***'
    print >> out
    print >> out, 'When t >', T_second_order, eventual_winner_name
    print >> out, 'has greater mutual information (MI) and approximate MI.'
    print >> out
    print >> out
    print >> out, '*** stronger inequality ***'
    print >> out
    if T_third_order is None:
        print >> out, 'the numerical solver failed to converge'
    else:
        print >> out, 'When t >', T_third_order, eventual_winner_name
        print >> out, 'has greater mutual information (MI) and approximate MI.'
    return out.getvalue()