コード例 #1
0
ファイル: F84.py プロジェクト: BIGtigr/xgcode
def create_rate_matrix(kappa, nt_distribution):
    """
    @param kappa: adjusts for the transition rate differing from the transversion rate
    @param nt_distribution: ordered ACGT nucleotide probabilities
    @return: a rate matrix object with one expected nucleotide substitution per time unit
    """
    # make some assertions about the distribution
    for p in nt_distribution:
        assert p >= 0
    assert len(nt_distribution) == 4
    assert RateMatrix.almost_equal(sum(nt_distribution), 1.0)
    # define some intermediate variables
    A, C, G, T = nt_distribution
    R = float(A + G)
    Y = float(C + T)
    # make some more assertions about the distribution and about kappa
    assert A + G > 0
    assert C + T > 0
    assert kappa > max(-Y, -R)
    # get the normalization constant
    normalization_constant = 4 * T * C * (1 + kappa / Y) + 4 * A * G * (
        1 + kappa / R) + 4 * Y * R
    # adjust the normalization constant to correct what might be an error in the paper
    normalization_constant /= 2
    # define the dictionary rate matrix
    dict_rate_matrix = {}
    for source_index, source in enumerate('ACGT'):
        for sink_index, sink in enumerate('ACGT'):
            key = (source, sink)
            coefficient = 1.0
            if key in g_transitions:
                coefficient = 1 + kappa / (nt_distribution[source_index] +
                                           nt_distribution[sink_index])
            dict_rate_matrix[key] = coefficient * nt_distribution[
                sink_index] / normalization_constant
    for source in 'ACGT':
        dict_rate_matrix[(source,
                          source)] = -sum(dict_rate_matrix[(source, sink)]
                                          for sink in 'ACGT' if source != sink)
    # convert the dictionary rate matrix to a row major rate matrix
    row_major = MatrixUtil.dict_to_row_major(dict_rate_matrix, 'ACGT', 'ACGT')
    # return the rate matrix object
    rate_matrix_object = RateMatrix.RateMatrix(row_major, 'ACGT')
    expected_rate = rate_matrix_object.get_expected_rate()
    if not RateMatrix.almost_equal(expected_rate, 1.0):
        assert False, 'the rate is %f but should be 1.0' % expected_rate
    return rate_matrix_object
コード例 #2
0
ファイル: F84.py プロジェクト: argriffing/xgcode
def create_rate_matrix(kappa, nt_distribution):
    """
    @param kappa: adjusts for the transition rate differing from the transversion rate
    @param nt_distribution: ordered ACGT nucleotide probabilities
    @return: a rate matrix object with one expected nucleotide substitution per time unit
    """
    # make some assertions about the distribution
    for p in nt_distribution:
        assert p >= 0
    assert len(nt_distribution) == 4
    assert RateMatrix.almost_equal(sum(nt_distribution), 1.0)
    # define some intermediate variables
    A, C, G, T = nt_distribution
    R = float(A + G)
    Y = float(C + T)
    # make some more assertions about the distribution and about kappa
    assert A+G > 0
    assert C+T > 0
    assert kappa > max(-Y, -R)
    # get the normalization constant
    normalization_constant = 4*T*C*(1 + kappa/Y) + 4*A*G*(1 + kappa/R) + 4*Y*R
    # adjust the normalization constant to correct what might be an error in the paper
    normalization_constant /= 2
    # define the dictionary rate matrix
    dict_rate_matrix = {}
    for source_index, source in enumerate('ACGT'):
        for sink_index, sink in enumerate('ACGT'):
            key = (source, sink)
            coefficient = 1.0
            if key in g_transitions:
                coefficient = 1 + kappa / (nt_distribution[source_index] + nt_distribution[sink_index])
            dict_rate_matrix[key] = coefficient * nt_distribution[sink_index] / normalization_constant
    for source in 'ACGT':
        dict_rate_matrix[(source, source)] = -sum(dict_rate_matrix[(source, sink)] for sink in 'ACGT' if source != sink)
    # convert the dictionary rate matrix to a row major rate matrix
    row_major = MatrixUtil.dict_to_row_major(dict_rate_matrix, 'ACGT', 'ACGT')
    # return the rate matrix object
    rate_matrix_object = RateMatrix.RateMatrix(row_major, 'ACGT')
    expected_rate = rate_matrix_object.get_expected_rate()
    if not RateMatrix.almost_equal(expected_rate, 1.0):
        assert False, 'the rate is %f but should be 1.0' % expected_rate
    return rate_matrix_object