def create_rate_matrix(kappa, nt_distribution): """ @param kappa: adjusts for the transition rate differing from the transversion rate @param nt_distribution: ordered ACGT nucleotide probabilities @return: a rate matrix object with one expected nucleotide substitution per time unit """ # make some assertions about the distribution for p in nt_distribution: assert p >= 0 assert len(nt_distribution) == 4 assert RateMatrix.almost_equal(sum(nt_distribution), 1.0) # define some intermediate variables A, C, G, T = nt_distribution R = float(A + G) Y = float(C + T) # make some more assertions about the distribution and about kappa assert A + G > 0 assert C + T > 0 assert kappa > max(-Y, -R) # get the normalization constant normalization_constant = 4 * T * C * (1 + kappa / Y) + 4 * A * G * ( 1 + kappa / R) + 4 * Y * R # adjust the normalization constant to correct what might be an error in the paper normalization_constant /= 2 # define the dictionary rate matrix dict_rate_matrix = {} for source_index, source in enumerate('ACGT'): for sink_index, sink in enumerate('ACGT'): key = (source, sink) coefficient = 1.0 if key in g_transitions: coefficient = 1 + kappa / (nt_distribution[source_index] + nt_distribution[sink_index]) dict_rate_matrix[key] = coefficient * nt_distribution[ sink_index] / normalization_constant for source in 'ACGT': dict_rate_matrix[(source, source)] = -sum(dict_rate_matrix[(source, sink)] for sink in 'ACGT' if source != sink) # convert the dictionary rate matrix to a row major rate matrix row_major = MatrixUtil.dict_to_row_major(dict_rate_matrix, 'ACGT', 'ACGT') # return the rate matrix object rate_matrix_object = RateMatrix.RateMatrix(row_major, 'ACGT') expected_rate = rate_matrix_object.get_expected_rate() if not RateMatrix.almost_equal(expected_rate, 1.0): assert False, 'the rate is %f but should be 1.0' % expected_rate return rate_matrix_object
def create_rate_matrix(kappa, nt_distribution): """ @param kappa: adjusts for the transition rate differing from the transversion rate @param nt_distribution: ordered ACGT nucleotide probabilities @return: a rate matrix object with one expected nucleotide substitution per time unit """ # make some assertions about the distribution for p in nt_distribution: assert p >= 0 assert len(nt_distribution) == 4 assert RateMatrix.almost_equal(sum(nt_distribution), 1.0) # define some intermediate variables A, C, G, T = nt_distribution R = float(A + G) Y = float(C + T) # make some more assertions about the distribution and about kappa assert A+G > 0 assert C+T > 0 assert kappa > max(-Y, -R) # get the normalization constant normalization_constant = 4*T*C*(1 + kappa/Y) + 4*A*G*(1 + kappa/R) + 4*Y*R # adjust the normalization constant to correct what might be an error in the paper normalization_constant /= 2 # define the dictionary rate matrix dict_rate_matrix = {} for source_index, source in enumerate('ACGT'): for sink_index, sink in enumerate('ACGT'): key = (source, sink) coefficient = 1.0 if key in g_transitions: coefficient = 1 + kappa / (nt_distribution[source_index] + nt_distribution[sink_index]) dict_rate_matrix[key] = coefficient * nt_distribution[sink_index] / normalization_constant for source in 'ACGT': dict_rate_matrix[(source, source)] = -sum(dict_rate_matrix[(source, sink)] for sink in 'ACGT' if source != sink) # convert the dictionary rate matrix to a row major rate matrix row_major = MatrixUtil.dict_to_row_major(dict_rate_matrix, 'ACGT', 'ACGT') # return the rate matrix object rate_matrix_object = RateMatrix.RateMatrix(row_major, 'ACGT') expected_rate = rate_matrix_object.get_expected_rate() if not RateMatrix.almost_equal(expected_rate, 1.0): assert False, 'the rate is %f but should be 1.0' % expected_rate return rate_matrix_object