Exemple #1
0
 def testLikelihood(self):
     """
     Assert that no errors occur during the analysis
     """
     # define a simple (but not completely degenerate) alignment
     sa = 'AAAACCCCGGGGTTAA'
     sb = 'GAAACCTCGGCGTAAA'
     sequence_pair = (sa, sb)
     # get estimates according to an analytical formula which is not necessarily the mle
     distance_mle, kappa_mle, A_mle, C_mle, G_mle, T_mle = get_closed_form_estimates(
         (sa, sb))
     nt_distribution_mle = (A_mle, C_mle, G_mle, T_mle)
     rate_matrix_object = create_rate_matrix(kappa_mle, nt_distribution_mle)
     log_likelihood_mle = PairLikelihood.get_log_likelihood(
         distance_mle, sequence_pair, rate_matrix_object)
     # get the maximum likelihood estimates according to a numeric optimizer.
     f = Objective((sa, sb))
     values = list(f.get_initial_parameters())
     result = scipy.optimize.fmin(f, values, ftol=.0000000001, disp=0)
     distance_opt, kappa_opt, wC_opt, wG_opt, wT_opt = result
     nt_distribution_opt = parameters_to_distribution(
         (wC_opt, wG_opt, wT_opt))
     rate_matrix_object = create_rate_matrix(kappa_opt, nt_distribution_opt)
     log_likelihood_opt = PairLikelihood.get_log_likelihood(
         distance_opt, sequence_pair, rate_matrix_object)
Exemple #2
0
 def testLikelihood(self):
     """
     Assert that no errors occur during the analysis
     """
     # define a simple (but not completely degenerate) alignment
     sa = 'AAAACCCCGGGGTTAA'
     sb = 'GAAACCTCGGCGTAAA'
     sequence_pair = (sa, sb)
     # get estimates according to an analytical formula which is not necessarily the mle
     distance_mle, kappa_mle, A_mle, C_mle, G_mle, T_mle = get_closed_form_estimates((sa, sb))
     nt_distribution_mle = (A_mle, C_mle, G_mle, T_mle)
     rate_matrix_object = create_rate_matrix(kappa_mle, nt_distribution_mle)
     log_likelihood_mle = PairLikelihood.get_log_likelihood(distance_mle, sequence_pair, rate_matrix_object)
     # get the maximum likelihood estimates according to a numeric optimizer.
     f = Objective((sa, sb))
     values = list(f.get_initial_parameters())
     result = scipy.optimize.fmin(f, values, ftol=.0000000001, disp=0)
     distance_opt, kappa_opt, wC_opt, wG_opt, wT_opt = result
     nt_distribution_opt = parameters_to_distribution((wC_opt, wG_opt, wT_opt))
     rate_matrix_object = create_rate_matrix(kappa_opt, nt_distribution_opt)
     log_likelihood_opt = PairLikelihood.get_log_likelihood(distance_opt, sequence_pair, rate_matrix_object)
Exemple #3
0
 def __call__(self, branch_length):
     """
     This will be called by a one dimensional minimizer.
     @param branch_length: the distance between the two aligned sequences
     @return: the negative log likelihood
     """
     if branch_length < 0:
         return float('inf')
     log_likelihood = PairLikelihood.get_log_likelihood(
         branch_length, self.sequence_pair, self.rate_matrix)
     if log_likelihood is None:
         return float('inf')
     return -log_likelihood
Exemple #4
0
 def __call__(self, theta):
     """
     @param theta: the vector of estimated parameters
     @return: the negative log likelihood to be minimized
     """
     # unpack the parameters
     distance, kappa, wC, wG, wT = theta
     nt_distribution = parameters_to_distribution((wC, wG, wT))
     # make the rate matrix
     model = create_rate_matrix(kappa, nt_distribution)
     # get the likelihood
     log_likelihood = PairLikelihood.get_log_likelihood(distance, self.sequence_pair, model)
     return -log_likelihood
Exemple #5
0
 def __call__(self, branch_length):
     """
     This will be called by a one dimensional minimizer.
     @param branch_length: the distance between the two aligned sequences
     @return: the negative log likelihood
     """
     if branch_length < 0:
         return float('inf')
     log_likelihood = PairLikelihood.get_log_likelihood(
             branch_length, self.sequence_pair, self.rate_matrix)
     if log_likelihood is None:
         return float('inf')
     return -log_likelihood
Exemple #6
0
 def __call__(self, theta):
     """
     @param theta: the vector of estimated parameters
     @return: the negative log likelihood to be minimized
     """
     # unpack the parameters
     distance, kappa, wC, wG, wT = theta
     nt_distribution = parameters_to_distribution((wC, wG, wT))
     # make the rate matrix
     model = create_rate_matrix(kappa, nt_distribution)
     # get the likelihood
     log_likelihood = PairLikelihood.get_log_likelihood(
         distance, self.sequence_pair, model)
     return -log_likelihood
Exemple #7
0
def get_response_content(fs):
    # get the alignment object
    try:
        alignment = Fasta.Alignment(StringIO(fs.fasta))
    except Fasta.AlignmentError as e:
        raise HandlingError('alignment error: ' + str(e))
    # assert that the alignment is of exactly two sequences
    if len(alignment.sequences) != 2:
        raise HandlingError('expected a pair of sequences')
    # assert that the alignment is a gapless unambiguous nucleotide alignment
    old_column_count = alignment.get_column_count()
    try:
        alignment.force_nucleotide()
    except Fasta.AlignmentError as e:
        raise HandlingError('nucleotide alignment error: ' + str(e))
    new_column_count = alignment.get_column_count()
    if old_column_count != new_column_count:
        msg = 'expected a gapless unambiguous nucleotide alignment'
        raise HandlingError(msg)
    # get the maximum likelihood estimates according to a numeric optimizer.
    f = F84.Objective(alignment.sequences)
    values = list(f.get_initial_parameters())
    result = scipy.optimize.fmin(f, values, ftol=1e-10, disp=0)
    distance, kappa, wC, wG, wT = result
    nt_distribution = F84.parameters_to_distribution((wC, wG, wT))
    A, C, G, T = nt_distribution
    model = F84.create_rate_matrix(kappa, nt_distribution)
    log_likelihood = PairLikelihood.get_log_likelihood(distance,
                                                       alignment.sequences,
                                                       model)
    # begin the response
    out = StringIO()
    print >> out, 'ML distance:', distance
    print >> out, 'ML kappa:', kappa
    print >> out, 'ML A frequency:', A
    print >> out, 'ML C frequency:', C
    print >> out, 'ML G frequency:', G
    print >> out, 'ML T frequency:', T
    print >> out, 'log likelihood:', log_likelihood
    # write the response
    return out.getvalue()
Exemple #8
0
def get_response_content(fs):
    # get the alignment object
    try:
        alignment = Fasta.Alignment(StringIO(fs.fasta))
    except Fasta.AlignmentError as e:
        raise HandlingError('alignment error: ' + str(e))
    # assert that the alignment is of exactly two sequences
    if len(alignment.sequences) != 2:
        raise HandlingError('expected a pair of sequences')
    # assert that the alignment is a gapless unambiguous nucleotide alignment
    old_column_count = alignment.get_column_count()
    try:
        alignment.force_nucleotide()
    except Fasta.AlignmentError as e:
        raise HandlingError('nucleotide alignment error: ' + str(e))
    new_column_count = alignment.get_column_count()
    if old_column_count != new_column_count:
        msg = 'expected a gapless unambiguous nucleotide alignment'
        raise HandlingError(msg)
    # get the maximum likelihood estimates according to a numeric optimizer.
    f = F84.Objective(alignment.sequences)
    values = list(f.get_initial_parameters())
    result = scipy.optimize.fmin(f, values, ftol=1e-10, disp=0)
    distance, kappa, wC, wG, wT= result
    nt_distribution = F84.parameters_to_distribution((wC, wG, wT))
    A, C, G, T = nt_distribution
    model = F84.create_rate_matrix(kappa, nt_distribution)
    log_likelihood = PairLikelihood.get_log_likelihood(
            distance, alignment.sequences, model)
    # begin the response
    out = StringIO()
    print >> out, 'ML distance:', distance
    print >> out, 'ML kappa:', kappa
    print >> out, 'ML A frequency:', A
    print >> out, 'ML C frequency:', C
    print >> out, 'ML G frequency:', G
    print >> out, 'ML T frequency:', T
    print >> out, 'log likelihood:', log_likelihood
    # write the response
    return out.getvalue()
Exemple #9
0
def get_response_content(fs):
    # get the alignment object
    try:
        alignment = Fasta.Alignment(fs.fasta.splitlines())
    except Fasta.AlignmentError as e:
        raise HandlingError('alignment error: ' + str(e))
    # assert that the alignment is of exactly two sequences
    if len(alignment.sequences) != 2:
        raise HandlingError('expected a pair of sequences')
    # assert that the alignment is a gapless unambiguous nucleotide alignment
    old_column_count = alignment.get_column_count()
    try:
        alignment.force_nucleotide()
    except Fasta.AlignmentError as e:
        raise HandlingError('nucleotide alignment error: ' + str(e))
    new_column_count = alignment.get_column_count()
    if old_column_count != new_column_count:
        msg = 'expected a gapless unambiguous nucleotide alignment'
        raise HandlingError(msg)
    # get the maximum likelihood estimates
    sequence_pair = alignment.sequences
    distance, kappa, A, C, G, T = F84.get_closed_form_estimates(sequence_pair)
    # get the log likelihood
    nt_distribution = (A, C, G, T)
    rate_matrix_object = F84.create_rate_matrix(kappa, nt_distribution)
    log_likelihood = PairLikelihood.get_log_likelihood(distance,
                                                       alignment.sequences,
                                                       rate_matrix_object)
    # begin the response
    out = StringIO()
    print >> out, 'distance:', distance
    print >> out, 'kappa:', kappa
    print >> out, 'A frequency:', A
    print >> out, 'C frequency:', C
    print >> out, 'G frequency:', G
    print >> out, 'T frequency:', T
    print >> out, 'log likelihood:', log_likelihood
    # return the response
    return out.getvalue()
Exemple #10
0
def get_response_content(fs):
    # get the alignment object
    try:
        alignment = Fasta.Alignment(fs.fasta.splitlines())
    except Fasta.AlignmentError as e:
        raise HandlingError('alignment error: ' + str(e))
    # assert that the alignment is of exactly two sequences
    if len(alignment.sequences) != 2:
        raise HandlingError('expected a pair of sequences')
    # assert that the alignment is a gapless unambiguous nucleotide alignment
    old_column_count = alignment.get_column_count()
    try:
        alignment.force_nucleotide()
    except Fasta.AlignmentError as e:
        raise HandlingError('nucleotide alignment error: ' + str(e))
    new_column_count = alignment.get_column_count()
    if old_column_count != new_column_count:
        msg = 'expected a gapless unambiguous nucleotide alignment'
        raise HandlingError(msg)
    # get the maximum likelihood estimates
    sequence_pair = alignment.sequences
    distance, kappa, A, C, G, T = F84.get_closed_form_estimates(sequence_pair)
    # get the log likelihood
    nt_distribution = (A, C, G, T)
    rate_matrix_object = F84.create_rate_matrix(kappa, nt_distribution)
    log_likelihood = PairLikelihood.get_log_likelihood(
            distance, alignment.sequences, rate_matrix_object)
    # begin the response
    out = StringIO()
    print >> out, 'distance:', distance
    print >> out, 'kappa:', kappa
    print >> out, 'A frequency:', A
    print >> out, 'C frequency:', C
    print >> out, 'G frequency:', G
    print >> out, 'T frequency:', T
    print >> out, 'log likelihood:', log_likelihood
    # return the response
    return out.getvalue()