コード例 #1
0
ファイル: 20080523a.py プロジェクト: argriffing/xgcode
def main():
    # create the alignment object
    print 'creating the alignment...'
    alignment_string = Fasta.brown_example_alignment.strip()
    alignment = Fasta.Alignment(StringIO(alignment_string))
    # create a tree object
    print 'creating the tree...'
    tree_string = Newick.brown_example_tree
    tree = Newick.parse(tree_string, Newick.NewickTree)
    # create a rate matrix object
    print 'creating the rate matrix object...'
    distribution = {'A': .25, 'C': .25, 'G': .25, 'T': .25}
    kappa = 2.0
    row_major_rate_matrix = RateMatrix.get_unscaled_hky85_rate_matrix(
            distribution, kappa).get_row_major_rate_matrix()
    rate_matrix = RateMatrix.FastRateMatrix(
            row_major_rate_matrix, list('ACGT'))
    rate_matrix.normalize()
    # get the mle_rates
    print 'getting the mle rates...'
    mle_rates = get_mle_rates(tree, alignment, rate_matrix)
    print 'mle rates:'
    print mle_rates
    print 'stockholm string:'
    print get_stockholm_string(tree, alignment, mle_rates)
コード例 #2
0
def get_response_content(fs):
    # read the nexus data
    nexus = Nexus.Nexus()
    try:
        nexus.load(StringIO(fs.nexus))
    except Nexus.NexusError as e:
        raise HandlingError(e)
    # get the mixture weights
    mixture_weights = [fs.weight_a, fs.weight_b]
    # get the kappa values
    kappa_values = [fs.kappa_a, fs.kappa_b]
    # get the nucleotide distributions
    nucleotide_distributions = []
    for nt_string in (fs.frequency_a, fs.frequency_b):
        distribution = SnippetUtil.get_distribution(nt_string, 'nucleotide',
                                                    list('ACGT'))
        nucleotide_distributions.append(distribution)
    # create the nucleotide HKY rate matrix objects
    rate_matrix_objects = []
    for nt_distribution, kappa in zip(nucleotide_distributions, kappa_values):
        rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix(
            nt_distribution, kappa)
        rate_matrix_objects.append(rate_matrix_object)
    # create the mixture proportions
    weight_sum = sum(mixture_weights)
    mixture_proportions = [weight / weight_sum for weight in mixture_weights]
    # create the mixture model
    mixture_model = SubModel.MixtureModel(mixture_proportions,
                                          rate_matrix_objects)
    # normalize the mixture model
    mixture_model.normalize()
    # return the results
    return do_analysis(mixture_model, nexus.alignment, nexus.tree) + '\n'
コード例 #3
0
 def test_hky_nielsen(self):
     """
     Give modified rejection sampling a chance to fail.
     It should give the same results as vanilla rejection sampling.
     """
     distribution = {'A':.2,'C':.3,'G':.3,'T':.2}
     kappa = 2
     rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix(distribution, kappa)
     rate_matrix_object.normalize()
     rate_matrix = rate_matrix_object.get_dictionary_rate_matrix()
     path_length = 2
     initial_state = 'A'
     terminal_state = 'C'
     states = 'ACGT'
     iterations = 200
     rejection_changes = []
     i = 0
     while i < iterations:
         rejection_events = get_rejection_sample(initial_state, terminal_state, states, path_length, rate_matrix)
         if rejection_events is not None:
             rejection_changes.append(len(rejection_events))
             i += 1
     nielsen_changes = []
     i = 0
     while i < iterations:
         nielsen_events = get_nielsen_sample(initial_state, terminal_state, states, path_length, rate_matrix)
         if nielsen_events is not None:
             nielsen_changes.append(len(nielsen_events))
             i += 1
     t, p = scipy.stats.mannwhitneyu(rejection_changes, nielsen_changes)
     self.failIf(p < .001)
コード例 #4
0
ファイル: 20080408b.py プロジェクト: BIGtigr/xgcode
def get_sample_mixture_model():
    """
    @return: a mixture model that is used to generate the default nexus data
    """
    # define the model
    kappa = 2
    category_distribution = [.1, .4, .5]
    nt_dicts = [{
        'A': .1,
        'C': .4,
        'G': .4,
        'T': .1
    }, {
        'A': .2,
        'C': .3,
        'G': .3,
        'T': .2
    }, {
        'A': .25,
        'C': .25,
        'G': .25,
        'T': .25
    }]
    # create a mixture model from the variables that define the model
    rate_matrix_objects = []
    for nt_dict in nt_dicts:
        rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix(
            nt_dict, kappa)
        rate_matrix_objects.append(rate_matrix_object)
    mixture_model = SubModel.MixtureModel(category_distribution,
                                          rate_matrix_objects)
    mixture_model.normalize()
    return mixture_model
コード例 #5
0
 def test_hky_uniformization(self):
     """
     Give uniformization a chance to fail.
     It should give the same results as modified rejection sampling.
     """
     distribution = {'A':.2,'C':.3,'G':.3,'T':.2}
     kappa = 2
     rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix(distribution, kappa)
     rate_matrix_object.normalize()
     rate_matrix = rate_matrix_object.get_dictionary_rate_matrix()
     path_length = 2
     initial_state = 'A'
     terminal_state = 'C'
     states = 'ACGT'
     iterations = 200
     # get the modified rejection sampling changes, where each change is the number of events on a sampled path
     nielsen_changes = []
     i = 0
     while i < iterations:
         nielsen_events = get_nielsen_sample(initial_state, terminal_state, states, path_length, rate_matrix)
         if nielsen_events is not None:
             nielsen_changes.append(len(nielsen_events))
             i += 1
     # get the uniformization changes, where each change is the number of events on a sampled path
     uniformization_changes = []
     for i in range(iterations):
         uniformization_events = get_uniformization_sample(initial_state, terminal_state, states, path_length, rate_matrix)
         uniformization_changes.append(len(uniformization_events))
     # see if there is a statistically significant difference between the sampled path lengths
     #print sum(nielsen_changes)
     #print sum(uniformization_changes)
     t, p = scipy.stats.mannwhitneyu(uniformization_changes, nielsen_changes)
     self.failIf(p < .001, p)
コード例 #6
0
ファイル: 20080408a.py プロジェクト: argriffing/xgcode
def deserialize_mixture_model(xml_string):
    """
    Convert the xml string to a mixture model.
    @param xml_string: an xml string defining the mixture model
    @return: an unscaled mixture model object
    """
    # define the variables that define the model
    kappa = None
    category_weights = []
    nt_dicts = []
    # get the variables that define the model
    element_tree = ET.parse(StringIO(xml_string))
    root = element_tree.getroot()
    kappa = float(root.get("kappa"))
    for category in root:
        category_weights.append(float(category.get("weight")))
        distribution = category.find("distribution")
        nt_dict = {}
        for terminal in distribution:
            nt_dict[terminal.get("symbol")] = float(terminal.get("weight"))
        total = sum(nt_dict.values())
        for nt in nt_dict:
            nt_dict[nt] /= total
        nt_dicts.append(nt_dict)
    # create a mixture model from the variables that define the model
    rate_matrix_objects = []
    for nt_dict in nt_dicts:
        rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix(nt_dict, kappa)
        rate_matrix_objects.append(rate_matrix_object)
    total = float(sum(category_weights))
    category_distribution = [weight / total for weight in category_weights]
    mixture_model = SubModel.MixtureModel(category_distribution, rate_matrix_objects)
    mixture_model.normalize()
    return mixture_model
コード例 #7
0
def main():
    # create the alignment object
    print 'creating the alignment...'
    alignment_string = Fasta.brown_example_alignment.strip()
    alignment = Fasta.Alignment(StringIO(alignment_string))
    # create a tree object
    print 'creating the tree...'
    tree_string = Newick.brown_example_tree
    tree = Newick.parse(tree_string, Newick.NewickTree)
    # create a rate matrix object
    print 'creating the rate matrix object...'
    distribution = {'A': .25, 'C': .25, 'G': .25, 'T': .25}
    kappa = 2.0
    row_major_rate_matrix = RateMatrix.get_unscaled_hky85_rate_matrix(
        distribution, kappa).get_row_major_rate_matrix()
    rate_matrix = RateMatrix.FastRateMatrix(row_major_rate_matrix,
                                            list('ACGT'))
    rate_matrix.normalize()
    # get the mle_rates
    print 'getting the mle rates...'
    mle_rates = get_mle_rates(tree, alignment, rate_matrix)
    print 'mle rates:'
    print mle_rates
    print 'stockholm string:'
    print get_stockholm_string(tree, alignment, mle_rates)
コード例 #8
0
ファイル: 20080330a.py プロジェクト: argriffing/xgcode
def get_response_content(fs):
    # read the nexus data
    nexus = Nexus.Nexus()
    try:
        nexus.load(StringIO(fs.nexus))
    except Nexus.NexusError as e:
        raise HandlingError(e)
    # get the mixture weights
    mixture_weights = [fs.weight_a, fs.weight_b]
    # get the kappa values
    kappa_values = [fs.kappa_a, fs.kappa_b]
    # get the nucleotide distributions
    nucleotide_distributions = []
    for nt_string in (fs.frequency_a, fs.frequency_b):
        distribution = SnippetUtil.get_distribution(
                nt_string, 'nucleotide', list('ACGT'))
        nucleotide_distributions.append(distribution)
    # create the nucleotide HKY rate matrix objects
    rate_matrix_objects = []
    for nt_distribution, kappa in zip(nucleotide_distributions, kappa_values):
        rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix(
                nt_distribution, kappa)
        rate_matrix_objects.append(rate_matrix_object)
    # create the mixture proportions
    weight_sum = sum(mixture_weights)
    mixture_proportions = [weight / weight_sum for weight in mixture_weights]
    # create the mixture model
    mixture_model = SubModel.MixtureModel(
            mixture_proportions, rate_matrix_objects)
    # normalize the mixture model
    mixture_model.normalize()
    # return the results
    return do_analysis(mixture_model, nexus.alignment, nexus.tree) + '\n'
コード例 #9
0
def demo_uniformization():
    distribution = {'A':.2,'C':.3,'G':.3,'T':.2}
    kappa = 2
    rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix(distribution, kappa)
    rate_matrix_object.normalize()
    rate_matrix = rate_matrix_object.get_dictionary_rate_matrix()
    path_length = 2
    initial_state = 'A'
    terminal_state = 'C'
    states = 'ACGT'
    uniformization_events = get_uniformization_sample(initial_state, terminal_state, states, path_length, rate_matrix)
    print uniformization_events
コード例 #10
0
ファイル: 20080201a.py プロジェクト: BIGtigr/xgcode
def get_response_content(fs):
    # get the nucleotide distribution
    d = SnippetUtil.get_distribution(fs.weights, 'nucleotide', list('ACGT'))
    # get the rate matrix defined by the nucleotide distribution and kappa
    rate_object = RateMatrix.get_unscaled_hky85_rate_matrix(d, fs.kappa)
    if fs.scaled:
        rate_object.normalize()
    rate_matrix = rate_object.get_dictionary_rate_matrix()
    # show the rate matrix in convenient text form
    out = StringIO()
    for nta in 'ACGT':
        print >> out, '\t'.join(str(rate_matrix[(nta, ntb)]) for ntb in 'ACGT')
    return out.getvalue()
コード例 #11
0
ファイル: 20080201a.py プロジェクト: argriffing/xgcode
def get_response_content(fs):
    # get the nucleotide distribution
    d = SnippetUtil.get_distribution(fs.weights, 'nucleotide', list('ACGT'))
    # get the rate matrix defined by the nucleotide distribution and kappa
    rate_object = RateMatrix.get_unscaled_hky85_rate_matrix(d, fs.kappa)
    if fs.scaled:
        rate_object.normalize()
    rate_matrix = rate_object.get_dictionary_rate_matrix()
    # show the rate matrix in convenient text form
    out = StringIO()
    for nta in 'ACGT':
        print >> out, '\t'.join(str(rate_matrix[(nta, ntb)]) for ntb in 'ACGT')
    return out.getvalue()
コード例 #12
0
ファイル: 20080403a.py プロジェクト: argriffing/xgcode
def get_response_content(fs):
    # read the nexus data
    nexus = Nexus.Nexus()
    try:
        nexus.load(StringIO(fs.nexus))
    except Nexus.NexusError as e:
        raise HandlingError(e)
    # read the hyphy variables
    ns = Hyphy.get_hyphy_namespace(StringIO(fs.hyphy))
    # get the mixture weights
    mixture_weights = [ns.P, 1.0 - ns.P]
    # get the nucleotide distributions
    nucleotide_distributions = []
    for suffix in ("", "2"):
        distribution = {}
        for nt in list("ACGT"):
            var = "eqFreq" + nt + suffix
            proportion = getattr(ns, var)
            distribution[nt] = proportion
        nucleotide_distributions.append(distribution)
    # create the normalized nucleotide HKY rate matrix objects
    rate_matrix_objects = []
    for nt_distribution in nucleotide_distributions:
        rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix(nt_distribution, ns.kappa)
        rate_matrix_object.normalize()
        rate_matrix_objects.append(rate_matrix_object)
    # create the mixture proportions
    weight_sum = sum(mixture_weights)
    mixture_proportions = [weight / weight_sum for weight in mixture_weights]
    # scale each rate matrix object by its branch length ratio
    for rate_matrix_object, tree_name in zip(rate_matrix_objects, ("givenTree", "otherTree")):
        nexus_tree = nexus.tree
        hyphy_tree = getattr(ns, tree_name)
        try:
            nexus_human_node = nexus_tree.get_unique_node("Human")
        except Newick.NewickSearchError as e:
            raise HandlingError("nexus tree error: %s" % e)
        try:
            hyphy_human_node = hyphy_tree.get_unique_node("HUMAN")
        except Newick.NewickSearchError as e:
            raise HandlingError("hyphy tree error: %s" % e)
        sf = hyphy_human_node.blen / nexus_human_node.blen
        rate_matrix_object.rescale(sf)
    # create the mixture model
    mixture_model = SubModel.MixtureModel(mixture_proportions, rate_matrix_objects)
    # return the results
    return do_analysis(mixture_model, nexus.alignment, nexus.tree) + "\n"
コード例 #13
0
ファイル: 20080408b.py プロジェクト: argriffing/xgcode
def get_sample_mixture_model():
    """
    @return: a mixture model that is used to generate the default nexus data
    """
    # define the model
    kappa = 2
    category_distribution = [.1, .4, .5]
    nt_dicts = [
            {'A' : .1, 'C' : .4, 'G' : .4, 'T' : .1},
            {'A' : .2, 'C' : .3, 'G' : .3, 'T' : .2},
            {'A' : .25, 'C' : .25, 'G' : .25, 'T' : .25}
            ]
    # create a mixture model from the variables that define the model
    rate_matrix_objects = []
    for nt_dict in nt_dicts:
        rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix(
                nt_dict, kappa)
        rate_matrix_objects.append(rate_matrix_object)
    mixture_model = SubModel.MixtureModel(
            category_distribution, rate_matrix_objects)
    mixture_model.normalize()
    return mixture_model
コード例 #14
0
ファイル: 20080523a.py プロジェクト: argriffing/xgcode
def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # get the nucleotide distribution
    distribution = SnippetUtil.get_distribution(
            fs.weights, 'nucleotide', list('ACGT'))
    # get the nucleotide alignment
    try:
        alignment = Fasta.Alignment(StringIO(fs.alignment))
        alignment.force_nucleotide()
    except Fasta.AlignmentError as e:
        raise HandlingError(e)
    # get the rate matrix defined by the nucleotide distribution and kappa
    row_major_rate_matrix = RateMatrix.get_unscaled_hky85_rate_matrix(
            distribution, fs.kappa).get_row_major_rate_matrix()
    rate_matrix = RateMatrix.FastRateMatrix(
            row_major_rate_matrix, list('ACGT'))
    rate_matrix.normalize()
    # get the mle rates
    mle_rates = get_mle_rates(tree, alignment, rate_matrix)
    # return the response
    return get_stockholm_string(tree, alignment, mle_rates) + '\n'
コード例 #15
0
def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # get the nucleotide distribution
    distribution = SnippetUtil.get_distribution(fs.weights, 'nucleotide',
                                                list('ACGT'))
    # get the nucleotide alignment
    try:
        alignment = Fasta.Alignment(StringIO(fs.alignment))
        alignment.force_nucleotide()
    except Fasta.AlignmentError as e:
        raise HandlingError(e)
    # get the rate matrix defined by the nucleotide distribution and kappa
    row_major_rate_matrix = RateMatrix.get_unscaled_hky85_rate_matrix(
        distribution, fs.kappa).get_row_major_rate_matrix()
    rate_matrix = RateMatrix.FastRateMatrix(row_major_rate_matrix,
                                            list('ACGT'))
    rate_matrix.normalize()
    # get the mle rates
    mle_rates = get_mle_rates(tree, alignment, rate_matrix)
    # return the response
    return get_stockholm_string(tree, alignment, mle_rates) + '\n'
コード例 #16
0
ファイル: 20080408a.py プロジェクト: BIGtigr/xgcode
def deserialize_mixture_model(xml_string):
    """
    Convert the xml string to a mixture model.
    @param xml_string: an xml string defining the mixture model
    @return: an unscaled mixture model object
    """
    # define the variables that define the model
    kappa = None
    category_weights = []
    nt_dicts = []
    # get the variables that define the model
    element_tree = ET.parse(StringIO(xml_string))
    root = element_tree.getroot()
    kappa = float(root.get('kappa'))
    for category in root:
        category_weights.append(float(category.get('weight')))
        distribution = category.find('distribution')
        nt_dict = {}
        for terminal in distribution:
            nt_dict[terminal.get('symbol')] = float(terminal.get('weight'))
        total = sum(nt_dict.values())
        for nt in nt_dict:
            nt_dict[nt] /= total
        nt_dicts.append(nt_dict)
    # create a mixture model from the variables that define the model
    rate_matrix_objects = []
    for nt_dict in nt_dicts:
        rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix(
            nt_dict, kappa)
        rate_matrix_objects.append(rate_matrix_object)
    total = float(sum(category_weights))
    category_distribution = [weight / total for weight in category_weights]
    mixture_model = SubModel.MixtureModel(category_distribution,
                                          rate_matrix_objects)
    mixture_model.normalize()
    return mixture_model
コード例 #17
0
ファイル: 20080329a.py プロジェクト: BIGtigr/xgcode
def get_response(fs):
    """
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a (response_headers, response_text) pair
    """
    # parse the tree
    try:
        tree = Newick.parse(fs.tree, Newick.NewickTree)
        tree.assert_valid()
    except Newick.NewickSyntaxError as e:
        raise HandlingError(str(e))
    # get the mixture weights
    mixture_weights = [fs.weight_a, fs.weight_b]
    # get the kappa values
    kappa_values = [fs.kappa_a, fs.kappa_b]
    # get the nucleotide distributions
    frequency_strings = (fs.frequency_a, fs.frequency_b)
    nucleotide_distributions = []
    for nt_string in frequency_strings:
        d = SnippetUtil.get_distribution(nt_string, 'nucleotide', list('ACGT'))
        nucleotide_distributions.append(d)
    # create the nucleotide HKY rate matrix objects
    rate_matrix_objects = []
    for nt_distribution, kappa in zip(nucleotide_distributions, kappa_values):
        rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix(
            nt_distribution, kappa)
        rate_matrix_objects.append(rate_matrix_object)
    # create the mixture proportions
    weight_sum = sum(mixture_weights)
    mixture_proportions = [weight / weight_sum for weight in mixture_weights]
    # create the mixture model
    mixture_model = SubModel.MixtureModel(mixture_proportions,
                                          rate_matrix_objects)
    # normalize the mixture model
    mixture_model.normalize()
    # simulate the alignment
    try:
        alignment = PhyLikelihood.simulate_alignment(tree, mixture_model,
                                                     fs.ncols)
    except PhyLikelihood.SimulationError as e:
        raise HandlingError(e)
    # get the output string
    output_string = ''
    if fs.fasta:
        # the output is the alignment
        arr = []
        for node in tree.gen_tips():
            arr.append(alignment.get_fasta_sequence(node.name))
        alignment_string = '\n'.join(arr)
        output_string = alignment_string
    elif fs.nex:
        # the output is the alignment and the tree
        nexus = Nexus.Nexus()
        nexus.tree = tree
        nexus.alignment = alignment
        for i in range(2):
            arr = []
            arr.append('weight: %s' % mixture_weights[i])
            arr.append('kappa: %s' % kappa_values[i])
            nexus.add_comment('category %d: %s' % (i + 1, ', '.join(arr)))
        output_string = str(nexus)
    # define the filename
    if fs.fasta:
        filename_extension = 'fasta'
    elif fs.nex:
        filename_extension = 'nex'
    filename = 'sample.' + fs.fmt
    #TODO use the correct filename extension in the output
    return output_string
コード例 #18
0
ファイル: 20080329a.py プロジェクト: argriffing/xgcode
def get_response(fs):
    """
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a (response_headers, response_text) pair
    """
    # parse the tree
    try:
        tree = Newick.parse(fs.tree, Newick.NewickTree)
        tree.assert_valid()
    except Newick.NewickSyntaxError as e:
        raise HandlingError(str(e))
    # get the mixture weights
    mixture_weights = [fs.weight_a, fs.weight_b]
    # get the kappa values
    kappa_values = [fs.kappa_a, fs.kappa_b]
    # get the nucleotide distributions
    frequency_strings = (fs.frequency_a, fs.frequency_b)
    nucleotide_distributions = []
    for nt_string in frequency_strings:
        d = SnippetUtil.get_distribution(nt_string, 'nucleotide', list('ACGT'))
        nucleotide_distributions.append(d)
    # create the nucleotide HKY rate matrix objects
    rate_matrix_objects = []
    for nt_distribution, kappa in zip(nucleotide_distributions, kappa_values):
        rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix(
                nt_distribution, kappa)
        rate_matrix_objects.append(rate_matrix_object)
    # create the mixture proportions
    weight_sum = sum(mixture_weights)
    mixture_proportions = [weight / weight_sum for weight in mixture_weights]
    # create the mixture model
    mixture_model = SubModel.MixtureModel(
            mixture_proportions, rate_matrix_objects)
    # normalize the mixture model
    mixture_model.normalize()
    # simulate the alignment
    try:
        alignment = PhyLikelihood.simulate_alignment(
                tree, mixture_model, fs.ncols)
    except PhyLikelihood.SimulationError as e:
        raise HandlingError(e)
    # get the output string
    output_string = ''
    if fs.fasta:
        # the output is the alignment
        arr = []
        for node in tree.gen_tips():
            arr.append(alignment.get_fasta_sequence(node.name))
        alignment_string = '\n'.join(arr)
        output_string = alignment_string
    elif fs.nex:
        # the output is the alignment and the tree
        nexus = Nexus.Nexus()
        nexus.tree = tree
        nexus.alignment = alignment
        for i in range(2):
            arr = []
            arr.append('weight: %s' % mixture_weights[i])
            arr.append('kappa: %s' % kappa_values[i])
            nexus.add_comment('category %d: %s' % (i+1, ', '.join(arr)))
        output_string = str(nexus)
    # define the filename
    if fs.fasta:
        filename_extension = 'fasta'
    elif fs.nex:
        filename_extension = 'nex'
    filename = 'sample.' + fs.fmt
    #TODO use the correct filename extension in the output
    return output_string
コード例 #19
0
ファイル: 20080408b.py プロジェクト: argriffing/xgcode
def get_response(fs):
    """
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a (response_headers, response_text) pair
    """
    # read the nexus data
    nexus = Nexus.Nexus()
    try:
        nexus.load(StringIO(fs.nexus))
    except Nexus.NexusError as e:
        raise HandlingError(e)
    # move to the data directory
    original_directory = os.getcwd()
    os.chdir(Config.data_path)
    # create the batch file
    category_suffixes = [str(category+1) for category in range(fs.ncategories)]
    hky_hyphy_model = get_hyphy_model_string(hyphy_nexus, fs.ncategories)
    with open(hyphy_bf, 'wt') as fout:
        print >> fout, hky_hyphy_model 
    # create the nexus file
    with open(hyphy_nexus, 'wt') as fout:
        print >> fout, nexus
    # run hyphy
    p = subprocess.Popen([Config.hyphy_exe_path, hyphy_bf],
            close_fds=True, stdout=subprocess.PIPE)
    hyphy_output = p.stdout.read()
    # move back to the original directory
    os.chdir(original_directory)
    # read the hyphy output
    ns = Hyphy.get_hyphy_namespace(StringIO(hyphy_output))
    out = StringIO()
    if fs.outdebug:
        print >> out, get_hyphy_debug_info(hyphy_output)
        print >> out, ''
        print >> out, ''
    if fs.outmodel:
        print >> out, 'hyphy model:'
        print >> out, '---------------------------------------'
        print >> out, hky_hyphy_model
        print >> out, '---------------------------------------'
        print >> out, ''
        print >> out, ''
    if True:
        print >> out, 'reformatted hyphy output:'
        print >> out, '---------------------------------------'
        # show the log likelihood
        print >> out, 'log likelihood :', ns.lnL
        print >> out, ''
        # show the kappa value
        print >> out, 'kappa :', ns.kappa
        print >> out, ''
        category_blocks = []
        for suffix in category_suffixes:
            block = StringIO()
            print >> block, 'mixing proportion :', getattr(ns, 'catFreq'+suffix)
            print >> block, 'tree :', getattr(ns, 'tree'+suffix).get_newick_string()
            for nt in list('ACGT'):
                print >> block, nt, ':', getattr(ns, 'eqFreq'+nt+suffix)
            category_blocks.append(block.getvalue().strip())
        print >> out, '\n\n'.join(category_blocks)
        print >> out, '---------------------------------------'
        print >> out, ''
        print >> out, ''
    if fs.outcheck:
        # get the raw matrices
        matrices = []
        for suffix in category_suffixes:
            nt_dict = {}
            for nt in list('ACGT'):
                nt_dict[nt] = getattr(ns, 'eqFreq'+nt+suffix)
            total = float(sum(nt_dict.values()))
            nt_dict = dict((k, v/total) for k, v in nt_dict.items())
            matrix = RateMatrix.get_unscaled_hky85_rate_matrix(
                    nt_dict, ns.kappa)
            matrices.append(matrix)
        raw_matrix_rates = [matrix.get_expected_rate() for matrix in matrices]
        category_weights = []
        for suffix in category_suffixes:
            category_weights.append(getattr(ns, 'catFreq'+suffix))
        total = float(sum(category_weights))
        category_distribution = [weight / total for weight in category_weights]
        mixture_model = SubModel.MixtureModel(category_distribution, matrices)
        raw_mixture_rate = mixture_model.get_expected_rate()
        # rescale the mixture model
        # 0.75 is the expected rate of the initial model
        r1 = 0.75
        scaling_factor = r1
        mixture_model.rescale(scaling_factor)
        recomputed_log_likelihood = PhyLikelihood.get_log_likelihood(
                nexus.tree, nexus.alignment, mixture_model)
        print >> out, 'recomputed likelihood and rates:'
        print >> out, '---------------------------------------'
        print >> out, 'log likelihood :', recomputed_log_likelihood
        print >> out, ''
        print >> out, 'rate :', raw_mixture_rate
        print >> out, ''
        for rate, suffix in zip(raw_matrix_rates, category_suffixes):
            print >> out, 'rate%s : %s' % (suffix, rate)
        print >> out, '---------------------------------------'
        print >> out, ''
        print >> out, ''
    # return the response
    return out.getvalue()
コード例 #20
0
ファイル: 20080408b.py プロジェクト: BIGtigr/xgcode
def get_response(fs):
    """
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a (response_headers, response_text) pair
    """
    # read the nexus data
    nexus = Nexus.Nexus()
    try:
        nexus.load(StringIO(fs.nexus))
    except Nexus.NexusError as e:
        raise HandlingError(e)
    # move to the data directory
    original_directory = os.getcwd()
    os.chdir(Config.data_path)
    # create the batch file
    category_suffixes = [
        str(category + 1) for category in range(fs.ncategories)
    ]
    hky_hyphy_model = get_hyphy_model_string(hyphy_nexus, fs.ncategories)
    with open(hyphy_bf, 'wt') as fout:
        print >> fout, hky_hyphy_model
    # create the nexus file
    with open(hyphy_nexus, 'wt') as fout:
        print >> fout, nexus
    # run hyphy
    p = subprocess.Popen([Config.hyphy_exe_path, hyphy_bf],
                         close_fds=True,
                         stdout=subprocess.PIPE)
    hyphy_output = p.stdout.read()
    # move back to the original directory
    os.chdir(original_directory)
    # read the hyphy output
    ns = Hyphy.get_hyphy_namespace(StringIO(hyphy_output))
    out = StringIO()
    if fs.outdebug:
        print >> out, get_hyphy_debug_info(hyphy_output)
        print >> out, ''
        print >> out, ''
    if fs.outmodel:
        print >> out, 'hyphy model:'
        print >> out, '---------------------------------------'
        print >> out, hky_hyphy_model
        print >> out, '---------------------------------------'
        print >> out, ''
        print >> out, ''
    if True:
        print >> out, 'reformatted hyphy output:'
        print >> out, '---------------------------------------'
        # show the log likelihood
        print >> out, 'log likelihood :', ns.lnL
        print >> out, ''
        # show the kappa value
        print >> out, 'kappa :', ns.kappa
        print >> out, ''
        category_blocks = []
        for suffix in category_suffixes:
            block = StringIO()
            print >> block, 'mixing proportion :', getattr(
                ns, 'catFreq' + suffix)
            print >> block, 'tree :', getattr(ns, 'tree' +
                                              suffix).get_newick_string()
            for nt in list('ACGT'):
                print >> block, nt, ':', getattr(ns, 'eqFreq' + nt + suffix)
            category_blocks.append(block.getvalue().strip())
        print >> out, '\n\n'.join(category_blocks)
        print >> out, '---------------------------------------'
        print >> out, ''
        print >> out, ''
    if fs.outcheck:
        # get the raw matrices
        matrices = []
        for suffix in category_suffixes:
            nt_dict = {}
            for nt in list('ACGT'):
                nt_dict[nt] = getattr(ns, 'eqFreq' + nt + suffix)
            total = float(sum(nt_dict.values()))
            nt_dict = dict((k, v / total) for k, v in nt_dict.items())
            matrix = RateMatrix.get_unscaled_hky85_rate_matrix(
                nt_dict, ns.kappa)
            matrices.append(matrix)
        raw_matrix_rates = [matrix.get_expected_rate() for matrix in matrices]
        category_weights = []
        for suffix in category_suffixes:
            category_weights.append(getattr(ns, 'catFreq' + suffix))
        total = float(sum(category_weights))
        category_distribution = [weight / total for weight in category_weights]
        mixture_model = SubModel.MixtureModel(category_distribution, matrices)
        raw_mixture_rate = mixture_model.get_expected_rate()
        # rescale the mixture model
        # 0.75 is the expected rate of the initial model
        r1 = 0.75
        scaling_factor = r1
        mixture_model.rescale(scaling_factor)
        recomputed_log_likelihood = PhyLikelihood.get_log_likelihood(
            nexus.tree, nexus.alignment, mixture_model)
        print >> out, 'recomputed likelihood and rates:'
        print >> out, '---------------------------------------'
        print >> out, 'log likelihood :', recomputed_log_likelihood
        print >> out, ''
        print >> out, 'rate :', raw_mixture_rate
        print >> out, ''
        for rate, suffix in zip(raw_matrix_rates, category_suffixes):
            print >> out, 'rate%s : %s' % (suffix, rate)
        print >> out, '---------------------------------------'
        print >> out, ''
        print >> out, ''
    # return the response
    return out.getvalue()