Python Fasta.create_alignment 예제들, Fasta.create_alignment, barque Python 예제들

예제 #1

0

파일 보기

def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # get the sequence order if it exists
    ordered_names = Util.get_stripped_lines(fs.order.splitlines())
    if ordered_names:
        observed_name_set = set(ordered_names)
        expected_name_set = set(node.get_name() for node in tree.gen_tips())
        extra_names = observed_name_set - expected_name_set
        missing_names = expected_name_set - observed_name_set
        if extra_names:
            msg_a = 'the list of ordered names includes these names '
            msg_b = 'not found in the tree: %s' % str(tuple(extra_names))
            raise HandlingError(msg_a + msg_b)
        if missing_names:
            msg_a = 'the tree includes these names not found in the list '
            msg_b = 'of ordered names: %s' % str(tuple(missing_names))
            raise HandlingError(msg_a + msg_b)
    else:
        ordered_names = list(tip.get_name() for name in tree.gen_tips())
    # do the sampling
    sampled_sequences = JC69.sample_sequences(tree, ordered_names, fs.length)
    alignment = Fasta.create_alignment(ordered_names, sampled_sequences)
    # return the response
    return alignment.to_fasta_string() + '\n'

예제 #2

0

파일 보기

파일: 20080826a.py 프로젝트: argriffing/xgcode

def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # get the sequence order if it exists
    ordered_names = Util.get_stripped_lines(fs.order.splitlines())
    if ordered_names:
        observed_name_set = set(ordered_names)
        expected_name_set = set(node.get_name() for node in tree.gen_tips())
        extra_names = observed_name_set - expected_name_set
        missing_names = expected_name_set - observed_name_set
        if extra_names:
            msg_a = 'the list of ordered names includes these names '
            msg_b = 'not found in the tree: %s' % str(tuple(extra_names))
            raise HandlingError(msg_a + msg_b)
        if missing_names:
            msg_a = 'the tree includes these names not found in the list '
            msg_b = 'of ordered names: %s' % str(tuple(missing_names))
            raise HandlingError(msg_a + msg_b)
    else:
        ordered_names = list(tip.get_name() for name in tree.gen_tips())
    # do the sampling
    sampled_sequences = JC69.sample_sequences(tree, ordered_names, fs.length)
    alignment = Fasta.create_alignment(ordered_names, sampled_sequences)
    # return the response
    return alignment.to_fasta_string() + '\n'

예제 #3

0

파일 보기

파일: 20081218a.py 프로젝트: argriffing/xgcode

def get_amino_acid_alignment(table):
    """
    @param table: a table of data in some random format sent by Ferran Casals
    @return: a Fasta amino acid alignment object
    """
    if len(table) < 2:
        raise HandlingError('the data table should have at least two rows')
    first_row = table[0]
    if len(first_row) < 6:
        raise HandlingError(
                'the first row of the table has %d columns '
                'but at least six were expected' % len(first_row))
    if first_row[0].upper() != 'variant'.upper():
        raise HandlingError('expected the first column to be the variant')
    if first_row[1].upper() != 'chr'.upper():
        raise HandlingError('expected the second column to be the chromosome')
    if first_row[2].upper() != 'position'.upper():
        raise HandlingError('expected the third column to be the position')
    if first_row[3].upper() != 'Amino Acid Change'.upper():
        raise HandlingError(
                'expected the fourth column to be the amino acid change')
    if first_row[4].upper() != 'alleles'.upper():
        raise HandlingError(
                'expected the fifth column to be the nucleotide change')
    remaining_rows = table[1:]
    for row in remaining_rows:
        if len(row) != len(first_row):
            raise HandlingError(
                    'each row should have the same number of columns')
    # get the ordered taxa
    taxa = first_row[5:]
    if len(set(taxa)) != len(taxa):
        raise HandlingError('the same taxon appears in more than one column')
    # get the sequence of codons for each taxon
    codon_sequences = zip(*remaining_rows)[5:]
    # convert codon sequences to amino acid sequences
    aa_sequences = []
    for codon_sequence in codon_sequences:
        aa_list = []
        for codon in codon_sequence:
            codon = codon.upper()
            if codon == 'ND':
                aa = '-'
            elif codon in Codon.g_non_stop_codons:
                aa = Codon.g_codon_to_aa_letter[codon]
            elif codon in Codon.g_stop_codons:
                raise HandlingError(
                        'one of the codons is a stop codon: %s' % codon)
            else:
                raise HandlingError(
                        'one of the codons is invalid: %s' % codon)
            aa_list.append(aa)
        aa_sequences.append(''.join(aa_list))
    # return the alignment
    return Fasta.create_alignment(taxa, aa_sequences)

예제 #4

0

파일 보기

파일: 20081218a.py 프로젝트: BIGtigr/xgcode

def get_amino_acid_alignment(table):
    """
    @param table: a table of data in some random format sent by Ferran Casals
    @return: a Fasta amino acid alignment object
    """
    if len(table) < 2:
        raise HandlingError('the data table should have at least two rows')
    first_row = table[0]
    if len(first_row) < 6:
        raise HandlingError('the first row of the table has %d columns '
                            'but at least six were expected' % len(first_row))
    if first_row[0].upper() != 'variant'.upper():
        raise HandlingError('expected the first column to be the variant')
    if first_row[1].upper() != 'chr'.upper():
        raise HandlingError('expected the second column to be the chromosome')
    if first_row[2].upper() != 'position'.upper():
        raise HandlingError('expected the third column to be the position')
    if first_row[3].upper() != 'Amino Acid Change'.upper():
        raise HandlingError(
            'expected the fourth column to be the amino acid change')
    if first_row[4].upper() != 'alleles'.upper():
        raise HandlingError(
            'expected the fifth column to be the nucleotide change')
    remaining_rows = table[1:]
    for row in remaining_rows:
        if len(row) != len(first_row):
            raise HandlingError(
                'each row should have the same number of columns')
    # get the ordered taxa
    taxa = first_row[5:]
    if len(set(taxa)) != len(taxa):
        raise HandlingError('the same taxon appears in more than one column')
    # get the sequence of codons for each taxon
    codon_sequences = zip(*remaining_rows)[5:]
    # convert codon sequences to amino acid sequences
    aa_sequences = []
    for codon_sequence in codon_sequences:
        aa_list = []
        for codon in codon_sequence:
            codon = codon.upper()
            if codon == 'ND':
                aa = '-'
            elif codon in Codon.g_non_stop_codons:
                aa = Codon.g_codon_to_aa_letter[codon]
            elif codon in Codon.g_stop_codons:
                raise HandlingError('one of the codons is a stop codon: %s' %
                                    codon)
            else:
                raise HandlingError('one of the codons is invalid: %s' % codon)
            aa_list.append(aa)
        aa_sequences.append(''.join(aa_list))
    # return the alignment
    return Fasta.create_alignment(taxa, aa_sequences)

예제 #5

0

파일 보기

파일: 20120403a.py 프로젝트: BIGtigr/xgcode

def get_response_content(fs):
    # init the response and get the user variables
    out = StringIO()
    nleaves = fs.nleaves
    nvertices = nleaves * 2 - 1
    nbranches = nvertices - 1
    nsites = fs.nsites
    # sample the coalescent tree with timelike branch lengths
    R, B = kingman.sample(fs.nleaves)
    r = Ftree.R_to_root(R)
    # get the leaf vertex names
    N = dict(zip(range(nleaves), string.uppercase[:nleaves]))
    N_leaves = dict(N)
    # get the internal vertex names
    v_to_leaves = R_to_v_to_leaves(R)
    for v, leaves in sorted(v_to_leaves.items()):
        if len(leaves) > 1:
            N[v] = ''.join(sorted(N[leaf] for leaf in leaves))
    # get vertex ages
    v_to_age = kingman.RB_to_v_to_age(R, B)
    # sample the rates on the branches
    b_to_rate = sample_b_to_rate(R)
    xycorr = get_correlation(R, b_to_rate)
    # define B_subs in terms of substitutions instead of time
    B_subs = dict((p, t * b_to_rate[p]) for p, t in B.items())
    # sample the alignment
    v_to_seq = sample_v_to_seq(R, B_subs, nsites)
    # get the log likelihood; this is kind of horrible
    pairs = [(N[v], ''.join(v_to_seq[v])) for v in range(nleaves)]
    headers, sequences = zip(*pairs)
    alignment = Fasta.create_alignment(headers, sequences)
    newick_string = FtreeIO.RBN_to_newick(R, B_subs, N_leaves)
    tree = Newick.parse(newick_string, Newick.NewickTree)
    dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() 
    ordered_states = list('ACGT') 
    row_major_rate_matrix = MatrixUtil.dict_to_row_major(
            dictionary_rate_matrix, ordered_states, ordered_states)
    rate_matrix_object = RateMatrix.RateMatrix(
            row_major_rate_matrix, ordered_states) 
    ll = PhyLikelihood.get_log_likelihood(
            tree, alignment, rate_matrix_object)
    # get ll when rates are all 1.0
    newick_string = FtreeIO.RBN_to_newick(R, B, N_leaves)
    tree = Newick.parse(newick_string, Newick.NewickTree)
    ll_unity = PhyLikelihood.get_log_likelihood(
            tree, alignment, rate_matrix_object)
    # get ll when rates are numerically optimized
    # TODO incorporate the result into the xml file
    # TODO speed up the likelihood evaluation (beagle? C module?)
    #f = Opt(R, B, N_leaves, alignment)
    #X_logs = [0.0] * nbranches
    #result = scipy.optimize.fmin(f, X_logs, full_output=True)
    #print result
    #
    print >> out, '<?xml version="1.0"?>'
    print >> out, '<beast>'
    print >> out
    print >> out, '<!-- actual rate autocorrelation', xycorr, '-->'
    print >> out, '<!-- actual root height', v_to_age[r], '-->'
    print >> out, '<!-- actual log likelihood', ll, '-->'
    print >> out, '<!-- ll if rates were unity', ll_unity, '-->'
    print >> out
    print >> out, '<!--'
    print >> out, 'predefine the taxa as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Introduction_to_XML_format'
    print >> out, '-->'
    print >> out, get_leaf_taxon_defn(list(string.uppercase[:nleaves]))
    print >> out
    print >> out, '<!--'
    print >> out, 'define the alignment as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Introduction_to_XML_format'
    print >> out, '-->'
    print >> out, get_alignment_defn(leaves, N, v_to_seq)
    print >> out
    print >> out, '<!--'
    print >> out, 'specify the starting tree as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_4'
    print >> out, '-->'
    print >> out, get_starting_tree_defn(R, B, N_leaves)
    print >> out
    print >> out, '<!--'
    print >> out, 'connect the tree model as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_4'
    print >> out, '-->'
    print >> out, g_tree_model_defn
    print >> out
    print >> out, g_uncorrelated_relaxed_clock_info
    print >> out
    """
    print >> out, '<!--'
    print >> out, 'create a list of taxa for which to constrain the mrca as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1'
    print >> out, '-->'
    for v, leaves in sorted(v_to_leaves.items()):
        if len(leaves) > 1:
            print >> out, get_mrca_subset_defn(N, v, leaves)
    print >> out
    print >> out, '<!--'
    print >> out, 'create a tmrcaStatistic that will record the height as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1'
    print >> out, '-->'
    for v, leaves in sorted(v_to_leaves.items()):
        if len(leaves) > 1:
            print >> out, get_mrca_stat_defn(N[v])
    """
    print >> out
    print >> out, g_likelihood_info
    print >> out
    print >> out, '<!--'
    print >> out, 'run the mcmc'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1'
    print >> out, '-->'
    print >> out, get_mcmc_defn(v_to_leaves, v_to_age, N)
    print >> out
    print >> out, '</beast>'
    # return the response
    return out.getvalue()