seqA   MAEIPDETIQ QFMALT---H NIAVQYLSEF GDLNEALNSY YASQTDDIKD RREEAHQFMA
      seqB   MAEIPDATIQ QFMALTNVSH NIAVQY--EF GDLNEALNSY YAYQTDDQKD RREEAHQFMA
      seqC   MAEIPDATIQ ---ALTNVSH NIAVQYLSEF GDLNEALNSY YASQTDDQPD RREEAHQFMA
      seqD   MAEAPDETIQ QFMALTNVSH NIAVQYLSEF GDLNEAL--- ---------- -REEAHQ---
             LTNVSHQFMA LTNVSH
             LTNVSH---- ------
             LTNVSH---- ------
             -------FMA LTNVSH
"""
# Load a tree and link it to an alignment. As usual, 'alignment' can
# be the path to a file or data in text format.
t = PhyloTree("(((seqA,seqB),seqC),seqD);", alignment=fasta_txt, alg_format="fasta")

#We can now access the sequence of every leaf node
print "These are the nodes and its sequences:"
for leaf in t.iter_leaves():
    print leaf.name, leaf.sequence
#seqD MAEAPDETIQQFMALTNVSHNIAVQYLSEFGDLNEAL--------------REEAH
#seqC MAEIPDATIQ---ALTNVSHNIAVQYLSEFGDLNEALNSYYASQTDDQPDRREEAH
#seqA MAEIPDETIQQFMALT---HNIAVQYLSEFGDLNEALNSYYASQTDDIKDRREEAH
#seqB MAEIPDATIQQFMALTNVSHNIAVQY--EFGDLNEALNSYYAYQTDDQKDRREEAH
#
# The associated alignment can be changed at any time
t.link_to_alignment(alignment=iphylip_txt, alg_format="iphylip")
# Let's check that sequences have changed
print "These are the nodes and its re-linked sequences:"
for leaf in t.iter_leaves():
    print leaf.name, leaf.sequence
#seqD MAEAPDETIQQFMALTNVSHNIAVQYLSEFGDLNEAL--------------REEAHQ----------FMALTNVSH
#seqC MAEIPDATIQ---ALTNVSHNIAVQYLSEFGDLNEALNSYYASQTDDQPDRREEAHQFMALTNVSH----------
#seqA MAEIPDETIQQFMALT---HNIAVQYLSEFGDLNEALNSYYASQTDDIKDRREEAHQFMALTNVSHQFMALTNVSH
예제 #2
0
    def calculate_nodes(self):
        """Method to calculate the different internal node scores
        for a given calculus method, and store those values both in
        a dictionary (if the user wants to) and in an instance
        of a processed tree.
        """
        try:
            tree = PhyloTree(self.tree_in,
                             alignment=self.align_in,
                             alg_format="fasta")
            md = tree.get_midpoint_outgroup()
            tree.set_outgroup(md)
            leaf_deleting_list = set()
            if self.position_matrix == None:
                uniprot_hit_hash, leaf_deleting_list = fp.retrieve_features(
                    self.study_features, self.table_info, self.min_eval,
                    self.uniprot_info)
                self.position_matrix = fp.get_positions_matrix(
                    uniprot_hit_hash, tree
                )  # If we want to update the features, we have to delete the position matrix (with update method)
            for leaf in tree.iter_leaves():
                if leaf.name in leaf_deleting_list:
                    leaf.delete()

            node_number = 0
            node_scores = {}
            node_haplotypes = {}
            node_haplotype_matrices = {}
            node_haplotype_logos = {}
            for index, node in enumerate(tree.traverse("preorder")):
                node._nid = index
                if node.is_leaf() == False:
                    node_sequence_matrix = fp.annotated_sequence_extractor(
                        node, self.position_matrix, self.differentiate_gaps)

                    node_score = round(
                        fp.calculate_node_score(node_sequence_matrix,
                                                self.calc_alg), 2)
                    node.add_feature("node_score", node_score)
                    node_scores[node_number] = node_score

                    node_haplotype = fp.haplotype_parse(node_sequence_matrix)
                    node.add_feature("node_haplotype", node_haplotype)
                    node_haplotypes[node_number] = node_haplotype

                    if self.compute_logos == "Y":
                        node_haplotype_matrix = fp.haplotype_matrix_calculator(
                            node_sequence_matrix)
                        node.add_feature("node_haplotype_matrix",
                                         node_haplotype_matrix)
                        node_haplotype_matrices[
                            node_number] = node_haplotype_matrix
                        if node_haplotype_matrix is not None:
                            node_haplotype_logo = logomaker.Logo(
                                node_haplotype_matrix,
                                color_scheme="dmslogo_funcgroup",
                                show_spines=False)
                            node_haplotype_logo = node_haplotype_logo.fig
                        else:
                            node_haplotype_logo = None
                        node.add_feature("node_haplotype_logo",
                                         node_haplotype_logo)
                        node_haplotype_logos[node_number] = node_haplotype_logo

                    node_number += 1

            self.processed_tree = tree
            self.node_scores = node_scores
            self.node_haplotypes = node_haplotypes
            self.node_haplotype_matrices = node_haplotype_matrices
            self.node_haplotype_logos = node_haplotype_logos

        except:
            sys.stderr.write("Error at calculating nodes.\n")
            sys.exit(1)

        return
    parser.add_argument("-g",
                        "--gene_tree",
                        help="Homolog tree to be assessed.",
                        required=True)
    parser.add_argument("-og",
                        "--outgroupf",
                        help="Outgroup taxon names, one per line.",
                        required=True)

    if len(sys.argv[1:]) == 0:
        sys.argv.append("-h")

    args = parser.parse_args()

    og_list = []
    with open(args.outgroupf, "r") as ogf:
        for line in ogf:
            og_list.append(line.strip())

    tr = PhyloTree(args.gene_tree,
                   sp_naming_function=lambda node: node.name.split("@")[0])

    og_in_tr = []
    for l in tr.iter_leaves():
        if l.species in og_list:
            og_in_tr.append(l.species)

    print(args.gene_tree + "\t" +
          str(tr.check_monophyly(values=og_in_tr, target_attr="species")[0]))

t = PhyloTree( tree_input , format=1, quoted_node_names=True )
seqs = SeqGroup(alg, format="fasta")


nodestyle1 = NodeStyle()
nodestyle1["size"] = 0
nodestyle1["vt_line_width"] = 2
nodestyle1["hz_line_width"] = 2

for node in t.traverse():
    node.set_style(nodestyle1)


for leaf in t.iter_leaves():
    item=seqs.get_seq(leaf.name)
    name_face = AttrFace(item, fsize=24)
    Bars = SequenceFace(item, seqtype='aa', fsize=24, bg_colors={'G': 'Khaki', 'A': 'Khaki', 'S': 'Khaki', 'T': 'Khaki', 'C': 'LightGreen', 'V': 'LightGreen', 'I': 'LightGreen', 'L': 'LightGreen', 'P': 'LightGreen', 'F': 'LightGreen', 'Y': 'LightGreen', 'M': 'YellowGreen', 'W': 'LightGreen', 'N': 'Thistle', 'Q': 'Thistle', 'H': 'Thistle', 'D': 'DarkSalmon', 'E': 'DarkSalmon', 'K': 'SkyBlue', 'R': 'SkyBlue', 'X':'Black', '-':'White' }, fg_colors=None, codon=None, col_w=1.5, alt_col_w=3, special_col=None, interactive=False)
    leaf.add_face(Bars, 2, "aligned")    
    
t.render("tree_and_alignment.png", h=100, units="mm")
t.render("tree_and_alignment.svg", h=100, units="mm")





t2 = PhyloTree( tree_input , format=1, quoted_node_names=True )
for node in t2.traverse():
    node.set_style(nodestyle1)
예제 #5
0
sorted_fasta_select.append(
    SeqRecord(record_dict["28377.ENSACAP00000003186"].seq,
              "Anolis carolinensis", '', ''))
sorted_fasta_select.append(
    SeqRecord(record_dict["7955.ENSDARP00000020399"].seq, "Danio rerio", '',
              ''))
count = SeqIO.write(sorted_fasta_select, output_fasta_ordered_select, "fasta")
print("Saved %i records from %s to %s" %
      (count, input_fasta, output_fasta_ordered_select))

# Note: in theory we can sort the fasta records using the link_to_alignment method.
# Our method is more robust.
# tree.link_to_alignment(input_fasta, alg_format="fasta")
sorted_fasta_all = []
skip = True
for leaf in tree.iter_leaves():
    # The iterative way to get the species name. Not needed since we included this operation in the generator above.
    # taxid = int(leaf.name.split(".",1)[0])
    # species = ncbi.get_taxid_translator([taxid])
    species = leaf.sci_name
    if species == "H**o sapiens":
        skip = False
    if not skip:
        seq = record_dict[leaf.name].seq
        print species
        print seq
        record = SeqRecord(seq, species, '', '')
        sorted_fasta_all.append(record)

count = SeqIO.write(sorted_fasta_all, output_fasta_ordered_all, "fasta")
print("Saved %i records from %s to %s" %
                    "--iterate",
                    help="Number of resamples [1000]",
                    type=int,
                    default=1000)
    args = ap.parse_args()

    og_list = []
    with open(args.outgroupf, "r") as ogf:
        for line in ogf:
            og_list.append(line.strip())

    tr = PhyloTree(args.tree,
                   sp_naming_function=lambda node: node.name.split("@")[0])

    og_in_tr = []
    for l in tr.iter_leaves():
        if l.name.split("@")[0] in og_list:
            og_in_tr.append(l.name)

    all_l = [b.name for b in tr.iter_leaves()]
    ing = list(set(all_l) - set(og_in_tr))

    tr.set_outgroup(tr.get_common_ancestor(*og_in_tr))
    tr.prune(ing, preserve_branch_length=True)
    all_l = list(set(all_l) - set(og_in_tr))

    trlen = calc_trlen(tr)

    sub_trlen = calc_sub_trlen(tr)

    resamp_dict = resample(sub_trlen, args.iterate)