seqA MAEIPDETIQ QFMALT---H NIAVQYLSEF GDLNEALNSY YASQTDDIKD RREEAHQFMA seqB MAEIPDATIQ QFMALTNVSH NIAVQY--EF GDLNEALNSY YAYQTDDQKD RREEAHQFMA seqC MAEIPDATIQ ---ALTNVSH NIAVQYLSEF GDLNEALNSY YASQTDDQPD RREEAHQFMA seqD MAEAPDETIQ QFMALTNVSH NIAVQYLSEF GDLNEAL--- ---------- -REEAHQ--- LTNVSHQFMA LTNVSH LTNVSH---- ------ LTNVSH---- ------ -------FMA LTNVSH """ # Load a tree and link it to an alignment. As usual, 'alignment' can # be the path to a file or data in text format. t = PhyloTree("(((seqA,seqB),seqC),seqD);", alignment=fasta_txt, alg_format="fasta") #We can now access the sequence of every leaf node print "These are the nodes and its sequences:" for leaf in t.iter_leaves(): print leaf.name, leaf.sequence #seqD MAEAPDETIQQFMALTNVSHNIAVQYLSEFGDLNEAL--------------REEAH #seqC MAEIPDATIQ---ALTNVSHNIAVQYLSEFGDLNEALNSYYASQTDDQPDRREEAH #seqA MAEIPDETIQQFMALT---HNIAVQYLSEFGDLNEALNSYYASQTDDIKDRREEAH #seqB MAEIPDATIQQFMALTNVSHNIAVQY--EFGDLNEALNSYYAYQTDDQKDRREEAH # # The associated alignment can be changed at any time t.link_to_alignment(alignment=iphylip_txt, alg_format="iphylip") # Let's check that sequences have changed print "These are the nodes and its re-linked sequences:" for leaf in t.iter_leaves(): print leaf.name, leaf.sequence #seqD MAEAPDETIQQFMALTNVSHNIAVQYLSEFGDLNEAL--------------REEAHQ----------FMALTNVSH #seqC MAEIPDATIQ---ALTNVSHNIAVQYLSEFGDLNEALNSYYASQTDDQPDRREEAHQFMALTNVSH---------- #seqA MAEIPDETIQQFMALT---HNIAVQYLSEFGDLNEALNSYYASQTDDIKDRREEAHQFMALTNVSHQFMALTNVSH
def calculate_nodes(self): """Method to calculate the different internal node scores for a given calculus method, and store those values both in a dictionary (if the user wants to) and in an instance of a processed tree. """ try: tree = PhyloTree(self.tree_in, alignment=self.align_in, alg_format="fasta") md = tree.get_midpoint_outgroup() tree.set_outgroup(md) leaf_deleting_list = set() if self.position_matrix == None: uniprot_hit_hash, leaf_deleting_list = fp.retrieve_features( self.study_features, self.table_info, self.min_eval, self.uniprot_info) self.position_matrix = fp.get_positions_matrix( uniprot_hit_hash, tree ) # If we want to update the features, we have to delete the position matrix (with update method) for leaf in tree.iter_leaves(): if leaf.name in leaf_deleting_list: leaf.delete() node_number = 0 node_scores = {} node_haplotypes = {} node_haplotype_matrices = {} node_haplotype_logos = {} for index, node in enumerate(tree.traverse("preorder")): node._nid = index if node.is_leaf() == False: node_sequence_matrix = fp.annotated_sequence_extractor( node, self.position_matrix, self.differentiate_gaps) node_score = round( fp.calculate_node_score(node_sequence_matrix, self.calc_alg), 2) node.add_feature("node_score", node_score) node_scores[node_number] = node_score node_haplotype = fp.haplotype_parse(node_sequence_matrix) node.add_feature("node_haplotype", node_haplotype) node_haplotypes[node_number] = node_haplotype if self.compute_logos == "Y": node_haplotype_matrix = fp.haplotype_matrix_calculator( node_sequence_matrix) node.add_feature("node_haplotype_matrix", node_haplotype_matrix) node_haplotype_matrices[ node_number] = node_haplotype_matrix if node_haplotype_matrix is not None: node_haplotype_logo = logomaker.Logo( node_haplotype_matrix, color_scheme="dmslogo_funcgroup", show_spines=False) node_haplotype_logo = node_haplotype_logo.fig else: node_haplotype_logo = None node.add_feature("node_haplotype_logo", node_haplotype_logo) node_haplotype_logos[node_number] = node_haplotype_logo node_number += 1 self.processed_tree = tree self.node_scores = node_scores self.node_haplotypes = node_haplotypes self.node_haplotype_matrices = node_haplotype_matrices self.node_haplotype_logos = node_haplotype_logos except: sys.stderr.write("Error at calculating nodes.\n") sys.exit(1) return
parser.add_argument("-g", "--gene_tree", help="Homolog tree to be assessed.", required=True) parser.add_argument("-og", "--outgroupf", help="Outgroup taxon names, one per line.", required=True) if len(sys.argv[1:]) == 0: sys.argv.append("-h") args = parser.parse_args() og_list = [] with open(args.outgroupf, "r") as ogf: for line in ogf: og_list.append(line.strip()) tr = PhyloTree(args.gene_tree, sp_naming_function=lambda node: node.name.split("@")[0]) og_in_tr = [] for l in tr.iter_leaves(): if l.species in og_list: og_in_tr.append(l.species) print(args.gene_tree + "\t" + str(tr.check_monophyly(values=og_in_tr, target_attr="species")[0]))
t = PhyloTree( tree_input , format=1, quoted_node_names=True ) seqs = SeqGroup(alg, format="fasta") nodestyle1 = NodeStyle() nodestyle1["size"] = 0 nodestyle1["vt_line_width"] = 2 nodestyle1["hz_line_width"] = 2 for node in t.traverse(): node.set_style(nodestyle1) for leaf in t.iter_leaves(): item=seqs.get_seq(leaf.name) name_face = AttrFace(item, fsize=24) Bars = SequenceFace(item, seqtype='aa', fsize=24, bg_colors={'G': 'Khaki', 'A': 'Khaki', 'S': 'Khaki', 'T': 'Khaki', 'C': 'LightGreen', 'V': 'LightGreen', 'I': 'LightGreen', 'L': 'LightGreen', 'P': 'LightGreen', 'F': 'LightGreen', 'Y': 'LightGreen', 'M': 'YellowGreen', 'W': 'LightGreen', 'N': 'Thistle', 'Q': 'Thistle', 'H': 'Thistle', 'D': 'DarkSalmon', 'E': 'DarkSalmon', 'K': 'SkyBlue', 'R': 'SkyBlue', 'X':'Black', '-':'White' }, fg_colors=None, codon=None, col_w=1.5, alt_col_w=3, special_col=None, interactive=False) leaf.add_face(Bars, 2, "aligned") t.render("tree_and_alignment.png", h=100, units="mm") t.render("tree_and_alignment.svg", h=100, units="mm") t2 = PhyloTree( tree_input , format=1, quoted_node_names=True ) for node in t2.traverse(): node.set_style(nodestyle1)
sorted_fasta_select.append( SeqRecord(record_dict["28377.ENSACAP00000003186"].seq, "Anolis carolinensis", '', '')) sorted_fasta_select.append( SeqRecord(record_dict["7955.ENSDARP00000020399"].seq, "Danio rerio", '', '')) count = SeqIO.write(sorted_fasta_select, output_fasta_ordered_select, "fasta") print("Saved %i records from %s to %s" % (count, input_fasta, output_fasta_ordered_select)) # Note: in theory we can sort the fasta records using the link_to_alignment method. # Our method is more robust. # tree.link_to_alignment(input_fasta, alg_format="fasta") sorted_fasta_all = [] skip = True for leaf in tree.iter_leaves(): # The iterative way to get the species name. Not needed since we included this operation in the generator above. # taxid = int(leaf.name.split(".",1)[0]) # species = ncbi.get_taxid_translator([taxid]) species = leaf.sci_name if species == "H**o sapiens": skip = False if not skip: seq = record_dict[leaf.name].seq print species print seq record = SeqRecord(seq, species, '', '') sorted_fasta_all.append(record) count = SeqIO.write(sorted_fasta_all, output_fasta_ordered_all, "fasta") print("Saved %i records from %s to %s" %
"--iterate", help="Number of resamples [1000]", type=int, default=1000) args = ap.parse_args() og_list = [] with open(args.outgroupf, "r") as ogf: for line in ogf: og_list.append(line.strip()) tr = PhyloTree(args.tree, sp_naming_function=lambda node: node.name.split("@")[0]) og_in_tr = [] for l in tr.iter_leaves(): if l.name.split("@")[0] in og_list: og_in_tr.append(l.name) all_l = [b.name for b in tr.iter_leaves()] ing = list(set(all_l) - set(og_in_tr)) tr.set_outgroup(tr.get_common_ancestor(*og_in_tr)) tr.prune(ing, preserve_branch_length=True) all_l = list(set(all_l) - set(og_in_tr)) trlen = calc_trlen(tr) sub_trlen = calc_sub_trlen(tr) resamp_dict = resample(sub_trlen, args.iterate)