SPECIES_NAME_POS = args.species_field SPECIES_NAME_DELIMITER = args.species_delimiter # load a phylomeDB Tree provided as a newick file in the command line t = PhyloTree(newick, sp_naming_function=extract_spname) if args.root: if len(args.root) > 1: outgroup = t.get_common_ancestor(args.root) else: outgroup = t & args.root[0] t.set_outgroup(outgroup) if not args.skip_ortholog_detection: # detect speciation and duplication events using the species overlap # algorithm used in phylomeDB t.get_descendant_evol_events() if args.ascii: print( t.get_ascii(attributes=[args.evoltype_attr, "name"], show_internal=True)) if args.newick: print(t.write(features=[args.evoltype_attr], format_root_node=True)) if args.show: t.show() export_as_orthoXML(t, args.database, args.evoltype_attr)
from ete3 import NCBITaxa from Bio import SeqIO from Bio.SeqRecord import SeqRecord input_tree = sys.argv[1] input_fasta = sys.argv[2] output_fasta_ordered_select = sys.argv[3] output_fasta_ordered_all = sys.argv[4] # There's a way to save these extra attributes, but it's a bit awkward (not supported by newick format) # So we fetch them anew each time. ncbi = NCBITaxa() tree = PhyloTree(input_tree, sp_naming_function=lambda name: name.split('.', 1)[0]) tax2names, tax2lineages, tax2rank = tree.annotate_ncbi_taxa() print tree.get_ascii(attributes=["name", "sci_name", "taxid"]) record_dict = SeqIO.to_dict(SeqIO.parse(input_fasta, "fasta")) # H**o sapiens # Macaca mulatta # Canis lupus familiaris # Mus musculus # Gallus gallus # Anolis carolinensis # Danio rerio sorted_fasta_select = [] sorted_fasta_select.append( SeqRecord(record_dict["9606.ENSP00000261448"].seq, "H**o sapiens", '', '')) sorted_fasta_select.append( SeqRecord(record_dict["9544.ENSMMUP00000011753"].seq, "Macaca mulatta", '',