예제 #1
0
    SPECIES_NAME_POS = args.species_field
    SPECIES_NAME_DELIMITER = args.species_delimiter

    # load a phylomeDB Tree provided as a newick file in the command line
    t = PhyloTree(newick, sp_naming_function=extract_spname)

    if args.root:
        if len(args.root) > 1:
            outgroup = t.get_common_ancestor(args.root)
        else:
            outgroup = t & args.root[0]
        t.set_outgroup(outgroup)

    if not args.skip_ortholog_detection:
        # detect speciation and duplication events using the species overlap
        # algorithm used in phylomeDB
        t.get_descendant_evol_events()

    if args.ascii:
        print(
            t.get_ascii(attributes=[args.evoltype_attr, "name"],
                        show_internal=True))

    if args.newick:
        print(t.write(features=[args.evoltype_attr], format_root_node=True))

    if args.show:
        t.show()

    export_as_orthoXML(t, args.database, args.evoltype_attr)
예제 #2
0
from ete3 import NCBITaxa
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord

input_tree = sys.argv[1]
input_fasta = sys.argv[2]
output_fasta_ordered_select = sys.argv[3]
output_fasta_ordered_all = sys.argv[4]

# There's a way to save these extra attributes, but it's a bit awkward (not supported by newick format)
# So we fetch them anew each time.
ncbi = NCBITaxa()
tree = PhyloTree(input_tree,
                 sp_naming_function=lambda name: name.split('.', 1)[0])
tax2names, tax2lineages, tax2rank = tree.annotate_ncbi_taxa()
print tree.get_ascii(attributes=["name", "sci_name", "taxid"])

record_dict = SeqIO.to_dict(SeqIO.parse(input_fasta, "fasta"))

# H**o sapiens
# Macaca mulatta
# Canis lupus familiaris
# Mus musculus
# Gallus gallus
# Anolis carolinensis
# Danio rerio
sorted_fasta_select = []
sorted_fasta_select.append(
    SeqRecord(record_dict["9606.ENSP00000261448"].seq, "H**o sapiens", '', ''))
sorted_fasta_select.append(
    SeqRecord(record_dict["9544.ENSMMUP00000011753"].seq, "Macaca mulatta", '',