def main(newick: str, output_newick: str, samples: Iterable[Sample]):
    sample_name_by_workflow_run_id = {
        str(s["workflow_run_id"]): s["sample_name"]
        for s in samples
    }
    with open(newick) as i, open(output_newick, "w") as o:
        tree = next(NewickIO.parse(i))
        for node in tree.find_clades(order="level"):
            node.name = sample_name_by_workflow_run_id.get(
                node.name, node.name)
        NewickIO.write([tree], o)
Ejemplo n.º 2
0
def nexus_text(obj, colour_branches, colours, **kwargs):
    """ Take tree-like object(s) and create nexus-format representation.
        Allows for colouring tip names.
        Modified from http://biopython.org/DIST/docs/api/Bio.Phylo.NexusIO-pysrc.html
        NB here we compensate for an apparent bug in the Biopython implementation, 
        whereby an additional colon is wrongly added to confidence values in the output tree strings.
    """
    try:
        trees = list(obj) # assume iterable
    except TypeError:
        trees = [obj]
    writer = NewickIO.Writer(trees) 
    nexus_trees = [TREE_TEMPLATE % {'index': idx + 1, 'tree': nwk} 
                 for idx, nwk in enumerate( 
      writer.to_strings(plain=False, plain_newick=True, 
                        **kwargs))] 
    # if branches are being coloured, then taxon names already contain colouring annotation
    # otherwise we need to add this annotation here
    tax_labels = [ colour_taxon(str(x.name), colours) if not colour_branches else str(x.name) for x in chain(*(t.get_terminals() for t in trees))] 
    text = NEX_TEMPLATE % { 
      'count': len(tax_labels), 
      'labels': ' '.join(tax_labels), # taxlabels all on one line 
      'trees': '\n'.join(nexus_trees), # trees on separate lines
    }
    return re.sub(r':([0-9]{1,3}\.[0-9]{1,3}):', r'\1:', text) # Corrects for biopython bug. eg ":50.00:" -> "50.00:"
Ejemplo n.º 3
0
    def to_nexus(self, filename):
        """Writes the tree to the given file in nexus format.

        This method doesn't call Bio.Phylo.NexusIO as BayesTraitsV2 requires a
        different dialect of Nexus format.
        """
        # Copy the tree before making any changes on it.
        tree = copy.deepcopy(self.tree)
        # BayesTraits requires the Nexus file to have a "Translate" block which
        # declares a number->taxon mapping so that numbers, not long taxa
        # names, are used in the tree descriptions.
        names_to_ints = dict((clade.name, i) for i, clade in enumerate(
            tree.get_terminals(), start=1))
        # Assign numbers to terminal clades
        for node in tree.get_terminals():
            node.name = str(names_to_ints[node.name])
        # Drop names of the inner nodes
        for n in tree.get_nonterminals():
            n.name = None
        # Tree to string
        writer = NewickIO.Writer([tree])
        nexus_tree = NEX_TEMPLATE % {
            'translate': ',\n'.join('%d %s' % (name, id)
                                    for id, name in names_to_ints.items()),
            'tree': next(writer.to_strings(plain=False, plain_newick=True))}
        # Write string to file
        with open(filename, 'w') as handle:
            handle.write(nexus_tree)
Ejemplo n.º 4
0
def write_nexus_trees_to_bayestraits(nx, handle, **kwargs):
    """Modified from Bio.Phylo.NexusIO.write():

    add a translate block converting leaf names to integers.
    """
    trees = BioNexusTrees_to_BioPhylo(nx.trees)
    writer = NewickIO.Writer(trees)
    nexus_trees = [
        TREE_TEMPLATE % {"index": idx + 1, "tree": nwk}
        for idx, nwk in enumerate(
            writer.to_strings(plain=False, plain_newick=True, **kwargs)
        )
    ]
    translate = ["%d %s" % id_name for id_name in nx.translate.items()]

    # Unused in my output format (BayesTraits) + why aren't they unique?
    tax_labels = [taxon for nt in nx.trees for taxon in nt.get_taxa()]
    #tax_labels = [str(x.name) for x in chain(*(t.get_terminals() for t in trees))]

    text = NEX_TEMPLATE % {
        "count": len(tax_labels),
        "labels": " ".join(tax_labels),
        "trees": "\n".join(nexus_trees),
        "translate": ",\n ".join(translate)
    }
    handle.write(text)
    return len(nexus_trees)
Ejemplo n.º 5
0
def write(obj, handle, **kwargs):
    """Write a new Nexus file containing the given trees.

    Uses a simple Nexus template and the NewickIO writer to serialize just the
    trees and minimal supporting info needed for a valid Nexus file.
    """
    trees = list(obj)
    writer = NewickIO.Writer(trees)
    nexus_trees = [
        TREE_TEMPLATE % {
            'index': idx + 1,
            'tree': nwk
        } for idx, nwk in enumerate(
            writer.to_strings(plain=False, plain_newick=True, **kwargs))
    ]
    tax_labels = [
        str(x.name) for x in chain(*(t.get_terminals() for t in trees))
    ]
    text = NEX_TEMPLATE % {
        'count': len(tax_labels),
        'labels': ' '.join(tax_labels),
        'trees': '\n'.join(nexus_trees),
    }
    handle.write(text)
    return len(nexus_trees)
Ejemplo n.º 6
0
def get_fam(rfid):
    '''Get a family including tree and sequence information
from an Rfam data dump stored in data/rfam

inputs: 
  rfid:   rfam family id.

outputs:
  ali:    a biopython alignment
  tree:   a biopython tree from a newick file.
  info:   information parsed from the original stockholm file.

'''

    fmeta = open(cfg.dataPath('rfam/family_metas/{0}.pickle'.format(rfid)))
    fali = open(cfg.dataPath('rfam/family_alis/{0}.fa'.format(rfid)))

    ali = aio.parse(fali, 'fasta').next()
    info = pickle.load(fmeta)

    fname = cfg.dataPath('rfam/Rfam.seed_tree/{0}.seed_tree'.format(rfid))
    tree = nio.parse(
        open(cfg.dataPath(
            'rfam/Rfam.seed_tree/{0}.seed_tree'.format(rfid)))).next()
    return ali, tree, info
Ejemplo n.º 7
0
    def test_phylotree(self):
        sample_names = [
            s["sample_name"] for s in self.common_inputs["samples"]
        ]

        res = self.run_miniwdl()
        outputs = res["outputs"]

        self.assertCountEqual(outputs.keys(), [
            "phylotree.clustermap_png",
            "phylotree.clustermap_svg",
            "phylotree.ncbi_metadata_json",
            "phylotree.phylotree_newick",
            "phylotree.ska_distances",
            "phylotree.variants",
        ])

        with open(outputs["phylotree.phylotree_newick"]) as f:
            tree = next(NewickIO.parse(f))
            nodes = [
                n.name for n in tree.get_terminals() + tree.get_nonterminals()
                if n.name
            ]
            self.assertCountEqual(nodes, sample_names + self.accession_ids)

        identifiers = sorted(sample_names + self.accession_ids)
        with open(outputs["phylotree.ska_distances"]) as f:
            pairs = [
                sorted([r["Sample 1"], r["Sample 2"]])
                for r in DictReader(f, delimiter="\t")
            ]
            expected = [[a, b] for a in identifiers for b in identifiers
                        if a < b]
            self.assertCountEqual(pairs, expected)

        with open(outputs["phylotree.variants"]) as f:
            self.assertCountEqual(identifiers,
                                  [r.id for r in SeqIO.parse(f, "fasta")])

        with open(outputs["phylotree.ncbi_metadata_json"]) as f:
            self.assertEqual(
                json.load(f), {
                    "NC_012532.1": {
                        "name": "Zika virus, complete genome",
                        "country": "Uganda",
                    },
                    "NC_035889.1": {
                        "name":
                        "Zika virus isolate ZIKV/H. sapiens/Brazil/Natal/2015, complete genome",
                        "country": "Brazil: Rio Grande do Norte, Natal",
                        "collection_date": "2015",
                    },
                })

        with open(outputs["phylotree.clustermap_svg"]) as f:
            full_text = "\n".join(f.readlines())
            for name in sample_names + self.accession_ids:
                self.assertEqual(full_text.count(name), 2, name)
Ejemplo n.º 8
0
Archivo: phylo.py Proyecto: xzy3/QuaSim
def read_fasta_or_newick_and_return_tree(path, nwk_path=None, patt=None):
    global NUM_OF_VIRIONS
    if any(path.name.endswith(x) for x in FASTA_EXTENSIONS):
        seqs = AlignIO.read(path, FASTA)
        seqs._records = [x for x in seqs if get_count(x, patt) > MIN_COUNT]
        NUM_OF_VIRIONS = int(sum(get_count(x, patt) for x in seqs))

        if len(seqs) <= 2: return None
        tree = build_phylogenetic_tree(seqs)
        if nwk_path is not None and tree is not None:
            NewickIO.write([tree], nwk_path)
    elif any(path.name.endswith(x) for x in NEWICK_EXTENSIONS):
        tree = NewickIO.parse(path).next()

    # Root the tree if necessary
    if not tree.rooted:
        tree.root_at_midpoint()

    return tree
Ejemplo n.º 9
0
def readOneTree(stream):
	"""Reads a Newick-formatted tree, permitting lines with comments denoted by leading '#'."""
	tree_string = ""
	lines = stream.readlines()
	for line in lines:
		if not line.strip()[0] == '#':
			tree_string += line.strip()
	trees = NewickIO.parse(StringIO(tree_string))
	tree = next(trees)
	return tree
Ejemplo n.º 10
0
def readOneTree(stream):
	"""Reads a Newick-formatted tree, permitting lines with comments denoted by leading '#'."""
	tree_string = ""
	lines = stream.readlines()
	for line in lines:
		if not line.strip()[0] == '#':
			tree_string += line.strip()
	trees = NewickIO.parse(StringIO(tree_string))
	tree = next(trees)
	return tree
Ejemplo n.º 11
0
def main():
    prog = sys.argv[0]
    description = ('Parse newick tree an perform action on'
                   'each non-root node')
    parser = argparse.ArgumentParser(prog=prog, description=description)
    parser.add_argument('infile', nargs='?', type=argparse.FileType(),
                        help='a Newick treefile')
    parser.add_argument('outfile', nargs='?', type=argparse.FileType('w'),
                        help='changed Newick outfile')
    parser.add_argument('--cutoff', dest='cutoff', nargs='?', type=int, default=75,
                        help='value at or beneath which no inner node'
                             'confidences are snown any more')
    options = parser.parse_args()

    infile = options.infile or sys.stdin
    outfile = options.outfile or sys.stdout
    cutoff = options.cutoff
    newick = NewickCutoff(infile, outfile, cutoff)
    trees = newick.readtrees()
    trees = newick.relabeltree(trees)

    NewickIO.write(trees, outfile)
Ejemplo n.º 12
0
def get_fam(rfid):
    '''Get a family including tree and sequence information
from an Rfam data dump stored in data/rfam

inputs: 
  rfid:   rfam family id.

outputs:
  ali:    a biopython alignment
  tree:   a biopython tree from a newick file.
  info:   information parsed from the original stockholm file.

'''

    fmeta = open( cfg.dataPath('rfam/family_metas/{0}.pickle'.format(rfid)))
    fali  = open( cfg.dataPath('rfam/family_alis/{0}.fa'.format(rfid)))
    
    ali = aio.parse(fali,'fasta').next()
    info= pickle.load(fmeta)

    fname = cfg.dataPath('rfam/Rfam.seed_tree/{0}.seed_tree'.format(rfid))
    tree = nio.parse(open(cfg.dataPath('rfam/Rfam.seed_tree/{0}.seed_tree'.format(rfid)))).next()
    return ali, tree, info
                        code)
    if len(mappings) <= 0:
        log.warning("empty mappings file")
    return mappings


def printtrees(trees):
    for tree in trees:
        Phylo.draw_ascii(tree)


stdout_handler = logging.StreamHandler(sys.stderr)
handlers = [stdout_handler]
logging.basicConfig(level=logging.INFO,
                    format='[%(levelname)s - %(message)s]',
                    handlers=handlers)
log = logging.getLogger('LOGGER_NAME')

mappingfile = sys.argv[2]
mappings = readmappings(mappingfile)
mappedstrings = {}

treefile = sys.argv[1]
trees = readtrees(treefile)

trees = relabeltree(trees)
reportMappings(mappedstrings)
#printtees(trees)

NewickIO.write(trees, sys.stdout)
Ejemplo n.º 14
0
            n.name = 'n{}'.format(i)
    tree_ids = [_.name for _ in tree]
    df = pd.read_csv(params.tab, sep='\t', index_col=0)
    df = df.loc[df.index.isin(tree_ids), :]
    cdf = df[['country', 'host']].groupby(['country']).count().to_dict()['host']
    for c, n in cdf.items():
        print(c, n)

    c2ids = defaultdict(set)
    for t in tree:
        if t.name in df.index:
            c2ids[df.loc[t.name, 'country']].add(t.name)
    to_keep = set()
    for c, ids in c2ids.items():
        if not pd.isna(c):
            if len(ids) <= params.threshold:
                to_keep |= ids
            else:
                to_keep |= set(pd.np.random.choice(list(ids), size=params.threshold, replace=False))

    tree = remove_certain_leaves(tree, lambda _: _.name not in to_keep)
    features = [DATE, DATE_CI]
    nwk = tree.write(format_root_node=True, features=features, format=3)
    write(NewickIO.parse(StringIO(nwk)), params.out_tree, 'nexus')
    with open(params.out_tree, 'r') as f:
        nexus_str = f.read().replace('&&NHX:', '&')
    for feature in features:
        nexus_str = nexus_str.replace(':{}='.format(feature), ',{}='.format(feature))
    with open(params.out_tree, 'w') as f:
        f.write(nexus_str)
Ejemplo n.º 15
0
def _mapto(raxml_dir, pathr, query_id):

    # print(raxml_dir, pathr, query_id)

    classification_file = raxml_dir + '/RAxML_portableTree.' + pathr + '.jplace'
    # print(classification_file)

    with open(classification_file) as classification:
        classification_json = json.load(classification)

    # print(classification_json)

    tree_string = classification_json['tree']
    # print(tree_string)

    # tree_string = "(((((AN11:0.84399999999999997247{0},((AN7:1.00600000000000000533{1},AN8:0.87399999999999999911{2}):0.14999999999999999445{3},(AN9:0.77700000000000002398{4},AN10:2.00000000000000000000{5}):0.14999999999999999445{6}):0.14999999999999999445{7}):0.00500000000000000010{8},(AN13:0.63000000000000000444{9},AN14:0.50200000000000000178{10}):0.04700000000000000011{11}):0.65000000000000002220{12},(AN3:1.52200000000000001954{13},AN4:1.17700000000000004619{14}):0.14999999999999999445{15}):0.24399999999999999467{16},(((AN17:1.04200000000000003730{17},((AN20:0.53600000000000003197{18},AN21:0.55800000000000005151{19}):0.56799999999999994937{20},AN22:0.79700000000000004174{21}):0.46000000000000001998{22}):0.55900000000000005240{23},(AN24:0.70499999999999996003{24},AN25:0.64400000000000001688{25}):0.78500000000000003109{26}):0.14999999999999999445{27},((AN27:0.71899999999999997247{28},AN28:0.58299999999999996270{29}):0.63300000000000000711{30},((AN31:0.71899999999999997247{31},(AN37:0.16400000000000000688{32},((AN34:0.83899999999999996803{33},AN35:0.17299999999999998712{34}):0.05999999999999999778{35},AN36:0.36399999999999999023{36}):0.14999999999999999445{37}):0.54800000000000004263{38}):0.43800000000000000044{39},(((AN45:0.14099999999999998646{40},(AN43:0.17000000000000001221{41},AN44:0.47399999999999997691{42}):0.14999999999999999445{43}):0.58899999999999996803{44},(AN49:0.05899999999999999689{45},(AN47:0.07199999999999999456{46},AN48:0.25500000000000000444{47}):0.14999999999999999445{48}):0.43499999999999999778{49}):0.20799999999999999045{50},(AN39:0.51200000000000001066{51},AN40:0.44500000000000000666{52}):0.14999999999999999445{53}):0.64500000000000001776{54}):0.48099999999999998312{55}):0.14999999999999999445{56}):0.25100000000000000089{57}):0.39800000000000002043{58},((((AN75:0.75400000000000000355{59},(((AN79:0.17799999999999999156{60},((AN82:0.22300000000000000377{61},AN83:0.20999999999999999223{62}):0.52800000000000002487{63},(AN85:0.33100000000000001643{64},AN86:0.30799999999999999600{65}):0.02400000000000000050{66}):0.18399999999999999689{67}):0.12199999999999999734{68},(AN88:0.14799999999999999267{69},(((AN94:0.19600000000000000755{70},AN95:0.06099999999999999867{71}):0.08200000000000000344{72},(AN97:2.00000000000000000000{73},AN98:0.42499999999999998890{74}):0.05500000000000000028{75}):0.40600000000000002753{76},(AN90:0.16300000000000000600{77},AN91:0.17299999999999998712{78}):0.14999999999999999445{79}):0.08300000000000000433{80}):0.13500000000000000888{81}):0.22700000000000000733{82},(AN100:2.00000000000000000000{83},AN101:0.48899999999999999023{84}):0.09600000000000000200{85}):0.38000000000000000444{86}):0.47999999999999998224{87},(((AN55:0.67200000000000004174{88},AN56:0.60799999999999998490{89}):0.58599999999999996536{90},(((AN59:1.10600000000000009415{91},AN60:1.22199999999999997513{92}):0.14999999999999999445{93},(AN61:1.24300000000000010481{94},AN62:1.10600000000000009415{95}):0.14999999999999999445{96}):0.17599999999999998979{97},(AN64:0.17199999999999998623{98},AN65:0.18699999999999999956{99}):0.45800000000000001821{100}):0.66900000000000003908{101}):0.46800000000000002709{102},((AN70:1.05299999999999993605{103},(AN68:1.24700000000000010836{104},AN69:1.20199999999999995737{105}):0.14999999999999999445{106}):0.12099999999999999645{107},(AN72:0.90100000000000002309{108},AN73:0.92200000000000004174{109}):0.13600000000000000977{110}):0.49699999999999999734{111}):0.14999999999999999445{112}):0.29999999999999998890{113},(((AN105:0.76900000000000001688{114},AN106:0.73599999999999998757{115}):0.16200000000000000511{116},(AN110:0.67500000000000004441{117},(AN108:0.55500000000000004885{118},AN109:0.76800000000000001599{119}):0.14999999999999999445{120}):0.32300000000000000933{121}):0.58499999999999996447{122},AN111:1.19999999999999995559{123}):0.39400000000000001688{124}):0.13400000000000000799{125},(((((AN116:1.02800000000000002487{126},AN117:0.98099999999999998312{127}):0.32600000000000001199{128},AN118:0.65400000000000002576{129}):0.21900000000000000022{130},((AN121:0.14499999999999999001{131},AN122:0.14499999999999999001{132}):0.38600000000000000977{133},((AN125:0.07699999999999999900{134},AN126:0.04299999999999999656{135}):0.03300000000000000155{136},AN127:0.02000000000000000042{137}):0.45800000000000001821{138}):0.44300000000000000488{139}):1.09400000000000008349{140},(AN129:1.00000000000000000000{141},(AN131:0.22500000000000000555{142},AN132:0.20399999999999998690{143}):1.01499999999999990230{144}):1.13999999999999990230{145}):0.14999999999999999445{146},((((AN135:0.74099999999999999201{147},((AN138:0.96899999999999997247{148},AN139:0.76400000000000001243{149}):0.25100000000000000089{150},((AN142:0.60199999999999997957{151},AN143:0.57099999999999995204{152}):0.13400000000000000799{153},(AN145:0.58699999999999996625{154},AN146:0.49199999999999999289{155}):0.13800000000000001155{156}):0.31900000000000000577{157}):0.21900000000000000022{158}):0.56000000000000005329{159},(AN148:0.57299999999999995381{160},AN149:0.58099999999999996092{161}):0.33400000000000001910{162}):0.14999999999999999445{163},(AN150:1.11499999999999999112{164},AN151:0.77800000000000002487{165}):0.14999999999999999445{166}):1.09099999999999996980{167},((AN206:0.95499999999999996003{168},((AN213:0.04599999999999999922{169},(AN211:0.03799999999999999906{170},AN212:0.39000000000000001332{171}):0.14999999999999999445{172}):0.54300000000000003819{173},(AN208:0.85999999999999998668{174},AN209:0.51200000000000001066{175}):0.14999999999999999445{176}):0.35399999999999998135{177}):0.46200000000000002176{178},(((((AN157:0.82799999999999995826{179},(AN155:1.17799999999999993605{180},AN156:1.09400000000000008349{181}):0.14999999999999999445{182}):0.08200000000000000344{183},(((AN179:0.88800000000000001155{184},(AN177:1.03600000000000003197{185},AN178:0.98799999999999998934{186}):0.14999999999999999445{187}):0.06500000000000000222{188},(AN181:0.75000000000000000000{189},(AN183:0.62600000000000000089{190},(AN187:1.00499999999999989342{191},(AN185:1.33200000000000007283{192},AN186:1.50699999999999989519{193}):0.14999999999999999445{194}):0.17199999999999998623{195}):0.31800000000000000488{196}):0.11200000000000000233{197}):0.35299999999999998046{198},((AN160:0.80500000000000004885{199},(AN162:0.50900000000000000799{200},(AN164:0.47899999999999998135{201},AN165:0.40799999999999997380{202}):0.25900000000000000799{203}):0.44500000000000000666{204}):0.63200000000000000622{205},(AN167:1.00200000000000000178{206},((AN170:1.21599999999999996980{207},AN171:0.52400000000000002132{208}):0.05500000000000000028{209},(AN173:0.83399999999999996358{210},AN174:0.50600000000000000533{211}):0.05399999999999999939{212}):0.56399999999999994582{213}):0.81200000000000005507{214}):0.14999999999999999445{215}):0.00800000000000000017{216}):0.64600000000000001865{217},(((AN191:0.38400000000000000799{218},AN192:0.34799999999999997602{219}):0.30699999999999999512{220},AN193:0.53500000000000003109{221}):0.31300000000000000044{222},AN194:0.58599999999999996536{223}):0.70399999999999995914{224}):0.14999999999999999445{225},(AN195:1.05499999999999993783{226},(AN197:0.88500000000000000888{227},AN198:0.65200000000000002398{228}):0.69799999999999995381{229}):0.14999999999999999445{230}):0.14999999999999999445{231},((AN199:1.52000000000000001776{232},AN200:1.15999999999999992006{233}):0.14999999999999999445{234},((AN202:0.45700000000000001732{235},AN203:0.48399999999999998579{236}):0.64800000000000002043{237},AN204:1.56699999999999994849{238}):0.14999999999999999445{239}):0.14999999999999999445{240}):0.14999999999999999445{241}):0.85399999999999998135{242}):0.14999999999999999445{243}):0.05500000000000000028{244}):0.39800000000000002043{245});"

    matches = re.findall('AN(\d+):\d+\.\d+\{(\d+)\}', tree_string)
    # print(matches)

    AN_label = {}
    for [an, r] in matches:
        AN_label['AN' + an] = 'R' + r
        AN_label['R' + r] = 'AN' + an

    # print(AN_label)

    newick_string = re.sub('(AN\d+)?\:\d+\.\d+{(\d+)}', 'R\g<2>', tree_string)
    # print(newick_string)

    mytree = Phylo.read(NewickIO.StringIO(newick_string), 'newick')
    # print(mytree)
    # Phylo.draw_ascii(mytree)

    locations_ref = classification_json['placements'][0]['p']
    # locations_ref = [[130, 13902], [238, 13902]]
    # print(locations_ref)

    child_ids = []

    ter = []

    for maploc in locations_ref:
        # print("maploc")

        rloc = 'R' + str(maploc[0])

        # print(rloc)

        node = mytree.find_clades(rloc).__next__()
        # print(node)

        ter.extend(node.get_terminals())

    # print("maploc OUT")

    comonancestor = mytree.common_ancestor(ter)

    # print(comonancestor)

    for leaf in comonancestor.get_terminals():
        child_ids.append(AN_label[leaf.name])

    # print(child_ids)
    return child_ids
Ejemplo n.º 16
0
    if not os.path.isfile(fname):
        raise IOError("# Error: file {} does not exist".format(fname))
    with open(fname, 'r') as inf:
        # Read a FASTA file?
        (headers, seqs) = biofile.readFASTA(inf)
    # Read tree
    tree_fname = os.path.expanduser(options.tree_in_fname)
    if not os.path.isfile(tree_fname):
        raise IOError("# Error: file {} does not exist".format(tree_fname))
    tree_string = ""
    with open(tree_fname, 'r') as inf:
        lines = inf.readlines()
        for line in lines:
            if not line.strip()[0] == '#':
                tree_string += line.strip()
    trees = NewickIO.parse(StringIO(tree_string))
    tree = next(trees)
    # Read mapping file
    map_fname = os.path.expanduser(options.mapping_in_fname)
    if not os.path.isfile(map_fname):
        raise IOError("# Error: file {} does not exist".format(map_fname))
    with open(map_fname, 'r') as inf:
        map_table = util.readTable(inf, header=True)

    # Create mapping
    mapping_dict = dict(zip(map_table['species'],
                            map_table['updated.species']))

    # Update the FASTA headers
    #new_headers = []
    #new_seqs = []
Ejemplo n.º 17
0
import sys

from Bio import Phylo
from Bio.Phylo import NewickIO

trees = list(Phylo.parse(sys.argv[1], "newick"))

print("Removing trees that are not bifurcating.")

for tree in trees:
    for nonterminal in tree.get_nonterminals():
        nonterminal.comment = None
        nonterminal.branch_length = None

writer = NewickIO.Writer([tree for tree in trees if tree.is_bifurcating()])

print()
print("Saving trees as plain newick files (no branch lengths).")
with open(sys.argv[2], "w") as handle:
    for newick_tree in writer.to_strings(plain=True):
        handle.write(newick_tree + "\n")