def main(): """ Main CLI handler. """ parser = OptionParser(usage=_prog_usage, add_help_option=True, version=_prog_version, description=_prog_description) parser.add_option('-n', '--pop-size', '-N', action='store', dest='pop_size', type='int', default=1, metavar='Ne', help='effective HAPLOID population size (default=%default [assumes edge lengths are in units of Ne])') (opts, args) = parser.parse_args() if len(args) == 0: sys.stderr.write("%s" % parser.get_usage()) sys.exit(1) for a in args: fpath = os.path.expandvars(os.path.expanduser(a)) if not os.path.exists(fpath): sys.stderr.write('File not found: "%s"\n' % fpath) else: sys.stderr.write('Reading: "%s"\n' % fpath) d = datasets.Dataset() ctrees = d.read_trees(open(fpath, "rU"), "NEXUS") for t in ctrees: p = coalescent.log_probability_of_coalescent_tree(t, opts.pop_size) sys.stdout.write("%s\n" % p)
def load_fasta(fpath, schema="DNAFASTA"): d = datasets.Dataset() data = {} d.read(open(fpath, "rU"), schema) chars = d.char_blocks[0] for t in d.taxa_blocks[0]: data[t.label] = chars[t].values_as_string() return data
def write_nexus_tree(tree, tree_filepath): "Wrapper to write a single tree to a NEWICK file." d = datasets.Dataset() taxa_block = tree.infer_taxa_block() tree_block = d.add_trees_block(taxa_block=taxa_block) tree_block.append(tree) nw = nexus.NexusWriter() _LOG.info('\nWriting "%s"' % os.path.basename(tree_filepath)) nw.write_dataset(d, open(tree_filepath, 'w'))
def testSimple1(self): d = datasets.Dataset() t = d.trees_from_string( "((((a:1, b:1):1, c:2):1, d:3, e:3):2, (f:4, g:4):1)", "newick")[0] i1 = coalescent.coalescence_intervals(t) assert i1 == [1.0, 1.0, 1.0, 1.0, 1.0], "intervals found = %s" % ", ".join(intervals) i2 = coalescent.num_genes_waiting_times_pairs(t) assert i2 == [(7, 1.0), (6, 1.0), (5, 1.0), (3, 1.0), (2, 1.0)] check = coalescent.probability_of_coalescent_tree(t, 10)
def translate_nucleotide_file(infilepath, protfilepath, rna=False): infile = open(infilepath) ofile = file(protfilepath, 'w') d = datasets.Dataset() if rna: d.read(infile, "RNAFASTA") else: d.read(infile, "DNAFASTA") chars = d.char_blocks[0] for t in d.taxa_blocks[0]: s = chars[t] nucs = Seq(s.values_as_string(), generic_dna) prots = nucs.translate() ofile.write(">%s\n%s\n\n" % (t.label, prots.tostring()))
def main(): """ Main CLI handler. """ parser = OptionParser(usage=_prog_usage, add_help_option=True, version=_prog_version, description=_prog_description) parser.add_option('-n', '--nexus', action='store_const', dest='schema', const='NEXUS', default="NEXUS", help='output in NEXUS format (default)') parser.add_option('-p', '--phylip', action='store_const', dest='schema', const='PHYLIP', help='output in NEXUS format (default)') parser.add_option('-f', '--fasta', action='store_const', dest='schema', const='FASTA', help='output in FASTA format') (opts, args) = parser.parse_args() if len(args) == 0: sys.stderr.write("(reading from standard input)\n") input = sys.stdin else: input = open(args[0], "rU") output = sys.stdout fd = datasets.Dataset() fd.read(input, "DNAFASTA") pattern = re.compile("gi\|.+\|.+\|(.+)\|\S* ([\w\.]+) ([\w\.]+) (\w+).*") for t in fd.taxa_blocks[0]: m = pattern.match(t.label) t.label = m.groups(1)[1] + "_" + m.groups(1)[2] + "_" + m.groups(1)[0] fd.write(output, opts.schema)
def round_trip_tree_file(self, tree_filepath, reader_class, writer_class): "Round-trips a treefile." reader = reader_class() _LOG.info("\nDATA FILE: \"%s\"" % os.path.basename(tree_filepath)) dataset = reader.read_dataset(file_obj=open(tree_filepath, "r")) for tb_idx, trees_block in enumerate(dataset.trees_blocks): for t_idx, tree in enumerate(trees_block): _LOG.info( "*** Tree %d of %d from tree block %d of %d in \"%s\"" % (t_idx + 1, len(trees_block), tb_idx + 1, len(dataset.trees_blocks), os.path.basename(tree_filepath))) _LOG.debug("\nORIGINAL TREE >>>\n%s\n<<< ORIGINAL TREE" % tree.compose_newick()) # write ... _LOG.info("(writing out)") temp_dataset = datasets.Dataset() temp_trees_block = trees.TreesBlock( taxa_block=trees_block.taxa_block) temp_trees_block.append(tree) temp_dataset.add_trees_block(trees_block=temp_trees_block) writer = writer_class() result1 = StringIO() writer.write_dataset(temp_dataset, result1) result1 = result1.getvalue() _LOG.debug("\nWRITE OUT >>>\n%s\n<<< WRITE OUT" % result1) # read back ... _LOG.info("(reading back)") r2 = StringIO(result1) #r2 = open("/Users/jeet/Documents/Projects/Phyloinformatics/DendroPy/dendropy/dendropy/tests/data/anolis.mbcon.trees.nexus", "r") temp_dataset2 = reader.read_dataset(file_obj=r2) tree2 = temp_dataset2.trees_blocks[0][0] result2 = StringIO() writer.write_dataset(temp_dataset, result2) result2 = result2.getvalue() _LOG.debug("\nREAD IN >>>\n%s\n<<< READ IN" % result2) # compare ... _LOG.debug("\nREPARSED TREE >>>\n%s\n<<< REPARSED TREE\n" % tree.compose_newick()) assert result1 == result2, \ "Reparsed tree strings do not match:\n\n" \ +"FIRST >>>\n%s\n<<< FIRST\n\nSECOND >>>\n%s\n<<< SECOND" % (result1, result2) _LOG.info("(reparsed tree string match)")
def generate_dataset(seq_len, tree_model, char_model, mutation_rate=1.0, root_states=None, dataset=None, taxa_block=None, rng=None): """ Wrapper to conveniently generate a Dataset simulated under the given tree and character model. `seq_len` : length of sequence (number of characters) `tree_model` : dendropy.trees.Tree object `char_model` : dendropy.charmodels.CharacterModel object `mutation_rate` : mutation *modifier* rate (should be 1.0 if branch lengths on tree reflect true expected number of changes `root_states` : vector of root states (length must equal `seq_len`) `dataset` : a dendropy.datasets.Dataset object. if given, the new dendropy.characters.CharacterBlock object will be added to this (along with a new taxa_block if required). Otherwise, a new dendropy.datasets.Dataset object will be created. `taxa_block` : if given, this will be the taxa manager used; otherwise new default one will be created `rng` : random number generator; if not given, `GLOBAL_RNG` will be used Returns: a dendropy.datasets.Dataset object object. """ if dataset is None: dataset = datasets.Dataset() if taxa_block is not None and taxa_block not in dataset.taxa_blocks: taxa_block = dataset.add_taxa_block(taxa_block=taxa_block) char_block = generate_characters(seq_len=seq_len, tree_model=tree_model, char_model=char_model, mutation_rate=mutation_rate, root_states=root_states, char_block=None, taxa_block=taxa_block, rng=None) dataset.add_char_block(char_block=char_block) return dataset
def main(): """ Main CLI handler. """ parser = OptionParser(usage=_prog_usage, add_help_option=True, version=_prog_version, description=_prog_description) (opts, args) = parser.parse_args() if len(args) == 0: sys.stderr.write("Please specify a Newick/NEXUS file to convert.\n") sys.exit(1) fpath = os.path.expanduser(os.path.expandvars(args[0])) if not os.path.exists(fpath): sys.stderr.write('File not found: %s\n' % fpath) sys.exit(1) d = datasets.Dataset() d.read(open(fpath, "rU"), "nexus") d.write(sys.stdout, "nexml")
def compare_chars(self, src, format, expected): """Reads 'src', checks against 'expected'""" _LOG.info("Reading %s" % src.name) d = datasets.Dataset() d.read(src, format) taxa_block = d.taxa_blocks[0] char_block = d.char_blocks[0] assert len(expected) == len(char_block) assert len(expected) == len(taxa_block) for tax_idx, (exp_taxa, exp_seq) in enumerate(expected): taxon = taxa_block[tax_idx] label = taxon.label # ok, this is ugly, but my nexus parser does not # do the "_" => " " conversion in taxlabels (yet) # so ... assert ((exp_taxa == label) \ or (exp_taxa.replace("_", " ") == label) \ or (exp_taxa.replace(" ", "_") == label)), \ "(Taxon #%d) %s not eq. %s" % (tax_idx, exp_taxa, label) assert len(exp_seq) == len(char_block.matrix[taxon]) for col_idx, symbol1 in enumerate(exp_seq): test_state = char_block.matrix[taxon][col_idx].value if char_block.matrix[taxon][col_idx].column_type is not None: state_alpha = char_block.matrix[taxon][ col_idx].column_type.state_alphabet else: state_alpha = char_block.default_state_alphabet exp_state = state_alpha.state_for_symbol(symbol1) assert test_state == exp_state
def main(): """ Main CLI handler. """ parser = OptionParser(usage=_prog_usage, add_help_option=True, version=_prog_version, description=_prog_description) parser.add_option('-s', '--summarize-means', action='store', dest='summarize_means', default=None, metavar='FILENAME', help='summarize means to this file (default="%default")') parser.add_option( '-n', '--pop-size', '-N', action='store', dest='pop_size', type='int', default=1, metavar='Ne', help= 'effective HAPLOID population size (for calculation of expected distribution means; default=%default [assumes edge lengths are in units of Ne])' ) # parser.add_option('-o', '--output-prefix', # action='store', # dest='output_prefix', # default="wt", # metavar='OUTPUT-PREFIX', # help='prefix for output file names (default="%default")') (opts, args) = parser.parse_args() if len(args) == 0: sys.stderr.write("%s" % parser.get_usage()) sys.exit(1) output = sys.stdout output.write("k\twaiting_time\n") coal_frames = {} for a in args: fpath = os.path.expandvars(os.path.expanduser(a)) if not os.path.exists(fpath): sys.stderr.write('File not found: "%s"\n' % fpath) else: sys.stderr.write('Reading: "%s"\n' % fpath) d = datasets.Dataset() ctrees = d.read_trees(open(fpath, "rU"), "NEXUS") for t in ctrees: cf = coalescent.extract_extract_coalescent_frames(t) for k, wt in cf: output.write("%d\t%s\n" % (k, wt)) if k not in coal_frames: coal_frames[k] = [] coal_frames[k].append(wt) if opts.summarize_means is not None: smfile = open( os.path.expandvars(os.path.expanduser(opts.summarize_means)), "w") smfile.write("k\tmean_wt\texpected_wt\n") for k, wt in coal_frames.items(): actual_mean = float(sum(wt)) / len(wt) expected_mean = float( opts.pop_size) / distributions.binomial_coefficient(k, 2) smfile.write("%d\t%s\t%s\n" % (k, actual_mean, expected_mean))
def main_cli(): description = '%s %s %s' % (_program_name, _program_version, _program_subtitle) usage = "%prog [options] <TREES FILE> [<TREES FILE> [<TREES FILE> [...]]" parser = OptionParser(usage=usage, add_help_option=True, version = _program_version, description=description) sum_tree_optgroup = OptionGroup(parser, 'Summarization Options') parser.add_option_group(sum_tree_optgroup) sum_tree_optgroup.add_option('-b', '--burnin', action='store', dest='burnin', type='int', # also 'float', 'string' etc. default=0, help='number of trees to skip from the beginning of *each tree file* when counting support [default=%default]') target_tree_optgroup = OptionGroup(parser, 'Target Tree Options') parser.add_option_group(target_tree_optgroup) target_tree_optgroup.add_option('-t','--target', dest='target_tree_filepath', default=None, help="path to optional target, model or best topology tree file (Newick or NEXUS format) " + "to which support will be mapped; " + "if not given, then a majority-rule clade consensus tree will be constructed based on the " + "all the trees given in the support tree files (except for those discarded as burn-ins), " + "and this will be used as the target tree") target_tree_optgroup.add_option('-f', '--min-clade-freq', dest='min_clade_freq', type='float', default=0.50, metavar='#.##', help="minimum frequency or probability for a clade or a split to be included in the consensus tree, if used [default=%default]") target_tree_optgroup.add_option('--no-branch-lengths', action='store_true', dest='no_branch_lengths', default=False, help="by default, if using a consensus tree as the target tree, branch lengths will be the mean of the lengths " \ + "of the given branch across all trees considered; this option forces branch " \ + "lengths to be unspecified (obviously, this is only applicable if you do not ask the support to be mapped as " \ + "branch lengths)") source_tree_optgroup = OptionGroup(parser, 'Source Tree Options') parser.add_option_group(source_tree_optgroup) source_tree_optgroup.add_option('--from-newick-stream', action='store_true', dest='from_newick_stream', default=False, help="support trees will be streamed in Newick format") source_tree_optgroup.add_option('--from-nexus-stream', action='store_true', dest='from_nexus_stream', default=False, help="support trees will be streamed in NEXUS format") output_tree_optgroup = OptionGroup(parser, 'Output Tree Options') parser.add_option_group(output_tree_optgroup) output_tree_optgroup.add_option('-l','--support-as-labels', action='store_true', dest='support_as_labels', default=True, help="indicate branch support as internal node labels [default=%default]") output_tree_optgroup.add_option('-v','--support-as-lengths', action='store_false', dest='support_as_labels', default=True, help="indicate branch support as branch lengths (otherwise support will be indicated by internal node labels)") output_tree_optgroup.add_option('-p', '--percentages', action='store_true', dest='support_as_percentages', default=False, help="indicate branch support as percentages (otherwise, will report as proportions by default)") output_tree_optgroup.add_option('-d', '--decimals', dest='support_label_decimals', type='int', metavar='#', default=2, help="number of decimal places in indication of support values [default=%default]") output_filepath_optgroup = OptionGroup(parser, 'Output File Options') parser.add_option_group(output_filepath_optgroup) output_filepath_optgroup.add_option('-o','--output', dest='output_filepath', default=None, help="path to output file (if not given, will print to standard output)") output_filepath_optgroup.add_option('--no-taxa-block', action='store_false', dest='include_taxa_block', default=True, help="do not include a taxa block in the output treefile (otherwise will create taxa block by default)") output_filepath_optgroup.add_option('--no-meta-comments', action='store_false', dest='include_meta_comments', default=True, help="include initial file comment annotating details of scoring operation") output_filepath_optgroup.add_option('-m', '--additional_comments', action='store', dest='additional_comments', default=None, help="additional comments to be added to the summary file") output_filepath_optgroup.add_option('--to-newick', action='store_true', dest='to_newick_format', default=False, help="save results in NEWICK (PHYLIP) format (default is to save in NEXUS format)") output_filepath_optgroup.add_option('--to-phylip', action='store_true', dest='to_newick_format', default=False, help="same as --newick") output_filepath_optgroup.add_option('-r', '--replace', action='store_true', dest='replace', default=False, help="replace/overwrite output file without asking if it already exists ") other_optgroup = OptionGroup(parser, 'Other Options') parser.add_option_group(other_optgroup) other_optgroup.add_option('-e','--split-edges', dest='split_edges_filepath', default=None, metavar='FILEPATH', help="if specified, a tab-delimited file of splits and their edge " \ + "lengths across runs will be saved to FILEPATH") run_optgroup = OptionGroup(parser, 'Program Run Options') parser.add_option_group(run_optgroup) run_optgroup.add_option('-q', '--quiet', action='store_true', dest='quiet', default=False, help="suppress progress messages") run_optgroup.add_option('--ignore-missing-support', action='store_true', dest='ignore_missing_support', default=False, help="ignore missing support tree files (at least one must exist!)") run_optgroup.add_option('--ignore-missing-target', action='store_true', dest='ignore_missing_target', default=False, help="ignore missing target tree file (will construct majority rule consensus tree if missing)") (opts, args) = parser.parse_args() messenger = Messenger(quiet=opts.quiet) # splash if not opts.quiet: show_splash(dest=sys.stderr, extended=False) ################################################### # Support file idiot checking support_filepaths = [] if len(args) == 0 and (opts.from_newick_stream or opts.from_nexus_stream): if not opts.quiet: sys.stderr.write("(reading trees from standard input)") support_file_objs = [sys.stdin] else: missing = False for fpath in args: fpath = os.path.expanduser(os.path.expandvars(fpath)) if not os.path.exists(fpath): messenger.send_error('Support file not found: "%s"' % fpath) missing = True else: support_filepaths.append(fpath) if missing: messenger.send("") if opts.ignore_missing_support: pass else: messenger.send_formatted('Terminating due to missing support files. ' + 'Use the "--ignore-missing-support" option to continue even ' + 'if some files are missing.', force=True) sys.exit(1) if len(support_filepaths) == 0: messenger.send_formatted("No sources of support specified or could be found. " + "Please provide the path to at least one (valid and existing) file " + "containing non-parametric or MCMC tree samples " + "to summarize.", force=True) sys.exit(1) support_file_objs = [open(f, "r") for f in support_filepaths] ################################################### # Lots of other idiot-checking ... # target tree if opts.target_tree_filepath is not None: target_tree_filepath = os.path.expanduser(os.path.expandvars(opts.target_tree_filepath)) if not os.path.exists(target_tree_filepath): messenger.send_error('Target tree file not found: "%s"\n' % target_tree_filepath) if opts.ignore_missing_target: if not opts.quiet: messenger.send('Will construct and use majority-rule consensus tree instead.\n') target_tree_filepath = None else: sys.exit(1) else: target_tree_filepath = None # output if opts.output_filepath is None: output_dest = sys.stdout else: output_fpath = os.path.expanduser(os.path.expandvars(opts.output_filepath)) if confirm_overwrite(output_fpath, messenger, opts.replace): output_dest = open(output_fpath, "w") else: sys.exit(1) if opts.split_edges_filepath: split_edges_filepath = os.path.expanduser(os.path.expandvars(opts.split_edges_filepath)) if confirm_overwrite(split_edges_filepath, messenger, opts.replace): split_edges_dest = open(split_edges_filepath, "w") else: sys.exit(1) else: split_edges_dest = None ################################################### # Main work begins here: Count the splits start_time = datetime.datetime.now() comments = [] tsum = treesum.TreeSummarizer() tsum.support_as_labels = opts.support_as_labels tsum.support_as_percentages = opts.support_as_percentages if not opts.support_as_percentages and opts.support_label_decimals < 2: messenger.send_error("(WARNING: proportions require that support will be reported to at least 2 decimal places)") opts.support_label_decimals = 2 tsum.support_label_decimals = opts.support_label_decimals tsum.ignore_node_ages = True # until a more efficient implementation is developed if opts.quiet: tsum.verbose = False tsum.write_message = None else: tsum.verbose = True tsum.write_message = sys.stderr.write tsum.progress_message_prefix = "" tsum.progress_message_suffix = "\n" messenger.send("### COUNTING SPLITS ###\n") if opts.from_newick_stream: file_format = "newick" elif opts.from_nexus_stream: file_format = "nexus" else: file_format = None tree_source = MultiFileTreeIterator(sources=support_file_objs, core_iterator=nexus.iterate_over_trees, format=file_format, from_index=opts.burnin, progress_func=tsum.send_progress_message, encode_splits=True) split_distribution = tsum.count_splits_on_trees(tree_source, trees_splits_encoded=True) if split_distribution.taxa_block is None: assert(tsum.total_trees_counted == 0) split_distribution.taxa_block = dendropy.taxa.TaxaBlock() # we just produce an empty block so we don't crash as we report nothing of interest report = [] report.append("%d trees read from %d files." % (tree_source.total_trees_read, len(support_filepaths))) report.append("%d trees from each file requested to be ignored for burn-in." % (opts.burnin)) report.append("%d trees ignored in total." % (tree_source.total_trees_ignored)) report.append("%d trees considered in total for split support assessment." % (tsum.total_trees_counted)) n_taxa = len(split_distribution.taxa_block) report.append("%d unique taxa across all trees." % n_taxa) num_splits, num_unique_splits, num_nt_splits, num_nt_unique_splits = split_distribution.splits_considered() report.append("%d unique splits out of %d total splits counted." % (num_unique_splits, num_splits)) #report.append("%d unique non-trivial splits out of %d total non-trivial splits counted." % (num_nt_unique_splits, num_nt_splits)) comments.extend(report) messenger.send("---") messenger.send_multi(report) messenger.send("") ################################################### # Target tree and mapping if opts.support_as_percentages: support_units = "Percentage" else: support_units = "Proportion (frequency or probability)" if opts.support_as_labels: support_show = "node labels" else: support_show = "branch lengths" support_indication = "%s of support for each split indicated by %s" % (support_units, support_show) tt_trees = [] if target_tree_filepath is not None: messenger.send("### MAPPING SUPPORT TO TARGET TREE(S) ###\n") tt_dataset = nexus.read_dataset(open(target_tree_filepath, 'r')) for tree_block in tt_dataset.trees_blocks: for tree in tree_block: tsum.map_split_support_to_tree(tree, split_distribution) tt_trees.append(tree) messenger.send('Parsed "%s": %d tree(s) in file' % (target_tree_filepath, len(tt_trees))) comments.append('Split support mapped to trees in:') comments.append(' - "%s" (%d trees)' % (os.path.abspath(target_tree_filepath), len(tt_trees))) comments.append(support_indication + ".") else: messenger.send("### CONSTRUCTING CLADE CONSENSUS TREE ###\n") if opts.min_clade_freq > 1.0: messenger.send("Maximum frequency threshold for clade inclusion is 1.0: reset to 1.0.", force=True) min_freq = 1.0 else: min_freq = opts.min_clade_freq tt_trees.append(tsum.tree_from_splits(split_distribution, min_freq=min_freq, include_edge_lengths=not opts.no_branch_lengths)) report = [] report.append('Consensus tree (%f clade frequency threshold) constructed from splits.' % min_freq) report.append(support_indication + ".") messenger.send_multi(report) comments.extend(report) messenger.send("") end_time = datetime.datetime.now() ################################################### # RESULTS messenger.send("### RESULTS ###\n") final_run_report = [] final_run_report.append("Began at: %s." % (start_time.isoformat(' '))) final_run_report.append("Ended at: %s." % (end_time.isoformat(' '))) hours, mins, secs = str(end_time-start_time).split(":") run_time = "Run time: %s hour(s), %s minute(s), %s second(s)." % (hours, mins, secs) final_run_report.append(run_time) # if not opts.output_filepath: # messenger.send('\n\n>>>>>>>>>>') output_dataset = datasets.Dataset() taxa_block = output_dataset.add_taxa_block(taxa_block=split_distribution.taxa_block) trees_block = trees.TreesBlock() trees_block.taxa_block = taxa_block for tree in tt_trees: trees_block.append(tree) trees_block = output_dataset.add_trees_block(trees_block=trees_block) if opts.to_newick_format: newick_writer = nexus.NewickWriter() newick_writer.write_dataset(output_dataset, output_dest) else: nexus_writer = nexus.NexusWriter() if opts.include_taxa_block: nexus_writer.simple = False else: nexus_writer.simple = True if opts.include_meta_comments: nexus_writer.comment = [] try: username = getpass.getuser() except: username = "******" nexus_writer.comment.append("%s %s by %s." % (_program_name, _program_version, _program_author)) nexus_writer.comment.append("Using DendroPy Version %s by Jeet Sukumaran and Mark T. Holder." % dendropy.PACKAGE_VERSION) python_version = sys.version.replace("\n", "").replace("[", "(").replace("]",")") nexus_writer.comment.append("Running under Python %s on %s." % (python_version, sys.platform)) nexus_writer.comment.append("Executed on %s by %s@%s." % (platform.node(), username, socket.gethostname())) nexus_writer.comment.append("Basis of split support:") for support_file in support_filepaths: nexus_writer.comment.append(' - "%s"' % os.path.abspath(support_file)) nexus_writer.comment.extend(final_run_report) nexus_writer.comment.extend(comments) if opts.additional_comments: nexus_writer.comment.append("\n") nexus_writer.comment.append(opts.additional_comments) nexus_writer.write_dataset(output_dataset, output_dest) if split_edges_dest: for split in split_distribution.splits: row = [] row.append(nexus.split_to_newick(split, split_distribution.taxa_block)) for edge_length in split_distribution.split_edge_lengths[split]: row.append("%s" % edge_length) split_edges_dest.write("%s\n" % ("\t".join(row))) if not opts.output_filepath: #messenger.send('<<<<<<<<<') pass else: messenger.send('Results written to: "%s".' % (output_fpath)) messenger.send("") ################################################### # WRAP UP messenger.send("### DONE ###\n") messenger.send_multi(final_run_report)
def testCharBlockMerge(self): ds1 = datasets.Dataset() tb1 = ds1.add_taxa_block(label="Dataset 1, Taxa Block 1") for i in range(1, 11): tb1.add_taxon(label="T%02d" % i) cb1 = ds1.add_char_block(char_block=characters.DnaCharactersBlock( label="Dataset 2, Taxa Block 1")) for t in tb1: cb1.append_taxon_sequence(t, state_symbols="AAAAAAAAAA") ds2 = datasets.Dataset() tb2 = ds2.add_taxa_block(label="Dataset 2, Taxa Block 1") for i in range(1, 21): tb2.add_taxon(label="T%02d" % i) cb2 = ds2.add_char_block(char_block=characters.DnaCharactersBlock( label="Dataset 2, Taxa Block 1")) for t in tb2: cb2.append_taxon_sequence(t, state_symbols="CCCCCCCCCC") ds1b = deepcopy(ds1) cb = ds1b.char_blocks[0] ntax_pre = len(cb) nchars_pre = len(cb.values()[0]) cb.extend_characters(ds2.char_blocks[0]) assert len(cb) == ntax_pre, \ "Number of taxa have changed after from %d to %d" % (ntax_pre, len(cb)) for t in cb: _LOG.debug("\n%s: %s" \ % (str(t), cb[t].values_as_string())) assert len(cb[t]) == 20, \ "Data vector is incorrect length (%d):\n%s: %s" \ % (len(cb[t]), str(t), cb[t].values_as_string()) assert cb[t].values_as_string() == "AAAAAAAAAACCCCCCCCCC", \ "Incorrect sequence:\n%s: %s" % (str(t), cb[t].values_as_string()) ds1b = deepcopy(ds1) cb = ds1b.char_blocks[0] cb.extend(ds2.char_blocks[0], overwrite_existing=True) target_ntax = 20 assert len(cb) == target_ntax, \ "Number of rows in character block have not changed to %d (%d)" % (target_ntax, len(cb)) assert len(cb.taxa_block) == target_ntax, \ "Number of taxa in taxa block have not changed to %d (%d)" % (target_ntax, len(cb)) for t in tb2: cb_tb_labels = cb.taxa_block.labels() assert t.label in cb_tb_labels, \ "Taxon '%s' not found in taxa block:\n%s" % (str(t), str(cb_tb_labels)) cb_labels = [t.label for t in cb] assert t.label in cb_labels, \ "Taxon '%s' not found in char block:\n%s" % (str(t), str(cb_labels)) for t in cb: _LOG.debug("\n%s: %s" \ % (str(t), cb[t].values_as_string())) assert len(cb[t]) == 10, \ "Data vector is incorrect length (%d):\n%s: %s" \ % (len(cb[t]), str(t), cb[t].values_as_string()) assert cb[t].values_as_string() == "CCCCCCCCCC", \ "Incorrect sequence:\n%s: %s" % (str(t), cb[t].values_as_string()) ds1b = deepcopy(ds1) cb = ds1b.char_blocks[0] cb.extend(ds2.char_blocks[0], append_existing=True) target_ntax = 20 assert len(cb) == target_ntax, \ "Number of rows in character block have not changed to %d (%d)" % (target_ntax, len(cb)) assert len(cb.taxa_block) == target_ntax, \ "Number of taxa in taxa block have not changed to %d (%d)" % (target_ntax, len(cb)) for t in tb2: cb_tb_labels = cb.taxa_block.labels() assert t.label in cb_tb_labels, \ "Taxon '%s' not found in taxa block:\n%s" % (str(t), str(cb_tb_labels)) cb_labels = [t.label for t in cb] assert t.label in cb_labels, \ "Taxon '%s' not found in char block:\n%s" % (str(t), str(cb_labels)) for t in cb: _LOG.debug("\n%s: %s" \ % (str(t), cb[t].values_as_string())) tnum = int(t.label[-2:]) if tnum > 10: assert len(cb[t]) == 10, \ "Data vector is incorrect length (%d):\n%s: %s" \ % (len(cb[t]), str(t), cb[t].values_as_string()) assert cb[t].values_as_string() == "CCCCCCCCCC", \ "Incorrect sequence:\n%s: %s" % (str(t), cb[t].values_as_string()) else: assert len(cb[t]) == 20, \ "Data vector is incorrect length (%d):\n%s: %s" \ % (len(cb[t]), str(t), cb[t].values_as_string()) assert cb[t].values_as_string() == "AAAAAAAAAACCCCCCCCCC", \ "Incorrect sequence:\n%s: %s" % (str(t), cb[t].values_as_string())
node.report_branching_time = False else: node.report_branching_time = True if ch[0].include == True or ch[1].include == True: node.include = True else: node.include = False # # branchingTimes = list() sampleTimes = list() for node in tree.internal_nodes(): if node.report_branching_time: branchingTimes.append(node.age) for node in tree.leaf_iter(): if node.include: sampleTimes.append(node.age) # return branchingTimes, sampleTimes # # if __name__=='__main__': d = datasets.Dataset() d.read( open("May09SubsetDates.summary", "rU"), "NEXUS" ) tree = d.trees_blocks[0][0] # branching times and sampling times: bt,st = truncate_and_add_distToRoot(tree)