Python Dataset Beispiele, dendropy.datasets.Dataset Python Beispiele

Beispiel #1

0

Datei anzeigen

def main():
    """
    Main CLI handler.
    """
    
    parser = OptionParser(usage=_prog_usage, 
        add_help_option=True, 
        version=_prog_version, 
        description=_prog_description)    
       
    parser.add_option('-n', '--pop-size', '-N',
        action='store',
        dest='pop_size',
        type='int',
        default=1,
        metavar='Ne',
        help='effective HAPLOID population size (default=%default [assumes edge lengths are in units of Ne])')

    (opts, args) = parser.parse_args()
    
    if len(args) == 0:
        sys.stderr.write("%s" % parser.get_usage())
        sys.exit(1)
        
    for a in args:
        fpath = os.path.expandvars(os.path.expanduser(a))
        if not os.path.exists(fpath):
            sys.stderr.write('File not found: "%s"\n' % fpath)
        else:
            sys.stderr.write('Reading: "%s"\n' % fpath)
            d = datasets.Dataset()
            ctrees = d.read_trees(open(fpath, "rU"), "NEXUS")
            for t in ctrees:
                p = coalescent.log_probability_of_coalescent_tree(t, opts.pop_size)
                sys.stdout.write("%s\n" % p)

Beispiel #2

0

Datei anzeigen

Datei: coalign-muscle.py Projekt: wrightaprilm/DendroPy

def load_fasta(fpath, schema="DNAFASTA"):
    d = datasets.Dataset()
    data = {}
    d.read(open(fpath, "rU"), schema)
    chars = d.char_blocks[0]
    for t in d.taxa_blocks[0]:
        data[t.label] = chars[t].values_as_string()
    return data

Beispiel #3

0

Datei anzeigen

Datei: test_tree_io.py Projekt: zahiyona/SuperFine-wQMC

def write_nexus_tree(tree, tree_filepath):
    "Wrapper to write a single tree to a NEWICK file."
    d = datasets.Dataset()
    taxa_block = tree.infer_taxa_block()
    tree_block = d.add_trees_block(taxa_block=taxa_block)
    tree_block.append(tree)
    nw = nexus.NexusWriter()
    _LOG.info('\nWriting "%s"' % os.path.basename(tree_filepath))
    nw.write_dataset(d, open(tree_filepath, 'w'))

Beispiel #4

0

Datei anzeigen

 def testSimple1(self):
     d = datasets.Dataset()
     t = d.trees_from_string(
         "((((a:1, b:1):1, c:2):1, d:3, e:3):2, (f:4, g:4):1)", "newick")[0]
     i1 = coalescent.coalescence_intervals(t)
     assert i1 == [1.0, 1.0, 1.0, 1.0,
                   1.0], "intervals found = %s" % ", ".join(intervals)
     i2 = coalescent.num_genes_waiting_times_pairs(t)
     assert i2 == [(7, 1.0), (6, 1.0), (5, 1.0), (3, 1.0), (2, 1.0)]
     check = coalescent.probability_of_coalescent_tree(t, 10)

Beispiel #5

0

Datei anzeigen

Datei: coalign-muscle.py Projekt: wrightaprilm/DendroPy

def translate_nucleotide_file(infilepath, protfilepath, rna=False):
    infile = open(infilepath)
    ofile = file(protfilepath, 'w')
    d = datasets.Dataset()
    if rna:
        d.read(infile, "RNAFASTA")
    else:
        d.read(infile, "DNAFASTA")
    chars = d.char_blocks[0]
    for t in d.taxa_blocks[0]:
        s = chars[t]
        nucs = Seq(s.values_as_string(), generic_dna)
        prots = nucs.translate()
        ofile.write(">%s\n%s\n\n" % (t.label, prots.tostring()))

Beispiel #6

0

Datei anzeigen

def main():
    """
    Main CLI handler.
    """

    parser = OptionParser(usage=_prog_usage,
                          add_help_option=True,
                          version=_prog_version,
                          description=_prog_description)

    parser.add_option('-n',
                      '--nexus',
                      action='store_const',
                      dest='schema',
                      const='NEXUS',
                      default="NEXUS",
                      help='output in NEXUS format (default)')

    parser.add_option('-p',
                      '--phylip',
                      action='store_const',
                      dest='schema',
                      const='PHYLIP',
                      help='output in NEXUS format (default)')

    parser.add_option('-f',
                      '--fasta',
                      action='store_const',
                      dest='schema',
                      const='FASTA',
                      help='output in FASTA format')

    (opts, args) = parser.parse_args()

    if len(args) == 0:
        sys.stderr.write("(reading from standard input)\n")
        input = sys.stdin
    else:
        input = open(args[0], "rU")

    output = sys.stdout

    fd = datasets.Dataset()
    fd.read(input, "DNAFASTA")
    pattern = re.compile("gi\|.+\|.+\|(.+)\|\S* ([\w\.]+) ([\w\.]+) (\w+).*")
    for t in fd.taxa_blocks[0]:
        m = pattern.match(t.label)
        t.label = m.groups(1)[1] + "_" + m.groups(1)[2] + "_" + m.groups(1)[0]

    fd.write(output, opts.schema)

Beispiel #7

0

Datei anzeigen

Datei: test_tree_io.py Projekt: zahiyona/SuperFine-wQMC

    def round_trip_tree_file(self, tree_filepath, reader_class, writer_class):
        "Round-trips a treefile."
        reader = reader_class()
        _LOG.info("\nDATA FILE: \"%s\"" % os.path.basename(tree_filepath))
        dataset = reader.read_dataset(file_obj=open(tree_filepath, "r"))
        for tb_idx, trees_block in enumerate(dataset.trees_blocks):
            for t_idx, tree in enumerate(trees_block):

                _LOG.info(
                    "*** Tree %d of %d from tree block %d of %d in \"%s\"" %
                    (t_idx + 1, len(trees_block), tb_idx + 1,
                     len(dataset.trees_blocks),
                     os.path.basename(tree_filepath)))

                _LOG.debug("\nORIGINAL TREE >>>\n%s\n<<< ORIGINAL TREE" %
                           tree.compose_newick())
                # write ...
                _LOG.info("(writing out)")
                temp_dataset = datasets.Dataset()
                temp_trees_block = trees.TreesBlock(
                    taxa_block=trees_block.taxa_block)
                temp_trees_block.append(tree)
                temp_dataset.add_trees_block(trees_block=temp_trees_block)
                writer = writer_class()
                result1 = StringIO()
                writer.write_dataset(temp_dataset, result1)
                result1 = result1.getvalue()
                _LOG.debug("\nWRITE OUT >>>\n%s\n<<< WRITE OUT" % result1)

                # read back ...
                _LOG.info("(reading back)")
                r2 = StringIO(result1)
                #r2 = open("/Users/jeet/Documents/Projects/Phyloinformatics/DendroPy/dendropy/dendropy/tests/data/anolis.mbcon.trees.nexus", "r")
                temp_dataset2 = reader.read_dataset(file_obj=r2)
                tree2 = temp_dataset2.trees_blocks[0][0]
                result2 = StringIO()
                writer.write_dataset(temp_dataset, result2)
                result2 = result2.getvalue()
                _LOG.debug("\nREAD IN >>>\n%s\n<<< READ IN" % result2)

                # compare ...
                _LOG.debug("\nREPARSED TREE >>>\n%s\n<<< REPARSED TREE\n" %
                           tree.compose_newick())
                assert result1 == result2, \
                       "Reparsed tree strings do not match:\n\n" \
                                                       +"FIRST >>>\n%s\n<<< FIRST\n\nSECOND >>>\n%s\n<<< SECOND" % (result1, result2)
                _LOG.info("(reparsed tree string match)")

Beispiel #8

0

Datei anzeigen

Datei: chargen.py Projekt: zahiyona/SuperFine-wQMC

def generate_dataset(seq_len,
                     tree_model,
                     char_model,
                     mutation_rate=1.0,
                     root_states=None,
                     dataset=None,
                     taxa_block=None,
                     rng=None):
    """
    Wrapper to conveniently generate a Dataset simulated under
    the given tree and character model.
    `seq_len`       : length of sequence (number of characters)
    `tree_model`    : dendropy.trees.Tree object
    `char_model`    : dendropy.charmodels.CharacterModel object
    `mutation_rate` : mutation *modifier* rate (should be 1.0 if branch lengths
                      on tree reflect true expected number of changes
    `root_states`   : vector of root states (length must equal `seq_len`)
    `dataset`       : a dendropy.datasets.Dataset object.
                      if given, the new 
                      dendropy.characters.CharacterBlock object will be added to
                      this (along with a new taxa_block if required). Otherwise,
                      a new dendropy.datasets.Dataset object will be created.
    `taxa_block`    : if given, this will be the taxa manager used; otherwise
                      new default one will be created
    `rng`           : random number generator; if not given, `GLOBAL_RNG` will be
                      used                      
    Returns: a dendropy.datasets.Dataset object object.
    """
    if dataset is None:
        dataset = datasets.Dataset()
    if taxa_block is not None and taxa_block not in dataset.taxa_blocks:
        taxa_block = dataset.add_taxa_block(taxa_block=taxa_block)
    char_block = generate_characters(seq_len=seq_len,
                                     tree_model=tree_model,
                                     char_model=char_model,
                                     mutation_rate=mutation_rate,
                                     root_states=root_states,
                                     char_block=None,
                                     taxa_block=taxa_block,
                                     rng=None)
    dataset.add_char_block(char_block=char_block)
    return dataset

Beispiel #9

0

Datei anzeigen

Datei: nexus-to-nexml.py Projekt: zahiyona/SuperFine-wQMC

def main():
    """
    Main CLI handler.
    """

    parser = OptionParser(usage=_prog_usage,
                          add_help_option=True,
                          version=_prog_version,
                          description=_prog_description)

    (opts, args) = parser.parse_args()

    if len(args) == 0:
        sys.stderr.write("Please specify a Newick/NEXUS file to convert.\n")
        sys.exit(1)
    fpath = os.path.expanduser(os.path.expandvars(args[0]))
    if not os.path.exists(fpath):
        sys.stderr.write('File not found: %s\n' % fpath)
        sys.exit(1)
    d = datasets.Dataset()
    d.read(open(fpath, "rU"), "nexus")
    d.write(sys.stdout, "nexml")

Beispiel #10

0

Datei anzeigen

    def compare_chars(self, src, format, expected):
        """Reads 'src', checks against 'expected'"""
        _LOG.info("Reading %s" % src.name)
        d = datasets.Dataset()
        d.read(src, format)

        taxa_block = d.taxa_blocks[0]
        char_block = d.char_blocks[0]

        assert len(expected) == len(char_block)
        assert len(expected) == len(taxa_block)

        for tax_idx, (exp_taxa, exp_seq) in enumerate(expected):

            taxon = taxa_block[tax_idx]
            label = taxon.label

            # ok, this is ugly, but my nexus parser does not
            # do the "_" => " " conversion in taxlabels (yet)
            # so ...
            assert ((exp_taxa == label) \
                or (exp_taxa.replace("_", " ") == label) \
                or (exp_taxa.replace(" ", "_") == label)), \
                "(Taxon #%d) %s not eq. %s" % (tax_idx, exp_taxa, label)

            assert len(exp_seq) == len(char_block.matrix[taxon])

            for col_idx, symbol1 in enumerate(exp_seq):
                test_state = char_block.matrix[taxon][col_idx].value
                if char_block.matrix[taxon][col_idx].column_type is not None:
                    state_alpha = char_block.matrix[taxon][
                        col_idx].column_type.state_alphabet
                else:
                    state_alpha = char_block.default_state_alphabet
                exp_state = state_alpha.state_for_symbol(symbol1)
                assert test_state == exp_state

Beispiel #11

0

Datei anzeigen

Datei: extract-coalescent-frames.py Projekt: wrightaprilm/DendroPy

def main():
    """
    Main CLI handler.
    """

    parser = OptionParser(usage=_prog_usage,
                          add_help_option=True,
                          version=_prog_version,
                          description=_prog_description)

    parser.add_option('-s',
                      '--summarize-means',
                      action='store',
                      dest='summarize_means',
                      default=None,
                      metavar='FILENAME',
                      help='summarize means to this file (default="%default")')

    parser.add_option(
        '-n',
        '--pop-size',
        '-N',
        action='store',
        dest='pop_size',
        type='int',
        default=1,
        metavar='Ne',
        help=
        'effective HAPLOID population size (for calculation of expected distribution means; default=%default [assumes edge lengths are in units of Ne])'
    )

    #     parser.add_option('-o', '--output-prefix',
    #         action='store',
    #         dest='output_prefix',
    #         default="wt",
    #         metavar='OUTPUT-PREFIX',
    #         help='prefix for output file names (default="%default")')

    (opts, args) = parser.parse_args()

    if len(args) == 0:
        sys.stderr.write("%s" % parser.get_usage())
        sys.exit(1)

    output = sys.stdout
    output.write("k\twaiting_time\n")

    coal_frames = {}

    for a in args:
        fpath = os.path.expandvars(os.path.expanduser(a))
        if not os.path.exists(fpath):
            sys.stderr.write('File not found: "%s"\n' % fpath)
        else:
            sys.stderr.write('Reading: "%s"\n' % fpath)
            d = datasets.Dataset()
            ctrees = d.read_trees(open(fpath, "rU"), "NEXUS")
            for t in ctrees:
                cf = coalescent.extract_extract_coalescent_frames(t)
                for k, wt in cf:
                    output.write("%d\t%s\n" % (k, wt))
                    if k not in coal_frames:
                        coal_frames[k] = []
                    coal_frames[k].append(wt)

    if opts.summarize_means is not None:
        smfile = open(
            os.path.expandvars(os.path.expanduser(opts.summarize_means)), "w")
        smfile.write("k\tmean_wt\texpected_wt\n")
        for k, wt in coal_frames.items():
            actual_mean = float(sum(wt)) / len(wt)
            expected_mean = float(
                opts.pop_size) / distributions.binomial_coefficient(k, 2)
            smfile.write("%d\t%s\t%s\n" % (k, actual_mean, expected_mean))

Beispiel #12

0

Datei anzeigen

def main_cli():
    
    description =  '%s %s %s' % (_program_name, _program_version, _program_subtitle)    
    usage = "%prog [options] <TREES FILE> [<TREES FILE> [<TREES FILE> [...]]"
    
    parser = OptionParser(usage=usage, add_help_option=True, version = _program_version, description=description)

    sum_tree_optgroup = OptionGroup(parser, 'Summarization Options')    
    parser.add_option_group(sum_tree_optgroup)                      
    sum_tree_optgroup.add_option('-b', '--burnin', 
                        action='store',
                        dest='burnin',
                        type='int', # also 'float', 'string' etc.
                        default=0, 
                        help='number of trees to skip from the beginning of *each tree file* when counting support [default=%default]') 

    target_tree_optgroup = OptionGroup(parser, 'Target Tree Options')    
    parser.add_option_group(target_tree_optgroup)
    target_tree_optgroup.add_option('-t','--target',  
                  dest='target_tree_filepath',
                  default=None,
                  help="path to optional target, model or best topology tree file (Newick or NEXUS format) "
                       + "to which support will be mapped; " 
                       + "if not given, then a majority-rule clade consensus tree will be constructed based on the "
                       + "all the trees given in the support tree files (except for those discarded as burn-ins), "
                       + "and this will be used as the target tree")  
    target_tree_optgroup.add_option('-f', '--min-clade-freq', 
                      dest='min_clade_freq',
                      type='float', 
                      default=0.50,
                      metavar='#.##',
                      help="minimum frequency or probability for a clade or a split to be included in the consensus tree, if used [default=%default]") 
    target_tree_optgroup.add_option('--no-branch-lengths',  
                      action='store_true', 
                      dest='no_branch_lengths',
                      default=False,
                      help="by default, if using a consensus tree as the target tree, branch lengths will be the mean of the lengths " \
                          + "of the given branch across all trees considered; this option forces branch " \
                          + "lengths to be unspecified (obviously, this is only applicable if you do not ask the support to be mapped as "  \
                          + "branch lengths)")

    source_tree_optgroup = OptionGroup(parser, 'Source Tree Options')    
    parser.add_option_group(source_tree_optgroup)          
    source_tree_optgroup.add_option('--from-newick-stream',  
                      action='store_true', 
                      dest='from_newick_stream',
                      default=False,
                      help="support trees will be streamed in Newick format")            
    source_tree_optgroup.add_option('--from-nexus-stream',  
                      action='store_true', 
                      dest='from_nexus_stream',
                      default=False,
                      help="support trees will be streamed in NEXUS format")                      
                            
    output_tree_optgroup = OptionGroup(parser, 'Output Tree Options')    
    parser.add_option_group(output_tree_optgroup)          
    output_tree_optgroup.add_option('-l','--support-as-labels',  
                      action='store_true', 
                      dest='support_as_labels',
                      default=True,
                      help="indicate branch support as internal node labels [default=%default]")            
    output_tree_optgroup.add_option('-v','--support-as-lengths',  
                      action='store_false', 
                      dest='support_as_labels',
                      default=True,
                      help="indicate branch support as branch lengths (otherwise support will be indicated by internal node labels)")   
    output_tree_optgroup.add_option('-p', '--percentages',  
                      action='store_true', 
                      dest='support_as_percentages',
                      default=False,
                      help="indicate branch support as percentages (otherwise, will report as proportions by default)")     
    output_tree_optgroup.add_option('-d', '--decimals', 
                      dest='support_label_decimals',
                      type='int', 
                      metavar='#',
                      default=2,
                      help="number of decimal places in indication of support values [default=%default]")  

    output_filepath_optgroup = OptionGroup(parser, 'Output File Options')    
    parser.add_option_group(output_filepath_optgroup)                      
    output_filepath_optgroup.add_option('-o','--output',  
                  dest='output_filepath',
                  default=None,
                  help="path to output file (if not given, will print to standard output)")                       
    output_filepath_optgroup.add_option('--no-taxa-block',  
                      action='store_false', 
                      dest='include_taxa_block',
                      default=True,
                      help="do not include a taxa block in the output treefile (otherwise will create taxa block by default)")      
    output_filepath_optgroup.add_option('--no-meta-comments',  
                      action='store_false', 
                      dest='include_meta_comments',
                      default=True,
                      help="include initial file comment annotating details of scoring operation")                      
    output_filepath_optgroup.add_option('-m', '--additional_comments',  
                      action='store', 
                      dest='additional_comments',
                      default=None,
                      help="additional comments to be added to the summary file")                                              
    output_filepath_optgroup.add_option('--to-newick', 
                      action='store_true', 
                      dest='to_newick_format',
                      default=False,
                      help="save results in NEWICK (PHYLIP) format (default is to save in NEXUS format)")         
    output_filepath_optgroup.add_option('--to-phylip', 
                      action='store_true', 
                      dest='to_newick_format',
                      default=False,
                      help="same as --newick")
    output_filepath_optgroup.add_option('-r', '--replace', 
                      action='store_true', 
                      dest='replace',
                      default=False,
                      help="replace/overwrite output file without asking if it already exists ")
                      
    other_optgroup = OptionGroup(parser, 'Other Options')    
    parser.add_option_group(other_optgroup)
    
    other_optgroup.add_option('-e','--split-edges',  
                  dest='split_edges_filepath',
                  default=None,
                  metavar='FILEPATH',
                  help="if specified, a tab-delimited file of splits and their edge " \
                    + "lengths across runs will be saved to FILEPATH")
                                              
    run_optgroup = OptionGroup(parser, 'Program Run Options')    
    parser.add_option_group(run_optgroup)         
    run_optgroup.add_option('-q', '--quiet', 
                      action='store_true', 
                      dest='quiet',
                      default=False,
                      help="suppress progress messages") 
    run_optgroup.add_option('--ignore-missing-support', 
                      action='store_true', 
                      dest='ignore_missing_support',
                      default=False,
                      help="ignore missing support tree files (at least one must exist!)") 
    run_optgroup.add_option('--ignore-missing-target', 
                      action='store_true', 
                      dest='ignore_missing_target',
                      default=False,
                      help="ignore missing target tree file (will construct majority rule consensus tree if missing)") 
  
    (opts, args) = parser.parse_args()
    messenger = Messenger(quiet=opts.quiet)
    
    # splash 
    if not opts.quiet:
        show_splash(dest=sys.stderr, extended=False)
                                    
    ###################################################
    # Support file idiot checking
        
    support_filepaths = []     
    if len(args) == 0 and (opts.from_newick_stream or opts.from_nexus_stream):
        if not opts.quiet:
            sys.stderr.write("(reading trees from standard input)")
        support_file_objs = [sys.stdin]           
    else:
        missing = False 
        for fpath in args:
            fpath = os.path.expanduser(os.path.expandvars(fpath))        
            if not os.path.exists(fpath):
                messenger.send_error('Support file not found: "%s"' % fpath)
                missing = True
            else:
                support_filepaths.append(fpath)
        if missing:
            messenger.send("")
            if opts.ignore_missing_support:
                pass
            else:
                messenger.send_formatted('Terminating due to missing support files. '
                       + 'Use the "--ignore-missing-support" option to continue even '
                       + 'if some files are missing.', force=True)
                sys.exit(1)
        if len(support_filepaths) == 0:
            messenger.send_formatted("No sources of support specified or could be found. "
            + "Please provide the path to at least one (valid and existing) file "
            + "containing non-parametric or MCMC tree samples "
            + "to summarize.", force=True)
            sys.exit(1)
            
        support_file_objs = [open(f, "r") for f in support_filepaths]

    ###################################################
    # Lots of other idiot-checking ...
    
    # target tree
    if opts.target_tree_filepath is not None:
        target_tree_filepath = os.path.expanduser(os.path.expandvars(opts.target_tree_filepath))
        if not os.path.exists(target_tree_filepath):
            messenger.send_error('Target tree file not found: "%s"\n' % target_tree_filepath)
            if opts.ignore_missing_target:                
                if not opts.quiet:
                    messenger.send('Will construct and use majority-rule consensus tree instead.\n')
                target_tree_filepath = None
            else:
                sys.exit(1)
    else:
        target_tree_filepath = None
                    
    # output
    if opts.output_filepath is None:
        output_dest = sys.stdout
    else:
        output_fpath = os.path.expanduser(os.path.expandvars(opts.output_filepath))
        if confirm_overwrite(output_fpath, messenger, opts.replace):
            output_dest = open(output_fpath, "w")
        else:
            sys.exit(1)

    if opts.split_edges_filepath:
        split_edges_filepath = os.path.expanduser(os.path.expandvars(opts.split_edges_filepath))
        if confirm_overwrite(split_edges_filepath, messenger, opts.replace):
            split_edges_dest = open(split_edges_filepath, "w")
        else:
            sys.exit(1)
    else:
        split_edges_dest = None
        
                
    ###################################################
    # Main work begins here: Count the splits
    
    start_time = datetime.datetime.now()
    
    comments = []
    tsum = treesum.TreeSummarizer()
    tsum.support_as_labels = opts.support_as_labels 
    tsum.support_as_percentages = opts.support_as_percentages
    if not opts.support_as_percentages and opts.support_label_decimals < 2:
        messenger.send_error("(WARNING: proportions require that support will be reported to at least 2 decimal places)")
        opts.support_label_decimals = 2
    tsum.support_label_decimals = opts.support_label_decimals
    tsum.ignore_node_ages = True # until a more efficient implementation is developed
    if opts.quiet:
        tsum.verbose = False
        tsum.write_message = None
    else:
        tsum.verbose = True
        tsum.write_message = sys.stderr.write
        tsum.progress_message_prefix = ""
        tsum.progress_message_suffix = "\n"

    messenger.send("### COUNTING SPLITS ###\n")
    if opts.from_newick_stream:
        file_format = "newick"
    elif opts.from_nexus_stream:
        file_format = "nexus"
    else:
        file_format = None
    tree_source = MultiFileTreeIterator(sources=support_file_objs,
                                        core_iterator=nexus.iterate_over_trees, 
                                        format=file_format,
                                        from_index=opts.burnin,
                                        progress_func=tsum.send_progress_message,
                                        encode_splits=True)

    split_distribution = tsum.count_splits_on_trees(tree_source, trees_splits_encoded=True)
    if split_distribution.taxa_block is None:
        assert(tsum.total_trees_counted == 0)
        split_distribution.taxa_block = dendropy.taxa.TaxaBlock() # we just produce an empty block so we don't crash as we report nothing of interest
    report = []
    report.append("%d trees read from %d files." % (tree_source.total_trees_read, len(support_filepaths)))
    report.append("%d trees from each file requested to be ignored for burn-in." % (opts.burnin))
    report.append("%d trees ignored in total." % (tree_source.total_trees_ignored))    
    report.append("%d trees considered in total for split support assessment." % (tsum.total_trees_counted))
    n_taxa = len(split_distribution.taxa_block)
    report.append("%d unique taxa across all trees." % n_taxa)
    num_splits, num_unique_splits, num_nt_splits, num_nt_unique_splits = split_distribution.splits_considered()
    report.append("%d unique splits out of %d total splits counted." % (num_unique_splits, num_splits))
    #report.append("%d unique non-trivial splits out of %d total non-trivial splits counted." % (num_nt_unique_splits, num_nt_splits))
        
    comments.extend(report)
    messenger.send("---")
    messenger.send_multi(report)
    messenger.send("")
    
    ###################################################
    #  Target tree and mapping
    
    if opts.support_as_percentages:
        support_units = "Percentage"
    else:        
        support_units = "Proportion (frequency or probability)"        
    if opts.support_as_labels:
        support_show = "node labels"
    else:
        support_show = "branch lengths"
    support_indication = "%s of support for each split indicated by %s" % (support_units, support_show)      
    
    tt_trees = []
    if target_tree_filepath is not None:
        messenger.send("### MAPPING SUPPORT TO TARGET TREE(S) ###\n")         
        tt_dataset = nexus.read_dataset(open(target_tree_filepath, 'r'))
        for tree_block in tt_dataset.trees_blocks:
            for tree in tree_block:
                tsum.map_split_support_to_tree(tree, split_distribution)
                tt_trees.append(tree)
        messenger.send('Parsed "%s": %d tree(s) in file' % (target_tree_filepath, len(tt_trees)))
        comments.append('Split support mapped to trees in:')
        comments.append('  - "%s" (%d trees)' % (os.path.abspath(target_tree_filepath), len(tt_trees)))
        comments.append(support_indication + ".")
    else:
        messenger.send("### CONSTRUCTING CLADE CONSENSUS TREE ###\n")
        if opts.min_clade_freq > 1.0:
            messenger.send("Maximum frequency threshold for clade inclusion is 1.0: reset to 1.0.", force=True)
            min_freq = 1.0
        else:            
            min_freq = opts.min_clade_freq
        tt_trees.append(tsum.tree_from_splits(split_distribution, 
                                              min_freq=min_freq, 
                                              include_edge_lengths=not opts.no_branch_lengths))
        report = []
        report.append('Consensus tree (%f clade frequency threshold) constructed from splits.' % min_freq)
        report.append(support_indication + ".")
        messenger.send_multi(report)
        comments.extend(report)
    messenger.send("")
                
    end_time = datetime.datetime.now()            
   
    ###################################################
    #  RESULTS    
            
    messenger.send("### RESULTS ###\n")
        
    final_run_report = []    
    final_run_report.append("Began at: %s." % (start_time.isoformat(' ')))
    final_run_report.append("Ended at: %s." % (end_time.isoformat(' ')))
    hours, mins, secs = str(end_time-start_time).split(":")
    run_time = "Run time: %s hour(s), %s minute(s), %s second(s)." % (hours, mins, secs)
    final_run_report.append(run_time)
                                
#     if not opts.output_filepath:
#         messenger.send('\n\n>>>>>>>>>>')
    
    output_dataset = datasets.Dataset()    
    taxa_block = output_dataset.add_taxa_block(taxa_block=split_distribution.taxa_block)
    trees_block = trees.TreesBlock()
    trees_block.taxa_block = taxa_block
    for tree in tt_trees:
        trees_block.append(tree)
    trees_block = output_dataset.add_trees_block(trees_block=trees_block)
        
    if opts.to_newick_format:
        newick_writer = nexus.NewickWriter()
        newick_writer.write_dataset(output_dataset, output_dest)
    else:
        nexus_writer = nexus.NexusWriter()
        if opts.include_taxa_block:
            nexus_writer.simple = False
        else:
            nexus_writer.simple = True 
        if opts.include_meta_comments:
            nexus_writer.comment = []
            try:
                username = getpass.getuser()
            except:
                username = "******"
            nexus_writer.comment.append("%s %s by %s." % (_program_name, _program_version, _program_author))
            nexus_writer.comment.append("Using DendroPy Version %s by Jeet Sukumaran and Mark T. Holder." 
                % dendropy.PACKAGE_VERSION)
            python_version = sys.version.replace("\n", "").replace("[", "(").replace("]",")")            
            nexus_writer.comment.append("Running under Python %s on %s." % (python_version, sys.platform))               
            nexus_writer.comment.append("Executed on %s by %s@%s." % (platform.node(),  username, socket.gethostname()))         
            nexus_writer.comment.append("Basis of split support:")
            for support_file in support_filepaths:
                nexus_writer.comment.append('  - "%s"' % os.path.abspath(support_file))            
            nexus_writer.comment.extend(final_run_report)
            nexus_writer.comment.extend(comments)
        if opts.additional_comments:
            nexus_writer.comment.append("\n")
            nexus_writer.comment.append(opts.additional_comments)
            
        nexus_writer.write_dataset(output_dataset, output_dest)
        
    if split_edges_dest:
        for split in split_distribution.splits:
            row = []
            row.append(nexus.split_to_newick(split, split_distribution.taxa_block))
            for edge_length in split_distribution.split_edge_lengths[split]:
                row.append("%s" % edge_length)
            split_edges_dest.write("%s\n" % ("\t".join(row)))                
        
    if not opts.output_filepath:
        #messenger.send('<<<<<<<<<')     
        pass
    else:
        messenger.send('Results written to: "%s".' % (output_fpath))
    messenger.send("")        
        
    ###################################################
    #  WRAP UP    
    messenger.send("### DONE ###\n")
    messenger.send_multi(final_run_report)

Beispiel #13

0

Datei anzeigen

    def testCharBlockMerge(self):
        ds1 = datasets.Dataset()
        tb1 = ds1.add_taxa_block(label="Dataset 1, Taxa Block 1")
        for i in range(1, 11):
            tb1.add_taxon(label="T%02d" % i)

        cb1 = ds1.add_char_block(char_block=characters.DnaCharactersBlock(
            label="Dataset 2, Taxa Block 1"))
        for t in tb1:
            cb1.append_taxon_sequence(t, state_symbols="AAAAAAAAAA")

        ds2 = datasets.Dataset()
        tb2 = ds2.add_taxa_block(label="Dataset 2, Taxa Block 1")
        for i in range(1, 21):
            tb2.add_taxon(label="T%02d" % i)

        cb2 = ds2.add_char_block(char_block=characters.DnaCharactersBlock(
            label="Dataset 2, Taxa Block 1"))
        for t in tb2:
            cb2.append_taxon_sequence(t, state_symbols="CCCCCCCCCC")

        ds1b = deepcopy(ds1)
        cb = ds1b.char_blocks[0]
        ntax_pre = len(cb)
        nchars_pre = len(cb.values()[0])
        cb.extend_characters(ds2.char_blocks[0])
        assert len(cb) == ntax_pre, \
                    "Number of taxa have changed after from %d to %d" % (ntax_pre, len(cb))
        for t in cb:
            _LOG.debug("\n%s: %s" \
                % (str(t), cb[t].values_as_string()))
            assert len(cb[t]) == 20, \
                "Data vector is incorrect length (%d):\n%s: %s" \
                % (len(cb[t]), str(t), cb[t].values_as_string())
            assert cb[t].values_as_string() == "AAAAAAAAAACCCCCCCCCC", \
                "Incorrect sequence:\n%s: %s" % (str(t), cb[t].values_as_string())

        ds1b = deepcopy(ds1)
        cb = ds1b.char_blocks[0]
        cb.extend(ds2.char_blocks[0], overwrite_existing=True)
        target_ntax = 20
        assert len(cb) == target_ntax,  \
                    "Number of rows in character block have not changed to %d (%d)" % (target_ntax, len(cb))
        assert len(cb.taxa_block) == target_ntax, \
                    "Number of taxa in taxa block have not changed to %d (%d)" % (target_ntax, len(cb))

        for t in tb2:
            cb_tb_labels = cb.taxa_block.labels()
            assert t.label in cb_tb_labels, \
                "Taxon '%s' not found in taxa block:\n%s" % (str(t), str(cb_tb_labels))
            cb_labels = [t.label for t in cb]
            assert t.label in cb_labels, \
                "Taxon '%s' not found in char block:\n%s" % (str(t), str(cb_labels))
        for t in cb:
            _LOG.debug("\n%s: %s" \
                % (str(t), cb[t].values_as_string()))
            assert len(cb[t]) == 10, \
                "Data vector is incorrect length (%d):\n%s: %s" \
                % (len(cb[t]), str(t), cb[t].values_as_string())
            assert cb[t].values_as_string() == "CCCCCCCCCC", \
                "Incorrect sequence:\n%s: %s" % (str(t), cb[t].values_as_string())

        ds1b = deepcopy(ds1)
        cb = ds1b.char_blocks[0]
        cb.extend(ds2.char_blocks[0], append_existing=True)
        target_ntax = 20
        assert len(cb) == target_ntax, \
                    "Number of rows in character block have not changed to %d (%d)" % (target_ntax, len(cb))
        assert len(cb.taxa_block) == target_ntax, \
                    "Number of taxa in taxa block have not changed to %d (%d)" % (target_ntax, len(cb))
        for t in tb2:
            cb_tb_labels = cb.taxa_block.labels()
            assert t.label in cb_tb_labels, \
                "Taxon '%s' not found in taxa block:\n%s" % (str(t), str(cb_tb_labels))
            cb_labels = [t.label for t in cb]
            assert t.label in cb_labels, \
                "Taxon '%s' not found in char block:\n%s" % (str(t), str(cb_labels))
        for t in cb:
            _LOG.debug("\n%s: %s" \
                % (str(t), cb[t].values_as_string()))
            tnum = int(t.label[-2:])
            if tnum > 10:
                assert len(cb[t]) == 10, \
                    "Data vector is incorrect length (%d):\n%s: %s" \
                    % (len(cb[t]), str(t), cb[t].values_as_string())
                assert cb[t].values_as_string() == "CCCCCCCCCC", \
                    "Incorrect sequence:\n%s: %s" % (str(t), cb[t].values_as_string())
            else:
                assert len(cb[t]) == 20, \
                    "Data vector is incorrect length (%d):\n%s: %s" \
                    % (len(cb[t]), str(t), cb[t].values_as_string())
                assert cb[t].values_as_string() == "AAAAAAAAAACCCCCCCCCC", \
                    "Incorrect sequence:\n%s: %s" % (str(t), cb[t].values_as_string())

Beispiel #14

0

Datei anzeigen

				node.report_branching_time = False
			else: 
				node.report_branching_time = True
			if ch[0].include == True or ch[1].include == True:
				node.include = True
			else:
				node.include = False
			#
		#
	branchingTimes = list()
	sampleTimes = list()
	for node in tree.internal_nodes():
		if node.report_branching_time:
			branchingTimes.append(node.age)
	for node in tree.leaf_iter():
		if node.include:
			sampleTimes.append(node.age)
	#
	return branchingTimes, sampleTimes
	#
#

if __name__=='__main__':
	d = datasets.Dataset()
	d.read( open("May09SubsetDates.summary", "rU"), "NEXUS" )
	tree = d.trees_blocks[0][0]
	
	# branching times and sampling times: 
	bt,st = truncate_and_add_distToRoot(tree)