Ejemplo n.º 1
0
def main_cli():

    description =  "%s %s %s" % (_program_name, _program_version, _program_subtitle)
    usage = "%prog [options] -t <TARGET-TREE-FILE> <TREES-FILE> [TREES-FILE [TREES-FILE [...]]"

    parser = OptionParser(usage=usage, add_help_option=True, version = _program_version, description=description)

    parser.add_option("-t","--target",
            dest="target_tree_filepath",
            default=None,
            help="path to file with tree (Newick or NEXUS format) "
            + "to which labels will be written")
    parser.add_option("--preserve-target-labels",
            action="store_true",
            dest="preserve_target_labels",
            default=False,
            help="keep any existing labels on target tree (by default, these will be cleared before writing the new labels)")
    parser.add_option("--rooted",
            action="store_true",
            dest="rooted_trees",
            default=None,
            help="treat trees as rooted")
    parser.add_option("--unrooted",
            action="store_false",
            dest="rooted_trees",
            default=None,
            help="treat trees as unrooted")
    parser.add_option("--ignore-missing-source",
            action="store_true",
            dest="ignore_missing_source",
            default=False,
            help="ignore missing source tree files (at least one must exist!)")
    parser.add_option("-o","--output",
            dest="output_filepath",
            default=None,
            help="path to output file (if not given, will print to standard output)")
    parser.add_option("-s","--separator",
            dest="separator",
            default="/",
            help="string to use to separate labels from different source trees (default='%default')")
    parser.add_option("--no-taxa-block",
            action="store_false",
            dest="include_taxa_block",
            default=True,
            help="do not include a taxa block in the output treefile (otherwise will create taxa block by default)")
    parser.add_option("-c", "--additional-comments",
            action="store",
            dest="additional_comments",
            default=None,
            help="additional comments to be added to the summary file")
    parser.add_option("--to-newick",
            action="store_true",
            dest="to_newick_format",
            default=False,
            help="save results in NEWICK (PHYLIP) format (default is to save in NEXUS format)")
    parser.add_option("--to-phylip",
            action="store_true",
            dest="to_newick_format",
            default=False,
            help="same as --newick")
    parser.add_option("-r", "--replace",
            action="store_true",
            dest="replace",
            default=False,
            help="replace/overwrite output file without asking if it already exists ")
    parser.add_option("-q", "--quiet",
            action="store_true",
            dest="quiet",
            default=False,
            help="suppress ALL logging, progress and feedback messages")

    (opts, args) = parser.parse_args()
    if opts.quiet:
        messaging_level = ConsoleMessenger.ERROR_MESSAGING_LEVEL
    else:
        messaging_level = ConsoleMessenger.INFO_MESSAGING_LEVEL
    messenger = ConsoleMessenger(name="SumLabels", messaging_level=messaging_level)

    # splash
    if not opts.quiet:
        show_splash(prog_name=_program_name,
                prog_subtitle=_program_subtitle,
                prog_version=_program_version,
                prog_author=_program_author,
                prog_copyright=_program_copyright,
                dest=sys.stderr,
                extended=False)

    ###################################################
    # Source file idiot checking

    source_filepaths = []
    if len(args) > 0:
        for fpath in args:
            fpath = os.path.expanduser(os.path.expandvars(fpath))
            if not os.path.exists(fpath):
                if opts.ignore_missing_source:
                    messenger.send_warning("Source file not found: '%s'" % fpath)
                else:
                    messenger.send_error("Terminating due to missing source files. "
                           + "Use the '--ignore-missing-source' option to continue even "
                           + "if some files are missing.")
                    sys.exit(1)
            else:
                source_filepaths.append(fpath)
        if len(source_filepaths) == 0:
            messenger.send_error("No valid sources of input trees specified. "
                    + "Please provide the path to at least one (valid and existing) file "
                    + "containing trees")
            sys.exit(1)
    else:
        messenger.send_info("No sources of input trees specified. "
                + "Please provide the path to at least one (valid and existing) file "
                + "containing tree samples to summarize. See '--help' for other options.")
        sys.exit(1)

    ###################################################
    # Lots of other idiot-checking ...

    # target tree
    if opts.target_tree_filepath is not None:
        target_tree_filepath = os.path.expanduser(os.path.expandvars(opts.target_tree_filepath))
        if not os.path.exists(target_tree_filepath):
            messenger.send_error("Target tree file not found: '%s'" % target_tree_filepath)
            sys.exit(1)
    else:
        messenger.send_error("Target tree file not specified: use the '-t' or '--target' option to provide path to target tree")
        sys.exit(1)

    # output
    if opts.output_filepath is None:
        output_dest = sys.stdout
    else:
        output_fpath = os.path.expanduser(os.path.expandvars(opts.output_filepath))
        if confirm_overwrite(filepath=output_fpath, replace_without_asking=opts.replace):
            output_dest = open(output_fpath, "w")
        else:
            sys.exit(1)

    # taxon set to handle target trees
    master_taxon_set = dendropy.TaxonSet()
    is_rooted = opts.rooted_trees
    messenger.send_info("Reading target tree: '%s'" % target_tree_filepath)
    target_tree = None
    for tree in tree_source_iter(
            open(target_tree_filepath, "rU"),
            schema='nexus/newick',
            taxon_set=master_taxon_set,
            as_rooted=is_rooted):
        target_tree = tree
        break
    split_labels = {}
    for src_fpath in source_filepaths:
        messenger.send_info("Reading source tree(s) from: '%s'" % src_fpath)
        for tree in tree_source_iter(
                open(src_fpath, "rU"),
                schema='nexus/newick',
                taxon_set=master_taxon_set,
                as_rooted=is_rooted):
            tree.update_splits()
            for split, edge in tree.split_edges.items():
                label = edge.head_node.label
                print label
                if not label:
                    continue
                try:
                    split_labels[split].append(label)
                except KeyError:
                    split_labels[split] = [label]
    messenger.send_info("Mapping labels")
    target_tree.update_splits()
    for split, edge in target_tree.split_edges.items():
        label = []
        if opts.preserve_target_labels and edge.head_node.label:
            label.append(edge.head_node.label)
        elif not opts.preserve_target_labels:
            edge.head_node.label = None
        if split in split_labels:
            label.extend(split_labels[split])
        else:
            pass
            # messenger.send_warning("Split on target tree not found in source trees: ignoring")
        if label:
            edge.head_node.label = opts.separator.join(label)
    output_dataset = dendropy.DataSet(dendropy.TreeList([target_tree], taxon_set=master_taxon_set))
    if opts.to_newick_format:
        output_dataset.write(output_dest,
                "newick",
                suppress_rooting=False,
                suppress_edge_lengths=False,
                unquoted_underscores=False,
                preserve_spaces=False,
                store_tree_weights=False,
                suppress_annotations=False,
                annotations_as_nhx=False,
                suppress_item_comments=False,
                suppress_leaf_taxon_labels=False,
                suppress_leaf_node_labels=True,
                suppress_internal_taxon_labels=False,
                suppress_internal_node_labels=False,
                node_label_element_separator=' ',
                node_label_compose_func=None)
    else:
        if opts.include_taxa_block:
            simple = False
        else:
            simple = True
        comment = []
        try:
            username = getpass.getuser()
        except:
            username = "******"
        comment.append("%s %s by %s." % (_program_name, _program_version, _program_author))
        comment.append("Using DendroPy Version %s by Jeet Sukumaran and Mark T. Holder."
            % dendropy.__version__)
        python_version = sys.version.replace("\n", "").replace("[", "(").replace("]",")")
        comment.append("Running under Python %s on %s." % (python_version, sys.platform))
        comment.append("Executed on %s by %s@%s." % (platform.node(),  username, socket.gethostname()))
        if opts.additional_comments:
            comment.append("\n")
            comment.append(opts.additional_comments)
        output_dataset.write(output_dest,
                "nexus",
                simple=simple,
                file_comments=comment,
                suppress_rooting=False,
                unquoted_underscores=False,
                preserve_spaces=False,
                store_tree_weights=False,
                suppress_annotations=False,
                annotations_as_nhx=False,
                suppress_item_comments=False,
                suppress_leaf_taxon_labels=False,
                suppress_leaf_node_labels=True,
                suppress_internal_taxon_labels=False,
                suppress_internal_node_labels=False,
                node_label_element_separator=' ',
                node_label_compose_func=None)
    if not opts.output_filepath:
        pass
    else:
        messenger.send_info("Results written to: '%s'." % (output_fpath))
Ejemplo n.º 2
0
def main_cli():

    description = "%s %s %s" % (_program_name, _program_version, _program_subtitle)
    usage = "%prog [options] TREES-FILE [TREES-FILE [TREES-FILE [...]]"

    parser = OptionParser(usage=usage, add_help_option=True, version=_program_version, description=description)

    sum_tree_optgroup = OptionGroup(parser, "Source Treatment Options")
    parser.add_option_group(sum_tree_optgroup)
    sum_tree_optgroup.add_option(
        "-b",
        "--burnin",
        action="store",
        dest="burnin",
        type="int",
        default=0,
        help="number of trees to skip from the beginning of *each tree file* when counting support [default=%default]",
    )

    source_tree_optgroup = OptionGroup(parser, "Source Tree Options")
    parser.add_option_group(source_tree_optgroup)
    source_tree_optgroup.add_option(
        "--rooted", action="store_true", dest="rooted_trees", default=None, help="treat trees as rooted"
    )
    source_tree_optgroup.add_option(
        "--unrooted", action="store_false", dest="rooted_trees", default=None, help="treat trees as unrooted"
    )
    source_tree_optgroup.add_option(
        "--ultrametric",
        action="store_true",
        dest="ultrametric_trees",
        default=False,
        help="assume trees are ultrametric (implies '--rooted' ; will result in node ages being summarized; will result in error if trees are not ultrametric)",
    )
    source_tree_optgroup.add_option(
        "--weighted-trees",
        action="store_true",
        dest="weighted_trees",
        default=False,
        help="use weights of trees as indicated by '[&W m/n]' comment to weight contribution of splits found on each tree to overall split frequencies",
    )
    source_tree_optgroup.add_option(
        "--from-newick-stream",
        action="store_true",
        dest="from_newick_stream",
        default=False,
        help="support trees will be streamed in newick format",
    )
    source_tree_optgroup.add_option(
        "--from-nexus-stream",
        action="store_true",
        dest="from_nexus_stream",
        default=False,
        help="support trees will be streamed in NEXUS format",
    )

    target_tree_optgroup = OptionGroup(parser, "Target Tree Options")
    parser.add_option_group(target_tree_optgroup)
    target_tree_optgroup.add_option(
        "-t",
        "--target",
        dest="target_tree_filepath",
        default=None,
        help="path to optional target, model or best topology tree file (Newick or NEXUS format) "
        + "to which support will be mapped; "
        + "if not given, then a majority-rule clade consensus tree will be constructed based on the "
        + "all the trees given in the support tree files (except for those discarded as burn-ins), "
        + "and this will be used as the target tree",
    )
    target_tree_optgroup.add_option(
        "-f",
        "--min-clade-freq",
        dest="min_clade_freq",
        type="float",
        default=0.50,
        metavar="#.##",
        help="minimum frequency or probability for a clade or a split to be "
        + "included in the consensus tree, if used [default=%default]",
    )

    support_summarization_optgroup = OptionGroup(parser, "Support Summarization Options")
    parser.add_option_group(support_summarization_optgroup)
    support_summarization_optgroup.add_option(
        "-l",
        "--support-as-labels",
        action="store_const",
        dest="support_annotation_target",
        default=1,
        const=1,
        help="in addition to node metadata, indicate branch support as internal node labels [default]",
    )
    support_summarization_optgroup.add_option(
        "-v",
        "--support-as-lengths",
        action="store_const",
        dest="support_annotation_target",
        default=1,
        const=2,
        help="in addition to node metadata, indicate branch support as branch lengths",
    )
    support_summarization_optgroup.add_option(
        "-x",
        "--no-support",
        action="store_const",
        dest="support_annotation_target",
        default=1,
        const=0,
        help="""\
do not indicate support with internal node labels or edge lengths
(support will still be indicated as node metadata unless
'--no-summary-metadata' is specified)""",
    )
    support_summarization_optgroup.add_option(
        "-p",
        "--percentages",
        action="store_true",
        dest="support_as_percentages",
        default=False,
        help="indicate branch support as percentages (otherwise, will report as proportions by default)",
    )
    support_summarization_optgroup.add_option(
        "-d",
        "--decimals",
        dest="support_label_decimals",
        type="int",
        metavar="#",
        default=2,
        help="number of decimal places in indication of support values [default=%default]",
    )

    edge_summarization_optgroup = OptionGroup(parser, "Edge Length Summarization Options")
    parser.add_option_group(edge_summarization_optgroup)
    edge_summarization_choices = ["mean-length", "median-length", "mean-age", "median-age", "keep", "unweighted"]
    edge_summarization_optgroup.add_option(
        "-e",
        "--edges",
        type="choice",
        dest="edge_summarization",
        metavar="<%s>" % ("|".join(edge_summarization_choices)),
        choices=edge_summarization_choices,
        default=None,
        help="""\
set edge lengths of target tree(s) to mean/median lengths/ages of
corresponding splits or edges of input trees (note that using 'mean-age' or
'median-age' require rooted ultrametric input trees, and will behave as
if '--ultrametric' and '--with-node-ages' are specified");
default is to 'keep' if target trees are specified
(i.e., target trees will have their branch lengths preserved by default),
'median-age' if no target trees are specified but the '--ultrametric' directive is given
(a consensus tree should be constructed to summarize support and input trees are ultrametric),
and 'mean-length' if no target trees are specified and the '--ultrametric' directive is *not* given
(a consensus tree should be constructed to summarize support and input trees are *not* assumed to be ultrametric),
""",
    )
    edge_summarization_optgroup.add_option(
        "--collapse-negative-edges",
        action="store_true",
        dest="collapse_negative_edges",
        default=False,
        help="(if setting edge lengths) force parent node ages to be at least as old as its oldest child when summarizing node ages",
    )

    other_summarization_optgroup = OptionGroup(parser, "Other Summarization Options")
    parser.add_option_group(other_summarization_optgroup)
    # other_summarization_optgroup.add_option("--with-node-ages",
    #        action="store_true",
    #        dest="calc_node_ages",
    #        default=None,
    #        help="summarize node ages as well as edge lengths (implies '--rooted' and '--ultrametric'; automatically enabled if '--ultrametric' is specified; will result in error if trees are not ultrametric)")
    other_summarization_optgroup.add_option(
        "--trprobs",
        "--calc-tree-probabilities",
        dest="trprobs_filepath",
        default=None,
        metavar="FILEPATH",
        help="if specified, a file listing tree (topologies) and the "
        + "frequencies of their occurrences will be saved to FILEPATH",
    )
    other_summarization_optgroup.add_option(
        "--extract-edges",
        dest="split_edges_filepath",
        default=None,
        metavar="FILEPATH",
        help="if specified, a tab-delimited file of splits and their edge "
        + "lengths across input trees will be saved to FILEPATH",
    )
    other_summarization_optgroup.add_option(
        "--no-node-ages",
        action="store_false",
        dest="calc_node_ages",
        default=None,
        help="do not calculate/summarize node ages, even if '--ultrametric' is specified",
    )
    other_summarization_optgroup.add_option(
        "--no-summary-metadata",
        action="store_true",
        dest="suppress_summary_metadata",
        default=False,
        help="do not annotate nodes with ranges, 5%/95 quartiles, 95% HPD's etc. of edge lengths and node ages",
    )
    other_summarization_optgroup.add_option(
        "--ultrametricity-precision", default=0.0000001, type="float", help="precision when checking ultrametricity"
    )

    output_filepath_optgroup = OptionGroup(parser, "Output File Options")
    parser.add_option_group(output_filepath_optgroup)
    output_filepath_optgroup.add_option(
        "-o",
        "--output",
        dest="output_filepath",
        default=None,
        help="path to output file (if not given, will print to standard output)",
    )
    output_filepath_optgroup.add_option(
        "--no-taxa-block",
        action="store_false",
        dest="include_taxa_block",
        default=True,
        help="do not include a taxa block in the output treefile (otherwise will create taxa block by default)",
    )
    output_filepath_optgroup.add_option(
        "--no-meta-comments",
        action="store_false",
        dest="include_meta_comments",
        default=True,
        help="do not include initial file comment annotating details of scoring operation",
    )
    output_filepath_optgroup.add_option(
        "-c",
        "--additional-comments",
        action="store",
        dest="additional_comments",
        default=None,
        help="additional comments to be added to the summary file",
    )
    output_filepath_optgroup.add_option(
        "--to-newick",
        action="store_true",
        dest="to_newick_format",
        default=False,
        help="save results in NEWICK (PHYLIP) format (default is to save in NEXUS format)",
    )
    output_filepath_optgroup.add_option(
        "--to-phylip", action="store_true", dest="to_newick_format", default=False, help="same as --newick"
    )
    output_filepath_optgroup.add_option(
        "-r",
        "--replace",
        action="store_true",
        dest="replace",
        default=False,
        help="replace/overwrite output file without asking if it already exists ",
    )

    run_optgroup = OptionGroup(parser, "Program Run Options")
    parser.add_option_group(run_optgroup)
    if _MP:
        run_optgroup.add_option(
            "-m",
            "--multiprocessing",
            action="store",
            dest="multiprocess",
            metavar="NUM-PROCESSES",
            default=None,
            help="run in parallel mode with up to a maximum of NUM-PROCESSES processes "
            + "(specify '*' to run in as many processes as there are cores on the "
            + "local machine)",
        )

    run_optgroup.add_option(
        "-g",
        "--log-frequency",
        type="int",
        metavar="LOG-FREQUENCY",
        dest="log_frequency",
        default=500,
        help="tree processing progress logging frequency (default=%default; set to 0 to suppress)",
    )
    run_optgroup.add_option(
        "-q",
        "--quiet",
        action="store_true",
        dest="quiet",
        default=False,
        help="suppress ALL logging, progress and feedback messages",
    )
    run_optgroup.add_option(
        "--ignore-missing-support",
        action="store_true",
        dest="ignore_missing_support",
        default=False,
        help="ignore missing support tree files (at least one must exist!)",
    )
    run_optgroup.add_option(
        "--ignore-missing-target",
        action="store_true",
        dest="ignore_missing_target",
        default=False,
        help="ignore missing target tree file (will construct majority rule consensus tree if missing)",
    )

    (opts, args) = parser.parse_args()
    if opts.quiet:
        messaging_level = ConsoleMessenger.ERROR_MESSAGING_LEVEL
    else:
        messaging_level = ConsoleMessenger.INFO_MESSAGING_LEVEL
    messenger = ConsoleMessenger(name="SumTrees", messaging_level=messaging_level)

    # splash
    if not opts.quiet:
        show_splash(
            prog_name=_program_name,
            prog_subtitle=_program_subtitle,
            prog_version=_program_version,
            prog_author=_program_author,
            prog_copyright=_program_copyright,
            dest=sys.stderr,
            extended=False,
        )

    ###################################################
    # Support file idiot checking

    support_filepaths = []
    if len(args) > 0:
        for fpath in args:
            fpath = os.path.expanduser(os.path.expandvars(fpath))
            if not os.path.exists(fpath):
                if opts.ignore_missing_support:
                    messenger.send_warning("Support file not found: '%s'" % fpath)
                else:
                    messenger.send_error(
                        "Terminating due to missing support files. "
                        + "Use the '--ignore-missing-support' option to continue even "
                        + "if some files are missing."
                    )
                    sys.exit(1)
            else:
                support_filepaths.append(fpath)
        if len(support_filepaths) == 0:
            messenger.send_error(
                "No valid sources of input trees specified. "
                + "Please provide the path to at least one (valid and existing) file "
                + "containing tree samples to summarize."
            )
            sys.exit(1)
    else:
        if not opts.from_newick_stream and not opts.from_nexus_stream:
            messenger.send_info(
                "No sources of input trees specified. "
                + "Please provide the path to at least one (valid and existing) file "
                + "containing tree samples to summarize. See '--help' for other options."
            )
            sys.exit(1)

    ###################################################
    # Lots of other idiot-checking ...

    # target tree
    if opts.target_tree_filepath is not None:
        target_tree_filepath = os.path.expanduser(os.path.expandvars(opts.target_tree_filepath))
        if not os.path.exists(target_tree_filepath):
            if opts.ignore_missing_target:
                if not opts.quiet:
                    messenger.send_warning(
                        "Target tree file not found: '%s': using majority-rule consensus tree instead."
                        % target_tree_filepath
                    )
                target_tree_filepath = None
            else:
                messenger.send_error("Target tree file not found: '%s'" % target_tree_filepath)
                sys.exit(1)
    else:
        target_tree_filepath = None

    ### TODO: these will be command-line options in the future
    ### here we just set it
    assert not hasattr(opts, "outgroup")
    opts.outgroup = None
    assert not hasattr(opts, "root_target")
    opts.root_target = None

    ### TODO: idiot-check edge length summarization
    # edge lengths
    if opts.edge_summarization:
        opts.edge_summarization = opts.edge_summarization.lower()
        if opts.edge_summarization not in edge_summarization_choices:
            messenger.send_error(
                "'%s' is not a valid edge summarization choice; must be one of: %s"
                % (opts.edge_summarization, edge_summarization_choices)
            )
            sys.exit(1)
    if opts.edge_summarization == "mean-age" or opts.edge_summarization == "median-age":
        opts.ultrametric_trees = True
        opts.rooted_trees = True
        if opts.calc_node_ages is None:
            opts.calc_node_ages = True
    else:
        if opts.ultrametric_trees:
            opts.rooted_trees = True
            if opts.calc_node_ages is None:
                opts.calc_node_ages = True
        else:
            if opts.calc_node_ages is True:
                opts.ultrametric_trees = True
                opts.rooted_trees = True
            else:
                opts.calc_node_ages = False

    # output
    if opts.output_filepath is None:
        output_dest = sys.stdout
    else:
        output_fpath = os.path.expanduser(os.path.expandvars(opts.output_filepath))
        if confirm_overwrite(filepath=output_fpath, replace_without_asking=opts.replace):
            output_dest = open(output_fpath, "w")
        else:
            sys.exit(1)

    if opts.trprobs_filepath:
        trprobs_filepath = os.path.expanduser(os.path.expandvars(opts.trprobs_filepath))
        if confirm_overwrite(filepath=trprobs_filepath, replace_without_asking=opts.replace):
            trprobs_dest = open(trprobs_filepath, "w")
        else:
            sys.exit(1)
        opts.calc_tree_probs = True
    else:
        trprobs_dest = None
        opts.calc_tree_probs = False

    if opts.split_edges_filepath:
        split_edges_filepath = os.path.expanduser(os.path.expandvars(opts.split_edges_filepath))
        if confirm_overwrite(filepath=split_edges_filepath, replace_without_asking=opts.replace):
            split_edges_dest = open(split_edges_filepath, "w")
        else:
            sys.exit(1)
    else:
        split_edges_dest = None

    if opts.from_newick_stream:
        schema = "newick"
    elif opts.from_nexus_stream:
        schema = "nexus"
    else:
        schema = "nexus/newick"

    ###################################################
    # Main work begins here: Count the splits

    start_time = datetime.datetime.now()
    master_split_distribution = None
    if (support_filepaths is not None and len(support_filepaths) > 1) and _MP and opts.multiprocess:
        if opts.multiprocess is not None:
            if opts.multiprocess == "*":
                num_processes = multiprocessing.cpu_count()
            elif opts.multiprocess == "@":
                num_processes = len(support_filepaths)
            else:
                try:
                    num_processes = int(opts.multiprocess)
                except ValueError:
                    messenger.send_error(
                        "'%s' is not a valid number of processes (must be a positive integer)." % opts.multiprocess
                    )
                    sys.exit(1)
            if num_processes <= 0:
                messenger.send_error(
                    "Maximum number of processes set to %d: cannot run SumTrees with less than 1 process"
                    % num_processes
                )
                sys.exit(1)
            if num_processes == 1:
                messenger.send_warning(
                    "Running in parallel processing mode but limited to only 1 process: probably more efficient to run in serial mode!"
                )

        master_split_distribution, master_topology_counter = process_sources_parallel(
            num_processes=num_processes,
            support_filepaths=support_filepaths,
            schema=schema,
            is_rooted=opts.rooted_trees,
            ignore_node_ages=not opts.calc_node_ages,
            ultrametricity_precision=opts.ultrametricity_precision,
            calc_tree_probs=opts.calc_tree_probs,
            weighted_trees=opts.weighted_trees,
            tree_offset=opts.burnin,
            log_frequency=opts.log_frequency,
            messenger=messenger,
        )
    else:
        if _MP and opts.multiprocess is not None and len(support_filepaths) == 1:
            messenger.send_warning(
                "Parallel processing mode requested but only one source specified: defaulting to serial mode."
            )
        if opts.from_newick_stream or opts.from_nexus_stream:
            support_filepaths = None
        master_split_distribution, master_topology_counter = process_sources_serial(
            support_filepaths=support_filepaths,
            schema=schema,
            is_rooted=opts.rooted_trees,
            ignore_node_ages=not opts.calc_node_ages,
            ultrametricity_precision=opts.ultrametricity_precision,
            calc_tree_probs=opts.calc_tree_probs,
            weighted_trees=opts.weighted_trees,
            tree_offset=opts.burnin,
            log_frequency=opts.log_frequency,
            messenger=messenger,
        )

    ###################################################
    # Compose post-counting report

    # if not splits counted or the taxon set was not populated for any reason,
    # we just produce an empty block so we don't crash as we report nothing of interest
    if master_split_distribution.taxon_set is None:
        assert master_split_distribution.total_trees_counted == 0
        master_split_distribution.taxon_set = dendropy.TaxonSet()

    # taxon set to handle target trees
    master_taxon_set = master_split_distribution.taxon_set

    report = []
    report.append(
        "%d trees considered in total for split support assessment." % (master_split_distribution.total_trees_counted)
    )
    if opts.rooted_trees is None:
        report.append("Tree rooting as given by tree statement (defaults to unrooted).")
    elif opts.rooted_trees:
        report.append("Trees treated as rooted.")
    else:
        report.append("Trees treated as unrooted.")
    if opts.ultrametric_trees:
        report.append("Trees are expected to be ultrametric.")
    if opts.weighted_trees:
        report.append("Trees treated as weighted (default weight = 1.0).")
    else:
        report.append("Trees treated as unweighted.")
    n_taxa = len(master_taxon_set)
    report.append("%d unique taxa across all trees." % n_taxa)
    num_splits, num_unique_splits, num_nt_splits, num_nt_unique_splits = master_split_distribution.splits_considered()
    report.append("%d unique splits out of %d total splits counted." % (num_unique_splits, num_splits))
    report.append(
        "%d unique non-trivial splits out of %d total non-trivial splits counted."
        % (num_nt_unique_splits, num_nt_splits)
    )

    comments = []
    comments.extend(report)
    messenger.send_info("Split counting completed:")
    messenger.send_info_lines(report, prefix=" - ")

    ###################################################
    #  Target tree and mapping

    if not opts.support_as_percentages and opts.support_label_decimals < 2:
        messenger.send_warning(
            "Reporting support by proportions require that support will be reported to at least 2 decimal places"
        )
        opts.support_label_decimals = 2

    tsum = treesum.TreeSummarizer()
    tsum.add_node_metadata = not opts.suppress_summary_metadata
    if opts.support_annotation_target == 1:
        tsum.support_as_labels = True
        tsum.support_as_edge_lengths = False
        support_show = "indicated by node labels"
        if tsum.add_node_metadata:
            support_show += " and node metadata"
    elif opts.support_annotation_target == 2:
        tsum.support_as_labels = False
        tsum.support_as_edge_lengths = True
        support_show = "indicated by branch lengths"
        if tsum.add_node_metadata:
            support_show += " and node metadata"
    elif opts.support_annotation_target == 0:
        tsum.support_as_labels = False
        tsum.support_as_edge_lengths = False
        if tsum.add_node_metadata:
            support_show = "indicated by node metadata (only)"
        else:
            support_show = "not indicated"
    else:
        raise Exception("Unexpected value for support annotation target: %s" % opts.support_annotation_target)
    tsum.support_as_percentages = opts.support_as_percentages
    tsum.support_label_decimals = opts.support_label_decimals
    tsum.weighted_splits = opts.weighted_trees

    if opts.support_as_percentages:
        support_units = "Percentage"
    else:
        support_units = "Proportion (frequency or probability)"
    support_summarization = "%s of support for each split %s" % (support_units, support_show)

    tt_trees = []
    support_comment_pattern = re.compile(r"support\s*=\s*[0-9.eE-]+,?", re.I)
    if target_tree_filepath is not None:
        messenger.send_info("Mapping support to target tree ...")
        # if adding node metadata, we extract it from the target tree first
        for tree in tree_source_iter(
            stream=open(target_tree_filepath, "r"),
            schema="nexus/newick",
            taxon_set=master_taxon_set,
            as_rooted=opts.rooted_trees,
            extract_comment_metadata=tsum.add_node_metadata,
        ):
            if opts.root_target:
                if opts.outgroup:
                    pass
                else:
                    tree.root_at_midpoint(splits=True)
            if opts.rooted_trees and not tree.is_rooted:
                messenger.send_error(
                    "Support trees are treated as rooted, but target tree is unrooted. Root target tree(s) and re-run, or run using the '--root-target' flag."
                )
                sys.exit(1)

            # strip out existing support statement
            # if tsum.add_node_metadata:
            #     for nd in tree.postorder_node_iter():
            #         for nd_comment_idx, comment in enumerate(nd.comments):
            #             nd.comments[nd_comment_idx] = support_comment_pattern.sub("", nd.comments[nd_comment_idx])

            stree = tsum.map_split_support_to_tree(tree, master_split_distribution)
            tt_trees.append(stree)
        messenger.send_info("Parsed '%s': %d tree(s) in file" % (target_tree_filepath, len(tt_trees)))
        comments.append("Split support mapped to trees in:")
        comments.append("  - '%s' (%d trees)" % (os.path.abspath(target_tree_filepath), len(tt_trees)))
        if opts.root_target:
            if opts.outgroup:
                comments.append("Target tree(s) rooted using outgroup: %s." % opts.outgroup)
            else:
                comments.append("Target tree(s) rooted at midpoint.")
        comments.append(support_summarization + ".")
    else:
        messenger.send_info("Constructing clade consensus tree ...")
        if opts.min_clade_freq > 1.0:
            messenger.send_warning("Maximum frequency threshold for clade inclusion is 1.0: reset to 1.0.")
            min_freq = 1.0
        else:
            min_freq = opts.min_clade_freq
        stree = tsum.tree_from_splits(master_split_distribution, min_freq=min_freq, include_edge_lengths=False)
        # include_edge_lengths=not opts.no_branch_lengths)
        if opts.root_target:
            stree.reroot_at_midpoint(update_splits=True)
        report = []
        report.append("Consensus tree (%f clade frequency threshold) constructed from splits." % min_freq)
        tt_trees.append(stree)
        if opts.root_target:
            if opts.outgroup:
                report.append("Consensus tree rooted using outgroup: %s." % opts.outgroup)
            else:
                report.append("Consensus tree rooted at midpoint.")
        report.append(support_summarization + ".")
        messenger.send_info_lines(report)
        comments.extend(report)

    if not opts.suppress_summary_metadata:
        messenger.send_info("Summarizing node ages and lengths ...")
        for stree in tt_trees:
            tsum.annotate_nodes_and_edges(tree=stree, split_distribution=master_split_distribution)

    if opts.edge_summarization is None:
        if target_tree_filepath is not None:
            opts.edge_summarization = "keep"
        else:
            if opts.ultrametric_trees:
                opts.edge_summarization = "median-age"
            else:
                opts.edge_summarization = "mean-length"
    if opts.edge_summarization is not None and opts.edge_summarization == "unweighted":
        for stree in tt_trees:
            for edge in stree.postorder_edge_iter():
                edge.length = None
    elif opts.edge_summarization is not None and opts.edge_summarization != "keep":
        if opts.edge_summarization.startswith("mean"):
            summary_func_desc = "mean"
            summarization_func = lambda x: statistics.mean_and_sample_variance(x)[0]
        else:
            summary_func_desc = "median"
            summarization_func = statistics.median
        if opts.edge_summarization.endswith("age"):
            messenger.send_info("Mapping node ages ...")
            comments.append(
                "Setting node ages of output tree(s) to %s ages of corresponding nodes of input trees."
                % summary_func_desc
            )
            if opts.collapse_negative_edges:
                comments.append("Parent node ages coerced to be at least as old as oldest daughter node age.")
                collapse_negative_edges = True
                allow_negative_edges = False
            else:
                comments.append("Parent node ages not adjusted: negative edge lengths allowed.")
                collapse_negative_edges = False
                allow_negative_edges = True
            for stree in tt_trees:
                tsum.summarize_node_ages_on_tree(
                    tree=stree,
                    split_distribution=master_split_distribution,
                    set_edge_lengths=True,
                    collapse_negative_edges=collapse_negative_edges,
                    allow_negative_edges=allow_negative_edges,
                    summarization_func=summarization_func,
                )
        elif opts.edge_summarization.endswith("length"):
            messenger.send_info("Mapping edge lengths ...")
            comments.append(
                "Setting edge lengths of output tree(s) to %s length of corresponding edges of input trees."
                % summary_func_desc
            )
            for stree in tt_trees:
                tsum.summarize_edge_lengths_on_tree(
                    tree=stree, split_distribution=master_split_distribution, summarization_func=summarization_func
                )
    else:
        comments.append("Not setting edge lengths on output tree(s).")

    end_time = datetime.datetime.now()

    ###################################################
    #  RESULTS

    messenger.send_info("Writing results ...")

    final_run_report = []
    final_run_report.append("Began at: %s." % (start_time.isoformat(" ")))
    final_run_report.append("Ended at: %s." % (end_time.isoformat(" ")))
    hours, mins, secs = str(end_time - start_time).split(":")
    run_time = "Run time: %s hour(s), %s minute(s), %s second(s)." % (hours, mins, secs)
    final_run_report.append(run_time)

    output_dataset = dendropy.DataSet(dendropy.TreeList(tt_trees, taxon_set=master_taxon_set))
    if opts.to_newick_format:
        output_dataset.write(
            output_dest,
            "newick",
            suppress_rooting=False,
            suppress_edge_lengths=False,
            unquoted_underscores=False,
            preserve_spaces=False,
            store_tree_weights=False,
            suppress_annotations=False,
            annotations_as_nhx=False,
            suppress_item_comments=False,
            suppress_leaf_taxon_labels=False,
            suppress_leaf_node_labels=True,
            suppress_internal_taxon_labels=False,
            suppress_internal_node_labels=False,
            node_label_element_separator=" ",
            node_label_compose_func=None,
        )
    else:
        if opts.include_taxa_block:
            simple = False
        else:
            simple = True
        if opts.include_meta_comments:
            comment = []
            try:
                username = getpass.getuser()
            except:
                username = "******"
            comment.append("%s %s by %s." % (_program_name, _program_version, _program_author))
            comment.append("Using DendroPy Version %s by Jeet Sukumaran and Mark T. Holder." % dendropy.__version__)
            python_version = sys.version.replace("\n", "").replace("[", "(").replace("]", ")")
            comment.append("Running under Python %s on %s." % (python_version, sys.platform))
            comment.append("Executed on %s by %s@%s." % (platform.node(), username, socket.gethostname()))
            if support_filepaths is not None and len(support_filepaths) > 0:
                comment.append("Basis of split support:")
                for support_file in support_filepaths:
                    comment.append("  - '%s'" % os.path.abspath(support_file))
            else:
                comment.append("Basis of split support: trees read from standard input.")
            comment.extend(final_run_report)
            comment.extend(comments)
        if opts.additional_comments:
            comment.append("\n")
            comment.append(opts.additional_comments)
        output_dataset.write(
            output_dest,
            "nexus",
            simple=simple,
            file_comments=comment,
            suppress_rooting=False,
            suppress_edge_lengths=opts.edge_summarization == "unweighted",
            unquoted_underscores=False,
            preserve_spaces=False,
            store_tree_weights=False,
            suppress_annotations=False,
            annotations_as_nhx=False,
            suppress_item_comments=False,
            suppress_leaf_taxon_labels=False,
            suppress_leaf_node_labels=True,
            suppress_internal_taxon_labels=False,
            suppress_internal_node_labels=False,
            node_label_element_separator=" ",
            node_label_compose_func=None,
        )

    if trprobs_dest:
        messenger.send_info("Writing tree probabilities ...")
        tree_list = dendropy.TreeList(taxon_set=master_split_distribution.taxon_set)
        tree_freqs = master_topology_counter.calc_tree_freqs(tree_list.taxon_set)
        cumulative_prob = 0.0
        for idx, (tree, (count, prop)) in enumerate(tree_freqs.items()):
            tree_list.append(tree)
            cumulative_prob += prop
            tree.probability = prop
            tree.count = count
            tree.cumulative_probability = cumulative_prob
            tree.annotations.add_bound_attribute("count")
            tree.annotations.add_bound_attribute("probability")
            tree.annotations.add_bound_attribute("cumulative_probability")
            tree.label = "Tree%d" % (idx + 1)
        tree_list.write_to_stream(
            trprobs_dest,
            "nexus",
            simple=simple,
            suppress_rooting=True,
            suppress_edge_lengths=True,
            suppress_internal_labels=True,
            unquoted_underscores=False,
            preserve_spaces=False,
            store_tree_weights=False,
            suppress_annotations=False,
            annotations_as_nhx=False,
            suppress_item_comments=True,
            suppress_leaf_taxon_labels=False,
            suppress_leaf_node_labels=True,
            suppress_internal_taxon_labels=False,
            suppress_internal_node_labels=False,
            node_label_element_separator=" ",
            node_label_compose_func=None,
        )

    if split_edges_dest:
        messenger.send_info("Writing split edge lengths ...")
        for split in master_split_distribution.splits:
            row = []
            row.append(master_split_distribution.taxon_set.split_as_newick_string(split))
            for edge_length in master_split_distribution.split_edge_lengths[split]:
                row.append("%s" % edge_length)
            split_edges_dest.write("%s\n" % ("\t".join(row)))

    if not opts.output_filepath:
        pass
    else:
        messenger.send_info("Results written to: '%s'." % (output_fpath))

    ###################################################
    #  WRAP UP
    messenger.send_info("Summarization completed.")
    messenger.send_info_lines(final_run_report)
    messenger.silent = True
Ejemplo n.º 3
0
def main_cli():

    description = "%s %s %s" % (_program_name, _program_version,
                                _program_subtitle)
    usage = "%prog [options] TREES-FILE [TREES-FILE [TREES-FILE [...]]"

    parser = OptionParser(usage=usage,
                          add_help_option=True,
                          version=_program_version,
                          description=description)

    sum_tree_optgroup = OptionGroup(parser, "Source Treatment Options")
    parser.add_option_group(sum_tree_optgroup)
    sum_tree_optgroup.add_option(
        "-b",
        "--burnin",
        action="store",
        dest="burnin",
        type="int",
        default=0,
        help=
        'number of trees to skip from the beginning of *each tree file* when counting support [default=%default]'
    )

    source_tree_optgroup = OptionGroup(parser, "Source Tree Options")
    parser.add_option_group(source_tree_optgroup)
    source_tree_optgroup.add_option("--rooted",
                                    action="store_true",
                                    dest="rooted_trees",
                                    default=None,
                                    help="treat trees as rooted")
    source_tree_optgroup.add_option("--unrooted",
                                    action="store_false",
                                    dest="rooted_trees",
                                    default=None,
                                    help="treat trees as unrooted")
    source_tree_optgroup.add_option(
        "--ultrametric",
        action="store_true",
        dest="ultrametric_trees",
        default=False,
        help=
        "assume trees are ultrametric (implies '--rooted' ; will result in node ages being summarized; will result in error if trees are not ultrametric)"
    )
    source_tree_optgroup.add_option(
        "--weighted-trees",
        action="store_true",
        dest="weighted_trees",
        default=False,
        help=
        "use weights of trees as indicated by '[&W m/n]' comment to weight contribution of splits found on each tree to overall split frequencies"
    )
    source_tree_optgroup.add_option(
        "--from-newick-stream",
        action="store_true",
        dest="from_newick_stream",
        default=False,
        help="support trees will be streamed in newick format")
    source_tree_optgroup.add_option(
        "--from-nexus-stream",
        action="store_true",
        dest="from_nexus_stream",
        default=False,
        help="support trees will be streamed in NEXUS format")

    target_tree_optgroup = OptionGroup(parser, 'Target Tree Options')
    parser.add_option_group(target_tree_optgroup)
    target_tree_optgroup.add_option(
        "-t",
        "--target",
        dest="target_tree_filepath",
        default=None,
        help=
        "path to optional target, model or best topology tree file (Newick or NEXUS format) "
        + "to which support will be mapped; " +
        "if not given, then a majority-rule clade consensus tree will be constructed based on the "
        +
        "all the trees given in the support tree files (except for those discarded as burn-ins), "
        + "and this will be used as the target tree")
    target_tree_optgroup.add_option("-f", "--min-clade-freq",
            dest="min_clade_freq",
            type="float",
            default=0.50,
            metavar="#.##",
            help="minimum frequency or probability for a clade or a split to be "\
                    + "included in the consensus tree, if used [default=%default]")

    support_summarization_optgroup = OptionGroup(
        parser, "Support Summarization Options")
    parser.add_option_group(support_summarization_optgroup)
    support_summarization_optgroup.add_option(
        "-l",
        "--support-as-labels",
        action="store_const",
        dest="support_annotation_target",
        default=1,
        const=1,
        help=
        "in addition to node metadata, indicate branch support as internal node labels [default]"
    )
    support_summarization_optgroup.add_option(
        "-v",
        "--support-as-lengths",
        action="store_const",
        dest="support_annotation_target",
        default=1,
        const=2,
        help=
        "in addition to node metadata, indicate branch support as branch lengths"
    )
    support_summarization_optgroup.add_option("-x",
                                              "--no-support",
                                              action="store_const",
                                              dest="support_annotation_target",
                                              default=1,
                                              const=0,
                                              help="""\
do not indicate support with internal node labels or edge lengths
(support will still be indicated as node metadata unless
'--no-summary-metadata' is specified)""")
    support_summarization_optgroup.add_option(
        "-p",
        "--percentages",
        action="store_true",
        dest="support_as_percentages",
        default=False,
        help=
        "indicate branch support as percentages (otherwise, will report as proportions by default)"
    )
    support_summarization_optgroup.add_option(
        "-d",
        "--decimals",
        dest="support_label_decimals",
        type="int",
        metavar="#",
        default=2,
        help=
        "number of decimal places in indication of support values [default=%default]"
    )

    edge_summarization_optgroup = OptionGroup(
        parser, "Edge Length Summarization Options")
    parser.add_option_group(edge_summarization_optgroup)
    edge_summarization_choices = [
        "mean-length", "median-length", "mean-age", "median-age", "keep",
        "unweighted"
    ]
    edge_summarization_optgroup.add_option(
        "-e",
        "--edges",
        type="choice",
        dest="edge_summarization",
        metavar="<%s>" % ("|".join(edge_summarization_choices)),
        choices=edge_summarization_choices,
        default=None,
        help="""\
set edge lengths of target tree(s) to mean/median lengths/ages of
corresponding splits or edges of input trees (note that using 'mean-age' or
'median-age' require rooted ultrametric input trees, and will behave as
if '--ultrametric' and '--with-node-ages' are specified");
default is to 'keep' if target trees are specified
(i.e., target trees will have their branch lengths preserved by default),
'median-age' if no target trees are specified but the '--ultrametric' directive is given
(a consensus tree should be constructed to summarize support and input trees are ultrametric),
and 'mean-length' if no target trees are specified and the '--ultrametric' directive is *not* given
(a consensus tree should be constructed to summarize support and input trees are *not* assumed to be ultrametric),
""")
    edge_summarization_optgroup.add_option(
        "--collapse-negative-edges",
        action="store_true",
        dest="collapse_negative_edges",
        default=False,
        help=
        "(if setting edge lengths) force parent node ages to be at least as old as its oldest child when summarizing node ages"
    )

    other_summarization_optgroup = OptionGroup(parser,
                                               "Other Summarization Options")
    parser.add_option_group(other_summarization_optgroup)
    #other_summarization_optgroup.add_option("--with-node-ages",
    #        action="store_true",
    #        dest="calc_node_ages",
    #        default=None,
    #        help="summarize node ages as well as edge lengths (implies '--rooted' and '--ultrametric'; automatically enabled if '--ultrametric' is specified; will result in error if trees are not ultrametric)")
    other_summarization_optgroup.add_option("--trprobs", "--calc-tree-probabilities",
            dest="trprobs_filepath",
            default=None,
            metavar="FILEPATH",
            help="if specified, a file listing tree (topologies) and the " \
                    + "frequencies of their occurrences will be saved to FILEPATH")
    other_summarization_optgroup.add_option("--extract-edges",
            dest="split_edges_filepath",
            default=None,
            metavar="FILEPATH",
            help="if specified, a tab-delimited file of splits and their edge " \
                    + "lengths across input trees will be saved to FILEPATH")
    other_summarization_optgroup.add_option(
        "--no-node-ages",
        action="store_false",
        dest="calc_node_ages",
        default=None,
        help=
        "do not calculate/summarize node ages, even if '--ultrametric' is specified"
    )
    other_summarization_optgroup.add_option(
        "--no-summary-metadata",
        action="store_true",
        dest="suppress_summary_metadata",
        default=False,
        help=
        "do not annotate nodes with ranges, 5%/95 quartiles, 95% HPD's etc. of edge lengths and node ages"
    )

    output_filepath_optgroup = OptionGroup(parser, "Output File Options")
    parser.add_option_group(output_filepath_optgroup)
    output_filepath_optgroup.add_option(
        "-o",
        "--output",
        dest="output_filepath",
        default=None,
        help="path to output file (if not given, will print to standard output)"
    )
    output_filepath_optgroup.add_option(
        "--no-taxa-block",
        action="store_false",
        dest="include_taxa_block",
        default=True,
        help=
        "do not include a taxa block in the output treefile (otherwise will create taxa block by default)"
    )
    output_filepath_optgroup.add_option(
        "--no-meta-comments",
        action="store_false",
        dest="include_meta_comments",
        default=True,
        help=
        "do not include initial file comment annotating details of scoring operation"
    )
    output_filepath_optgroup.add_option(
        "-c",
        "--additional-comments",
        action="store",
        dest="additional_comments",
        default=None,
        help="additional comments to be added to the summary file")
    output_filepath_optgroup.add_option(
        "--to-newick",
        action="store_true",
        dest="to_newick_format",
        default=False,
        help=
        "save results in NEWICK (PHYLIP) format (default is to save in NEXUS format)"
    )
    output_filepath_optgroup.add_option("--to-phylip",
                                        action="store_true",
                                        dest="to_newick_format",
                                        default=False,
                                        help="same as --newick")
    output_filepath_optgroup.add_option(
        "-r",
        "--replace",
        action="store_true",
        dest="replace",
        default=False,
        help=
        "replace/overwrite output file without asking if it already exists ")

    run_optgroup = OptionGroup(parser, "Program Run Options")
    parser.add_option_group(run_optgroup)
    if _MP:
        run_optgroup.add_option("-m", "--multiprocessing",
                action="store",
                dest="multiprocess",
                metavar="NUM-PROCESSES",
                default=None,
                help="run in parallel mode with up to a maximum of NUM-PROCESSES processes " \
                        + "(specify '*' to run in as many processes as there are cores on the "\
                        + "local machine)")

    run_optgroup.add_option(
        "-g",
        "--log-frequency",
        type="int",
        metavar="LOG-FREQUENCY",
        dest="log_frequency",
        default=500,
        help=
        "tree processing progress logging frequency (default=%default; set to 0 to suppress)"
    )
    run_optgroup.add_option(
        "-q",
        "--quiet",
        action="store_true",
        dest="quiet",
        default=False,
        help="suppress ALL logging, progress and feedback messages")
    run_optgroup.add_option(
        "--ignore-missing-support",
        action="store_true",
        dest="ignore_missing_support",
        default=False,
        help="ignore missing support tree files (at least one must exist!)")
    run_optgroup.add_option(
        "--ignore-missing-target",
        action="store_true",
        dest="ignore_missing_target",
        default=False,
        help=
        "ignore missing target tree file (will construct majority rule consensus tree if missing)"
    )

    (opts, args) = parser.parse_args()
    if opts.quiet:
        messaging_level = ConsoleMessenger.ERROR_MESSAGING_LEVEL
    else:
        messaging_level = ConsoleMessenger.INFO_MESSAGING_LEVEL
    messenger = ConsoleMessenger(name="SumTrees",
                                 messaging_level=messaging_level)

    # splash
    if not opts.quiet:
        show_splash(prog_name=_program_name,
                    prog_subtitle=_program_subtitle,
                    prog_version=_program_version,
                    prog_author=_program_author,
                    prog_copyright=_program_copyright,
                    dest=sys.stderr,
                    extended=False)

    ###################################################
    # Support file idiot checking

    support_filepaths = []
    if len(args) > 0:
        for fpath in args:
            fpath = os.path.expanduser(os.path.expandvars(fpath))
            if not os.path.exists(fpath):
                if opts.ignore_missing_support:
                    messenger.send_warning("Support file not found: '%s'" %
                                           fpath)
                else:
                    messenger.send_error(
                        "Terminating due to missing support files. " +
                        "Use the '--ignore-missing-support' option to continue even "
                        + "if some files are missing.")
                    sys.exit(1)
            else:
                support_filepaths.append(fpath)
        if len(support_filepaths) == 0:
            messenger.send_error(
                "No valid sources of input trees specified. " +
                "Please provide the path to at least one (valid and existing) file "
                + "containing tree samples to summarize.")
            sys.exit(1)
    else:
        if not opts.from_newick_stream and not opts.from_nexus_stream:
            messenger.send_info(
                "No sources of input trees specified. " +
                "Please provide the path to at least one (valid and existing) file "
                +
                "containing tree samples to summarize. See '--help' for other options."
            )
            sys.exit(1)

    ###################################################
    # Lots of other idiot-checking ...

    # target tree
    if opts.target_tree_filepath is not None:
        target_tree_filepath = os.path.expanduser(
            os.path.expandvars(opts.target_tree_filepath))
        if not os.path.exists(target_tree_filepath):
            if opts.ignore_missing_target:
                if not opts.quiet:
                    messenger.send_warning(
                        "Target tree file not found: '%s': using majority-rule consensus tree instead."
                        % target_tree_filepath)
                target_tree_filepath = None
            else:
                messenger.send_error("Target tree file not found: '%s'" %
                                     target_tree_filepath)
                sys.exit(1)
    else:
        target_tree_filepath = None

    ### TODO: these will be command-line options in the future
    ### here we just set it
    assert not hasattr(opts, 'outgroup')
    opts.outgroup = None
    assert not hasattr(opts, 'root_target')
    opts.root_target = None

    ### TODO: idiot-check edge length summarization
    # edge lengths
    if opts.edge_summarization:
        opts.edge_summarization = opts.edge_summarization.lower()
        if opts.edge_summarization not in edge_summarization_choices:
            messenger.send_error(
                "'%s' is not a valid edge summarization choice; must be one of: %s"
                % (opts.edge_summarization, edge_summarization_choices))
            sys.exit(1)
    if opts.edge_summarization == "mean-age" or opts.edge_summarization == "median-age":
        opts.ultrametric_trees = True
        opts.rooted_trees = True
        if opts.calc_node_ages is None:
            opts.calc_node_ages = True
    else:
        if opts.ultrametric_trees:
            opts.rooted_trees = True
            if opts.calc_node_ages is None:
                opts.calc_node_ages = True
        else:
            if opts.calc_node_ages is True:
                opts.ultrametric_trees = True
                opts.rooted_trees = True
            else:
                opts.calc_node_ages = False

    # output
    if opts.output_filepath is None:
        output_dest = sys.stdout
    else:
        output_fpath = os.path.expanduser(
            os.path.expandvars(opts.output_filepath))
        if confirm_overwrite(filepath=output_fpath,
                             replace_without_asking=opts.replace):
            output_dest = open(output_fpath, "w")
        else:
            sys.exit(1)

    if opts.trprobs_filepath:
        trprobs_filepath = os.path.expanduser(
            os.path.expandvars(opts.trprobs_filepath))
        if confirm_overwrite(filepath=trprobs_filepath,
                             replace_without_asking=opts.replace):
            trprobs_dest = open(trprobs_filepath, "w")
        else:
            sys.exit(1)
        opts.calc_tree_probs = True
    else:
        trprobs_dest = None
        opts.calc_tree_probs = False

    if opts.split_edges_filepath:
        split_edges_filepath = os.path.expanduser(
            os.path.expandvars(opts.split_edges_filepath))
        if confirm_overwrite(filepath=split_edges_filepath,
                             replace_without_asking=opts.replace):
            split_edges_dest = open(split_edges_filepath, "w")
        else:
            sys.exit(1)
    else:
        split_edges_dest = None

    if opts.from_newick_stream:
        schema = "newick"
    elif opts.from_nexus_stream:
        schema = "nexus"
    else:
        schema = 'nexus/newick'

    ###################################################
    # Main work begins here: Count the splits

    start_time = datetime.datetime.now()
    master_split_distribution = None
    if (support_filepaths is not None and len(support_filepaths) > 1) \
            and _MP \
            and opts.multiprocess:
        if opts.multiprocess is not None:
            if opts.multiprocess == "*":
                num_processes = multiprocessing.cpu_count()
            elif opts.multiprocess == "@":
                num_processes = len(support_filepaths)
            else:
                try:
                    num_processes = int(opts.multiprocess)
                except ValueError:
                    messenger.send_error(
                        "'%s' is not a valid number of processes (must be a positive integer)."
                        % opts.multiprocess)
                    sys.exit(1)
            if num_processes <= 0:
                messenger.send_error(
                    "Maximum number of processes set to %d: cannot run SumTrees with less than 1 process"
                    % num_processes)
                sys.exit(1)
            if num_processes == 1:
                messenger.send_warning(
                    "Running in parallel processing mode but limited to only 1 process: probably more efficient to run in serial mode!"
                )

        master_split_distribution, master_topology_counter = process_sources_parallel(
            num_processes=num_processes,
            support_filepaths=support_filepaths,
            schema=schema,
            is_rooted=opts.rooted_trees,
            ignore_node_ages=not opts.calc_node_ages,
            calc_tree_probs=opts.calc_tree_probs,
            weighted_trees=opts.weighted_trees,
            tree_offset=opts.burnin,
            log_frequency=opts.log_frequency,
            messenger=messenger)
    else:
        if (_MP and opts.multiprocess is not None
                and len(support_filepaths) == 1):
            messenger.send_warning(
                "Parallel processing mode requested but only one source specified: defaulting to serial mode."
            )
        if opts.from_newick_stream or opts.from_nexus_stream:
            support_filepaths = None
        master_split_distribution, master_topology_counter = process_sources_serial(
            support_filepaths=support_filepaths,
            schema=schema,
            is_rooted=opts.rooted_trees,
            ignore_node_ages=not opts.calc_node_ages,
            calc_tree_probs=opts.calc_tree_probs,
            weighted_trees=opts.weighted_trees,
            tree_offset=opts.burnin,
            log_frequency=opts.log_frequency,
            messenger=messenger)

    ###################################################
    # Compose post-counting report

    # if not splits counted or the taxon set was not populated for any reason,
    # we just produce an empty block so we don't crash as we report nothing of interest
    if master_split_distribution.taxon_set is None:
        assert (master_split_distribution.total_trees_counted == 0)
        master_split_distribution.taxon_set = dendropy.TaxonSet()

    # taxon set to handle target trees
    master_taxon_set = master_split_distribution.taxon_set

    report = []
    report.append(
        "%d trees considered in total for split support assessment." %
        (master_split_distribution.total_trees_counted))
    if opts.rooted_trees is None:
        report.append(
            "Tree rooting as given by tree statement (defaults to unrooted).")
    elif opts.rooted_trees:
        report.append("Trees treated as rooted.")
    else:
        report.append("Trees treated as unrooted.")
    if opts.ultrametric_trees:
        report.append("Trees are expected to be ultrametric.")
    if opts.weighted_trees:
        report.append("Trees treated as weighted (default weight = 1.0).")
    else:
        report.append("Trees treated as unweighted.")
    n_taxa = len(master_taxon_set)
    report.append("%d unique taxa across all trees." % n_taxa)
    num_splits, num_unique_splits, num_nt_splits, num_nt_unique_splits = master_split_distribution.splits_considered(
    )
    report.append("%d unique splits out of %d total splits counted." %
                  (num_unique_splits, num_splits))
    report.append(
        "%d unique non-trivial splits out of %d total non-trivial splits counted."
        % (num_nt_unique_splits, num_nt_splits))

    comments = []
    comments.extend(report)
    messenger.send_info("Split counting completed:")
    messenger.send_info_lines(report, prefix=" - ")

    ###################################################
    #  Target tree and mapping

    if not opts.support_as_percentages and opts.support_label_decimals < 2:
        messenger.send_warning(
            "Reporting support by proportions require that support will be reported to at least 2 decimal places"
        )
        opts.support_label_decimals = 2

    tsum = treesum.TreeSummarizer()
    tsum.add_node_metadata = not opts.suppress_summary_metadata
    if opts.support_annotation_target == 1:
        tsum.support_as_labels = True
        tsum.support_as_edge_lengths = False
        support_show = "indicated by node labels"
        if tsum.add_node_metadata:
            support_show += " and node metadata"
    elif opts.support_annotation_target == 2:
        tsum.support_as_labels = False
        tsum.support_as_edge_lengths = True
        support_show = "indicated by branch lengths"
        if tsum.add_node_metadata:
            support_show += " and node metadata"
    elif opts.support_annotation_target == 0:
        tsum.support_as_labels = False
        tsum.support_as_edge_lengths = False
        if tsum.add_node_metadata:
            support_show = "indicated by node metadata (only)"
        else:
            support_show = "not indicated"
    else:
        raise Exception("Unexpected value for support annotation target: %s" %
                        opts.support_annotation_target)
    tsum.support_as_percentages = opts.support_as_percentages
    tsum.support_label_decimals = opts.support_label_decimals
    tsum.weighted_splits = opts.weighted_trees

    if opts.support_as_percentages:
        support_units = "Percentage"
    else:
        support_units = "Proportion (frequency or probability)"
    support_summarization = "%s of support for each split %s" % (support_units,
                                                                 support_show)

    tt_trees = []
    if target_tree_filepath is not None:
        messenger.send_info("Mapping support to target tree ...")
        for tree in tree_source_iter(stream=open(target_tree_filepath, 'r'),
                                     schema="nexus/newick",
                                     taxon_set=master_taxon_set,
                                     as_rooted=opts.rooted_trees):
            if opts.root_target:
                if opts.outgroup:
                    pass
                else:
                    tree.root_at_midpoint(splits=True)
            if opts.rooted_trees and not tree.is_rooted:
                messenger.send_error(
                    "Support trees are treated as rooted, but target tree is unrooted. Root target tree(s) and re-run, or run using the '--root-target' flag."
                )
                sys.exit(1)
            stree = tsum.map_split_support_to_tree(tree,
                                                   master_split_distribution)
            tt_trees.append(stree)
        messenger.send_info("Parsed '%s': %d tree(s) in file" %
                            (target_tree_filepath, len(tt_trees)))
        comments.append("Split support mapped to trees in:")
        comments.append("  - '%s' (%d trees)" %
                        (os.path.abspath(target_tree_filepath), len(tt_trees)))
        if opts.root_target:
            if opts.outgroup:
                comments.append("Target tree(s) rooted using outgroup: %s." %
                                opts.outgroup)
            else:
                comments.append("Target tree(s) rooted at midpoint.")
        comments.append(support_summarization + '.')
    else:
        messenger.send_info("Constructing clade consensus tree ...")
        if opts.min_clade_freq > 1.0:
            messenger.send_warning(
                "Maximum frequency threshold for clade inclusion is 1.0: reset to 1.0."
            )
            min_freq = 1.0
        else:
            min_freq = opts.min_clade_freq
        stree = tsum.tree_from_splits(master_split_distribution,
                                      min_freq=min_freq,
                                      include_edge_lengths=False)
        #include_edge_lengths=not opts.no_branch_lengths)
        if opts.root_target:
            stree.reroot_at_midpoint(update_splits=True)
        report = []
        report.append(
            "Consensus tree (%f clade frequency threshold) constructed from splits."
            % min_freq)
        tt_trees.append(stree)
        if opts.root_target:
            if opts.outgroup:
                report.append("Consensus tree rooted using outgroup: %s." %
                              opts.outgroup)
            else:
                report.append("Consensus tree rooted at midpoint.")
        report.append(support_summarization + ".")
        messenger.send_info_lines(report)
        comments.extend(report)

    if not opts.suppress_summary_metadata:
        messenger.send_info("Summarizing node ages and lengths ...")
        for stree in tt_trees:
            tsum.annotate_nodes_and_edges(
                tree=stree, split_distribution=master_split_distribution)

    if opts.edge_summarization is None:
        if target_tree_filepath is not None:
            opts.edge_summarization = 'keep'
        else:
            if opts.ultrametric_trees:
                opts.edge_summarization = 'median-age'
            else:
                opts.edge_summarization = 'mean-length'
    if opts.edge_summarization is not None and opts.edge_summarization == 'unweighted':
        for stree in tt_trees:
            for edge in stree.postorder_edge_iter():
                edge.length = None
    elif opts.edge_summarization is not None and opts.edge_summarization != 'keep':
        if opts.edge_summarization.startswith('mean'):
            summary_func_desc = "mean"
            summarization_func = lambda x: statistics.mean_and_sample_variance(
                x)[0]
        else:
            summary_func_desc = "median"
            summarization_func = statistics.median
        if opts.edge_summarization.endswith("age"):
            messenger.send_info("Mapping node ages ...")
            comments.append(
                "Setting node ages of output tree(s) to %s ages of corresponding nodes of input trees."
                % summary_func_desc)
            if opts.collapse_negative_edges:
                comments.append(
                    "Parent node ages coerced to be at least as old as oldest daughter node age."
                )
                collapse_negative_edges = True
                allow_negative_edges = False
            else:
                comments.append(
                    "Parent node ages not adjusted: negative edge lengths allowed."
                )
                collapse_negative_edges = False
                allow_negative_edges = True
            for stree in tt_trees:
                tsum.summarize_node_ages_on_tree(
                    tree=stree,
                    split_distribution=master_split_distribution,
                    set_edge_lengths=True,
                    collapse_negative_edges=collapse_negative_edges,
                    allow_negative_edges=allow_negative_edges,
                    summarization_func=summarization_func)
        elif opts.edge_summarization.endswith("length"):
            messenger.send_info("Mapping edge lengths ...")
            comments.append(
                "Setting edge lengths of output tree(s) to %s length of corresponding edges of input trees."
                % summary_func_desc)
            for stree in tt_trees:
                tsum.summarize_edge_lengths_on_tree(
                    tree=stree,
                    split_distribution=master_split_distribution,
                    summarization_func=summarization_func)
    else:
        comments.append("Not setting edge lengths on output tree(s).")

    end_time = datetime.datetime.now()

    ###################################################
    #  RESULTS

    messenger.send_info("Writing results ...")

    final_run_report = []
    final_run_report.append("Began at: %s." % (start_time.isoformat(' ')))
    final_run_report.append("Ended at: %s." % (end_time.isoformat(' ')))
    hours, mins, secs = str(end_time - start_time).split(":")
    run_time = "Run time: %s hour(s), %s minute(s), %s second(s)." % (
        hours, mins, secs)
    final_run_report.append(run_time)

    output_dataset = dendropy.DataSet(
        dendropy.TreeList(tt_trees, taxon_set=master_taxon_set))
    if opts.to_newick_format:
        output_dataset.write(output_dest,
                             "newick",
                             suppress_rooting=False,
                             suppress_edge_lengths=False,
                             unquoted_underscores=False,
                             preserve_spaces=False,
                             store_tree_weights=False,
                             suppress_annotations=False,
                             annotations_as_nhx=False,
                             suppress_item_comments=False,
                             suppress_leaf_taxon_labels=False,
                             suppress_leaf_node_labels=True,
                             suppress_internal_taxon_labels=False,
                             suppress_internal_node_labels=False,
                             node_label_element_separator=' ',
                             node_label_compose_func=None)
    else:
        if opts.include_taxa_block:
            simple = False
        else:
            simple = True
        if opts.include_meta_comments:
            comment = []
            try:
                username = getpass.getuser()
            except:
                username = "******"
            comment.append("%s %s by %s." %
                           (_program_name, _program_version, _program_author))
            comment.append(
                "Using DendroPy Version %s by Jeet Sukumaran and Mark T. Holder."
                % dendropy.__version__)
            python_version = sys.version.replace("\n",
                                                 "").replace("[", "(").replace(
                                                     "]", ")")
            comment.append("Running under Python %s on %s." %
                           (python_version, sys.platform))
            comment.append("Executed on %s by %s@%s." %
                           (platform.node(), username, socket.gethostname()))
            if support_filepaths is not None and len(support_filepaths) > 0:
                comment.append("Basis of split support:")
                for support_file in support_filepaths:
                    comment.append("  - '%s'" % os.path.abspath(support_file))
            else:
                comment.append(
                    "Basis of split support: trees read from standard input.")
            comment.extend(final_run_report)
            comment.extend(comments)
        if opts.additional_comments:
            comment.append("\n")
            comment.append(opts.additional_comments)
        output_dataset.write(
            output_dest,
            "nexus",
            simple=simple,
            file_comments=comment,
            suppress_rooting=False,
            suppress_edge_lengths=opts.edge_summarization == 'unweighted',
            unquoted_underscores=False,
            preserve_spaces=False,
            store_tree_weights=False,
            suppress_annotations=False,
            annotations_as_nhx=False,
            suppress_item_comments=False,
            suppress_leaf_taxon_labels=False,
            suppress_leaf_node_labels=True,
            suppress_internal_taxon_labels=False,
            suppress_internal_node_labels=False,
            node_label_element_separator=' ',
            node_label_compose_func=None)

    if trprobs_dest:
        messenger.send_info("Writing tree probabilities ...")
        tree_list = dendropy.TreeList(
            taxon_set=master_split_distribution.taxon_set)
        tree_freqs = master_topology_counter.calc_tree_freqs(
            tree_list.taxon_set)
        cumulative_prob = 0.0
        for idx, (tree, (count, prop)) in enumerate(tree_freqs.items()):
            tree_list.append(tree)
            cumulative_prob += prop
            tree.probability = prop
            tree.count = count
            tree.cumulative_probability = cumulative_prob
            tree.annotate('count')
            tree.annotate('probability')
            tree.annotate('cumulative_probability')
            tree.label = "Tree%d" % (idx + 1)
        tree_list.write_to_stream(trprobs_dest,
                                  'nexus',
                                  simple=simple,
                                  suppress_rooting=True,
                                  suppress_edge_lengths=True,
                                  suppress_internal_labels=True,
                                  unquoted_underscores=False,
                                  preserve_spaces=False,
                                  store_tree_weights=False,
                                  suppress_annotations=False,
                                  annotations_as_nhx=False,
                                  suppress_item_comments=True,
                                  suppress_leaf_taxon_labels=False,
                                  suppress_leaf_node_labels=True,
                                  suppress_internal_taxon_labels=False,
                                  suppress_internal_node_labels=False,
                                  node_label_element_separator=' ',
                                  node_label_compose_func=None)

    if split_edges_dest:
        messenger.send_info("Writing split edge lengths ...")
        for split in master_split_distribution.splits:
            row = []
            row.append(
                master_split_distribution.taxon_set.split_as_newick_string(
                    split))
            for edge_length in master_split_distribution.split_edge_lengths[
                    split]:
                row.append("%s" % edge_length)
            split_edges_dest.write("%s\n" % ("\t".join(row)))

    if not opts.output_filepath:
        pass
    else:
        messenger.send_info("Results written to: '%s'." % (output_fpath))

    ###################################################
    #  WRAP UP
    messenger.send_info("Summarization completed.")
    messenger.send_info_lines(final_run_report)
    messenger.silent = True
Ejemplo n.º 4
0
def main_cli():

    description = '%s %s %s' % (_program_name, _program_version,
                                _program_subtitle)
    usage = "%prog [options] <TREES FILE> [<TREES FILE> [<TREES FILE> [...]]"

    parser = OptionParser(usage=usage,
                          add_help_option=True,
                          version=_program_version,
                          description=description)

    input_optgroup = OptionGroup(parser, 'Input File Options')
    parser.add_option_group(input_optgroup)
    input_optgroup.add_option(
        '-b',
        '--burnin',
        action='store',
        dest='burnin',
        type='int',  # also 'float', 'string' etc.
        default=0,
        help=
        'number of trees to skip from the beginning of *each tree file* when counting support [default=%default]'
    )
    input_optgroup.add_option(
        '-s',
        '--stride',
        action='store',
        dest='stride',
        metavar="STRIDE",
        type='int',  # also 'float', 'string' etc.
        default=1,
        help='resample rate: only include one out of every STRIDE trees')

    output_filepath_optgroup = OptionGroup(parser, 'Output File Options')
    parser.add_option_group(output_filepath_optgroup)
    output_filepath_optgroup.add_option(
        '-o',
        '--output',
        dest='output_filepath',
        default=None,
        help="path to output file (if not given, will print to standard output)"
    )
    output_filepath_optgroup.add_option(
        '--no-taxa-block',
        action='store_false',
        dest='include_taxa_block',
        default=True,
        help=
        "do not include a taxa block in the output treefile (otherwise will create taxa block by default)"
    )
    output_filepath_optgroup.add_option(
        '--no-meta-comments',
        action='store_false',
        dest='include_meta_comments',
        default=True,
        help=
        "include initial file comment annotating details of scoring operation")
    output_filepath_optgroup.add_option(
        '-m',
        '--additional_comments',
        action='store',
        dest='additional_comments',
        default=None,
        help="additional comments to be added to the summary file")
    output_filepath_optgroup.add_option(
        '--newick',
        action='store_true',
        dest='phylip_format',
        default=False,
        help=
        "save results in NEWICK (PHYLIP) format (default is to save in NEXUS format)"
    )
    output_filepath_optgroup.add_option('--phylip',
                                        action='store_true',
                                        dest='phylip_format',
                                        default=False,
                                        help="same as --newick")
    output_filepath_optgroup.add_option(
        '-r',
        '--replace',
        action='store_true',
        dest='replace',
        default=False,
        help=
        "replace/overwrite output file without asking if it already exists ")

    run_optgroup = OptionGroup(parser, 'Program Run Options')
    parser.add_option_group(run_optgroup)
    run_optgroup.add_option('-q',
                            '--quiet',
                            action='store_true',
                            dest='quiet',
                            default=False,
                            help="suppress progress messages")
    run_optgroup.add_option(
        '--ignore-missing-support',
        action='store_true',
        dest='ignore_missing_support',
        default=False,
        help="ignore missing support tree files (at least one must exist!)")

    (opts, args) = parser.parse_args()
    if opts.quiet:
        messaging_level = ConsoleMessenger.ERROR_MESSAGING_LEVEL
    else:
        messaging_level = ConsoleMessenger.INFO_MESSAGING_LEVEL
    messenger = ConsoleMessenger(name='cattrees.py',
                                 messaging_level=messaging_level)

    # splash
    if not opts.quiet:
        show_splash(prog_name=_program_name,
                    prog_subtitle=_program_subtitle,
                    prog_version=_program_version,
                    prog_author=_program_author,
                    prog_copyright=_program_copyright,
                    dest=sys.stderr,
                    extended=False)

    ###################################################
    # Tree file idiot checking

    tree_filepaths = []
    for fpath in args:
        fpath = os.path.expanduser(os.path.expandvars(fpath))
        if not os.path.exists(fpath):
            if opts.ignore_missing_support:
                messenger.send_warning('Tree file not found: "%s"' % fpath)
            else:
                messenger.send_error(
                    'Terminating due to missing tree files. ' +
                    'Use the "--ignore-missing-support" option to continue even '
                    + 'if some files are missing.')
                sys.exit(1)
        else:
            tree_filepaths.append(fpath)
    if len(tree_filepaths) == 0:
        messenger.send_error(
            "No sources of trees specified or could be found. " +
            "Please provide the path to at least one (valid and existing) file "
            + "containing trees.")
        sys.exit(1)

    tree_file_objs = [open(f, "r") for f in tree_filepaths]

    ###################################################
    # Other prepping...

    # output
    if opts.output_filepath is None:
        output_dest = sys.stdout
    else:
        output_fpath = os.path.expanduser(
            os.path.expandvars(opts.output_filepath))
        if confirm_overwrite(filepath=output_fpath,
                             replace_without_asking=opts.replace):
            output_dest = open(output_fpath, "w")
        else:
            sys.exit(1)

    ###################################################
    # write nexus header if neccessary

    if opts.phylip_format:
        pass
    else:
        output_dest.write("#NEXUS\n\n")
        output_dest.write("Begin Trees;\n")

    ###################################################
    # Main work begins here

    report = []
    total_trees_added = 0
    for tree_filepath_idx, tree_filepath in enumerate(tree_filepaths):
        messenger.send_info("-- Reading tree source %d of %d: %s" \
            % (tree_filepath_idx+1, len(tree_filepaths), tree_filepath))
        trees_added = 0
        for tree_count, tree in enumerate(
                tree_source_iter(stream=open(tree_filepath, "rU"),
                                 schema='nexus/newick')):
            if tree_count >= opts.burnin and not (tree_count % opts.stride):
                trees_added += 1
                if opts.phylip_format:
                    output_dest.write(tree.as_string(schema="newick"))
                else:
                    output_dest.write(
                        "tree %d = %s" %
                        (trees_added, tree.as_string(schema="newick")))
        total_trees_added += trees_added
        message = ("%s: %d trees in file, sampling 1 tree of every %d trees after %d tree burn-in: %d trees added (current total = %d trees)" \
            % (tree_filepath, tree_count+1, opts.stride, opts.burnin, trees_added, total_trees_added))
        report.append(message)
        messenger.send_info("   " + message)

    if opts.phylip_format:
        pass
    else:
        output_dest.write("End;\n")
        if opts.include_meta_comments:
            output_dest.write("\n")
            output_dest.write("[Total of %d trees sourced from:]\n" %
                              total_trees_added)
            maxlen = max([len(tf) for tf in report])
            for tf in report:
                output_dest.write("[ %s ]\n" % tf.ljust(maxlen))
        if opts.additional_comments:
            nexus_writer.comment.append("\n")
            nexus_writer.comment.append(opts.additional_comments)
Ejemplo n.º 5
0
def main_cli():

    description =  '%s %s %s' % (_program_name, _program_version, _program_subtitle)
    usage = "%prog [options] <TREES FILE> [<TREES FILE> [<TREES FILE> [...]]"

    parser = OptionParser(usage=usage, add_help_option=True, version = _program_version, description=description)

    input_optgroup = OptionGroup(parser, 'Input File Options')
    parser.add_option_group(input_optgroup)
    input_optgroup.add_option('-b', '--burnin',
                        action='store',
                        dest='burnin',
                        type='int', # also 'float', 'string' etc.
                        default=0,
                        help='number of trees to skip from the beginning of *each tree file* when counting support [default=%default]')
    input_optgroup.add_option('-s', '--stride',
                        action='store',
                        dest='stride',
                        metavar="STRIDE",
                        type='int', # also 'float', 'string' etc.
                        default=1,
                        help='resample rate: only include one out of every STRIDE trees')

    output_filepath_optgroup = OptionGroup(parser, 'Output File Options')
    parser.add_option_group(output_filepath_optgroup)
    output_filepath_optgroup.add_option('-o','--output',
                  dest='output_filepath',
                  default=None,
                  help="path to output file (if not given, will print to standard output)")
    output_filepath_optgroup.add_option('--no-taxa-block',
                      action='store_false',
                      dest='include_taxa_block',
                      default=True,
                      help="do not include a taxa block in the output treefile (otherwise will create taxa block by default)")
    output_filepath_optgroup.add_option('--no-meta-comments',
                      action='store_false',
                      dest='include_meta_comments',
                      default=True,
                      help="include initial file comment annotating details of scoring operation")
    output_filepath_optgroup.add_option('-m', '--additional_comments',
                      action='store',
                      dest='additional_comments',
                      default=None,
                      help="additional comments to be added to the summary file")
    output_filepath_optgroup.add_option('--newick',
                      action='store_true',
                      dest='phylip_format',
                      default=False,
                      help="save results in NEWICK (PHYLIP) format (default is to save in NEXUS format)")
    output_filepath_optgroup.add_option('--phylip',
                      action='store_true',
                      dest='phylip_format',
                      default=False,
                      help="same as --newick")
    output_filepath_optgroup.add_option('-r', '--replace',
                      action='store_true',
                      dest='replace',
                      default=False,
                      help="replace/overwrite output file without asking if it already exists ")

    run_optgroup = OptionGroup(parser, 'Program Run Options')
    parser.add_option_group(run_optgroup)
    run_optgroup.add_option('-q', '--quiet',
                      action='store_true',
                      dest='quiet',
                      default=False,
                      help="suppress progress messages")
    run_optgroup.add_option('--ignore-missing-support',
                      action='store_true',
                      dest='ignore_missing_support',
                      default=False,
                      help="ignore missing support tree files (at least one must exist!)")

    (opts, args) = parser.parse_args()
    if opts.quiet:
        messaging_level = ConsoleMessenger.ERROR_MESSAGING_LEVEL
    else:
        messaging_level = ConsoleMessenger.INFO_MESSAGING_LEVEL
    messenger = ConsoleMessenger(name='cattrees.py', messaging_level=messaging_level)

    # splash
    if not opts.quiet:
        show_splash(prog_name=_program_name,
        prog_subtitle=_program_subtitle,
        prog_version=_program_version,
        prog_author=_program_author,
        prog_copyright=_program_copyright,
        dest=sys.stderr,
        extended=False)

    ###################################################
    # Tree file idiot checking

    tree_filepaths = []
    for fpath in args:
        fpath = os.path.expanduser(os.path.expandvars(fpath))
        if not os.path.exists(fpath):
            if opts.ignore_missing_support:
                messenger.send_warning('Tree file not found: "%s"' % fpath)
            else:
                messenger.send_error('Terminating due to missing tree files. '
                       + 'Use the "--ignore-missing-support" option to continue even '
                       + 'if some files are missing.')
                sys.exit(1)
        else:
            tree_filepaths.append(fpath)
    if len(tree_filepaths) == 0:
        messenger.send_error("No sources of trees specified or could be found. "
        + "Please provide the path to at least one (valid and existing) file "
        + "containing trees.")
        sys.exit(1)

    tree_file_objs = [open(f, "r") for f in tree_filepaths]

    ###################################################
    # Other prepping...

    # output
    if opts.output_filepath is None:
        output_dest = sys.stdout
    else:
        output_fpath = os.path.expanduser(os.path.expandvars(opts.output_filepath))
        if confirm_overwrite(filepath=output_fpath, replace_without_asking=opts.replace):
            output_dest = open(output_fpath, "w")
        else:
            sys.exit(1)

    ###################################################
    # write nexus header if neccessary

    if opts.phylip_format:
        pass
    else:
        output_dest.write("#NEXUS\n\n")
        output_dest.write("Begin Trees;\n")

    ###################################################
    # Main work begins here

    report = []
    total_trees_added = 0
    for tree_filepath_idx, tree_filepath in enumerate(tree_filepaths):
        messenger.send_info("-- Reading tree source %d of %d: %s" \
            % (tree_filepath_idx+1, len(tree_filepaths), tree_filepath))
        trees_added = 0
        for tree_count, tree in enumerate(tree_source_iter(stream=open(tree_filepath, "rU"), schema='nexus/newick')):
            if tree_count >= opts.burnin and not (tree_count % opts.stride):
                trees_added += 1
                if opts.phylip_format:
                    output_dest.write(tree.as_string(schema="newick"))
                else:
                    output_dest.write("tree %d = %s" % (trees_added, tree.as_string(schema="newick")))
        total_trees_added += trees_added
        message = ("%s: %d trees in file, sampling 1 tree of every %d trees after %d tree burn-in: %d trees added (current total = %d trees)" \
            % (tree_filepath, tree_count+1, opts.stride, opts.burnin, trees_added, total_trees_added))
        report.append(message)
        messenger.send_info("   " + message)

    if opts.phylip_format:
        pass
    else:
        output_dest.write("End;\n")
        if opts.include_meta_comments:
            output_dest.write("\n")
            output_dest.write("[Total of %d trees sourced from:]\n" % total_trees_added)
            maxlen = max([len(tf) for tf in report])
            for tf in report:
                output_dest.write("[ %s ]\n" % tf.ljust(maxlen))
        if opts.additional_comments:
            nexus_writer.comment.append("\n")
            nexus_writer.comment.append(opts.additional_comments)
Ejemplo n.º 6
0
def main_cli():

    description = "%s %s %s" % (_program_name, _program_version,
                                _program_subtitle)
    usage = "%prog [options] -t <TARGET-TREE-FILE> <TREES-FILE> [TREES-FILE [TREES-FILE [...]]"

    parser = OptionParser(usage=usage,
                          add_help_option=True,
                          version=_program_version,
                          description=description)

    parser.add_option("-t",
                      "--target",
                      dest="target_tree_filepath",
                      default=None,
                      help="path to file with tree (Newick or NEXUS format) " +
                      "to which labels will be written")
    parser.add_option(
        "--preserve-target-labels",
        action="store_true",
        dest="preserve_target_labels",
        default=False,
        help=
        "keep any existing labels on target tree (by default, these will be cleared before writing the new labels)"
    )
    parser.add_option("--rooted",
                      action="store_true",
                      dest="rooted_trees",
                      default=None,
                      help="treat trees as rooted")
    parser.add_option("--unrooted",
                      action="store_false",
                      dest="rooted_trees",
                      default=None,
                      help="treat trees as unrooted")
    parser.add_option(
        "--ignore-missing-source",
        action="store_true",
        dest="ignore_missing_source",
        default=False,
        help="ignore missing source tree files (at least one must exist!)")
    parser.add_option(
        "-o",
        "--output",
        dest="output_filepath",
        default=None,
        help="path to output file (if not given, will print to standard output)"
    )
    parser.add_option(
        "-s",
        "--separator",
        dest="separator",
        default="/",
        help=
        "string to use to separate labels from different source trees (default='%default')"
    )
    parser.add_option(
        "--no-taxa-block",
        action="store_false",
        dest="include_taxa_block",
        default=True,
        help=
        "do not include a taxa block in the output treefile (otherwise will create taxa block by default)"
    )
    parser.add_option(
        "-c",
        "--additional-comments",
        action="store",
        dest="additional_comments",
        default=None,
        help="additional comments to be added to the summary file")
    parser.add_option(
        "--to-newick",
        action="store_true",
        dest="to_newick_format",
        default=False,
        help=
        "save results in NEWICK (PHYLIP) format (default is to save in NEXUS format)"
    )
    parser.add_option("--to-phylip",
                      action="store_true",
                      dest="to_newick_format",
                      default=False,
                      help="same as --newick")
    parser.add_option(
        "-r",
        "--replace",
        action="store_true",
        dest="replace",
        default=False,
        help=
        "replace/overwrite output file without asking if it already exists ")
    parser.add_option(
        "-q",
        "--quiet",
        action="store_true",
        dest="quiet",
        default=False,
        help="suppress ALL logging, progress and feedback messages")

    (opts, args) = parser.parse_args()
    if opts.quiet:
        messaging_level = ConsoleMessenger.ERROR_MESSAGING_LEVEL
    else:
        messaging_level = ConsoleMessenger.INFO_MESSAGING_LEVEL
    messenger = ConsoleMessenger(name="SumLabels",
                                 messaging_level=messaging_level)

    # splash
    if not opts.quiet:
        show_splash(prog_name=_program_name,
                    prog_subtitle=_program_subtitle,
                    prog_version=_program_version,
                    prog_author=_program_author,
                    prog_copyright=_program_copyright,
                    dest=sys.stderr,
                    extended=False)

    ###################################################
    # Source file idiot checking

    source_filepaths = []
    if len(args) > 0:
        for fpath in args:
            fpath = os.path.expanduser(os.path.expandvars(fpath))
            if not os.path.exists(fpath):
                if opts.ignore_missing_source:
                    messenger.send_warning("Source file not found: '%s'" %
                                           fpath)
                else:
                    messenger.send_error(
                        "Terminating due to missing source files. " +
                        "Use the '--ignore-missing-source' option to continue even "
                        + "if some files are missing.")
                    sys.exit(1)
            else:
                source_filepaths.append(fpath)
        if len(source_filepaths) == 0:
            messenger.send_error(
                "No valid sources of input trees specified. " +
                "Please provide the path to at least one (valid and existing) file "
                + "containing trees")
            sys.exit(1)
    else:
        messenger.send_info(
            "No sources of input trees specified. " +
            "Please provide the path to at least one (valid and existing) file "
            +
            "containing tree samples to summarize. See '--help' for other options."
        )
        sys.exit(1)

    ###################################################
    # Lots of other idiot-checking ...

    # target tree
    if opts.target_tree_filepath is not None:
        target_tree_filepath = os.path.expanduser(
            os.path.expandvars(opts.target_tree_filepath))
        if not os.path.exists(target_tree_filepath):
            messenger.send_error("Target tree file not found: '%s'" %
                                 target_tree_filepath)
            sys.exit(1)
    else:
        messenger.send_error(
            "Target tree file not specified: use the '-t' or '--target' option to provide path to target tree"
        )
        sys.exit(1)

    # output
    if opts.output_filepath is None:
        output_dest = sys.stdout
    else:
        output_fpath = os.path.expanduser(
            os.path.expandvars(opts.output_filepath))
        if confirm_overwrite(filepath=output_fpath,
                             replace_without_asking=opts.replace):
            output_dest = open(output_fpath, "w")
        else:
            sys.exit(1)

    # taxon set to handle target trees
    master_taxon_set = dendropy.TaxonSet()
    is_rooted = opts.rooted_trees
    messenger.send_info("Reading target tree: '%s'" % target_tree_filepath)
    target_tree = None
    for tree in tree_source_iter(open(target_tree_filepath, "rU"),
                                 schema='nexus/newick',
                                 taxon_set=master_taxon_set,
                                 as_rooted=is_rooted):
        target_tree = tree
        break
    split_labels = {}
    for src_fpath in source_filepaths:
        messenger.send_info("Reading source tree(s) from: '%s'" % src_fpath)
        for tree in tree_source_iter(open(src_fpath, "rU"),
                                     schema='nexus/newick',
                                     taxon_set=master_taxon_set,
                                     as_rooted=is_rooted):
            tree.update_splits()
            for split, edge in tree.split_edges.items():
                label = edge.head_node.label
                print label
                if not label:
                    continue
                try:
                    split_labels[split].append(label)
                except KeyError:
                    split_labels[split] = [label]
    messenger.send_info("Mapping labels")
    target_tree.update_splits()
    for split, edge in target_tree.split_edges.items():
        label = []
        if opts.preserve_target_labels and edge.head_node.label:
            label.append(edge.head_node.label)
        elif not opts.preserve_target_labels:
            edge.head_node.label = None
        if split in split_labels:
            label.extend(split_labels[split])
        else:
            pass
            # messenger.send_warning("Split on target tree not found in source trees: ignoring")
        if label:
            edge.head_node.label = opts.separator.join(label)
    output_dataset = dendropy.DataSet(
        dendropy.TreeList([target_tree], taxon_set=master_taxon_set))
    if opts.to_newick_format:
        output_dataset.write(output_dest,
                             "newick",
                             suppress_rooting=False,
                             suppress_edge_lengths=False,
                             unquoted_underscores=False,
                             preserve_spaces=False,
                             store_tree_weights=False,
                             suppress_annotations=False,
                             annotations_as_nhx=False,
                             suppress_item_comments=False,
                             suppress_leaf_taxon_labels=False,
                             suppress_leaf_node_labels=True,
                             suppress_internal_taxon_labels=False,
                             suppress_internal_node_labels=False,
                             node_label_element_separator=' ',
                             node_label_compose_func=None)
    else:
        if opts.include_taxa_block:
            simple = False
        else:
            simple = True
        comment = []
        try:
            username = getpass.getuser()
        except:
            username = "******"
        comment.append("%s %s by %s." %
                       (_program_name, _program_version, _program_author))
        comment.append(
            "Using DendroPy Version %s by Jeet Sukumaran and Mark T. Holder." %
            dendropy.__version__)
        python_version = sys.version.replace("\n", "").replace("[",
                                                               "(").replace(
                                                                   "]", ")")
        comment.append("Running under Python %s on %s." %
                       (python_version, sys.platform))
        comment.append("Executed on %s by %s@%s." %
                       (platform.node(), username, socket.gethostname()))
        if opts.additional_comments:
            comment.append("\n")
            comment.append(opts.additional_comments)
        output_dataset.write(output_dest,
                             "nexus",
                             simple=simple,
                             file_comments=comment,
                             suppress_rooting=False,
                             unquoted_underscores=False,
                             preserve_spaces=False,
                             store_tree_weights=False,
                             suppress_annotations=False,
                             annotations_as_nhx=False,
                             suppress_item_comments=False,
                             suppress_leaf_taxon_labels=False,
                             suppress_leaf_node_labels=True,
                             suppress_internal_taxon_labels=False,
                             suppress_internal_node_labels=False,
                             node_label_element_separator=' ',
                             node_label_compose_func=None)
    if not opts.output_filepath:
        pass
    else:
        messenger.send_info("Results written to: '%s'." % (output_fpath))