Esempio n. 1
0
def show_difftable_topo(difftable, attr1, attr2, usecolor=False):
    if not difftable:
        return
    showtable = []
    maxcolwidth = 80
    total_dist = 0
    for dist, side1, side2, diff, n1, n2 in sorted(difftable, reverse=True):
        total_dist += dist
        n1 = Tree(n1.write(features=[attr1]))
        n2 = Tree(n2.write(features=[attr2]))
        n1.ladderize()
        n2.ladderize()
        for leaf in n1.iter_leaves():
            leaf.name = getattr(leaf, attr1)
            if leaf.name in diff:
                leaf.name += " ***"
                if usecolor:
                    leaf.name = color(leaf.name, "red")
        for leaf in n2.iter_leaves():
            leaf.name = getattr(leaf, attr2)
            if leaf.name in diff:
                leaf.name += " ***"
                if usecolor:
                    leaf.name = color(leaf.name, "red")

        topo1 = n1.get_ascii(show_internal=False, compact=False)
        topo2 = n2.get_ascii(show_internal=False, compact=False)

        # This truncates too large topology strings pretending to be
        # scrolled to the right margin
        topo1_lines = topo1.split("\n")
        topowidth1 = max([len(l) for l in topo1_lines])
        if topowidth1 > maxcolwidth:
            start = topowidth1 - maxcolwidth
            topo1 = '\n'.join([line[start + 1:] for line in topo1_lines])

        topo2_lines = topo2.split("\n")
        topowidth2 = max([len(l) for l in topo2_lines])
        if topowidth2 > maxcolwidth:
            start = topowidth2 - maxcolwidth
            topo2 = '\n'.join([line[start + 1:] for line in topo2_lines])

        showtable.append([
            "%0.2g" % dist,
            "%d vs %d tips\n(%d diffs)" % (len(side1), len(side2), len(diff)),
            topo1, topo2
        ])
    print_table(showtable,
                header=["Dist", "#diffs", "Tree1", "Tree2"],
                max_col_width=maxcolwidth,
                wrap_style="wrap",
                row_line=True)

    log.info("Total euclidean distance:\t%0.4f\tMismatching nodes:\t%d" %
             (total_dist, len(difftable)))
Esempio n. 2
0
def show_difftable_topo(difftable, attr1, attr2, usecolor=False):
    if not difftable:
        return
    showtable = []
    maxcolwidth = 80
    total_dist = 0
    for dist, side1, side2, diff, n1, n2 in sorted(difftable, reverse=True):
        total_dist += dist
        n1 = Tree(n1.write(features=[attr1]))
        n2 = Tree(n2.write(features=[attr2]))
        n1.ladderize()
        n2.ladderize()
        for leaf in n1.iter_leaves():
            leaf.name = getattr(leaf, attr1)
            if leaf.name in diff:
                leaf.name += " ***"
                if usecolor:
                    leaf.name = color(leaf.name, "red")
        for leaf in n2.iter_leaves():
            leaf.name = getattr(leaf, attr2)
            if leaf.name in diff:
                leaf.name += " ***"
                if usecolor:
                    leaf.name = color(leaf.name, "red")

        topo1 = n1.get_ascii(show_internal=False, compact=False)
        topo2 = n2.get_ascii(show_internal=False, compact=False)

        # This truncates too large topology strings pretending to be
        # scrolled to the right margin
        topo1_lines = topo1.split("\n")
        topowidth1 = max([len(l) for l in topo1_lines])
        if topowidth1 > maxcolwidth:
            start = topowidth1 - maxcolwidth
            topo1 = "\n".join([line[start + 1 :] for line in topo1_lines])

        topo2_lines = topo2.split("\n")
        topowidth2 = max([len(l) for l in topo2_lines])
        if topowidth2 > maxcolwidth:
            start = topowidth2 - maxcolwidth
            topo2 = "\n".join([line[start + 1 :] for line in topo2_lines])

        showtable.append(
            ["%0.2g" % dist, "%d vs %d tips\n(%d diffs)" % (len(side1), len(side2), len(diff)), topo1, topo2]
        )
    print_table(
        showtable,
        header=["Dist", "#diffs", "Tree1", "Tree2"],
        max_col_width=maxcolwidth,
        wrap_style="wrap",
        row_line=True,
    )

    log.info("Total euclidean distance:\t%0.4f\tMismatching nodes:\t%d" % (total_dist, len(difftable)))
Esempio n. 3
0
def main(argv):

    parser = argparse.ArgumentParser(
        description=__DESCRIPTION__,
        formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument("target_trees",
                        metavar='target_trees',
                        type=str,
                        nargs="*",
                        help='a list of target tree files')

    parser.add_argument(
        "--targets_file",
        dest="targets_file",
        type=str,
        help="""path to a file containing target trees, one per line""")

    parser.add_argument("-o",
                        dest="output",
                        type=str,
                        help="""Path to the tab delimited report file""")

    parser.add_argument("-r",
                        dest="reftree",
                        type=str,
                        required=True,
                        help="""Reference tree""")

    parser.add_argument(
        "--outgroup",
        dest="outgroup",
        nargs="+",
        help=
        """outgroup used to root reference and target trees before distance computation"""
    )

    parser.add_argument("--expand_polytomies",
                        dest="polytomies",
                        action="store_true",
                        help="""expand politomies if necessary""")

    parser.add_argument("--unrooted",
                        dest="unrooted",
                        action="store_true",
                        help="""compare trees as unrooted""")

    parser.add_argument(
        "--min_support",
        dest="min_support",
        type=float,
        default=0.0,
        help=
        ("min support value for branches to be counted in the distance computation (RF, treeko and refTree/targeGene compatibility)"
         ))

    parser.add_argument(
        "--extract_species",
        dest="extract_species",
        action="store_true",
        help=
        """When used, reference tree is assumed to contain species names, while target trees as expected to be gene trees. Species name will be extracted from gene tree nodes and treeko will be used if duplication events are found."""
    )

    parser.add_argument("--spname_delimiter",
                        dest="spname_delimiter",
                        type=str,
                        default="_",
                        help=("species code delimiter in node names"))

    parser.add_argument(
        "--spname_field",
        dest="spname_field",
        type=int,
        default=-1,
        help=
        ("position of the species code extracted from node names. -1 = last field"
         ))

    parser.add_argument("--collateral",
                        dest="collateral",
                        action='store_true',
                        help=(""))

    parser.add_argument("--ref_attr",
                        dest="ref_attr",
                        type=str,
                        help=("attribute in ref tree used as leaf name"))

    parser.add_argument("--target_attr",
                        dest="target_attr",
                        type=str,
                        help=("attribute in target tree used as leaf name"))

    args = parser.parse_args(argv)
    print __DESCRIPTION__
    reftree = args.reftree
    if args.targets_file and args.target_trees:
        print >> sys.stderr, 'The use of targets_file and targets at the same time is not supported.'
        sys.exit(1)

    if args.targets_file:
        target_trees = tree_iterator(args.targets_file)
    else:
        target_trees = args.target_trees

    t = Tree(reftree)

    if args.ref_attr:
        for lf in t.iter_leaves():
            lf._origname = lf.name
            if args.ref_attr not in lf.features:
                print lf
            lf.name = getattr(lf, args.ref_attr)

    if args.outgroup:
        if len(args.outgroup) > 1:
            out = t.get_common_ancestor(args.outgroup)
        else:
            out = t.search_nodes(name=args.outgroup[0])[0]
        t.set_outgroup(out)

    ref_names = set(t.get_leaf_names())
    reftree_len = len(t)
    reftree_edges = (reftree_len * 2) - 2
    ncollapsed_branches = len([
        n for n in t.traverse() if n.children and n.support < args.min_support
    ])
    #reftree_edges -= ncollapsed_branches
    #if ncollapsed_branches:
    #    print '%d branches collapsed in reference tree' %ncollapsed_branches

    HEADER = ("target tree", 'dups', 'subtrees', 'used trees', 'treeko', "RF",
              "maxRF", 'normRF', "%reftree", "%genetree", "avgSize", "minSize",
              "common tips", "refSize", "targetSize")
    if args.output:
        OUT = open(args.output, "w")
        print >> OUT, '# ' + ctime()
        print >> OUT, '# ' + ' '.join(sys.argv)
        print >> OUT, '#' + '\t'.join(HEADER)
    else:
        print '# ' + ctime()
        print '# ' + ' '.join(sys.argv)
        COL_WIDTHS = [20, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7]
        print_table([HEADER], fix_col_width=COL_WIDTHS, wrap_style='wrap')

    prev_tree = None

    for counter, tfile in enumerate(target_trees):
        if args.targets_file:
            seedid, tfile = tfile
        else:
            seedid = None

        if args.extract_species:
            tt = PhyloTree(tfile,
                           sp_naming_function=lambda name: name.split(
                               args.spname_delimiter)[args.spname_field])
        else:
            tt = Tree(tfile)

        if args.target_attr:
            for lf in tt.iter_leaves():
                lf._origname = lf.name
                lf.name = getattr(lf, args.target_attr)

        if args.outgroup:
            if len(args.outgroup) > 1:
                out = tt.get_common_ancestor(args.outgroup)
            else:
                out = tt.search_nodes(name=args.outgroup[0])[0]
            tt.set_outgroup(out)

        if args.target_trees:
            fname = os.path.basename(tfile)
        else:
            fname = '%05d' % counter

        max_size, min_size, avg_size, common = -1, -1, -1, -1
        total_rf, max_rf, norm_rf = -1, -1, -1
        treeko_d = -1
        ref_branches_in_target, target_branches_in_ref = -1, -1
        target_tree_len = -1
        used_subtrees = -1
        if args.extract_species:
            orig_target_size = len(tt)
            ntrees, ndups, sp_trees = tt.get_speciation_trees(
                autodetect_duplications=True, newick_only=True)

            if ntrees < 1000:
                all_rf = []
                ref_found = []
                target_found = []
                tree_sizes = []
                all_max_rf = []
                common_names = 0

                for subtree_nw in sp_trees:
                    if seedid and not args.collateral and (seedid
                                                           not in subtree_nw):
                        continue
                    subtree = PhyloTree(
                        subtree_nw,
                        sp_naming_function=lambda name: name.split(
                            args.spname_delimiter)[args.spname_field])

                    # only necessary if rf function is going to filter by support value. It slows downs the analysis, obviously
                    if args.min_support:
                        subtree_content = subtree.get_cached_content(
                            store_attr='name')
                        for n in subtree.traverse():
                            if n.children:
                                n.support = tt.get_common_ancestor(
                                    subtree_content[n]).support

                    rf, maxr, common, p1, p2, d1, d2 = t.robinson_foulds(
                        subtree,
                        expand_polytomies=args.polytomies,
                        unrooted_trees=args.unrooted,
                        attr_t2='species',
                        min_support_t2=args.min_support)
                    if maxr > 0 and p1 and p2:
                        all_rf.append(rf)
                        tree_sizes.append(len(common))
                        all_max_rf.append(maxr)
                        common_names = max(common_names, len(common))

                        ref_found.append(float(len(p2 & p1)) / reftree_edges)
                        p2bis = set([
                            p for p in (p2 - d2)
                            if len(p[0]) > 1 and len(p[1]) > 1
                        ])  # valid edges in target not leaves
                        if p2bis:
                            incompatible_target_branches = float(
                                len((p2 - d2) - p1))
                            target_found.append(1 -
                                                (incompatible_target_branches /
                                                 (len(p2 - d2))))

                        # valid_target = p2-d2
                        # valid_ref = p1-d1
                        # ref_found.append(float(len(valid_target & valid_ref)) / reftree_edges)

                        # p2bis = set([p for p in (p2-d2) if len(p[0])>1 and len(p[1])>1])
                        # if p2bis-d2:
                        #     incompatible_target_branches = float(len((p2-d2) - p1))
                        #     target_found.append(1 - (incompatible_target_branches / (len(p2-d2))))

                if all_rf:
                    # Treeko speciation distance
                    alld = [(all_rf[i] / float(all_max_rf[i]))
                            for i in xrange(len(all_rf))]
                    a = numpy.sum(
                        [alld[i] * tree_sizes[i] for i in xrange(len(all_rf))])
                    b = float(numpy.sum(tree_sizes))
                    treeko_d = a / b
                    total_rf = numpy.mean(all_rf)
                    norm_rf = numpy.mean([(all_rf[i] / float(all_max_rf[i]))
                                          for i in xrange(len(all_rf))])
                    max_rf = numpy.max(all_max_rf)
                    ref_branches_in_target = numpy.mean(ref_found)
                    target_branches_in_ref = numpy.mean(
                        target_found) if target_found else -1
                    target_tree_len = numpy.mean(tree_sizes)
                    used_subtrees = len(all_rf)
        else:
            target_tree_len = len(tt)
            ndups, ntrees, used_subtrees = 0, 1, 1
            treeko_d = -1
            total_rf, max_rf, common, p1, p2, d1, d2 = tt.robinson_foulds(
                t,
                expand_polytomies=args.polytomies,
                unrooted_trees=args.unrooted)
            common_names = len(common)
            if max_rf:
                norm_rf = total_rf / float(max_rf)
            if p1 and p2:
                sizes = [len(p) for p in p2 ^ p1]
                if sizes:
                    avg_size = sum(sizes) / float(len(sizes))
                    max_size, min_size = max(sizes), min(sizes)
                else:
                    max_size, min_size, avg_size = 0, 0, 0

                ref_branches_in_target = float(len(p2 & p1)) / reftree_edges
                #if p2-d2:
                #    incompatible_target_branches = float(len((p2-d2) - p1))
                #    target_found.append(1 - (incompatible_target_branches / (len(p2-d2))))
            else:
                ref_branches_in_target = 0.0
                target_branches_in_ref = 0.0
                max_size, min_size, avg_size = -1, -1, -1

        if args.output:
            print >> OUT, '\t'.join(
                map(str, (fname, ndups, ntrees, used_subtrees, treeko_d,
                          total_rf, max_rf, norm_rf, ref_branches_in_target,
                          target_branches_in_ref, avg_size, min_size,
                          common_names, reftree_len, target_tree_len)))
        else:
            print_table([
                map(istr,
                    (fname[-30:], ndups, ntrees, used_subtrees, treeko_d,
                     total_rf, max_rf, norm_rf,
                     '%0.4f' % ref_branches_in_target,
                     '%0.4f' % target_branches_in_ref, avg_size, min_size,
                     common_names, reftree_len, target_tree_len))
            ],
                        fix_col_width=COL_WIDTHS,
                        wrap_style='cut')

    if args.output:
        OUT.close()
Esempio n. 4
0
def main(argv):
    global args
    #test()
    parser = argparse.ArgumentParser(
        description=__DESCRIPTION__,
        formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument("target_trees",
                        type=str,
                        nargs="+",
                        help='a list of target tree files')

    parser.add_argument("-r",
                        dest='reftree',
                        type=str,
                        help='The reference tree to compare with')

    parser.add_argument(
        "--ref_attr",
        dest="ref_attr",
        default="name",
        help=("Defines the attribute in REFERENCE tree that will be used"
              " to perform the comparison"))

    parser.add_argument(
        "--target_attr",
        dest="target_attr",
        default="name",
        help=("Defines the attribute in TARGET tree that will be used"
              " to perform the comparison"))

    parser.add_argument(
        "--fullsearch",
        dest="fullsearch",
        action="store_false",
        help=("Enable this option if duplicated attributes (i.e. name)"
              "exist in reference or target trees."))

    parser.add_argument("--quite",
                        dest="quite",
                        action="store_true",
                        help="Do not show process information")

    parser.add_argument("--report",
                        dest="report",
                        choices=["topology", "diffs", "diffs_tab", "summary"],
                        default="topology",
                        help="Different format for the comparison results")

    parser.add_argument(
        "--ncbi",
        dest="ncbi",
        action="store_true",
        help=
        "If enabled, it will use the ETE ncbi_taxonomy module to for ncbi taxid translation"
    )

    parser.add_argument(
        "--color",
        dest="color",
        action="store_true",
        help="If enabled, it will use colors in some of the report")

    args = parser.parse_args(argv)

    if args.quite:
        logging.basicConfig(format='%(message)s', level=logging.WARNING)
    else:
        logging.basicConfig(format='%(message)s', level=logging.INFO)
    log = logging

    t1 = Tree(args.reftree)
    if args.ncbi:
        from common import ncbi
        ncbi.connect_database()

    for ttree in args.target_trees:
        t2 = Tree(ttree)

        if args.ncbi:

            taxids = set(
                [getattr(leaf, args.ref_attr) for leaf in t1.iter_leaves()])
            taxids.update(
                [getattr(leaf, args.target_attr) for leaf in t2.iter_leaves()])
            taxid2name = ncbi.get_taxid_translator(taxids)
            for leaf in t1.get_leaves() + t2.get_leaves():
                try:
                    leaf.name = taxid2name.get(int(leaf.name), leaf.name)
                except ValueError:
                    pass

        difftable = treediff(t1,
                             t2,
                             args.ref_attr,
                             args.target_attr,
                             reduce_matrix=args.fullsearch)
        if args.report == "topology":
            show_difftable_topo(difftable,
                                args.ref_attr,
                                args.target_attr,
                                usecolor=args.color)
        elif args.report == "diffs":
            show_difftable(difftable)
        elif args.report == "diffs_tab":
            show_difftable_tab(difftable)
        elif args.report == 'table':
            rf, rf_max, _, _, _, _, _ = t1.robinson_foulds(
                t2, attr_t1=args.ref_attr, attr_t2=args.target_attr)[:2]
            show_difftable_summary(difftable, rf, rf_max)
Esempio n. 5
0
def main(argv):
    global args
    # test()
    parser = argparse.ArgumentParser(description=__DESCRIPTION__, formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument("target_trees", type=str, nargs="+", help="a list of target tree files")

    parser.add_argument("-r", dest="reftree", type=str, help="The reference tree to compare with")

    parser.add_argument(
        "--ref_attr",
        dest="ref_attr",
        default="name",
        help=("Defines the attribute in REFERENCE tree that will be used" " to perform the comparison"),
    )

    parser.add_argument(
        "--target_attr",
        dest="target_attr",
        default="name",
        help=("Defines the attribute in TARGET tree that will be used" " to perform the comparison"),
    )

    parser.add_argument(
        "--fullsearch",
        dest="fullsearch",
        action="store_false",
        help=("Enable this option if duplicated attributes (i.e. name)" "exist in reference or target trees."),
    )

    parser.add_argument("--quite", dest="quite", action="store_true", help="Do not show process information")

    parser.add_argument(
        "--report",
        dest="report",
        choices=["topology", "diffs", "diffs_tab", "summary"],
        default="topology",
        help="Different format for the comparison results",
    )

    parser.add_argument(
        "--ncbi",
        dest="ncbi",
        action="store_true",
        help="If enabled, it will use the ETE ncbi_taxonomy module to for ncbi taxid translation",
    )

    parser.add_argument(
        "--color", dest="color", action="store_true", help="If enabled, it will use colors in some of the report"
    )

    args = parser.parse_args(argv)

    if args.quite:
        logging.basicConfig(format="%(message)s", level=logging.WARNING)
    else:
        logging.basicConfig(format="%(message)s", level=logging.INFO)
    log = logging

    t1 = Tree(args.reftree)
    if args.ncbi:
        from common import ncbi

        ncbi.connect_database()

    for ttree in args.target_trees:
        t2 = Tree(ttree)

        if args.ncbi:

            taxids = set([getattr(leaf, args.ref_attr) for leaf in t1.iter_leaves()])
            taxids.update([getattr(leaf, args.target_attr) for leaf in t2.iter_leaves()])
            taxid2name = ncbi.get_taxid_translator(taxids)
            for leaf in t1.get_leaves() + t2.get_leaves():
                try:
                    leaf.name = taxid2name.get(int(leaf.name), leaf.name)
                except ValueError:
                    pass

        difftable = treediff(t1, t2, args.ref_attr, args.target_attr, reduce_matrix=args.fullsearch)
        if args.report == "topology":
            show_difftable_topo(difftable, args.ref_attr, args.target_attr, usecolor=args.color)
        elif args.report == "diffs":
            show_difftable(difftable)
        elif args.report == "diffs_tab":
            show_difftable_tab(difftable)
        elif args.report == "table":
            rf, rf_max, _, _, _, _, _ = t1.robinson_foulds(t2, attr_t1=args.ref_attr, attr_t2=args.target_attr)[:2]
            show_difftable_summary(difftable, rf, rf_max)
Esempio n. 6
0
def main(argv):
    
    parser = argparse.ArgumentParser(description=__DESCRIPTION__, 
                            formatter_class=argparse.RawDescriptionHelpFormatter)


    parser.add_argument("target_trees", metavar='target_trees', type=str, nargs="*",
                   help='a list of target tree files')
    
    parser.add_argument("--targets_file", dest="targets_file", 
                        type=str, 
                        help="""path to a file containing target trees, one per line""")
    
    parser.add_argument("-o", dest="output", 
                        type=str,
                        help="""Path to the tab delimited report file""")

    parser.add_argument("-r", dest="reftree", 
                        type=str, required=True,
                        help="""Reference tree""")

    parser.add_argument("--outgroup", dest="outgroup", 
                        nargs = "+",
                        help="""outgroup used to root reference and target trees before distance computation""")
  
    parser.add_argument("--expand_polytomies", dest="polytomies", 
                        action = "store_true",
                        help="""expand politomies if necessary""")
  
    parser.add_argument("--unrooted", dest="unrooted", 
                        action = "store_true",
                        help="""compare trees as unrooted""")

    parser.add_argument("--min_support", dest="min_support", 
                        type=float, default=0.0,
                        help=("min support value for branches to be counted in the distance computation (RF, treeko and refTree/targeGene compatibility)"))
    
    parser.add_argument("--extract_species", dest="extract_species", 
                        action = "store_true",
                        help="""When used, reference tree is assumed to contain species names, while target trees as expected to be gene trees. Species name will be extracted from gene tree nodes and treeko will be used if duplication events are found.""")

    parser.add_argument("--spname_delimiter", dest="spname_delimiter", 
                        type=str, default="_",
                        help=("species code delimiter in node names"))
    
    parser.add_argument("--spname_field", dest="spname_field", 
                        type=int, default=-1,
                        help=("position of the species code extracted from node names. -1 = last field"))
    

    parser.add_argument("--collateral", dest="collateral", 
                        action='store_true', 
                        help=(""))

    parser.add_argument("--ref_attr", dest="ref_attr", 
                        type=str, 
                        help=("attribute in ref tree used as leaf name"))
    
    parser.add_argument("--target_attr", dest="target_attr", 
                        type=str, 
                        help=("attribute in target tree used as leaf name"))


    
    args = parser.parse_args(argv)
    print __DESCRIPTION__
    reftree = args.reftree
    if args.targets_file and args.target_trees:
        print >>sys.stderr, 'The use of targets_file and targets at the same time is not supported.'
        sys.exit(1)
        
    if args.targets_file:
        target_trees = tree_iterator(args.targets_file)
    else:
        target_trees = args.target_trees
        
    t = Tree(reftree)

    if args.ref_attr:
        for lf in t.iter_leaves():
            lf._origname = lf.name
            if args.ref_attr not in lf.features:
                print lf
            lf.name = getattr(lf, args.ref_attr)
    
    if args.outgroup:
        if len(args.outgroup) > 1:
            out = t.get_common_ancestor(args.outgroup)
        else:
            out = t.search_nodes(name=args.outgroup[0])[0]
        t.set_outgroup(out)
             
        
    ref_names = set(t.get_leaf_names())
    reftree_len = len(t)
    reftree_edges = (reftree_len*2)-2
    ncollapsed_branches = len([n for n in t.traverse() if n.children and n.support < args.min_support])
    #reftree_edges -= ncollapsed_branches
    #if ncollapsed_branches:
    #    print '%d branches collapsed in reference tree' %ncollapsed_branches
    
    HEADER = ("target tree", 'dups', 'subtrees', 'used trees', 'treeko', "RF", "maxRF", 'normRF', "%reftree", "%genetree", "avgSize", "minSize", "common tips", "refSize", "targetSize")
    if args.output:
        OUT = open(args.output, "w")
        print >>OUT, '# ' + ctime()
        print >>OUT, '# ' + ' '.join(sys.argv) 
        print >>OUT, '#'+'\t'.join(HEADER)
    else:
        print '# ' + ctime()
        print '# ' + ' '.join(sys.argv) 
        COL_WIDTHS = [20, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7]
        print_table([HEADER], fix_col_width=COL_WIDTHS, wrap_style='wrap')
                
    prev_tree = None

    for counter, tfile in enumerate(target_trees):
        if args.targets_file:
            seedid, tfile = tfile
        else:
            seedid = None

           
        if args.extract_species:
            tt = PhyloTree(tfile, sp_naming_function = lambda name: name.split(args.spname_delimiter)[args.spname_field])
        else:
            tt = Tree(tfile)

        if args.target_attr:
            for lf in tt.iter_leaves():
                lf._origname = lf.name
                lf.name = getattr(lf, args.target_attr)
            
        if args.outgroup:
            if len(args.outgroup) > 1:
                out = tt.get_common_ancestor(args.outgroup)
            else:
                out = tt.search_nodes(name=args.outgroup[0])[0]
            tt.set_outgroup(out)
        
        if args.target_trees:
            fname = os.path.basename(tfile)
        else:
            fname = '%05d' %counter

        max_size, min_size, avg_size, common = -1, -1, -1, -1
        total_rf, max_rf, norm_rf = -1, -1, -1
        treeko_d = -1
        ref_branches_in_target, target_branches_in_ref = -1, -1
        target_tree_len = -1
        used_subtrees = -1             
        if args.extract_species:
            orig_target_size = len(tt)
            ntrees, ndups, sp_trees = tt.get_speciation_trees(autodetect_duplications=True, newick_only=True)

            if ntrees < 1000:
                all_rf = []
                ref_found = []
                target_found = []
                tree_sizes = []
                all_max_rf = []
                common_names = 0

                for subtree_nw in sp_trees:
                    if seedid and not args.collateral and (seedid not in subtree_nw):
                        continue
                    subtree = PhyloTree(subtree_nw, sp_naming_function = lambda name: name.split(args.spname_delimiter)[args.spname_field])

                    # only necessary if rf function is going to filter by support value. It slows downs the analysis, obviously
                    if args.min_support:
                        subtree_content = subtree.get_cached_content(store_attr='name')
                        for n in subtree.traverse():
                            if n.children:
                                n.support = tt.get_common_ancestor(subtree_content[n]).support
                                
                    rf, maxr, common, p1, p2, d1, d2 = t.robinson_foulds(subtree, expand_polytomies=args.polytomies, unrooted_trees=args.unrooted,
                                                                         attr_t2='species', min_support_t2=args.min_support)
                    if maxr > 0 and p1 and p2:
                        all_rf.append(rf)
                        tree_sizes.append(len(common))
                        all_max_rf.append(maxr)
                        common_names = max(common_names, len(common))
                        
                        ref_found.append(float(len(p2 & p1)) / reftree_edges)
                        p2bis = set([p for p in (p2-d2) if len(p[0])>1 and len(p[1])>1]) # valid edges in target not leaves
                        if p2bis:
                            incompatible_target_branches = float(len((p2-d2) - p1))
                            target_found.append(1 - (incompatible_target_branches / (len(p2-d2))))
                            
                        # valid_target = p2-d2
                        # valid_ref = p1-d1
                        # ref_found.append(float(len(valid_target & valid_ref)) / reftree_edges)
                        
                        # p2bis = set([p for p in (p2-d2) if len(p[0])>1 and len(p[1])>1])
                        # if p2bis-d2:
                        #     incompatible_target_branches = float(len((p2-d2) - p1))
                        #     target_found.append(1 - (incompatible_target_branches / (len(p2-d2))))

                        
                if all_rf:
                    # Treeko speciation distance
                    alld = [(all_rf[i]/float(all_max_rf[i])) for i in xrange(len(all_rf))]
                    a = numpy.sum([alld[i] * tree_sizes[i] for i in xrange(len(all_rf))])
                    b = float(numpy.sum(tree_sizes))
                    treeko_d  = a/b
                    total_rf = numpy.mean(all_rf)                    
                    norm_rf = numpy.mean([(all_rf[i]/float(all_max_rf[i])) for i in xrange(len(all_rf))])
                    max_rf = numpy.max(all_max_rf)
                    ref_branches_in_target = numpy.mean(ref_found)
                    target_branches_in_ref = numpy.mean(target_found) if target_found else -1
                    target_tree_len = numpy.mean(tree_sizes)
                    used_subtrees = len(all_rf)
        else:
            target_tree_len = len(tt)
            ndups, ntrees, used_subtrees = 0, 1, 1
            treeko_d = -1
            total_rf, max_rf, common, p1, p2, d1, d2 = tt.robinson_foulds(t, expand_polytomies=args.polytomies, unrooted_trees=args.unrooted)
            common_names = len(common)
            if max_rf:
                norm_rf = total_rf / float(max_rf)
            if p1 and p2: 
                sizes = [len(p) for p in p2 ^ p1]
                if sizes: 
                    avg_size = sum(sizes) / float(len(sizes))
                    max_size, min_size = max(sizes), min(sizes)
                else:
                    max_size, min_size, avg_size = 0, 0, 0
                    
                ref_branches_in_target = float(len(p2 & p1)) / reftree_edges
                #if p2-d2:
                #    incompatible_target_branches = float(len((p2-d2) - p1))
                #    target_found.append(1 - (incompatible_target_branches / (len(p2-d2))))
            else:
                ref_branches_in_target = 0.0
                target_branches_in_ref = 0.0
                max_size, min_size, avg_size = -1, -1, -1

        if args.output:
            print >>OUT, '\t'.join(map(str, (fname, ndups, ntrees, used_subtrees, treeko_d, total_rf, max_rf, norm_rf, ref_branches_in_target, target_branches_in_ref,
                                             avg_size, min_size, common_names, reftree_len, target_tree_len)))
        else:
            print_table([map(istr, (fname[-30:], ndups, ntrees, used_subtrees, treeko_d, total_rf, max_rf, norm_rf, '%0.4f' %ref_branches_in_target, '%0.4f' %target_branches_in_ref,
                 avg_size, min_size, common_names, reftree_len, target_tree_len))], fix_col_width = COL_WIDTHS, wrap_style='cut')

    if args.output:
        OUT.close()