예제 #1
0
파일: 20081201a.py 프로젝트: BIGtigr/xgcode
 def get_response_lines(self, options):
     """
     Yield lines that form the result of the analysis.
     @param options: a subset of strings specifying what to show
     """
     preamble_lines = []
     error_lines = []
     if 'show_incomplete' in options and self.is_incomplete:
         error_lines.append(
             'the sequential splits defined by the eigenvectors were insufficient to reconstruct the tree'
         )
     if 'show_conflicting' in options and self.is_conflicting:
         error_lines.append(
             'the reconstructed tree has a split that is incompatible with the original tree'
         )
     if 'show_negligible' in options and self.is_negligible:
         error_lines.append(
             'during reconstruction a negligible eigenvector loading was encountered'
         )
     if 'show_all' in options or error_lines:
         preamble_lines.extend(
             ['original tree:',
              NewickIO.get_newick_string(self.tree)])
         if self.reconstructed_tree:
             preamble_lines.extend([
                 'reconstructed tree:',
                 NewickIO.get_newick_string(self.reconstructed_tree)
             ])
     return preamble_lines + error_lines
예제 #2
0
def do_distance_analysis(X):
    # get the matrix of squared distances
    labels = list('0123')
    # reconstruct the matrix of Euclidean distances from a tree
    D_sqrt = np.array([[np.linalg.norm(y - x) for x in X] for y in X])
    sqrt_tree = NeighborJoining.make_tree(D_sqrt, labels)
    sqrt_tree_string = NewickIO.get_newick_string(sqrt_tree)
    sqrt_feltree = NewickIO.parse(sqrt_tree_string, FelTree.NewickTree)
    D_sqrt_reconstructed = np.array(sqrt_feltree.get_distance_matrix(labels))
    # reconstruct the matrix of squared Euclidean distances from a tree
    D = D_sqrt**2
    tree = NeighborJoining.make_tree(D, labels)
    tree_string = NewickIO.get_newick_string(tree)
    feltree = NewickIO.parse(tree_string, FelTree.NewickTree)
    D_reconstructed = np.array(feltree.get_distance_matrix(labels))
    # start writing
    out = StringIO()
    # matrix of Euclidean distances and its reconstruction from a tree
    print >> out, 'matrix of Euclidean distances between tetrahedron vertices:'
    print >> out, D_sqrt
    print >> out, 'neighbor joining tree constructed from D = non-squared Euclidean distances (unusual):'
    print >> out, sqrt_tree_string
    print >> out, 'distance matrix implied by this tree:'
    print >> out, D_sqrt_reconstructed
    # matrix of squared Euclidean distances and its reconstruction from a tree
    print >> out, 'matrix of squared distances between tetrahedron vertices:'
    print >> out, D
    print >> out, 'neighbor joining tree constructed from D = squared Euclidean distances (normal):'
    print >> out, tree_string
    print >> out, 'distance matrix implied by this tree:'
    print >> out, D_reconstructed
    return out.getvalue().strip()
예제 #3
0
def do_distance_analysis(X):
    # get the matrix of squared distances
    labels = list("0123")
    # reconstruct the matrix of Euclidean distances from a tree
    D_sqrt = np.array([[np.linalg.norm(y - x) for x in X] for y in X])
    sqrt_tree = NeighborJoining.make_tree(D_sqrt, labels)
    sqrt_tree_string = NewickIO.get_newick_string(sqrt_tree)
    sqrt_feltree = NewickIO.parse(sqrt_tree_string, FelTree.NewickTree)
    D_sqrt_reconstructed = np.array(sqrt_feltree.get_distance_matrix(labels))
    # reconstruct the matrix of squared Euclidean distances from a tree
    D = D_sqrt ** 2
    tree = NeighborJoining.make_tree(D, labels)
    tree_string = NewickIO.get_newick_string(tree)
    feltree = NewickIO.parse(tree_string, FelTree.NewickTree)
    D_reconstructed = np.array(feltree.get_distance_matrix(labels))
    # start writing
    out = StringIO()
    # matrix of Euclidean distances and its reconstruction from a tree
    print >> out, "matrix of Euclidean distances between tetrahedron vertices:"
    print >> out, D_sqrt
    print >> out, "neighbor joining tree constructed from D = non-squared Euclidean distances (unusual):"
    print >> out, sqrt_tree_string
    print >> out, "distance matrix implied by this tree:"
    print >> out, D_sqrt_reconstructed
    # matrix of squared Euclidean distances and its reconstruction from a tree
    print >> out, "matrix of squared distances between tetrahedron vertices:"
    print >> out, D
    print >> out, "neighbor joining tree constructed from D = squared Euclidean distances (normal):"
    print >> out, tree_string
    print >> out, "distance matrix implied by this tree:"
    print >> out, D_reconstructed
    return out.getvalue().strip()
예제 #4
0
파일: 20090318a.py 프로젝트: BIGtigr/xgcode
def get_response_content(fs):
    # get the newick trees.
    trees = []
    for tree_string in iterutils.stripped_lines(StringIO(fs.trees)):
        # parse each tree and make sure that it conforms to various requirements
        tree = NewickIO.parse(tree_string, FelTree.NewickTree)
        tip_names = [tip.get_name() for tip in tree.gen_tips()]
        if len(tip_names) < 4:
            raise HandlingError('expected at least four tips but found ' +
                                str(len(tip_names)))
        if any(name is None for name in tip_names):
            raise HandlingError('each terminal node must be labeled')
        if len(set(tip_names)) != len(tip_names):
            raise HandlingError('each terminal node label must be unique')
        trees.append(tree)
    # begin the response
    out = StringIO()
    # look at each tree
    nerrors = 0
    ncounterexamples = 0
    for tree in trees:
        # get the set of valid partitions implied by the tree
        valid_parts = TreeComparison.get_partitions(tree)
        ordered_tip_names = [tip.get_name() for tip in tree.gen_tips()]
        # assert that the partition implied by the correct formula is valid
        D = np.array(tree.get_distance_matrix(ordered_tip_names))
        loadings = get_principal_coordinate(D)
        nonneg_leaf_set = frozenset(
            tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
        neg_leaf_set = frozenset(tip
                                 for tip, v in zip(ordered_tip_names, loadings)
                                 if v < 0)
        part = frozenset([nonneg_leaf_set, neg_leaf_set])
        if part not in valid_parts:
            nerrors += 1
            print >> out, 'error: a partition that was supposed to be valid was found to be invalid'
            print >> out, 'tree:', NewickIO.get_newick_string(tree)
            print >> out, 'invalid partition:', partition_to_string(part)
            print >> out
        # check the validity of the partition implied by the incorrect formula
        Q = D * D
        loadings = get_principal_coordinate(Q)
        nonneg_leaf_set = frozenset(
            tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
        neg_leaf_set = frozenset(tip
                                 for tip, v in zip(ordered_tip_names, loadings)
                                 if v < 0)
        part = frozenset([nonneg_leaf_set, neg_leaf_set])
        if part not in valid_parts:
            ncounterexamples += 1
            print >> out, 'found a counterexample!'
            print >> out, 'tree:', NewickIO.get_newick_string(tree)
            print >> out, 'invalid partition:', partition_to_string(part)
            print >> out
    print >> out, 'errors found:', nerrors
    print >> out, 'counterexamples found:', ncounterexamples
    # return the response
    return out.getvalue()
예제 #5
0
파일: 20090318a.py 프로젝트: BIGtigr/xgcode
def main():
    filename = 'counterexamples.out'
    fout = open(filename, 'wt')
    print 'Does monotonically transforming the pairwise leaf distances affect the compatibility'
    print 'of the split found using principal coordinate analysis?'
    print 'I am looking through random trees for a tree that is split incompatibly'
    print 'when distances are squared.'
    print 'Use control-c to stop the program when you get bored.'
    try:
        count = 0
        ncounterexamples = 0
        nerrors = 0
        while True:
            count += 1
            # get a random tree
            n_base_leaves = 4
            n_expected_extra_leaves = 1
            expected_branch_length = 1
            tree = TreeSampler.sample_tree(n_base_leaves,
                                           n_expected_extra_leaves,
                                           expected_branch_length)
            # get the set of valid partitions implied by the tree
            valid_parts = TreeComparison.get_partitions(tree)
            ordered_tip_names = [tip.get_name() for tip in tree.gen_tips()]
            # assert that the partition implied by the correct formula is valid
            D = np.array(tree.get_distance_matrix(ordered_tip_names))
            loadings = get_principal_coordinate(D)
            nonneg_leaf_set = frozenset(
                tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
            neg_leaf_set = frozenset(
                tip for tip, v in zip(ordered_tip_names, loadings) if v < 0)
            part = frozenset([nonneg_leaf_set, neg_leaf_set])
            if part not in valid_parts:
                nerrors += 1
                print >> fout, 'error: a partition that was supposed to be valid was found to be invalid'
                print >> fout, 'tree:', NewickIO.get_newick_string(tree)
                print >> fout, 'invalid partition:', partition_to_string(part)
                print >> fout
            # check the validity of the partition implied by the incorrect formula
            Q = D * D
            loadings = get_principal_coordinate(Q)
            nonneg_leaf_set = frozenset(
                tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
            neg_leaf_set = frozenset(
                tip for tip, v in zip(ordered_tip_names, loadings) if v < 0)
            part = frozenset([nonneg_leaf_set, neg_leaf_set])
            if part not in valid_parts:
                ncounterexamples += 1
                print >> fout, 'found a counterexample!'
                print >> fout, 'tree:', NewickIO.get_newick_string(tree)
                print >> fout, 'invalid partition:', partition_to_string(part)
                print >> fout
    except KeyboardInterrupt, e:
        print 'trees examined:', count
        print 'errors:', nerrors
        print 'counterexamples:', ncounterexamples
예제 #6
0
def get_response_content(fs):
    # get the newick trees.
    trees = []
    for tree_string in iterutils.stripped_lines(StringIO(fs.trees)):
        # parse each tree and make sure that it conforms to various requirements
        tree = NewickIO.parse(tree_string, FelTree.NewickTree)
        tip_names = [tip.get_name() for tip in tree.gen_tips()]
        if len(tip_names) < 4:
            raise HandlingError('expected at least four tips but found ' + str(len(tip_names)))
        if any(name is None for name in tip_names):
            raise HandlingError('each terminal node must be labeled')
        if len(set(tip_names)) != len(tip_names):
            raise HandlingError('each terminal node label must be unique')
        trees.append(tree)
    # begin the response
    out = StringIO()
    # look at each tree
    nerrors = 0
    ncounterexamples = 0
    for tree in trees:
        # get the set of valid partitions implied by the tree
        valid_parts = TreeComparison.get_partitions(tree)
        ordered_tip_names = [tip.get_name() for tip in tree.gen_tips()]
        # assert that the partition implied by the correct formula is valid
        D = np.array(tree.get_distance_matrix(ordered_tip_names))
        loadings = get_principal_coordinate(D)
        nonneg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
        neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0)
        part = frozenset([nonneg_leaf_set, neg_leaf_set])
        if part not in valid_parts:
            nerrors += 1
            print >> out, 'error: a partition that was supposed to be valid was found to be invalid'
            print >> out, 'tree:', NewickIO.get_newick_string(tree)
            print >> out, 'invalid partition:', partition_to_string(part)
            print >> out
        # check the validity of the partition implied by the incorrect formula
        Q = D * D
        loadings = get_principal_coordinate(Q)
        nonneg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
        neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0)
        part = frozenset([nonneg_leaf_set, neg_leaf_set])
        if part not in valid_parts:
            ncounterexamples += 1
            print >> out, 'found a counterexample!'
            print >> out, 'tree:', NewickIO.get_newick_string(tree)
            print >> out, 'invalid partition:', partition_to_string(part)
            print >> out
    print >> out, 'errors found:', nerrors
    print >> out, 'counterexamples found:', ncounterexamples
    # return the response
    return out.getvalue()
예제 #7
0
def main():
    filename = 'counterexamples.out'
    fout = open(filename, 'wt')
    print 'Does monotonically transforming the pairwise leaf distances affect the compatibility'
    print 'of the split found using principal coordinate analysis?'
    print 'I am looking through random trees for a tree that is split incompatibly'
    print 'when distances are squared.'
    print 'Use control-c to stop the program when you get bored.'
    try:
        count = 0
        ncounterexamples = 0
        nerrors = 0
        while True:
            count += 1
            # get a random tree
            n_base_leaves = 4
            n_expected_extra_leaves = 1
            expected_branch_length = 1
            tree = TreeSampler.sample_tree(n_base_leaves, n_expected_extra_leaves, expected_branch_length)
            # get the set of valid partitions implied by the tree
            valid_parts = TreeComparison.get_partitions(tree)
            ordered_tip_names = [tip.get_name() for tip in tree.gen_tips()]
            # assert that the partition implied by the correct formula is valid
            D = np.array(tree.get_distance_matrix(ordered_tip_names))
            loadings = get_principal_coordinate(D)
            nonneg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
            neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0)
            part = frozenset([nonneg_leaf_set, neg_leaf_set])
            if part not in valid_parts:
                nerrors += 1
                print >> fout, 'error: a partition that was supposed to be valid was found to be invalid'
                print >> fout, 'tree:', NewickIO.get_newick_string(tree)
                print >> fout, 'invalid partition:', partition_to_string(part)
                print >> fout
            # check the validity of the partition implied by the incorrect formula
            Q = D * D
            loadings = get_principal_coordinate(Q)
            nonneg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
            neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0)
            part = frozenset([nonneg_leaf_set, neg_leaf_set])
            if part not in valid_parts:
                ncounterexamples += 1
                print >> fout, 'found a counterexample!'
                print >> fout, 'tree:', NewickIO.get_newick_string(tree)
                print >> fout, 'invalid partition:', partition_to_string(part)
                print >> fout
    except KeyboardInterrupt, e:
        print 'trees examined:', count
        print 'errors:', nerrors
        print 'counterexamples:', ncounterexamples
예제 #8
0
def get_response_content(fs):
    flags = get_flags(fs)
    nseconds = 5
    tm = time.time()
    rejected_s = None
    nerrors = 0
    nchecked = 0
    while time.time() < tm + nseconds and not rejected_s:
        nchecked += 1
        # Sample a Newick tree.
        true_f = TreeSampler.sample_tree(fs.nleaves, 0, 1.0)
        true_s = NewickIO.get_newick_string(true_f)
        true_tree = Newick.parse(true_s, Newick.NewickTree)
        # Get the leaf and internal vertex ids for the true tree.
        internal_ids = set(id(x) for x in true_tree.gen_internal_nodes())
        leaf_ids = set(id(x) for x in true_tree.gen_tips())
        nleaves = len(leaf_ids)
        # Get the harmonic valuations for all vertices of the tree.
        id_to_full_val_list = [
            Harmonic.get_harmonic_valuations(true_tree, i)
            for i in range(1, nleaves)
        ]
        # Check for small valuations at the leaves.
        try:
            for id_to_full_val in id_to_full_val_list:
                for x in leaf_ids:
                    value = id_to_full_val[x]
                    if abs(value) < 1e-8:
                        raise CheckTreeError('the true tree is too symmetric')
        except CheckTreeError as e:
            nerrors += 1
            continue
        # Assign the leaf values and assign None to internal values.
        id_to_val_list = []
        for id_to_full_val in id_to_full_val_list:
            d = {}
            for x in leaf_ids:
                s = -1 if id_to_full_val[x] < 0 else 1
                d[x] = s
            for x in internal_ids:
                d[x] = None
            id_to_val_list.append(d)
        # Define the topology in a different format.
        id_to_adj = get_id_to_adj(true_tree)
        # Check the tree for self-compatibility under the given conditions.
        id_to_vals = SeekEigenLacing.rec_eigen(id_to_adj, id_to_val_list,
                                               flags)
        if not id_to_vals:
            rejected_s = true_s
    # make the report
    out = StringIO()
    if rejected_s:
        print >> out, 'rejected a true tree:'
        print >> out, rejected_s
    else:
        print >> out, 'no true tree was rejected'
    print >> out
    print >> out, nchecked, 'trees were sampled total'
    print >> out, nerrors, 'trees were too symmetric'
    return out.getvalue()
예제 #9
0
파일: 20081114a.py 프로젝트: BIGtigr/xgcode
 def __call__(self, tree):
     # get the partitions implied by the tree
     valid_partitions = TreeComparison.get_partitions(tree)
     # Get the partition implied by the Fiedler split
     # of the graph derived from the tree.
     tip_nodes = list(tree.gen_tips())
     D = tree.get_partial_distance_matrix([id(node) for node in tip_nodes])
     y = get_vector(D).tolist()
     name_selection = frozenset(node.get_name()
                                for node, elem in zip(tip_nodes, y)
                                if elem > 0)
     name_complement = frozenset(node.get_name()
                                 for node, elem in zip(tip_nodes, y)
                                 if elem <= 0)
     name_partition = frozenset((name_selection, name_complement))
     if name_partition not in valid_partitions:
         msg = '\n'.join([
             'invalid partition found:', 'tree:',
             NewickIO.get_newick_string(tree), 'invalid partition:',
             name_partition
         ])
         if not self.fout:
             self.fout = open(self.counterexample_filename, 'wt')
         print >> self.fout, msg
         print msg
         self.ncounterexamples += 1
     # do not stop looking, even if a counterexample is found
     return False
예제 #10
0
def get_response_content(fs):
    # read the matrix
    D = fs.matrix
    if len(D) < 3:
        raise HandlingError('the matrix should have at least three rows')
    # read the ordered labels
    ordered_labels = Util.get_stripped_lines(fs.labels.splitlines())
    if len(ordered_labels) != len(D):
        msg_a = 'the number of ordered labels should be the same '
        msg_b = 'as the number of rows in the matrix'
        raise HandlingError(msg_a + msg_b)
    # create the tree building object
    splitter = Clustering.StoneExactDMS()
    tree_builder = NeighborhoodJoining.TreeBuilder(
            D.tolist(), ordered_labels, splitter)
    # Read the recourse string and set the corresponding method
    # in the tree builder.
    recourse_string = fs.getfirst('recourse')
    if fs.njrecourse:
        tree_builder.set_fallback_name('nj')
    elif fs.halvingrecourse:
        tree_builder.set_fallback_name('halving')
    # assert that the computation will not take too long
    if tree_builder.get_complexity() > 1000000:
        raise HandlingError('this computation would take too long')
    # build the tree
    tree = tree_builder.build()
    # return the response
    return NewickIO.get_newick_string(tree) + '\n'
예제 #11
0
 def __call__(self, tree):
     # get the partitions implied by the tree
     valid_partitions = TreeComparison.get_partitions(tree)
     # Get the partition implied by the Fiedler split
     # of the graph derived from the tree.
     tip_nodes = list(tree.gen_tips())
     D = tree.get_partial_distance_matrix(
             [id(node) for node in tip_nodes])
     y = get_vector(D).tolist()
     name_selection = frozenset(node.get_name()
             for node, elem in zip(tip_nodes, y) if elem > 0)
     name_complement = frozenset(node.get_name()
             for node, elem in zip(tip_nodes, y) if elem <= 0)
     name_partition = frozenset((name_selection, name_complement))
     if name_partition not in valid_partitions:
         msg = '\n'.join([
             'invalid partition found:',
             'tree:', NewickIO.get_newick_string(tree),
             'invalid partition:', name_partition])
         if not self.fout:
             self.fout = open(self.counterexample_filename, 'wt')
         print >> self.fout, msg
         print msg
         self.ncounterexamples += 1
     # do not stop looking, even if a counterexample is found
     return False
예제 #12
0
def get_response_content(fs):
    # read the matrix
    D = fs.matrix
    if len(D) < 3:
        raise HandlingError('the matrix should have at least three rows')
    # read the ordered labels
    ordered_labels = Util.get_stripped_lines(fs.labels.splitlines())
    if len(ordered_labels) != len(D):
        msg_a = 'the number of ordered labels should be the same '
        msg_b = 'as the number of rows in the matrix'
        raise HandlingError(msg_a + msg_b)
    # create the tree building object
    splitter = Clustering.StoneExactDMS()
    tree_builder = NeighborhoodJoining.TreeBuilder(D.tolist(), ordered_labels,
                                                   splitter)
    # Read the recourse string and set the corresponding method
    # in the tree builder.
    recourse_string = fs.getfirst('recourse')
    if fs.njrecourse:
        tree_builder.set_fallback_name('nj')
    elif fs.halvingrecourse:
        tree_builder.set_fallback_name('halving')
    # assert that the computation will not take too long
    if tree_builder.get_complexity() > 1000000:
        raise HandlingError('this computation would take too long')
    # build the tree
    tree = tree_builder.build()
    # return the response
    return NewickIO.get_newick_string(tree) + '\n'
예제 #13
0
def get_tikz_lines(newick, eigenvector_index, yaw, pitch):
    """
    @param eigenvector_index: 1 is Fiedler
    """
    tree = Newick.parse(newick, SpatialTree.SpatialTree)
    # change the node names and get the new tree string
    for node in tree.preorder():
        node.name = 'n' + str(id(node))
    newick = NewickIO.get_newick_string(tree)
    # do the layout
    layout = FastDaylightLayout.StraightBranchLayout()
    layout.do_layout(tree)
    tree.fit((g_xy_scale, g_xy_scale))
    name_to_location = dict(
        (x.name, tree._layout_to_display(x.location)) for x in tree.preorder())
    T, B, N = FtreeIO.newick_to_TBN(newick)
    # get some vertices
    leaves = Ftree.T_to_leaves(T)
    internal = Ftree.T_to_internal_vertices(T)
    vertices = leaves + internal
    # get the locations
    v_to_location = dict((v, name_to_location[N[v]]) for v in vertices)
    # get the valuations
    w, V = Ftree.TB_to_harmonic_extension(T, B, leaves, internal)
    index_to_val = V[:, eigenvector_index - 1]
    v_to_val = dict(
        (vertices[i], g_z_scale * val) for i, val in enumerate(index_to_val))
    # get the coordinates
    v_to_xyz = get_v_to_xyz(yaw, v_to_location, v_to_val)
    # add intersection vertices
    add_intersection_vertices(T, B, v_to_xyz)
    intersection_vertices = sorted(v for v in v_to_xyz if v not in vertices)
    # get lines of the tikz file
    return xyz_to_tikz_lines(T, B, pitch, v_to_xyz, leaves, internal,
                             intersection_vertices)
예제 #14
0
def get_tikz_lines(newick, eigenvector_index, yaw, pitch):
    """
    @param eigenvector_index: 1 is Fiedler
    """
    tree = Newick.parse(newick, SpatialTree.SpatialTree) 
    # change the node names and get the new tree string
    for node in tree.preorder():
        node.name = 'n' + str(id(node))
    newick = NewickIO.get_newick_string(tree)
    # do the layout
    layout = FastDaylightLayout.StraightBranchLayout() 
    layout.do_layout(tree) 
    tree.fit((g_xy_scale, g_xy_scale))
    name_to_location = dict((
        x.name, tree._layout_to_display(x.location)) for x in tree.preorder())
    T, B, N = FtreeIO.newick_to_TBN(newick)
    # get some vertices
    leaves = Ftree.T_to_leaves(T)
    internal = Ftree.T_to_internal_vertices(T)
    vertices = leaves + internal
    # get the locations
    v_to_location = dict((v, name_to_location[N[v]]) for v in vertices)
    # get the valuations
    w, V = Ftree.TB_to_harmonic_extension(T, B, leaves, internal)
    index_to_val = V[:, eigenvector_index-1]
    v_to_val = dict(
            (vertices[i], g_z_scale*val) for i, val in enumerate(index_to_val))
    # get the coordinates
    v_to_xyz = get_v_to_xyz(yaw, v_to_location, v_to_val)
    # add intersection vertices
    add_intersection_vertices(T, B, v_to_xyz)
    intersection_vertices = sorted(v for v in v_to_xyz if v not in vertices)
    # get lines of the tikz file
    return xyz_to_tikz_lines(T, B, pitch, v_to_xyz,
            leaves, internal, intersection_vertices)
예제 #15
0
 def get_distance_matrix(self, ordered_names=None):
     """
     @param ordered_names: the requested order of the names
     @return: a row major distance matrix
     """
     # map the id of each tip to its index
     if ordered_names:
         tip_name_to_index = dict((name, i) for i, name in enumerate(ordered_names))
         tip_id_to_index = dict((id(tip), tip_name_to_index[tip.name]) for tip in self.gen_tips())
     else:
         tip_id_to_index = dict((id(tip), i) for i, tip in enumerate(self.gen_tips()))
     # get the number of tips
     n = len(list(self.gen_tips()))
     # for each tip get the distance to each other tip
     distance_matrix = [[0]*n for i in range(n)]
     for tip in self.gen_tips():
         row = distance_matrix[tip_id_to_index[id(tip)]]
         stack = []
         for directed_branch in tip.gen_directed_branches():
             next_target = directed_branch.get_target()
             assert next_target
             stack.append((tip, next_target, directed_branch.get_undirected_branch().get_branch_length()))
         while stack:
             source, target, distance = stack.pop()
             if target.is_tip():
                 row[tip_id_to_index[id(target)]] = distance
             else:
                 for next_branch in target.gen_exits(source):
                     branch_length = next_branch.get_undirected_branch().get_branch_length()
                     next_target = next_branch.get_target()
                     assert next_target, NewickIO.get_newick_string(self)
                     stack.append((target, next_target, distance + branch_length))
     return distance_matrix
예제 #16
0
 def get_full_distance_matrix(self, ordered_ids=None):
     """
     @return: a row major distance matrix
     @param ordered_ids: the requested row order by node id
     """
     # map the id of each node to its index
     if ordered_ids:
         id_to_index = dict((id_, i) for i, id_ in enumerate(ordered_ids))
     else:
         id_to_index = dict(
             (id(node), i) for i, node in enumerate(self.preorder()))
     # get the number of nodes
     n = len(list(self.preorder()))
     # for each node get the distance to each other node
     distance_matrix = [[0] * n for i in range(n)]
     for node in self.preorder():
         row = distance_matrix[id_to_index[id(node)]]
         stack = []
         for directed_branch in node.gen_directed_branches():
             next_target = directed_branch.get_target()
             assert next_target
             stack.append(
                 (node, next_target, directed_branch.get_undirected_branch(
                 ).get_branch_length()))
         while stack:
             source, target, distance = stack.pop()
             row[id_to_index[id(target)]] = distance
             for next_branch in target.gen_exits(source):
                 branch_length = next_branch.get_undirected_branch(
                 ).get_branch_length()
                 next_target = next_branch.get_target()
                 assert next_target, NewickIO.get_newick_string(self)
                 stack.append(
                     (target, next_target, distance + branch_length))
     return distance_matrix
예제 #17
0
 def get_full_distance_matrix(self, ordered_ids=None):
     """
     @return: a row major distance matrix
     @param ordered_ids: the requested row order by node id
     """
     # map the id of each node to its index
     if ordered_ids:
         id_to_index = dict((id_, i) for i, id_ in enumerate(ordered_ids))
     else:
         id_to_index = dict((id(node), i) for i, node in enumerate(self.preorder()))
     # get the number of nodes
     n = len(list(self.preorder()))
     # for each node get the distance to each other node
     distance_matrix = [[0]*n for i in range(n)]
     for node in self.preorder():
         row = distance_matrix[id_to_index[id(node)]]
         stack = []
         for directed_branch in node.gen_directed_branches():
             next_target = directed_branch.get_target()
             assert next_target
             stack.append((node, next_target, directed_branch.get_undirected_branch().get_branch_length()))
         while stack:
             source, target, distance = stack.pop()
             row[id_to_index[id(target)]] = distance
             for next_branch in target.gen_exits(source):
                 branch_length = next_branch.get_undirected_branch().get_branch_length()
                 next_target = next_branch.get_target()
                 assert next_target, NewickIO.get_newick_string(self)
                 stack.append((target, next_target, distance + branch_length))
     return distance_matrix
예제 #18
0
def get_response_content(fs):
    flags = get_flags(fs)
    nseconds = 5
    tm = time.time()
    rejected_s = None
    nerrors = 0
    nchecked = 0
    while time.time() < tm + nseconds and not rejected_s:
        nchecked += 1
        # Sample a Newick tree.
        true_f = TreeSampler.sample_tree(fs.nleaves, 0, 1.0)
        true_s = NewickIO.get_newick_string(true_f)
        true_tree = Newick.parse(true_s, Newick.NewickTree)
        # Get the leaf and internal vertex ids for the true tree.
        internal_ids = set(id(x) for x in true_tree.gen_internal_nodes())
        leaf_ids = set(id(x) for x in true_tree.gen_tips())
        nleaves = len(leaf_ids)
        # Get the harmonic valuations for all vertices of the tree.
        id_to_full_val_list = [Harmonic.get_harmonic_valuations(
            true_tree, i) for i in range(1, nleaves)]
        # Check for small valuations at the leaves.
        try:
            for id_to_full_val in id_to_full_val_list:
                for x in leaf_ids:
                    value = id_to_full_val[x]
                    if abs(value) < 1e-8:
                        raise CheckTreeError('the true tree is too symmetric')
        except CheckTreeError as e:
            nerrors += 1
            continue
        # Assign the leaf values and assign None to internal values.
        id_to_val_list = []
        for id_to_full_val in id_to_full_val_list:
            d = {}
            for x in leaf_ids:
                s = -1 if id_to_full_val[x] < 0 else 1
                d[x] = s
            for x in internal_ids:
                d[x] = None
            id_to_val_list.append(d)
        # Define the topology in a different format.
        id_to_adj = get_id_to_adj(true_tree)
        # Check the tree for self-compatibility under the given conditions.
        id_to_vals = SeekEigenLacing.rec_eigen(
                id_to_adj, id_to_val_list, flags)
        if not id_to_vals:
            rejected_s = true_s
    # make the report
    out = StringIO()
    if rejected_s:
        print >> out, 'rejected a true tree:'
        print >> out, rejected_s
    else:
        print >> out, 'no true tree was rejected'
    print >> out
    print >> out, nchecked, 'trees were sampled total'
    print >> out, nerrors, 'trees were too symmetric'
    return out.getvalue()
예제 #19
0
 def get_response_lines(self, options):
     """
     Yield lines that form the result of the analysis.
     @param options: a subset of strings specifying what to show
     """
     preamble_lines = []
     error_lines = []
     if 'show_incomplete' in options and self.is_incomplete:
         error_lines.append('the sequential splits defined by the eigenvectors were insufficient to reconstruct the tree')
     if 'show_conflicting' in options and self.is_conflicting:
         error_lines.append('the reconstructed tree has a split that is incompatible with the original tree')
     if 'show_negligible' in options and self.is_negligible:
         error_lines.append('during reconstruction a negligible eigenvector loading was encountered')
     if 'show_all' in options or error_lines:
         preamble_lines.extend(['original tree:', NewickIO.get_newick_string(self.tree)])
         if self.reconstructed_tree:
             preamble_lines.extend(['reconstructed tree:', NewickIO.get_newick_string(self.reconstructed_tree)])
     return preamble_lines + error_lines
예제 #20
0
 def test_contrast_matrix_to_tree(self):
     original_tree = NewickIO.parse(g_felsenstein_tree_string, FelTree.NewickTree)
     ordered_names = ('a', 'b', 'c', 'd', 'e')
     C = get_contrast_matrix(original_tree, ordered_names)
     assert_contrast_matrix(C)
     reconstructed_tree = contrast_matrix_to_tree(C, ordered_names)
     newick_string = NewickIO.get_newick_string(reconstructed_tree)
     print
     print newick_string
     pass
예제 #21
0
파일: Contrasts.py 프로젝트: BIGtigr/xgcode
 def test_contrast_matrix_to_tree(self):
     original_tree = NewickIO.parse(g_felsenstein_tree_string,
                                    FelTree.NewickTree)
     ordered_names = ('a', 'b', 'c', 'd', 'e')
     C = get_contrast_matrix(original_tree, ordered_names)
     assert_contrast_matrix(C)
     reconstructed_tree = contrast_matrix_to_tree(C, ordered_names)
     newick_string = NewickIO.get_newick_string(reconstructed_tree)
     print
     print newick_string
     pass
예제 #22
0
파일: 20081201a.py 프로젝트: BIGtigr/xgcode
 def __init__(self, tree, epsilon):
     """
     @param tree: a newick tree in the felsenstein-inspired format
     @param epsilon: determines whether loadings are considered negligible
     """
     # clear some flags that describe events that occur during reconstruction
     self.is_negligible = False
     self.is_incomplete = False
     self.is_conflicting = False
     # define the trees
     self.tree = tree
     self.reconstructed_tree = None
     # set the threshold for loading negligibility
     self.epsilon = epsilon
     # define some arbitrary ordering of tip names
     self.ordered_names = [node.get_name() for node in tree.gen_tips()]
     # get the distance matrix with respect to this ordering
     D = tree.get_distance_matrix(self.ordered_names)
     # get the Gower doubly centered matrix
     G = MatrixUtil.double_centered(np.array(D))
     # get the eigendecomposition of the Gower matrix
     eigenvalues, eigenvector_transposes = np.linalg.eigh(G)
     eigenvectors = eigenvector_transposes.T
     self.sorted_eigensystem = list(
         reversed(
             list(
                 sorted((abs(w), v)
                        for w, v in zip(eigenvalues, eigenvectors)))))
     # build the tree recursively using the sorted eigensystem
     indices = set(range(len(self.ordered_names)))
     try:
         # try to reconstruct the tree
         root = self._build_tree(indices, 0)
         root.set_branch_length(None)
         output_tree = Newick.NewickTree(root)
         # convert the tree to the FelTree format
         newick_string = NewickIO.get_newick_string(output_tree)
         self.reconstructed_tree = NewickIO.parse(newick_string,
                                                  FelTree.NewickTree)
     except NegligibleError:
         self.is_negligible = True
     except IncompleteError:
         self.is_incomplete = True
     else:
         # compare the splits defined by the reconstructed tree
         # to splits in the original tree
         expected_partitions = TreeComparison.get_nontrivial_partitions(
             self.tree)
         observed_partitions = TreeComparison.get_nontrivial_partitions(
             self.reconstructed_tree)
         invalid_partitions = observed_partitions - expected_partitions
         if invalid_partitions:
             self.is_conflicting = True
예제 #23
0
파일: 20110404a.py 프로젝트: BIGtigr/xgcode
def get_response_content(fs):
    flags_a = get_flags_a(fs)
    flags_b = get_flags_b(fs)
    data = CheckTreeData(flags_a, flags_b)
    nseconds = 5
    tm = time.time()
    while time.time() < tm + nseconds:
        # Sample a pair of Newick trees.
        true_f = TreeSampler.sample_tree(fs.nleaves, 0, 1.0)
        test_f = TreeSampler.sample_tree(fs.nleaves, 0, 1.0)
        true_s = NewickIO.get_newick_string(true_f)
        test_s = NewickIO.get_newick_string(test_f)
        true_tree = Newick.parse(true_s, Newick.NewickTree)
        test_tree = Newick.parse(test_s, Newick.NewickTree)
        # Add the pairwise check to the data borg.
        try:
            found_difference = check_tree_pair(true_tree, test_tree, data)
        except CheckTreeError as e:
            data.add_error(e)
        # Check to see if we should stop early.
        if found_difference and fs.halt_on_difference:
            break
    # make the report
    out = StringIO()
    if data.report:
        print >> out, 'found a difference in rejection power'
        print >> out
        print >> out, data.report
        print >> out
    else:
        print >> out, 'failed to find a difference in rejection power'
        print >> out
    print >> out, 'search summary:'
    m = data.acceptance_matrix
    print >> out, 'A reject, B reject:', m[0, 0]
    print >> out, 'A reject, B accept:', m[0, 1]
    print >> out, 'A accept, B reject:', m[1, 0]
    print >> out, 'A accept, B accept:', m[1, 1]
    print >> out, data.nerrors, 'tree symmetry errors'
    return out.getvalue()
예제 #24
0
def get_response_content(fs):
    flags_a = get_flags_a(fs)
    flags_b = get_flags_b(fs)
    data = CheckTreeData(flags_a, flags_b)
    nseconds = 5
    tm = time.time()
    while time.time() < tm + nseconds:
        # Sample a pair of Newick trees.
        true_f = TreeSampler.sample_tree(fs.nleaves, 0, 1.0)
        test_f = TreeSampler.sample_tree(fs.nleaves, 0, 1.0)
        true_s = NewickIO.get_newick_string(true_f)
        test_s = NewickIO.get_newick_string(test_f)
        true_tree = Newick.parse(true_s, Newick.NewickTree)
        test_tree = Newick.parse(test_s, Newick.NewickTree)
        # Add the pairwise check to the data borg.
        try:
            found_difference = check_tree_pair(true_tree, test_tree, data)
        except CheckTreeError as e:
            data.add_error(e)
        # Check to see if we should stop early.
        if found_difference and fs.halt_on_difference:
            break
    # make the report
    out = StringIO()
    if data.report:
        print >> out, 'found a difference in rejection power'
        print >> out
        print >> out, data.report
        print >> out
    else:
        print >> out, 'failed to find a difference in rejection power'
        print >> out
    print >> out, 'search summary:'
    m = data.acceptance_matrix
    print >> out, 'A reject, B reject:', m[0, 0]
    print >> out, 'A reject, B accept:', m[0, 1]
    print >> out, 'A accept, B reject:', m[1, 0]
    print >> out, 'A accept, B accept:', m[1, 1]
    print >> out, data.nerrors, 'tree symmetry errors'
    return out.getvalue()
예제 #25
0
 def _create_trees(self):
     """
     Create the full tree and the pruned tree.
     The full tree is a Newick.NewickTree,
     and the pruned tree is a FelTree.NewickTree object.
     """
     # create the full tree
     self.full_tree = NewickIO.parse(self.newick_string, Newick.NewickTree)
     # create the pruned tree through a temporary tree that will be modified
     temp_tree = NewickIO.parse(self.newick_string, Newick.NewickTree)
     remove_redundant_nodes(temp_tree)
     pruned_newick_string = NewickIO.get_newick_string(temp_tree)
     self.pruned_tree = NewickIO.parse(pruned_newick_string, FelTree.NewickTree)
예제 #26
0
def get_art(tree):
    """
    @param tree: a FelTree
    @return: a multi-line ascii art
    """
    newick_string = NewickIO.get_newick_string(tree)
    simple_tree = NewickIO.parse(newick_string, Newick.NewickTree)
    drawer = DrawTree.DrawTree() 
    drawer.use_branch_lengths = True 
    drawer.force_ultrametric = False 
    drawer.vertical_spacing = 1 
    drawer.horizontal_spacing = 1 
    return drawer.draw(simple_tree)
예제 #27
0
def get_art(tree):
    """
    @param tree: a FelTree
    @return: a multi-line ascii art
    """
    newick_string = NewickIO.get_newick_string(tree)
    simple_tree = NewickIO.parse(newick_string, Newick.NewickTree)
    drawer = DrawTree.DrawTree()
    drawer.use_branch_lengths = True
    drawer.force_ultrametric = False
    drawer.vertical_spacing = 1
    drawer.horizontal_spacing = 1
    return drawer.draw(simple_tree)
예제 #28
0
def get_response_content(fs):
    """
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a (response_headers, response_text) pair
    """
    out = StringIO()
    # get some samples
    for i in range(fs.ntrees):
        tree = TreeSampler.sample_tree(fs.leafbase, fs.leafmean, fs.branchmean)
        # write the tree
        print >> out, NewickIO.get_newick_string(tree)
    # return the response
    return out.getvalue()
예제 #29
0
def get_response_content(fs):
    """
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a (response_headers, response_text) pair
    """
    out = StringIO()
    # get some samples
    for i in range(fs.ntrees):
        tree = TreeSampler.sample_tree(fs.leafbase, fs.leafmean, fs.branchmean)
        # write the tree
        print >> out, NewickIO.get_newick_string(tree)
    # return the response
    return out.getvalue()
예제 #30
0
def get_response_content(fs):
    # read the matrix
    C = fs.contrast_matrix
    # read the ordered labels
    ordered_labels = Util.get_stripped_lines(fs.labels.splitlines())
    # validate the input
    if len(C) != len(ordered_labels):
        msg_a = 'the number of rows in the contrast matrix '
        msg_b = 'should match the number of labels'
        raise HandlingError(msg_a + msg_b)
    # reconstruct the tree
    reconstructed_tree = Contrasts.contrast_matrix_to_tree(C, ordered_labels)
    # return the reponse
    return NewickIO.get_newick_string(reconstructed_tree) + '\n'
예제 #31
0
 def _create_trees(self):
     """
     Create the full tree and the pruned tree.
     The full tree is a Newick.NewickTree,
     and the pruned tree is a FelTree.NewickTree object.
     """
     # create the full tree
     self.full_tree = NewickIO.parse(self.newick_string, Newick.NewickTree)
     # create the pruned tree through a temporary tree that will be modified
     temp_tree = NewickIO.parse(self.newick_string, Newick.NewickTree)
     remove_redundant_nodes(temp_tree)
     pruned_newick_string = NewickIO.get_newick_string(temp_tree)
     self.pruned_tree = NewickIO.parse(pruned_newick_string,
                                       FelTree.NewickTree)
예제 #32
0
파일: 20090427a.py 프로젝트: BIGtigr/xgcode
def get_response_content(fs):
    # read the matrix
    C = fs.contrast_matrix
    # read the ordered labels
    ordered_labels = Util.get_stripped_lines(fs.labels.splitlines())
    # validate the input
    if len(C) != len(ordered_labels):
        msg_a = 'the number of rows in the contrast matrix '
        msg_b = 'should match the number of labels'
        raise HandlingError(msg_a + msg_b)
    # reconstruct the tree
    reconstructed_tree = Contrasts.contrast_matrix_to_tree(C, ordered_labels)
    # return the reponse
    return NewickIO.get_newick_string(reconstructed_tree) + '\n'
예제 #33
0
 def __init__(self, tree, epsilon):
     """
     @param tree: a newick tree in the felsenstein-inspired format
     @param epsilon: determines whether loadings are considered negligible
     """
     # clear some flags that describe events that occur during reconstruction
     self.is_negligible = False
     self.is_incomplete = False
     self.is_conflicting = False
     # define the trees
     self.tree = tree
     self.reconstructed_tree = None
     # set the threshold for loading negligibility
     self.epsilon = epsilon
     # define some arbitrary ordering of tip names
     self.ordered_names = [node.get_name() for node in tree.gen_tips()]
     # get the distance matrix with respect to this ordering
     D = tree.get_distance_matrix(self.ordered_names)
     # get the Gower doubly centered matrix
     G = MatrixUtil.double_centered(np.array(D))
     # get the eigendecomposition of the Gower matrix
     eigenvalues, eigenvector_transposes = np.linalg.eigh(G)
     eigenvectors = eigenvector_transposes.T
     self.sorted_eigensystem = list(reversed(list(sorted((abs(w), v) for w, v in zip(eigenvalues, eigenvectors)))))
     # build the tree recursively using the sorted eigensystem
     indices = set(range(len(self.ordered_names)))
     try:
         # try to reconstruct the tree
         root = self._build_tree(indices, 0)
         root.set_branch_length(None)
         output_tree = Newick.NewickTree(root)
         # convert the tree to the FelTree format
         newick_string = NewickIO.get_newick_string(output_tree)
         self.reconstructed_tree = NewickIO.parse(
                 newick_string, FelTree.NewickTree)
     except NegligibleError:
         self.is_negligible = True
     except IncompleteError:
         self.is_incomplete = True
     else:
         # compare the splits defined by the reconstructed tree
         # to splits in the original tree
         expected_partitions = TreeComparison.get_nontrivial_partitions(
                 self.tree)
         observed_partitions = TreeComparison.get_nontrivial_partitions(
                 self.reconstructed_tree)
         invalid_partitions = observed_partitions - expected_partitions
         if invalid_partitions:
             self.is_conflicting = True
예제 #34
0
def get_response_content(fs):
    # read the matrix
    D = fs.matrix
    if len(D) < 3:
        raise HandlingError('the matrix should have at least three rows')
    # read the ordered labels
    ordered_labels = Util.get_stripped_lines(fs.labels.splitlines())
    if len(ordered_labels) != len(D):
        msg_a = 'the number of ordered labels should be the same '
        msg_b = 'as the number of rows in the matrix'
        raise HandlingError(msg_a + msg_b)
    # get the newick tree
    tree = NeighborJoining.make_tree(D.tolist(), ordered_labels)
    # return the response
    return NewickIO.get_newick_string(tree) + '\n'
예제 #35
0
def get_response_content(fs):
    # read the matrix
    D = fs.matrix
    if len(D) < 3:
        raise HandlingError('the matrix should have at least three rows')
    # read the ordered labels
    ordered_labels = Util.get_stripped_lines(fs.labels.splitlines())
    if len(ordered_labels) != len(D):
        msg_a = 'the number of ordered labels should be the same '
        msg_b = 'as the number of rows in the matrix'
        raise HandlingError(msg_a + msg_b)
    # get the newick tree
    tree = NeighborJoining.make_tree(D.tolist(), ordered_labels)
    # return the response
    return NewickIO.get_newick_string(tree) + '\n'
예제 #36
0
def main():
    """
    Run some tree reconstructions from the command line.
    """
    # initialize the simulation objects
    sims = [
        Simulation(Clustering.NeighborJoiningDMS(),
            'nj', 'neighbor joining'),
        Simulation(Clustering.RandomDMS(),
            'nj', 'random partitioning'),
        Simulation(Clustering.StoneExactDMS(),
            'nj', 'exact criterion with neighbor joining fallback'),
        #Simulation(Clustering.StoneExactDMS(),
        #'halving', 'exact criterion with stem halving fallback'),
        Simulation(Clustering.StoneSpectralSignDMS(),
            'nj', 'spectral sign cut with neighbor joining fallback')
        #Simulation(Clustering.StoneSpectralSignDMS(),
        #'halving', 'spectral sign cut with stem halving fallback')
        ]
    # define the simulation parameters
    tree = get_default_original_tree()
    reconstruction_count = 1000
    sequence_length = 100
    step_limit_per_method = 10000000
    # set the simulation parameters
    for sim in sims:
        sim.set_original_tree(get_default_original_tree())
        sim.set_reconstruction_count(reconstruction_count)
        sim.set_step_limit(step_limit_per_method)
        sim.set_sequence_length(sequence_length)
    # show the simulation parameters
    print 'simulation parameters:'
    print 'original tree:', NewickIO.get_newick_string(tree)
    print 'reconstruction count:', reconstruction_count
    print 'sequence length:', sequence_length
    # run the simulations
    print 'running the simulations...'
    for sim in sims:
        print 'running "%s"...' % sim.description
        try:
            sim.run()
        except HandlingError as e:
            print 'Error:', e
    # print the simulation data
    print 'simulation results:'
    for sim in sims:
        print sim.description + ':'
        print sim.get_histogram_string()
예제 #37
0
파일: 20080828a.py 프로젝트: BIGtigr/xgcode
def main():
    """
    Run some tree reconstructions from the command line.
    """
    # initialize the simulation objects
    sims = [
        Simulation(Clustering.NeighborJoiningDMS(), 'nj', 'neighbor joining'),
        Simulation(Clustering.RandomDMS(), 'nj', 'random partitioning'),
        Simulation(Clustering.StoneExactDMS(), 'nj',
                   'exact criterion with neighbor joining fallback'),
        #Simulation(Clustering.StoneExactDMS(),
        #'halving', 'exact criterion with stem halving fallback'),
        Simulation(Clustering.StoneSpectralSignDMS(), 'nj',
                   'spectral sign cut with neighbor joining fallback')
        #Simulation(Clustering.StoneSpectralSignDMS(),
        #'halving', 'spectral sign cut with stem halving fallback')
    ]
    # define the simulation parameters
    tree = get_default_original_tree()
    reconstruction_count = 1000
    sequence_length = 100
    step_limit_per_method = 10000000
    # set the simulation parameters
    for sim in sims:
        sim.set_original_tree(get_default_original_tree())
        sim.set_reconstruction_count(reconstruction_count)
        sim.set_step_limit(step_limit_per_method)
        sim.set_sequence_length(sequence_length)
    # show the simulation parameters
    print 'simulation parameters:'
    print 'original tree:', NewickIO.get_newick_string(tree)
    print 'reconstruction count:', reconstruction_count
    print 'sequence length:', sequence_length
    # run the simulations
    print 'running the simulations...'
    for sim in sims:
        print 'running "%s"...' % sim.description
        try:
            sim.run()
        except HandlingError as e:
            print 'Error:', e
    # print the simulation data
    print 'simulation results:'
    for sim in sims:
        print sim.description + ':'
        print sim.get_histogram_string()
예제 #38
0
파일: 20091014b.py 프로젝트: BIGtigr/xgcode
def process(nseconds=None):
    """
    @param nseconds: allow this many seconds to run or None to run forever
    @return: a multi-line string that summarizes the results
    """
    # load the tree
    tree = NewickIO.parse(g_tree_string, FelTree.NewickTree)
    # get the alphabetically ordered tip names
    ordered_tip_names = list(
        sorted(node.get_name() for node in tree.gen_tips()))
    # initialize the search
    start_time = time.time()
    nsamples_rejected = 0
    nsamples_accepted = 0
    counterexample_message = 'no counterexample was found'
    try:
        while True:
            elapsed_time = time.time() - start_time
            if nseconds and elapsed_time > nseconds:
                break
            # sample some random branch lengths
            sample_branch_lengths(tree)
            # get the distance matrix
            D = np.array(tree.get_distance_matrix(ordered_tip_names))
            # get the projections onto the MDS axes of the leaves
            X = Euclid.edm_to_points(D)
            # if any coordinate is near zero then reject the sample
            if np.min(np.abs(X)) < g_epsilon:
                nsamples_rejected += 1
                continue
            # see if the sign pattern matches for each coordinate
            for v_observed, v_target in zip(X.T, g_target_sign_patterns):
                hadamard_product = v_observed * v_target
                all_positive = all(x > 0 for x in hadamard_product)
                all_negative = all(x < 0 for x in hadamard_product)
                if not (all_positive or all_negative):
                    # the target sign pattern was not met
                    break
            else:
                # the sign pattern matched for each coordinate so we have a counterexample
                msg = NewickIO.get_newick_string(tree)
                raise CounterexampleError(msg)
            # increment the count of accepted samples
            nsamples_accepted += 1
    except KeyboardInterrupt, e:
        pass
예제 #39
0
def process(nseconds=None):
    """
    @param nseconds: allow this many seconds to run or None to run forever
    @return: a multi-line string that summarizes the results
    """
    # load the tree
    tree = NewickIO.parse(g_tree_string, FelTree.NewickTree)
    # get the alphabetically ordered tip names
    ordered_tip_names = list(sorted(node.get_name() for node in tree.gen_tips()))
    # initialize the search
    start_time = time.time()
    nsamples_rejected = 0
    nsamples_accepted = 0
    counterexample_message = 'no counterexample was found'
    try:
        while True:
            elapsed_time = time.time() - start_time
            if nseconds and elapsed_time > nseconds:
                break
            # sample some random branch lengths
            sample_branch_lengths(tree)
            # get the distance matrix
            D = np.array(tree.get_distance_matrix(ordered_tip_names))
            # get the projections onto the MDS axes of the leaves
            X = Euclid.edm_to_points(D)
            # if any coordinate is near zero then reject the sample
            if np.min(np.abs(X)) < g_epsilon:
                nsamples_rejected += 1
                continue
            # see if the sign pattern matches for each coordinate
            for v_observed, v_target in zip(X.T, g_target_sign_patterns):
                hadamard_product = v_observed * v_target
                all_positive = all(x>0 for x in hadamard_product)
                all_negative = all(x<0 for x in hadamard_product)
                if not (all_positive or all_negative):
                    # the target sign pattern was not met
                    break
            else:
                # the sign pattern matched for each coordinate so we have a counterexample
                msg = NewickIO.get_newick_string(tree)
                raise CounterexampleError(msg)
            # increment the count of accepted samples
            nsamples_accepted += 1
    except KeyboardInterrupt, e:
        pass
예제 #40
0
파일: 20091004a.py 프로젝트: BIGtigr/xgcode
def process(ntaxa):
    """
    @param ntaxa: use this many taxa per tree
    @return: a multi-line string that summarizes the results
    """
    np.set_printoptions(linewidth=200)
    # sample an xtree topology
    xtree = TreeSampler.sample_agglomerated_tree(ntaxa)
    # convert the xtree to a FelTree, although I guess this might not be necessary
    tree_string = xtree.get_newick_string()
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    # get ordered ids and the number of leaves and some auxiliary variables
    ordered_ids = get_ordered_ids(tree)
    nleaves = len(list(tree.gen_tips()))
    id_to_index = dict((myid, i) for i, myid in enumerate(ordered_ids))
    # sample random branch lengths
    sample_branch_lengths(tree)
    # get the weighted tree string
    weighted_tree_string = NewickIO.get_newick_string(tree)
    # get the distance matrix relating all vertices
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    # create a mass vector that sums to one
    m = np.array([random.randrange(1, 10) for i in range(len(D))], dtype=float)
    m /= sum(m)
    # get the S matrix
    S = edm_to_S(D, m)
    # get the pseudoinverse of S
    S_pinv = np.linalg.pinv(S)
    # make the response
    out = StringIO()
    print >> out, 'newick tree:', weighted_tree_string
    print >> out
    print >> out, 'm:'
    print >> out, m
    print >> out
    print >> out, 'D:'
    print >> out, D
    print >> out
    print >> out, 'S:'
    print >> out, S
    print >> out
    print >> out, 'pseudoinverse of S:'
    print >> out, S_pinv
    print >> out
    return out.getvalue().strip()
예제 #41
0
def get_pruned_tree(tree, names_to_remove):
    """
    @param tree: a Newick tree (not a FelTree)
    @param names_to_remove: a set of names of leaves to remove from the tree
    @return: a FelTree
    """
    # get the list of tip nodes to remove
    nodes_to_remove = [node for node in tree.gen_tips() if node.name in names_to_remove]
    # prune the tree
    for node in nodes_to_remove:
        tree.prune(node)
    # merge segmented branches
    internal_nodes_to_remove = [node for node in tree.preorder() if node.get_child_count() == 1]
    for node in internal_nodes_to_remove:
        tree.remove_node(node)
    # convert the tree to the FelTree format
    newick_string = NewickIO.get_newick_string(tree)
    return NewickIO.parse(newick_string, FelTree.NewickTree)
예제 #42
0
def get_pruned_tree(tree, names_to_remove):
    """
    @param tree: a Newick tree (not a FelTree)
    @param names_to_remove: a set of names of leaves to remove from the tree
    @return: a FelTree
    """
    # get the list of tip nodes to remove
    nodes_to_remove = [node for node in tree.gen_tips() if node.name in names_to_remove]
    # prune the tree
    for node in nodes_to_remove:
        tree.prune(node)
    # merge segmented branches
    internal_nodes_to_remove = [node for node in tree.preorder() if node.get_child_count() == 1]
    for node in internal_nodes_to_remove:
        tree.remove_node(node)
    # convert the tree to the FelTree format
    newick_string = NewickIO.get_newick_string(tree)
    return NewickIO.parse(newick_string, FelTree.NewickTree)
예제 #43
0
def get_root_augmented_distance_matrix(tree_in, first_taxa, second_taxa):
    """
    @param tree_in: a newick tree
    @param first_taxa: a set of tip names
    @param second_taxa: another set of tip names
    @return: a distance matrix
    """
    # first convert the tree to the appropriate data structure
    tree = NewickIO.parse(NewickIO.get_newick_string(tree_in),
            FelTree.NewickTree)
    # now get the ordered ids
    ordered_ids = []
    for taxa in (first_taxa, second_taxa):
        for node in tree.gen_tips():
            if node.get_name() in taxa:
                ordered_ids.append(id(node))
    ordered_ids.append(id(tree.get_root()))
    # now get the distance matrix
    return tree.get_partial_distance_matrix(ordered_ids)
예제 #44
0
 def get_distance_matrix(self, ordered_names=None):
     """
     @param ordered_names: the requested order of the names
     @return: a row major distance matrix
     """
     # map the id of each tip to its index
     if ordered_names:
         tip_name_to_index = dict(
             (name, i) for i, name in enumerate(ordered_names))
         tip_id_to_index = dict((id(tip), tip_name_to_index[tip.name])
                                for tip in self.gen_tips())
     else:
         tip_id_to_index = dict(
             (id(tip), i) for i, tip in enumerate(self.gen_tips()))
     # get the number of tips
     n = len(list(self.gen_tips()))
     # for each tip get the distance to each other tip
     distance_matrix = [[0] * n for i in range(n)]
     for tip in self.gen_tips():
         row = distance_matrix[tip_id_to_index[id(tip)]]
         stack = []
         for directed_branch in tip.gen_directed_branches():
             next_target = directed_branch.get_target()
             assert next_target
             stack.append(
                 (tip, next_target, directed_branch.get_undirected_branch().
                  get_branch_length()))
         while stack:
             source, target, distance = stack.pop()
             if target.is_tip():
                 row[tip_id_to_index[id(target)]] = distance
             else:
                 for next_branch in target.gen_exits(source):
                     branch_length = next_branch.get_undirected_branch(
                     ).get_branch_length()
                     next_target = next_branch.get_target()
                     assert next_target, NewickIO.get_newick_string(self)
                     stack.append(
                         (target, next_target, distance + branch_length))
     return distance_matrix
예제 #45
0
파일: 20081114a.py 프로젝트: BIGtigr/xgcode
def get_response_content(fs):
    # get the newick trees.
    trees = []
    for tree_string in iterutils.stripped_lines(fs.trees.splitlines()):
        # parse each tree and make sure that it conforms to various requirements
        tree = NewickIO.parse(tree_string, FelTree.NewickTree)
        tip_names = [tip.get_name() for tip in tree.gen_tips()]
        if len(tip_names) < 4:
            raise HandlingError('expected at least four tips '
                                'but found ' + str(len(tip_names)))
        if any(name is None for name in tip_names):
            raise HandlingError('each terminal node must be labeled')
        if len(set(tip_names)) != len(tip_names):
            raise HandlingError('each terminal node label must be unique')
        trees.append(tree)
    # create the response
    out = StringIO()
    same_count = 0
    diff_count = 0
    for tree in trees:
        # make the local paragraph that will be shown if there is an event
        local_out = StringIO()
        has_event = False
        # print the tree
        print >> local_out, NewickIO.get_newick_string(tree)
        # get the tip nodes and the internal nodes
        tip_nodes = []
        internal_nodes = []
        for node in tree.preorder():
            if node.is_tip():
                tip_nodes.append(node)
            else:
                internal_nodes.append(node)
        all_nodes = tip_nodes + internal_nodes
        # get all tip name partitions implied by the tree topology
        valid_partitions = TreeComparison.get_partitions(tree)
        # get results from the augmented distance matrix
        D_full = tree.get_partial_distance_matrix(
            [id(node) for node in all_nodes])
        y_full = get_vector(D_full).tolist()
        y = y_full[:len(tip_nodes)]
        name_selection = frozenset(node.get_name()
                                   for node, elem in zip(tip_nodes, y)
                                   if elem > 0)
        name_complement = frozenset(node.get_name()
                                    for node, elem in zip(tip_nodes, y)
                                    if elem <= 0)
        name_partition_a = frozenset((name_selection, name_complement))
        if name_partition_a not in valid_partitions:
            print >> local_out, 'augmented distance matrix split fail:',
            print >> local_out, name_partition_a
            has_event = True
        # get results from the not-augmented distance matrix
        D = tree.get_partial_distance_matrix([id(node) for node in tip_nodes])
        y = get_vector(D).tolist()
        name_selection = frozenset(node.get_name()
                                   for node, elem in zip(tip_nodes, y)
                                   if elem > 0)
        name_complement = frozenset(node.get_name()
                                    for node, elem in zip(tip_nodes, y)
                                    if elem <= 0)
        name_partition_b = frozenset((name_selection, name_complement))
        if name_partition_b not in valid_partitions:
            print >> local_out, 'not-augmented distance matrix split fail:',
            print >> local_out, name_partition_b
            has_event = True
        # compare the name partitions
        if name_partition_a == name_partition_b:
            same_count += 1
        else:
            diff_count += 1
            print >> local_out, 'this tree was split differently '
            print >> local_out, 'by the different methods:'
            print >> local_out, 'augmented distance matrix split:',
            print >> local_out, name_partition_a
            print >> local_out, 'not-augmented distance matrix split:',
            print >> local_out, name_partition_b
            has_event = True
        # print a newline between trees
        if has_event:
            print >> out, local_out.getvalue()
    # write the summary
    print >> out, 'for this many trees the same split was found:',
    print >> out, same_count
    print >> out, 'for this many trees different splits were found:',
    print >> out, diff_count
    # write the response
    return out.getvalue()
예제 #46
0
def get_response_content(fs):
    # get the newick trees.
    trees = []
    for tree_string in iterutils.stripped_lines(fs.trees.splitlines()):
        # parse each tree and make sure that it conforms to various requirements
        tree = NewickIO.parse(tree_string, FelTree.NewickTree)
        tip_names = [tip.get_name() for tip in tree.gen_tips()]
        if len(tip_names) < 4:
            raise HandlingError(
                    'expected at least four tips '
                    'but found ' + str(len(tip_names)))
        if any(name is None for name in tip_names):
            raise HandlingError('each terminal node must be labeled')
        if len(set(tip_names)) != len(tip_names):
            raise HandlingError('each terminal node label must be unique')
        trees.append(tree)
    # create the response
    out = StringIO()
    same_count = 0
    diff_count = 0
    for tree in trees:
        # make the local paragraph that will be shown if there is an event
        local_out = StringIO()
        has_event = False
        # print the tree
        print >> local_out, NewickIO.get_newick_string(tree)
        # get the tip nodes and the internal nodes
        tip_nodes = []
        internal_nodes = []
        for node in tree.preorder():
            if node.is_tip():
                tip_nodes.append(node)
            else:
                internal_nodes.append(node)
        all_nodes = tip_nodes + internal_nodes
        # get all tip name partitions implied by the tree topology
        valid_partitions = TreeComparison.get_partitions(tree)
        # get results from the augmented distance matrix
        D_full = tree.get_partial_distance_matrix(
                [id(node) for node in all_nodes])
        y_full = get_vector(D_full).tolist()
        y = y_full[:len(tip_nodes)]
        name_selection = frozenset(node.get_name()
                for node, elem in zip(tip_nodes, y) if elem > 0)
        name_complement = frozenset(node.get_name()
                for node, elem in zip(tip_nodes, y) if elem <= 0)
        name_partition_a = frozenset((name_selection, name_complement))
        if name_partition_a not in valid_partitions:
            print >> local_out, 'augmented distance matrix split fail:',
            print >> local_out, name_partition_a
            has_event = True
        # get results from the not-augmented distance matrix
        D = tree.get_partial_distance_matrix([id(node) for node in tip_nodes])
        y = get_vector(D).tolist()
        name_selection = frozenset(node.get_name()
                for node, elem in zip(tip_nodes, y) if elem > 0)
        name_complement = frozenset(node.get_name()
                for node, elem in zip(tip_nodes, y) if elem <= 0)
        name_partition_b = frozenset((name_selection, name_complement))
        if name_partition_b not in valid_partitions:
            print >> local_out, 'not-augmented distance matrix split fail:',
            print >> local_out, name_partition_b
            has_event = True
        # compare the name partitions
        if name_partition_a == name_partition_b:
            same_count += 1
        else:
            diff_count += 1
            print >> local_out, 'this tree was split differently '
            print >> local_out, 'by the different methods:'
            print >> local_out, 'augmented distance matrix split:',
            print >> local_out, name_partition_a
            print >> local_out, 'not-augmented distance matrix split:',
            print >> local_out, name_partition_b
            has_event = True
        # print a newline between trees
        if has_event:
            print >> out, local_out.getvalue()
    # write the summary
    print >> out, 'for this many trees the same split was found:',
    print >> out, same_count
    print >> out, 'for this many trees different splits were found:',
    print >> out, diff_count
    # write the response
    return out.getvalue()
예제 #47
0
def process(ntaxa, nseconds):
    """
    @param nseconds: allow this many seconds to run or None to run forever
    @return: a multi-line string that summarizes the results
    """
    start_time = time.time()
    nsamples_rejected = 0
    nsamples_accepted = 0
    pattern_to_topo_surrogate = {}
    pattern_to_tree_string = {}
    counterexample_message = 'no counterexample was found'
    try:
        while True:
            elapsed_time = time.time() - start_time
            if nseconds and elapsed_time > nseconds:
                break
            # sample an xtree topology
            xtree = TreeSampler.sample_agglomerated_tree(ntaxa)
            # convert the xtree to a FelTree, although I guess this might not be necessary
            tree_string = xtree.get_newick_string()
            tree = NewickIO.parse(tree_string, FelTree.NewickTree)
            # get ordered ids and the number of leaves and some auxiliary variables
            ordered_ids = get_ordered_ids(tree)
            nleaves = len(list(tree.gen_tips()))
            id_to_index = dict((myid, i) for i, myid in enumerate(ordered_ids))
            # force every branch length to be the unit length
            reset_branch_lengths(tree)
            # get the unweighted distance matrix among tips in convenient hashable form
            D_unit = np.array(tree.get_partial_distance_matrix(ordered_ids))
            topo_surrogate = tuple(tuple(row.tolist()) for row in D_unit)
            # sample random branch lengths
            sample_branch_lengths(tree)
            # get the weighted tree string
            weighted_tree_string = NewickIO.get_newick_string(tree)
            # get the distance matrix relating the leaves
            D = np.array(tree.get_partial_distance_matrix(ordered_ids))
            # get the projections onto the MDS axes of the leaves
            X = Euclid.edm_to_points(D)
            # if any coordinate is near zero then reject the sample
            if np.min(np.abs(X)) < g_epsilon:
                nsamples_rejected += 1
                continue
            # do an orthogonal transformation that puts the first point in the positive orthant
            canonizing_vector = np.array(point_to_orthant(X[0]))
            X *= canonizing_vector
            # get the canonical sign pattern
            sign_pattern = tuple(point_to_orthant(row) for row in X)
            # compare the topo surrogate of this sign pattern to the one in memory
            expected_topo_surrogate = pattern_to_topo_surrogate.get(
                sign_pattern, None)
            if expected_topo_surrogate:
                if topo_surrogate != expected_topo_surrogate:
                    remembered_tree_string = pattern_to_tree_string[
                        sign_pattern]
                    msg = 'these trees have the same sign pattern but different topologies: {%s, %s}' % (
                        weighted_tree_string, remembered_tree_string)
                    raise CounterexampleError(msg)
            else:
                pattern_to_topo_surrogate[sign_pattern] = topo_surrogate
                pattern_to_tree_string[sign_pattern] = weighted_tree_string
            # increment the count of accepted samples
            nsamples_accepted += 1
    except KeyboardInterrupt, e:
        pass
예제 #48
0
def process(ntaxa, nseconds):
    """
    @param nseconds: allow this many seconds to run or None to run forever
    @return: a multi-line string that summarizes the results
    """
    start_time = time.time()
    nsamples_rejected = 0
    nsamples_accepted = 0
    pattern_to_topo_surrogate = {}
    pattern_to_tree_string = {}
    counterexample_message = 'no counterexample was found'
    try:
        while True:
            elapsed_time = time.time() - start_time
            if nseconds and elapsed_time > nseconds:
                break
            # sample an xtree topology
            xtree = TreeSampler.sample_agglomerated_tree(ntaxa)
            # convert the xtree to a FelTree, although I guess this might not be necessary
            tree_string = xtree.get_newick_string()
            tree = NewickIO.parse(tree_string, FelTree.NewickTree)
            # get ordered ids and the number of leaves and some auxiliary variables
            ordered_ids = get_ordered_ids(tree)
            nleaves = len(list(tree.gen_tips()))
            id_to_index = dict((myid, i) for i, myid in enumerate(ordered_ids))
            # force every branch length to be the unit length
            reset_branch_lengths(tree)
            # get the unweighted distance matrix among tips in convenient hashable form
            D_unit = np.array(tree.get_partial_distance_matrix(ordered_ids))
            topo_surrogate = tuple(tuple(row.tolist()) for row in D_unit)
            # sample random branch lengths
            sample_branch_lengths(tree)
            # get the weighted tree string
            weighted_tree_string = NewickIO.get_newick_string(tree)
            # get the distance matrix relating the leaves
            D = np.array(tree.get_partial_distance_matrix(ordered_ids))
            # get the projections onto the MDS axes of the leaves
            X = Euclid.edm_to_points(D)
            # if any coordinate is near zero then reject the sample
            if np.min(np.abs(X)) < g_epsilon:
                nsamples_rejected += 1
                continue
            # do an orthogonal transformation that puts the first point in the positive orthant
            canonizing_vector = np.array(point_to_orthant(X[0]))
            X *= canonizing_vector
            # get the canonical sign pattern
            sign_pattern = tuple(point_to_orthant(row) for row in X)
            # compare the topo surrogate of this sign pattern to the one in memory
            expected_topo_surrogate = pattern_to_topo_surrogate.get(sign_pattern, None)
            if expected_topo_surrogate:
                if topo_surrogate != expected_topo_surrogate:
                    remembered_tree_string = pattern_to_tree_string[sign_pattern]
                    msg = 'these trees have the same sign pattern but different topologies: {%s, %s}' % (weighted_tree_string, remembered_tree_string)
                    raise CounterexampleError(msg)
            else:
                pattern_to_topo_surrogate[sign_pattern] = topo_surrogate
                pattern_to_tree_string[sign_pattern] = weighted_tree_string
            # increment the count of accepted samples
            nsamples_accepted += 1
    except KeyboardInterrupt, e:
        pass
예제 #49
0
def get_response_content(fs):
    # read the criterion string, creating the splitter object
    if fs.exact:
        splitter = Clustering.StoneExactDMS()
    elif fs.sign:
        splitter = Clustering.StoneSpectralSignDMS()
    elif fs.threshold:
        splitter = Clustering.StoneSpectralThresholdDMS()
    elif fs.nj:
        splitter = Clustering.NeighborJoiningDMS()
    elif fs.random:
        splitter = Clustering.RandomDMS()
    # read the original tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # Make sure that the splitter object is appropriate for the number
    # of taxa and the number of tree reconstructions.
    ntaxa = len(list(tree.gen_tips()))
    if splitter.get_complexity(ntaxa) * fs.iterations > 1000000:
        msg_a = 'use a faster bipartition function, fewer taxa, '
        msg_b = 'or fewer tree reconstructions'
        raise HandlingError(msg_a + msg_b)
    # sample a bunch of sequences
    ordered_names = [node.name for node in tree.gen_tips()]
    sampler = DMSampler(tree, ordered_names, fs.length)
    # simulate a bunch of distance matrices and reconstruct the trees
    mismatch_count_tree_pairs = []
    error_count_histogram = {}
    max_steps = 1000000
    for sequence_list, distance_matrix in sampler.gen_distance_matrices(
            fs.iterations, max_steps):
        # create the tree builder
        tree_builder = NeighborhoodJoining.ValidatingTreeBuilder(
                distance_matrix, ordered_names, splitter)
        # Read the recourse string and set the corresponding method
        # in the tree builder.
        if fs.njrecourse:
            tree_builder.set_fallback_name('nj')
        elif fs.halvingrecourse:
            tree_builder.set_fallback_name('halving')
        # set parameters of the tree validating tree builder
        tree_builder.set_original_tree(tree)
        # build the tree
        reconstructed_tree = tree_builder.build()
        # note the number of partition errors during the reconstruction
        mismatch_count = tree_builder.get_mismatch_count()
        if mismatch_count not in error_count_histogram:
            error_count_histogram[mismatch_count] = 0
        error_count_histogram[mismatch_count] += 1
        # If we are saving the reconstructed trees
        # then remove branch lengths and add to the tree list.
        if fs.showtrees:
            for node in reconstructed_tree.preorder():
                node.set_branch_length(None)
            mismatch_count_tree_pair = (mismatch_count, reconstructed_tree)
            mismatch_count_tree_pairs.append(mismatch_count_tree_pair)
    # See if we bailed early because
    # the sampling was predicted to take too long.
    if sampler.accepted_sample_count < fs.iterations:
        raise HandlingError(sampler.get_sampling_error_message())
    # define the response
    out = StringIO()
    print >> out, 'partition error count frequencies:'
    max_mismatch_count = max(error_count_histogram)
    for i in range(max_mismatch_count + 1):
        frequency = error_count_histogram.get(i, 0)
        print >> out, i, ':', frequency
    if fs.showtrees:
        print >> out, ''
        print >> out, 'reconstructed tree topologies with mismatch counts:'
        for mismatch_count, tree in sorted(mismatch_count_tree_pairs):
            print >> out, NewickIO.get_newick_string(tree), mismatch_count
    # return the response
    return out.getvalue()
예제 #50
0
파일: 20080828a.py 프로젝트: BIGtigr/xgcode
def get_response_content(fs):
    # read the criterion string, creating the splitter object
    if fs.exact:
        splitter = Clustering.StoneExactDMS()
    elif fs.sign:
        splitter = Clustering.StoneSpectralSignDMS()
    elif fs.threshold:
        splitter = Clustering.StoneSpectralThresholdDMS()
    elif fs.nj:
        splitter = Clustering.NeighborJoiningDMS()
    elif fs.random:
        splitter = Clustering.RandomDMS()
    # read the original tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # Make sure that the splitter object is appropriate for the number
    # of taxa and the number of tree reconstructions.
    ntaxa = len(list(tree.gen_tips()))
    if splitter.get_complexity(ntaxa) * fs.iterations > 1000000:
        msg_a = 'use a faster bipartition function, fewer taxa, '
        msg_b = 'or fewer tree reconstructions'
        raise HandlingError(msg_a + msg_b)
    # sample a bunch of sequences
    ordered_names = [node.name for node in tree.gen_tips()]
    sampler = DMSampler(tree, ordered_names, fs.length)
    # simulate a bunch of distance matrices and reconstruct the trees
    mismatch_count_tree_pairs = []
    error_count_histogram = {}
    max_steps = 1000000
    for sequence_list, distance_matrix in sampler.gen_distance_matrices(
            fs.iterations, max_steps):
        # create the tree builder
        tree_builder = NeighborhoodJoining.ValidatingTreeBuilder(
            distance_matrix, ordered_names, splitter)
        # Read the recourse string and set the corresponding method
        # in the tree builder.
        if fs.njrecourse:
            tree_builder.set_fallback_name('nj')
        elif fs.halvingrecourse:
            tree_builder.set_fallback_name('halving')
        # set parameters of the tree validating tree builder
        tree_builder.set_original_tree(tree)
        # build the tree
        reconstructed_tree = tree_builder.build()
        # note the number of partition errors during the reconstruction
        mismatch_count = tree_builder.get_mismatch_count()
        if mismatch_count not in error_count_histogram:
            error_count_histogram[mismatch_count] = 0
        error_count_histogram[mismatch_count] += 1
        # If we are saving the reconstructed trees
        # then remove branch lengths and add to the tree list.
        if fs.showtrees:
            for node in reconstructed_tree.preorder():
                node.set_branch_length(None)
            mismatch_count_tree_pair = (mismatch_count, reconstructed_tree)
            mismatch_count_tree_pairs.append(mismatch_count_tree_pair)
    # See if we bailed early because
    # the sampling was predicted to take too long.
    if sampler.accepted_sample_count < fs.iterations:
        raise HandlingError(sampler.get_sampling_error_message())
    # define the response
    out = StringIO()
    print >> out, 'partition error count frequencies:'
    max_mismatch_count = max(error_count_histogram)
    for i in range(max_mismatch_count + 1):
        frequency = error_count_histogram.get(i, 0)
        print >> out, i, ':', frequency
    if fs.showtrees:
        print >> out, ''
        print >> out, 'reconstructed tree topologies with mismatch counts:'
        for mismatch_count, tree in sorted(mismatch_count_tree_pairs):
            print >> out, NewickIO.get_newick_string(tree), mismatch_count
    # return the response
    return out.getvalue()
예제 #51
0
def get_response_content(fs):
    # read the values from the form
    subtree_a = NewickIO.parse(fs.subtree_a, Newick.NewickTree)
    taxa_a1 = Util.get_stripped_lines(StringIO(fs.taxa_a1))
    taxa_a2 = Util.get_stripped_lines(StringIO(fs.taxa_a2))
    subtree_b = NewickIO.parse(fs.subtree_b, Newick.NewickTree)
    taxa_b1 = Util.get_stripped_lines(StringIO(fs.taxa_b1))
    taxa_b2 = Util.get_stripped_lines(StringIO(fs.taxa_b2))
    connecting_branch_length = fs.blen
    # assert that no group of taxa contains duplicates
    for taxa in (taxa_a1, taxa_a2, taxa_b1, taxa_b2):
        if len(set(taxa)) != len(taxa):
            raise HandlingError('one of the lists of taxa contains duplicates')
    # assert that each subtree has at least two tips and no duplicates
    for tree in (subtree_a, subtree_b):
        tip_names = list(node.get_name() for node in tree.gen_tips())
        if len(tip_names) < 2:
            raise HandlingError('each subtree should have at least two tips')
        if len(set(tip_names)) != len(tip_names):
            raise HandlingError('a subtree has duplicate tip names')
    # assert that the partitions are valid
    first_group = ('A', subtree_a, taxa_a1, taxa_a2) 
    second_group = ('B', subtree_b, taxa_b1, taxa_b2)
    for tree_name, tree, taxa_1, taxa_2 in (first_group, second_group):
        tip_names = set(node.get_name() for node in tree.gen_tips())
        for group_name, taxa in (('1', taxa_1), ('2', taxa_2)):
            nonsense_names = list(set(taxa) - set(tip_names))
            msg_a = 'the following taxa in group %s ' % group_name
            msg_b = 'of subtree %s ' % tree_name
            msg_c = 'are not valid tips: %s' % str(nonsense_names)
            message = msg_a + msg_b + msg_c
            if nonsense_names:
                raise HandlingError(message)
        if set(taxa_1) & set(taxa_2):
            msg_a = 'the taxon lists for subtree %s ' % tree_name
            msg_b = 'are not disjoint'
            raise HandlingError(msg_a + msg_b)
        if set(taxa_1) | set(taxa_2) < tip_names:
            msg_a = 'a tip in subtree %s ' % tree_name
            msg_b = 'is not represented in either of the groups'
            raise HandlingError(msg_a + msg_b)
    # define the response
    out = StringIO()
    # get the results for the first method
    do_first_method(subtree_a, subtree_b, taxa_a1, taxa_a2,
            taxa_b1, taxa_b2, connecting_branch_length, out)
    # define the entire tree by connecting the subtrees
    subtree_b.get_root().set_branch_length(connecting_branch_length)
    subtree_a.get_root().add_child(subtree_b.get_root())
    tree = subtree_a
    # define the order and structure of the distance matrix
    block_structure = get_block_structure(taxa_a1, taxa_a2, taxa_b1, taxa_b2)
    name_order = taxa_a1 + taxa_a2 + taxa_b1 + taxa_b2
    # get the distance matrix
    fel_tree = NewickIO.parse(NewickIO.get_newick_string(tree),
            FelTree.NewickTree)
    D = fel_tree.get_distance_matrix(name_order)
    # get the R matrix
    R = Clustering.get_R_balaji(D)
    # get the sums of block elements of R
    block_R = [[0]*4 for i in range(4)]
    for i, block_i in enumerate(block_structure):
        for j, block_j in enumerate(block_structure):
            block_R[block_i][block_j] += R[i][j]
    # show the results from the second method
    do_second_method(fel_tree, taxa_a1, taxa_a2, taxa_b1, taxa_b2, out)
    # show the results from the third method
    tree_m3_a = NewickIO.parse(fs.subtree_a, Newick.NewickTree)
    tree_m3_b = NewickIO.parse(fs.subtree_b, Newick.NewickTree)
    for t in (tree_m3_a, tree_m3_b):
        neo = Newick.NewickNode()
        neo.name = 'special'
        neo.blen = connecting_branch_length / 2
        t.get_root().add_child(neo)
    feltree_m3_a = NewickIO.parse(NewickIO.get_newick_string(tree_m3_a),
            FelTree.NewickTree)
    feltree_m3_b = NewickIO.parse(NewickIO.get_newick_string(tree_m3_b),
            FelTree.NewickTree)
    tree_m3_a = NewickIO.parse(fs.subtree_a, Newick.NewickTree)
    tree_m3_b = NewickIO.parse(fs.subtree_b, Newick.NewickTree)
    new_root = Newick.NewickNode()
    tree_m3_a.get_root().blen = connecting_branch_length / 2
    tree_m3_b.get_root().blen = connecting_branch_length / 2
    new_root.add_child(tree_m3_a.get_root())
    new_root.add_child(tree_m3_b.get_root())
    tree_m3 = Newick.NewickTree(new_root)
    feltree_m3 = NewickIO.parse(NewickIO.get_newick_string(tree_m3),
            FelTree.NewickTree)
    branch_d2 = connecting_branch_length / 2
    do_third_method(feltree_m3_a, feltree_m3_b, feltree_m3,
            branch_d2, taxa_a1, taxa_a2, taxa_b1, taxa_b2, out)
    # show the expected results
    print >> out, 'M:'
    print >> out, MatrixUtil.m_to_string(R)
    print >> out, 'M summed within blocks:'
    print >> out, MatrixUtil.m_to_string(block_R)
    # return the response
    return out.getvalue()