Exemple #1
0
def process(ntaxa, nseconds, branch_length_sampler):
    """
    @param ntaxa: the number of taxa in the sampled trees
    @param nseconds: allow this many seconds to run or None to run forever
    @param branch_length_sampler: a functor that returns a branch length and has a string cast
    @return: a multi-line string that summarizes the results
    """
    start_time = time.time()
    # initialize some state that will be tracked over the entire run
    degenerate_count = 0
    invalid_split_count = 0
    valid_split_count = 0
    spectral_error_count = 0
    atteson_error_count = 0
    counterexample_D = None
    counterexample_tree = None
    # do a bunch of reconstructions from sampled distance matrices
    try:
        while True:
            elapsed_time = time.time() - start_time
            if nseconds and elapsed_time > nseconds:
                break
            # sample the tree topology and get its set of implied full label splits
            tree = TreeSampler.sample_agglomerated_tree(ntaxa)
            true_splits = tree.get_nontrivial_splits()
            # sample the branch lengths
            for branch in tree.get_branches():
                branch.length = branch_length_sampler()
            # sample the atteson distance matrix
            D = sample_atteson_distance_matrix(tree)
            # assert that the atteson condition is true
            if not BuildTreeTopology.is_atteson(tree, D):
                atteson_error_count += 1
            else:
                try:
                    # see if the eigensplit is in the set of true splits
                    eigensplit = BuildTreeTopology.split_using_eigenvector(D)
                    if eigensplit in true_splits:
                        valid_split_count += 1
                    else:
                        invalid_split_count += 1
                        counterexample_D = D
                        counterexample_tree = tree
                        break
                except BuildTreeTopology.DegenerateSplitException, e:
                    degenerate_count += 1
                except BuildTreeTopology.InvalidSpectralSplitException, e:
                    spectral_error_count += 1
Exemple #2
0
def process(ntaxa, nseconds, branch_length_sampler):
    """
    @param ntaxa: the number of taxa in the sampled trees
    @param nseconds: allow this many seconds to run or None to run forever
    @param branch_length_sampler: a functor that returns a branch length and has a string cast
    @return: a multi-line string that summarizes the results
    """
    start_time = time.time()
    # initialize some state that will be tracked over the entire run
    degenerate_count = 0
    invalid_split_count = 0
    valid_split_count = 0
    spectral_error_count = 0
    atteson_error_count = 0
    counterexample_D = None
    counterexample_tree = None
    # do a bunch of reconstructions from sampled distance matrices
    try:
        while True:
            elapsed_time = time.time() - start_time
            if nseconds and elapsed_time > nseconds:
                break
            # sample the tree topology and get its set of implied full label splits
            tree = TreeSampler.sample_agglomerated_tree(ntaxa)
            true_splits = tree.get_nontrivial_splits()
            # sample the branch lengths
            for branch in tree.get_branches():
                branch.length = branch_length_sampler()
            # sample the atteson distance matrix
            D = sample_atteson_distance_matrix(tree)
            # assert that the atteson condition is true
            if not BuildTreeTopology.is_atteson(tree, D):
                atteson_error_count += 1
            else:
                try:
                    # see if the eigensplit is in the set of true splits
                    eigensplit = BuildTreeTopology.split_using_eigenvector(D)
                    if eigensplit in true_splits:
                        valid_split_count += 1
                    else:
                        invalid_split_count += 1
                        counterexample_D = D
                        counterexample_tree = tree
                        break
                except BuildTreeTopology.DegenerateSplitException, e:
                    degenerate_count += 1
                except BuildTreeTopology.InvalidSpectralSplitException, e:
                    spectral_error_count += 1
Exemple #3
0
 def do_search(self, nseconds, sampling_function):
     """
     @param nseconds: allowed search time or None
     @param sampling_function: a function that samples a branch length
     @return: True if a tree was found that met the criteria
     """
     if not self.is_initialized():
         raise RuntimeError("the search was not sufficiently initialized")
     true_splits = self.tree.get_nontrivial_splits()
     start_time = time.time()
     while True:
         elapsed_time = time.time() - start_time
         if nseconds and elapsed_time > nseconds:
             return False
         # assign new sampled branch lengths
         for branch in self.tree.get_branches():
             branch.length = sampling_function()
         # get the distance matrix so we can use a library function to get the split
         D = np.array(self.tree.get_distance_matrix())
         ntips = len(D)
         # get the Laplacian matrix of the full tree and the corresponding Fiedler split of the leaves
         if self.force_difference or self.informative_full_split:
             A_aug = np.array(self.tree.get_weighted_adjacency_matrix(self.id_to_index))
             L_aug = Euclid.adjacency_to_laplacian(A_aug)
             v_aug = BuildTreeTopology.laplacian_to_fiedler(L_aug)
             left_aug, right_aug = BuildTreeTopology.eigenvector_to_split(v_aug)
             left = [x for x in left_aug if x in range(ntips)]
             right = [x for x in right_aug if x in range(ntips)]
             leaf_eigensplit_aug = BuildTreeTopology.make_split(left, right)
             if self.force_difference:
                 if leaf_eigensplit_aug == self.desired_primary_split:
                     self.aug_split_collision_count += 1
                     continue
             if self.informative_full_split:
                 if min(len(s) for s in leaf_eigensplit_aug) < 2:
                     self.aug_split_degenerate_count += 1
                     continue
         # get the eigensplit
         try:
             eigensplit = BuildTreeTopology.split_using_eigenvector(D)
         except BuildTreeTopology.DegenerateSplitException, e:
             self.degenerate_primary_split_count += 1
             continue
         except BuildTreeTopology.InvalidSpectralSplitException, e:
             self.error_primary_split_count += 1
             continue
Exemple #4
0
 def do_search(self, nseconds, sampling_function):
     """
     @param nseconds: allowed search time or None
     @param sampling_function: a function that samples a branch length
     @return: True if a tree was found that met the criteria
     """
     if not self.is_initialized():
         raise RuntimeError('the search was not sufficiently initialized')
     true_splits = self.tree.get_nontrivial_splits()
     start_time = time.time()
     while True:
         elapsed_time = time.time() - start_time
         if nseconds and elapsed_time > nseconds:
             return False
         # assign new sampled branch lengths
         for branch in self.tree.get_branches():
             branch.length = sampling_function()
         # get the distance matrix so we can use a library function to get the split
         D = np.array(self.tree.get_distance_matrix())
         ntips = len(D)
         # get the Laplacian matrix of the full tree and the corresponding Fiedler split of the leaves
         if self.force_difference or self.informative_full_split:
             A_aug = np.array(self.tree.get_weighted_adjacency_matrix(self.id_to_index))
             L_aug = Euclid.adjacency_to_laplacian(A_aug)
             v_aug = BuildTreeTopology.laplacian_to_fiedler(L_aug)
             left_aug, right_aug = BuildTreeTopology.eigenvector_to_split(v_aug)
             left = [x for x in left_aug if x in range(ntips)]
             right = [x for x in right_aug if x in range(ntips)]
             leaf_eigensplit_aug = BuildTreeTopology.make_split(left, right)
             if self.force_difference:
                 if leaf_eigensplit_aug == self.desired_primary_split:
                     self.aug_split_collision_count += 1
                     continue
             if self.informative_full_split:
                 if min(len(s) for s in leaf_eigensplit_aug) < 2:
                     self.aug_split_degenerate_count += 1
                     continue
         # get the eigensplit
         try:
             eigensplit = BuildTreeTopology.split_using_eigenvector(D)
         except BuildTreeTopology.DegenerateSplitException, e:
             self.degenerate_primary_split_count += 1
             continue
         except BuildTreeTopology.InvalidSpectralSplitException, e:
             self.error_primary_split_count += 1
             continue
Exemple #5
0
    # see if the bottom up reconstruction was successful
    nj_splits = BuildTreeTopology.get_splits(D, BuildTreeTopology.split_nj,
                                             BuildTreeTopology.update_nj)
    nj_success = (nj_splits == true_splits)
    # note the joint results of the two reconstruction methods
    if top_down_success and nj_success:
        incr_attribute(attribute_array, 'nsuccesses.both')
    elif (not top_down_success) and (not nj_success):
        incr_attribute(attribute_array, 'nsuccesses.neither')
    elif top_down_success and (not nj_success):
        incr_attribute(attribute_array, 'nsuccesses.topdown.only')
    elif (not top_down_success) and nj_success:
        incr_attribute(attribute_array, 'nsuccesses.nj.only')
    # characterize the result of the first spectral split
    try:
        eigensplit = BuildTreeTopology.split_using_eigenvector(D)
        if eigensplit in true_splits:
            incr_attribute(attribute_array, 'first.split.informative')
        else:
            incr_attribute(attribute_array, 'first.split.invalid')
    except BuildTreeTopology.DegenerateSplitException, e:
        incr_attribute(attribute_array, 'first.split.uninformative')
    # return the attribute array
    return attribute_array


def process(ntaxa, nseconds, nlengths, nsamples, nj_like,
            branch_length_sampler, use_pbar):
    """
    @param ntaxa: the number of taxa per tree
    @param nseconds: stop after this many seconds
Exemple #6
0
        incr_attribute(attribute_array, 'nsamples.accepted.atteson')
    # see if the bottom up reconstruction was successful
    nj_splits = BuildTreeTopology.get_splits(D, BuildTreeTopology.split_nj, BuildTreeTopology.update_nj)
    nj_success = (nj_splits == true_splits)
    # note the joint results of the two reconstruction methods
    if top_down_success and nj_success:
        incr_attribute(attribute_array, 'nsuccesses.both')
    elif (not top_down_success) and (not nj_success):
        incr_attribute(attribute_array, 'nsuccesses.neither')
    elif top_down_success and (not nj_success):
        incr_attribute(attribute_array, 'nsuccesses.topdown.only')
    elif (not top_down_success) and nj_success:
        incr_attribute(attribute_array, 'nsuccesses.nj.only')
    # characterize the result of the first spectral split
    try:
        eigensplit = BuildTreeTopology.split_using_eigenvector(D)
        if eigensplit in true_splits:
            incr_attribute(attribute_array, 'first.split.informative')
        else:
            incr_attribute(attribute_array, 'first.split.invalid')
    except BuildTreeTopology.DegenerateSplitException, e:
        incr_attribute(attribute_array, 'first.split.uninformative')
    # return the attribute array
    return attribute_array

def process(ntaxa, nseconds, nlengths, nsamples, nj_like, branch_length_sampler, use_pbar):
    """
    @param ntaxa: the number of taxa per tree
    @param nseconds: stop after this many seconds
    @param nlengths: use this many different sequence lengths
    @param nsamples: stop after this many samples per sequence length
Exemple #7
0
def process(ntaxa, nseconds, seqlen, nsamples, branch_length_sampler,
            use_pbar):
    """
    @param ntaxa: the number of taxa per tree
    @param nseconds: stop after this many seconds
    @param seqlen: use this sequence length
    @param nsamples: stop after this many samples per sequence length
    @param branch_length_sampler: this function samples branch lengths independently
    @param use_pbar: True iff a progress bar should be used
    @return: a multi-line string of the contents of an R table
    """
    # initialize the global rejection counts
    nrejected_zero = 0
    nrejected_inf = 0
    nrejected_fail = 0
    naccepted = 0
    # Initialize the accumulation matrix.
    # The rows specify the size of the smaller side of the initial split.
    # The columns specify the compatibility status of the split.
    nsmall_sizes = (ntaxa / 2) + 1
    accum = np.zeros((nsmall_sizes, 2), dtype=np.int)
    # Repeatedly analyze samples.
    # We might have to stop early if we run out of time or if ctrl-c is pressed.
    # If we have to stop early, then show the results of the progress so far.
    termination_reason = 'no reason for termination was given'
    start_time = time.time()
    pbar = Progress.Bar(nsamples) if use_pbar else None
    try:
        for sample_index in range(nsamples):
            # keep trying to get an accepted sample
            while True:
                # check the time
                if nseconds and time.time() - start_time > nseconds:
                    raise TimeoutError()
                # first sample a tree and get its set of informative splits
                tree = TreeSampler.sample_agglomerated_tree(ntaxa)
                true_splits = tree.get_nontrivial_splits()
                # sample the branch lengths
                for branch in tree.get_branches():
                    branch.length = branch_length_sampler()
                # Attempt to sample a distance matrix.
                # If the sample was rejected then note the reason and go back to the drawing board.
                try:
                    D = sample_distance_matrix(tree, seqlen)
                except InfiniteDistanceError as e:
                    nrejected_inf += 1
                    continue
                except ZeroDistanceError as e:
                    nrejected_zero += 1
                    continue
                # Attempt to estimate the primary split of the tree from the distance matrix.
                # If there was a technical failure then note it and go back to the drawing board.
                # Otherwise note the compatibility and balance of the split.
                try:
                    eigensplit = BuildTreeTopology.split_using_eigenvector(D)
                    small_size = min(len(side) for side in eigensplit)
                    if eigensplit in true_splits:
                        compatibility = 1
                    else:
                        compatibility = 0
                except BuildTreeTopology.DegenerateSplitException, e:
                    small_size = 0
                    compatibility = 1
                except BuildTreeTopology.InvalidSpectralSplitException, e:
                    nrejected_fail += 1
                    continue
Exemple #8
0
def process(ntaxa, nseconds, seqlen, nsamples, branch_length_sampler, use_pbar):
    """
    @param ntaxa: the number of taxa per tree
    @param nseconds: stop after this many seconds
    @param seqlen: use this sequence length
    @param nsamples: stop after this many samples per sequence length
    @param branch_length_sampler: this function samples branch lengths independently
    @param use_pbar: True iff a progress bar should be used
    @return: a multi-line string of the contents of an R table
    """
    # initialize the global rejection counts
    nrejected_zero = 0
    nrejected_inf = 0
    nrejected_fail = 0
    naccepted = 0
    # Initialize the accumulation matrix.
    # The rows specify the size of the smaller side of the initial split.
    # The columns specify the compatibility status of the split.
    nsmall_sizes = (ntaxa / 2) + 1
    accum = np.zeros((nsmall_sizes, 2), dtype=np.int)
    # Repeatedly analyze samples.
    # We might have to stop early if we run out of time or if ctrl-c is pressed.
    # If we have to stop early, then show the results of the progress so far.
    termination_reason = 'no reason for termination was given'
    start_time = time.time()
    pbar = Progress.Bar(nsamples) if use_pbar else None
    try:
        for sample_index in range(nsamples):
            # keep trying to get an accepted sample
            while True:
                # check the time
                if nseconds and time.time() - start_time > nseconds:
                    raise TimeoutError()
                # first sample a tree and get its set of informative splits
                tree = TreeSampler.sample_agglomerated_tree(ntaxa)
                true_splits = tree.get_nontrivial_splits()
                # sample the branch lengths
                for branch in tree.get_branches():
                    branch.length = branch_length_sampler()
                # Attempt to sample a distance matrix.
                # If the sample was rejected then note the reason and go back to the drawing board.
                try:
                    D = sample_distance_matrix(tree, seqlen)
                except InfiniteDistanceError as e:
                    nrejected_inf += 1
                    continue
                except ZeroDistanceError as e:
                    nrejected_zero += 1
                    continue
                # Attempt to estimate the primary split of the tree from the distance matrix.
                # If there was a technical failure then note it and go back to the drawing board.
                # Otherwise note the compatibility and balance of the split.
                try:
                    eigensplit = BuildTreeTopology.split_using_eigenvector(D)
                    small_size = min(len(side) for side in eigensplit)
                    if eigensplit in true_splits:
                        compatibility = 1
                    else:
                        compatibility = 0
                except BuildTreeTopology.DegenerateSplitException, e:
                    small_size = 0
                    compatibility = 1
                except BuildTreeTopology.InvalidSpectralSplitException, e:
                    nrejected_fail += 1
                    continue