Exemple #1
0
def get_response_content(fs):
    # read the matrix
    D = np.array(fs.matrix)
    n = len(D)
    # read the ordered labels
    ordered_labels = Util.get_stripped_lines(StringIO(fs.labels))
    selected_labels = Util.get_stripped_lines(StringIO(fs.selection))
    # validate the input
    if n != len(ordered_labels):
        raise HandlingError(
            'the number of taxon labels should match the number of rows in the distance matrix'
        )
    # get the two sets of indices
    index_set_A = set(i for i, label in enumerate(ordered_labels)
                      if label in selected_labels)
    index_set_B = set(range(n)) - index_set_A
    # get internal values related to the split
    R, alpha, beta, gamma = get_R_alpha_beta_gamma(D, index_set_B)
    # get the two new distance matrices
    D_A = BuildTreeTopology.update_generalized_nj(D, index_set_B)
    D_B = BuildTreeTopology.update_generalized_nj(D, index_set_A)
    # get the names associated with the indices of the new distance matrices
    all_names = [set([name]) for name in ordered_labels]
    D_A_names = [
        set_to_string(x) for x in SchurAlgebra.vmerge(all_names, index_set_B)
    ]
    D_B_names = [
        set_to_string(x) for x in SchurAlgebra.vmerge(all_names, index_set_A)
    ]
    # show the results
    out = StringIO()
    print >> out, 'alpha:', alpha
    print >> out, 'beta:', beta
    print >> out, 'gamma:', gamma
    print >> out
    print >> out, 'new distance matrix corresponding to the selected names:'
    print >> out, MatrixUtil.m_to_string(D_A)
    print >> out
    print >> out, 'ordered labels corresponding to this matrix:'
    for name in D_A_names:
        print >> out, name
    print >> out
    print >> out, 'new distance matrix corresponding to the non-selected names:'
    print >> out, MatrixUtil.m_to_string(D_B)
    print >> out
    print >> out, 'ordered labels corresponding to this matrix:'
    for name in D_B_names:
        print >> out, name
    # return the response
    return out.getvalue()
Exemple #2
0
def get_response_content(fs):
    # read the matrix
    D = np.array(fs.matrix)
    n = len(D)
    # read the ordered labels
    ordered_labels = Util.get_stripped_lines(StringIO(fs.labels))
    selected_labels = Util.get_stripped_lines(StringIO(fs.selection))
    # validate the input
    if n != len(ordered_labels):
        raise HandlingError("the number of taxon labels should match the number of rows in the distance matrix")
    # get the two sets of indices
    index_set_A = set(i for i, label in enumerate(ordered_labels) if label in selected_labels)
    index_set_B = set(range(n)) - index_set_A
    # get internal values related to the split
    R, alpha, beta, gamma = get_R_alpha_beta_gamma(D, index_set_B)
    # get the two new distance matrices
    D_A = BuildTreeTopology.update_generalized_nj(D, index_set_B)
    D_B = BuildTreeTopology.update_generalized_nj(D, index_set_A)
    # get the names associated with the indices of the new distance matrices
    all_names = [set([name]) for name in ordered_labels]
    D_A_names = [set_to_string(x) for x in SchurAlgebra.vmerge(all_names, index_set_B)]
    D_B_names = [set_to_string(x) for x in SchurAlgebra.vmerge(all_names, index_set_A)]
    # show the results
    out = StringIO()
    print >> out, "alpha:", alpha
    print >> out, "beta:", beta
    print >> out, "gamma:", gamma
    print >> out
    print >> out, "new distance matrix corresponding to the selected names:"
    print >> out, MatrixUtil.m_to_string(D_A)
    print >> out
    print >> out, "ordered labels corresponding to this matrix:"
    for name in D_A_names:
        print >> out, name
    print >> out
    print >> out, "new distance matrix corresponding to the non-selected names:"
    print >> out, MatrixUtil.m_to_string(D_B)
    print >> out
    print >> out, "ordered labels corresponding to this matrix:"
    for name in D_B_names:
        print >> out, name
    # return the response
    return out.getvalue()
Exemple #3
0
 def _do_analysis(self, use_generalized_nj):
     """
     Do some splits of the tree.
     @param use_generalized_nj: True if we use an old method of outgrouping
     """
     # define the distance matrix
     D = np.array(self.pruned_tree.get_distance_matrix(self.pruned_names))
     # get the primary split of the criterion matrix
     L = Euclid.edm_to_laplacian(D)
     v = BuildTreeTopology.laplacian_to_fiedler(L)
     eigensplit = BuildTreeTopology.eigenvector_to_split(v)
     # assert that the first split cleanly separates the bacteria from the rest
     left_indices, right_indices = eigensplit
     left_domains = self._get_domains([self.pruned_names[x] for x in left_indices])
     right_domains = self._get_domains([self.pruned_names[x] for x in right_indices])
     if ('bacteria' in left_domains) and ('bacteria' in right_domains):
         raise HandlingError('bacteria were not defined by the first split')
     # now we have enough info to define the first supplementary csv file
     self.first_split_object = SupplementarySpreadsheetObject(self.pruned_names, L, v)
     # define the bacteria indices vs the non-bacteria indices for the second split
     if 'bacteria' in left_domains:
         bacteria_indices = left_indices
         non_bacteria_indices = right_indices
     elif 'bacteria' in right_domains:
         bacteria_indices = right_indices
         non_bacteria_indices = left_indices
     # get the secondary split of interest
     if use_generalized_nj:
         D_secondary = BuildTreeTopology.update_generalized_nj(D, bacteria_indices)
         L_secondary = Euclid.edm_to_laplacian(D_secondary)
     else:
         L_secondary = SchurAlgebra.mmerge(L, bacteria_indices)
     full_label_sets = [set([i]) for i in range(len(self.pruned_names))]
     next_label_sets = SchurAlgebra.vmerge(full_label_sets, bacteria_indices)
     v_secondary = BuildTreeTopology.laplacian_to_fiedler(L_secondary)
     eigensplit_secondary = BuildTreeTopology.eigenvector_to_split(v_secondary)
     left_subindices, right_subindices = eigensplit_secondary
     pruned_names_secondary = []
     for label_set in next_label_sets:
         if len(label_set) == 1:
             label = list(label_set)[0]
             pruned_names_secondary.append(self.pruned_names[label])
         else:
             pruned_names_secondary.append('all-bacteria')
     # assert that the second split cleanly separates the eukaryota from the rest
     left_subdomains = self._get_domains([pruned_names_secondary[x] for x in left_subindices])
     right_subdomains = self._get_domains([pruned_names_secondary[x] for x in right_subindices])
     if ('eukaryota' in left_subdomains) and ('eukaryota' in right_subdomains):
         raise HandlingError('eukaryota were not defined by the second split')
     # now we have enough info to define the second supplementary csv file
     self.second_split_object = SupplementarySpreadsheetObject(pruned_names_secondary, L_secondary, v_secondary)
Exemple #4
0
 def _do_analysis(self, use_generalized_nj):
     """
     Do some splits of the tree.
     @param use_generalized_nj: True if we use an old method of outgrouping
     """
     # define the distance matrix
     D = np.array(self.pruned_tree.get_distance_matrix(self.pruned_names))
     # get the primary split of the criterion matrix
     L = Euclid.edm_to_laplacian(D)
     v = BuildTreeTopology.laplacian_to_fiedler(L)
     eigensplit = BuildTreeTopology.eigenvector_to_split(v)
     # assert that the first split cleanly separates the bacteria from the rest
     left_indices, right_indices = eigensplit
     left_domains = self._get_domains(
         [self.pruned_names[x] for x in left_indices])
     right_domains = self._get_domains(
         [self.pruned_names[x] for x in right_indices])
     if ('bacteria' in left_domains) and ('bacteria' in right_domains):
         raise HandlingError('bacteria were not defined by the first split')
     # now we have enough info to define the first supplementary csv file
     self.first_split_object = SupplementarySpreadsheetObject(
         self.pruned_names, L, v)
     # define the bacteria indices vs the non-bacteria indices for the second split
     if 'bacteria' in left_domains:
         bacteria_indices = left_indices
         non_bacteria_indices = right_indices
     elif 'bacteria' in right_domains:
         bacteria_indices = right_indices
         non_bacteria_indices = left_indices
     # get the secondary split of interest
     if use_generalized_nj:
         D_secondary = BuildTreeTopology.update_generalized_nj(
             D, bacteria_indices)
         L_secondary = Euclid.edm_to_laplacian(D_secondary)
     else:
         L_secondary = SchurAlgebra.mmerge(L, bacteria_indices)
     full_label_sets = [set([i]) for i in range(len(self.pruned_names))]
     next_label_sets = SchurAlgebra.vmerge(full_label_sets,
                                           bacteria_indices)
     v_secondary = BuildTreeTopology.laplacian_to_fiedler(L_secondary)
     eigensplit_secondary = BuildTreeTopology.eigenvector_to_split(
         v_secondary)
     left_subindices, right_subindices = eigensplit_secondary
     pruned_names_secondary = []
     for label_set in next_label_sets:
         if len(label_set) == 1:
             label = list(label_set)[0]
             pruned_names_secondary.append(self.pruned_names[label])
         else:
             pruned_names_secondary.append('all-bacteria')
     # assert that the second split cleanly separates the eukaryota from the rest
     left_subdomains = self._get_domains(
         [pruned_names_secondary[x] for x in left_subindices])
     right_subdomains = self._get_domains(
         [pruned_names_secondary[x] for x in right_subindices])
     if ('eukaryota' in left_subdomains) and ('eukaryota'
                                              in right_subdomains):
         raise HandlingError(
             'eukaryota were not defined by the second split')
     # now we have enough info to define the second supplementary csv file
     self.second_split_object = SupplementarySpreadsheetObject(
         pruned_names_secondary, L_secondary, v_secondary)