def get_response_content(fs): # read the matrix D = np.array(fs.matrix) n = len(D) # read the ordered labels ordered_labels = Util.get_stripped_lines(StringIO(fs.labels)) selected_labels = Util.get_stripped_lines(StringIO(fs.selection)) # validate the input if n != len(ordered_labels): raise HandlingError( 'the number of taxon labels should match the number of rows in the distance matrix' ) # get the two sets of indices index_set_A = set(i for i, label in enumerate(ordered_labels) if label in selected_labels) index_set_B = set(range(n)) - index_set_A # get internal values related to the split R, alpha, beta, gamma = get_R_alpha_beta_gamma(D, index_set_B) # get the two new distance matrices D_A = BuildTreeTopology.update_generalized_nj(D, index_set_B) D_B = BuildTreeTopology.update_generalized_nj(D, index_set_A) # get the names associated with the indices of the new distance matrices all_names = [set([name]) for name in ordered_labels] D_A_names = [ set_to_string(x) for x in SchurAlgebra.vmerge(all_names, index_set_B) ] D_B_names = [ set_to_string(x) for x in SchurAlgebra.vmerge(all_names, index_set_A) ] # show the results out = StringIO() print >> out, 'alpha:', alpha print >> out, 'beta:', beta print >> out, 'gamma:', gamma print >> out print >> out, 'new distance matrix corresponding to the selected names:' print >> out, MatrixUtil.m_to_string(D_A) print >> out print >> out, 'ordered labels corresponding to this matrix:' for name in D_A_names: print >> out, name print >> out print >> out, 'new distance matrix corresponding to the non-selected names:' print >> out, MatrixUtil.m_to_string(D_B) print >> out print >> out, 'ordered labels corresponding to this matrix:' for name in D_B_names: print >> out, name # return the response return out.getvalue()
def get_response_content(fs): # read the matrix D = np.array(fs.matrix) n = len(D) # read the ordered labels ordered_labels = Util.get_stripped_lines(StringIO(fs.labels)) selected_labels = Util.get_stripped_lines(StringIO(fs.selection)) # validate the input if n != len(ordered_labels): raise HandlingError("the number of taxon labels should match the number of rows in the distance matrix") # get the two sets of indices index_set_A = set(i for i, label in enumerate(ordered_labels) if label in selected_labels) index_set_B = set(range(n)) - index_set_A # get internal values related to the split R, alpha, beta, gamma = get_R_alpha_beta_gamma(D, index_set_B) # get the two new distance matrices D_A = BuildTreeTopology.update_generalized_nj(D, index_set_B) D_B = BuildTreeTopology.update_generalized_nj(D, index_set_A) # get the names associated with the indices of the new distance matrices all_names = [set([name]) for name in ordered_labels] D_A_names = [set_to_string(x) for x in SchurAlgebra.vmerge(all_names, index_set_B)] D_B_names = [set_to_string(x) for x in SchurAlgebra.vmerge(all_names, index_set_A)] # show the results out = StringIO() print >> out, "alpha:", alpha print >> out, "beta:", beta print >> out, "gamma:", gamma print >> out print >> out, "new distance matrix corresponding to the selected names:" print >> out, MatrixUtil.m_to_string(D_A) print >> out print >> out, "ordered labels corresponding to this matrix:" for name in D_A_names: print >> out, name print >> out print >> out, "new distance matrix corresponding to the non-selected names:" print >> out, MatrixUtil.m_to_string(D_B) print >> out print >> out, "ordered labels corresponding to this matrix:" for name in D_B_names: print >> out, name # return the response return out.getvalue()
def _do_analysis(self, use_generalized_nj): """ Do some splits of the tree. @param use_generalized_nj: True if we use an old method of outgrouping """ # define the distance matrix D = np.array(self.pruned_tree.get_distance_matrix(self.pruned_names)) # get the primary split of the criterion matrix L = Euclid.edm_to_laplacian(D) v = BuildTreeTopology.laplacian_to_fiedler(L) eigensplit = BuildTreeTopology.eigenvector_to_split(v) # assert that the first split cleanly separates the bacteria from the rest left_indices, right_indices = eigensplit left_domains = self._get_domains([self.pruned_names[x] for x in left_indices]) right_domains = self._get_domains([self.pruned_names[x] for x in right_indices]) if ('bacteria' in left_domains) and ('bacteria' in right_domains): raise HandlingError('bacteria were not defined by the first split') # now we have enough info to define the first supplementary csv file self.first_split_object = SupplementarySpreadsheetObject(self.pruned_names, L, v) # define the bacteria indices vs the non-bacteria indices for the second split if 'bacteria' in left_domains: bacteria_indices = left_indices non_bacteria_indices = right_indices elif 'bacteria' in right_domains: bacteria_indices = right_indices non_bacteria_indices = left_indices # get the secondary split of interest if use_generalized_nj: D_secondary = BuildTreeTopology.update_generalized_nj(D, bacteria_indices) L_secondary = Euclid.edm_to_laplacian(D_secondary) else: L_secondary = SchurAlgebra.mmerge(L, bacteria_indices) full_label_sets = [set([i]) for i in range(len(self.pruned_names))] next_label_sets = SchurAlgebra.vmerge(full_label_sets, bacteria_indices) v_secondary = BuildTreeTopology.laplacian_to_fiedler(L_secondary) eigensplit_secondary = BuildTreeTopology.eigenvector_to_split(v_secondary) left_subindices, right_subindices = eigensplit_secondary pruned_names_secondary = [] for label_set in next_label_sets: if len(label_set) == 1: label = list(label_set)[0] pruned_names_secondary.append(self.pruned_names[label]) else: pruned_names_secondary.append('all-bacteria') # assert that the second split cleanly separates the eukaryota from the rest left_subdomains = self._get_domains([pruned_names_secondary[x] for x in left_subindices]) right_subdomains = self._get_domains([pruned_names_secondary[x] for x in right_subindices]) if ('eukaryota' in left_subdomains) and ('eukaryota' in right_subdomains): raise HandlingError('eukaryota were not defined by the second split') # now we have enough info to define the second supplementary csv file self.second_split_object = SupplementarySpreadsheetObject(pruned_names_secondary, L_secondary, v_secondary)
def _do_analysis(self, use_generalized_nj): """ Do some splits of the tree. @param use_generalized_nj: True if we use an old method of outgrouping """ # define the distance matrix D = np.array(self.pruned_tree.get_distance_matrix(self.pruned_names)) # get the primary split of the criterion matrix L = Euclid.edm_to_laplacian(D) v = BuildTreeTopology.laplacian_to_fiedler(L) eigensplit = BuildTreeTopology.eigenvector_to_split(v) # assert that the first split cleanly separates the bacteria from the rest left_indices, right_indices = eigensplit left_domains = self._get_domains( [self.pruned_names[x] for x in left_indices]) right_domains = self._get_domains( [self.pruned_names[x] for x in right_indices]) if ('bacteria' in left_domains) and ('bacteria' in right_domains): raise HandlingError('bacteria were not defined by the first split') # now we have enough info to define the first supplementary csv file self.first_split_object = SupplementarySpreadsheetObject( self.pruned_names, L, v) # define the bacteria indices vs the non-bacteria indices for the second split if 'bacteria' in left_domains: bacteria_indices = left_indices non_bacteria_indices = right_indices elif 'bacteria' in right_domains: bacteria_indices = right_indices non_bacteria_indices = left_indices # get the secondary split of interest if use_generalized_nj: D_secondary = BuildTreeTopology.update_generalized_nj( D, bacteria_indices) L_secondary = Euclid.edm_to_laplacian(D_secondary) else: L_secondary = SchurAlgebra.mmerge(L, bacteria_indices) full_label_sets = [set([i]) for i in range(len(self.pruned_names))] next_label_sets = SchurAlgebra.vmerge(full_label_sets, bacteria_indices) v_secondary = BuildTreeTopology.laplacian_to_fiedler(L_secondary) eigensplit_secondary = BuildTreeTopology.eigenvector_to_split( v_secondary) left_subindices, right_subindices = eigensplit_secondary pruned_names_secondary = [] for label_set in next_label_sets: if len(label_set) == 1: label = list(label_set)[0] pruned_names_secondary.append(self.pruned_names[label]) else: pruned_names_secondary.append('all-bacteria') # assert that the second split cleanly separates the eukaryota from the rest left_subdomains = self._get_domains( [pruned_names_secondary[x] for x in left_subindices]) right_subdomains = self._get_domains( [pruned_names_secondary[x] for x in right_subindices]) if ('eukaryota' in left_subdomains) and ('eukaryota' in right_subdomains): raise HandlingError( 'eukaryota were not defined by the second split') # now we have enough info to define the second supplementary csv file self.second_split_object = SupplementarySpreadsheetObject( pruned_names_secondary, L_secondary, v_secondary)