def get_response_content(fs): # get a properly formatted newick tree with branch lengths tree = Newick.parse(fs.tree, SpatialTree.SpatialTree) tree.assert_valid() if tree.has_negative_branch_lengths(): msg = 'drawing a tree with negative branch lengths is not implemented' raise HandlingError(msg) tree.add_branch_lengths() # do the layout if fs.daylight: try: layout = FastDaylightLayout.StraightBranchLayout() layout.do_layout(tree) except RuntimeError as e: pass elif fs.curved: try: layout = FastDaylightLayout.CurvedBranchLayout() layout.set_min_segment_count(400) layout.do_layout(tree) except RuntimeError as e: pass elif fs.arc: EqualArcLayout.do_layout(tree) # draw the image try: ext = Form.g_imageformat_to_ext[fs.imageformat] return DrawTreeImage.get_tree_image(tree, (640, 480), ext) except CairoUtil.CairoUtilError as e: raise HandlingError(e)
def get_response_content(fs): # get the tree tree = Newick.parse(fs.tree, Newick.NewickTree) tree.assert_valid() # get the alignment try: alignment = Fasta.Alignment(fs.fasta.splitlines()) alignment.force_nucleotide() except Fasta.AlignmentError as e: raise HandlingError(e) # define the jukes cantor rate matrix dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() ordered_states = list('ACGT') row_major_rate_matrix = MatrixUtil.dict_to_row_major( dictionary_rate_matrix, ordered_states, ordered_states) rate_matrix_object = RateMatrix.RateMatrix(row_major_rate_matrix, ordered_states) # simulate the ancestral alignment try: alignment = PhyLikelihood.simulate_ancestral_alignment( tree, alignment, rate_matrix_object) except PhyLikelihood.SimulationError as e: raise HandlingError(e) # get the alignment string using an ordering defined by the tree arr = [] for node in tree.preorder(): arr.append(alignment.get_fasta_sequence(node.name)) # return the response return '\n'.join(arr) + '\n'
def get_response_content(fs): # read the points and edges points, edges = read_points_and_edges(fs.graph_data) # define edge weights if fs.weighted: np_points = [np.array(p) for p in points] dists = [np.linalg.norm(np_points[j] - np_points[i]) for i, j in edges] weights = [1.0 / d for d in dists] else: weights = [1.0 for e in edges] # get the width and height of the drawable area of the image width = fs.total_width - 2 * fs.border height = fs.total_height - 2 * fs.border if width < 1 or height < 1: msg = 'the image dimensions do not allow for enough drawable area' raise HandlingError(msg) # define the point colors using the unweighted graph Fiedler loadings L = edges_to_laplacian(edges, weights) G = np.linalg.pinv(L) X = Euclid.dccov_to_points(G) points = [(-p[0] if fs.flip else p[0], p[1]) for p in X] x_coords, y_coords = zip(*points) colors = valuations_to_colors(x_coords) # draw the image ext = Form.g_imageformat_to_ext[fs.imageformat] info = ImageInfo(fs.total_width, fs.total_height, fs.border, ext) try: return get_image_string(points, edges, colors, fs.black, info) except CairoUtil.CairoUtilError as e: raise HandlingError(e)
def parse_module_lines(lines): """ @param lines: lines of the MMC csv output @return: (gene labels, module indices, gene indices) """ # do some basic validation min_nlines = 3 if len(lines) < min_nlines: raise HandlingError('expected at least %d module lines' % min_nlines) # extract the parts of the rows of interest rows = [] for line in lines[1:]: values = parse_comma_separated_line(line) if len(values) != 5: raise HandlingError( 'expected five comma separated values on each module line') gene_label, raw_module_index, raw_gene_index, foo, bar = values try: module_index = int(raw_module_index) - 1 except ValueError as e: raise HandlingError( 'expected the module index to be an integer: ' + raw_module_index) try: gene_index = int(raw_gene_index) - 1 except ValueError as e: raise HandlingError('expected the gene index to be an integer: ' + raw_gene_index) rows.append([gene_label, module_index, gene_index]) # return the three lists return zip(*rows)
def get_response_content(fs): # get the tree tree = Newick.parse(fs.tree, Newick.NewickTree) tree.assert_valid() # get the minimum number of segments min_segment_count = fs.segments # determine the maximum allowed branch length total_branch_length = tree.get_total_length() max_branch_length = total_branch_length / float(min_segment_count) # any branch longer than the max branch length will be broken in half while True: old_nodes = list(tree.preorder()) for node in old_nodes: if node is tree.root: if node.blen is not None: msg = 'the root node should not have a branch length' raise HandlingError(msg) elif node.blen is None: msg = 'each non-root node should have a branch length' raise HandlingError(msg) elif node.blen > max_branch_length: # create a new node and set its attributes new = Newick.NewickNode() new.name = node.name # insert the new node tree.insert_node(new, node.parent, node, .5) # if no node was added then break out of the loop if len(old_nodes) == len(list(tree.preorder())): break # return the response return tree.get_newick_string() + '\n'
def get_response_content(fs): # get the tree tree = Newick.parse(fs.tree, Newick.NewickTree) tree.assert_valid() # get the sequence order if it exists ordered_names = Util.get_stripped_lines(fs.order.splitlines()) if ordered_names: observed_name_set = set(ordered_names) expected_name_set = set(node.get_name() for node in tree.gen_tips()) extra_names = observed_name_set - expected_name_set missing_names = expected_name_set - observed_name_set if extra_names: msg_a = 'the list of ordered names includes these names ' msg_b = 'not found in the tree: %s' % str(tuple(extra_names)) raise HandlingError(msg_a + msg_b) if missing_names: msg_a = 'the tree includes these names not found in the list ' msg_b = 'of ordered names: %s' % str(tuple(missing_names)) raise HandlingError(msg_a + msg_b) else: ordered_names = list(tip.get_name() for name in tree.gen_tips()) # do the sampling sampled_sequences = JC69.sample_sequences(tree, ordered_names, fs.length) alignment = Fasta.create_alignment(ordered_names, sampled_sequences) # return the response return alignment.to_fasta_string() + '\n'
def get_response_content(fs): # read the matrix D = fs.matrix if len(D) < 3: msg = 'the distance matrix should have at least three rows' raise HandlingError(msg) # read the ordered labels ordered_labels = Util.get_stripped_lines(StringIO(fs.labels)) if not ordered_labels: raise HandlingError('no ordered labels were provided') if len(ordered_labels) != len(D): msg_a = 'the number of ordered labels ' msg_b = 'should be the same as the number of rows in the matrix' raise HandlingError(msg_a + msg_b) if len(set(ordered_labels)) != len(ordered_labels): raise HandlingError('the ordered labels must be unique') # read the index of the iteration that will be visualized min_iteration = 1 max_iteration = len(D) - 2 iteration = fs.iteration if not (min_iteration <= iteration <= max_iteration): msg_a = 'the iteration index ' msg_b = 'should be in [%d, %d]' % (min_iteration, max_iteration) raise HandlingError(msg_a + msg_b) # return the image string return get_image_string(D, ordered_labels, iteration)
def process(args, raw_hud_lines, nseconds=2): nwords = args.nwords nchars = args.nchars names, data = hud.decode(raw_hud_lines) out = StringIO() if len(data) < nwords: msg = 'the number of OTUs is smaller than the desired sample' raise HandlingError(msg) if len(data[0]) < nchars: msg = 'the number of characters is smaller than the desired sample' raise HandlingError(msg) # create the matrix M = np.array(data) # select row and column indices row_indices, col_indices = get_selections(M, nwords, nchars, nseconds) sorted_row_indices = list(sorted(row_indices)) sorted_col_indices = list(sorted(col_indices)) # print the separation d = get_separation(M, row_indices, col_indices) print >> out, 'best separation:', d # print the index selections print >> out, 'selected row indices:', sorted_row_indices print >> out, 'selected column indices:', sorted_col_indices # print some selected values for i in sorted_row_indices: s = ' '.join(str(M[i, j]) for j in sorted_col_indices) print >> out, names[i] + '\t' + s return out.getvalue().rstrip()
def get_response_content(fs): # get the tree tree = Newick.parse(fs.tree, Newick.NewickTree) tree.assert_valid() # get the mixture weights weights = [fs.weight_a, fs.weight_b, fs.weight_c] # get the matrices matrices = [fs.matrix_a, fs.matrix_b, fs.matrix_c] for R in matrices: if R.shape != (4, 4): msg = 'expected each nucleotide rate matrix to be 4x4' raise HandlingError(msg) # get the nucleotide alignment try: alignment = Fasta.Alignment(fs.alignment.splitlines()) alignment.force_nucleotide() except Fasta.AlignmentError as e: raise HandlingError(e) # create the mixture proportions weight_sum = sum(weights) mixture_proportions = [weight / weight_sum for weight in weights] # create the rate matrix objects ordered_states = list('ACGT') rate_matrix_objects = [] for R in matrices: rate_matrix_object = RateMatrix.RateMatrix(R.tolist(), ordered_states) rate_matrix_objects.append(rate_matrix_object) # create the mixture model mixture_model = SubModel.MixtureModel(mixture_proportions, rate_matrix_objects) # normalize the mixture model mixture_model.normalize() # return the html string return do_analysis(mixture_model, alignment, tree) + '\n'
def get_response_content(fs): # read the matrix from the form data R = fs.matrix nrows, ncols = R.shape # assert that the number of rows and columns is valid for a codon matrix states = Codon.g_sorted_non_stop_codons if nrows != len(states): msg = 'expected %d rows but got %d' % (len(states), nrows) raise HandlingError(msg) if ncols != len(states): msg = 'expected %d columns but got %d' % (len(states), ncols) raise HandlingError(msg) # define the row and column labels labels = [] for codon in states: label = '%s.%s.' % (Codon.g_codon_to_aa_letter[codon], codon) labels.append(label) row_labels = labels column_labels = labels # initialize the base class with this row major matrix heatmap = HeatMap.LabeledHeatMap(R.tolist(), fs.maxcategories, row_labels, column_labels) renderer = HeatMap.PreHeatMap(heatmap) html_string = renderer.get_example_html() # return the response return html_string + '\n'
def parse_lines(lines): """ The input lines have a special format. The first nonempty line is a header. The subsequent lines are whitespace separated values. The first value is the city name. The next four values are latitude and longitude minutes and degrees. @param lines: stripped input lines @return: (city, lat_deg, lat_min, lon_deg, lon_min) tuples """ lines = [line for line in lines if line] if not lines: raise HandlingError('no input was found') if len(lines) < 2: raise HandlingError('expected at least one header and data line') result = [] for line in lines[1:]: values = line.split() if len(values) != 5: raise HandlingError('expected five values per data line') city, latd, latm, lond, lonm = values try: latd = float(latd) latm = float(latm) lond = float(lond) lonm = float(lonm) except ValueError as e: raise HandlingError('error reading a value as a number') row = (city, latd, latm, lond, lonm) result.append(row) return result
def get_response_content(fs): # read the matrix D = fs.matrix if len(D) < 3: raise HandlingError('the matrix should have at least three rows') # read the ordered labels ordered_labels = Util.get_stripped_lines(fs.labels.splitlines()) if len(ordered_labels) != len(D): msg_a = 'the number of ordered labels should be the same ' msg_b = 'as the number of rows in the matrix' raise HandlingError(msg_a + msg_b) # create the tree building object splitter = Clustering.StoneExactDMS() tree_builder = NeighborhoodJoining.TreeBuilder(D.tolist(), ordered_labels, splitter) # Read the recourse string and set the corresponding method # in the tree builder. recourse_string = fs.getfirst('recourse') if fs.njrecourse: tree_builder.set_fallback_name('nj') elif fs.halvingrecourse: tree_builder.set_fallback_name('halving') # assert that the computation will not take too long if tree_builder.get_complexity() > 1000000: raise HandlingError('this computation would take too long') # build the tree tree = tree_builder.build() # return the response return NewickIO.get_newick_string(tree) + '\n'
def get_response_content(fs): # read the alignment try: alignment = Fasta.Alignment(fs.fasta.splitlines()) except Fasta.AlignmentError as e: raise HandlingError('fasta alignment error: ' + str(e)) if alignment.get_sequence_count() != 2: raise HandlingError('expected a sequence pair') # read the rate matrix R = fs.matrix # read the ordered states ordered_states = Util.get_stripped_lines(fs.states.splitlines()) if len(ordered_states) != len(R): msg_a = 'the number of ordered states must be the same ' msg_b = 'as the number of rows in the rate matrix' raise HandlingError(msg_a + msg_b) if len(set(ordered_states)) != len(ordered_states): raise HandlingError('the ordered states must be unique') # create the rate matrix object using the ordered states rate_matrix_object = RateMatrix.RateMatrix(R.tolist(), ordered_states) # create the objective function objective = Objective(alignment.sequences, rate_matrix_object) # Use golden section search to find the mle distance. # The bracket is just a suggestion. bracket = (0.51, 2.01) mle_distance = optimize.golden(objective, brack=bracket) # write the response out = StringIO() print >> out, 'maximum likelihood distance:', mle_distance #distances = (mle_distance, 0.2, 2.0, 20.0) #for distance in distances: #print >> out, 'f(%s): %s' % (distance, objective(distance)) return out.getvalue()
def get_supplementary_object(fs): """ @param fs: a FieldStorage object containing the cgi arguments @return: a object with all of the information for the supplementary data """ # extract the name sets from the newick tree strings archaea_names = newick_string_to_tip_names(g_archaea_data) bacteria_names = newick_string_to_tip_names(g_bacteria_data) eukaryota_names = newick_string_to_tip_names(g_eukaryota_data) all_names = newick_string_to_tip_names(g_full_data) # validate the sets of names nfull = len(all_names) ndisjoint = len(archaea_names) + len(bacteria_names) + len(eukaryota_names) if ndisjoint != nfull: raise HandlingError('there are %d taxa in the full tree ' 'but %d taxa in its subtrees' % (nfull, ndisjoint)) disjoint_union = archaea_names | bacteria_names | eukaryota_names if disjoint_union != all_names: raise HandlingError('the set of taxa in the full tree ' 'is not the union of taxa in its subtrees') # create the map from taxon name to taxonomic category taxon_to_domain = {} for name in archaea_names: taxon_to_domain[name] = 'archaea' for name in bacteria_names: taxon_to_domain[name] = 'bacteria' for name in eukaryota_names: taxon_to_domain[name] = 'eukaryota' taxon_to_domain['all-bacteria'] = 'bacteria' # create the supplementary object use_generalized_nj = fs.like_nj supplementary_object = SupplementaryObject(taxon_to_domain, g_full_data, use_generalized_nj) # return the supplementary object return supplementary_object
def get_response_content(fs): # read the nucleotide weights nt_weights = [fs.A, fs.C, fs.G, fs.T] # convert the nucleotide weights to probabilities nt_probs = [x / float(sum(nt_weights)) for x in nt_weights] # Assert that the kappa value and the nucleotide # probabilities are compatible. A, C, G, T = nt_probs R = float(A + G) Y = float(C + T) if R <= 0: raise HandlingError('the frequency of a purine must be positive') if Y <= 0: raise HandlingError('the frequency of a pyrimidine must be positive') if fs.kappa <= max(-Y, -R): msg_a = 'kappa must be greater than max(-R, -Y) ' msg_b = 'where R and Y are the purine and pyrimidine frequencies' raise HandlingError(msg_a + msg_b) # Create the rate matrix object # which is automatically scaled to a rate of 1.0. model = F84.create_rate_matrix(fs.kappa, nt_probs) # simulate a pair of sequences sequence_pair = PairLikelihood.simulate_sequence_pair( fs.distance, model, fs.length) # convert the pair of sequences to an alignment object aln = StringIO() print >> aln, '>first' print >> aln, ''.join(sequence_pair[0]) print >> aln, '>second' print >> aln, ''.join(sequence_pair[1]) return Fasta.Alignment(StringIO(aln.getvalue())).to_fasta_string() + '\n'
def get_response_content(fs): # get the tree tree = Newick.parse(fs.tree, Newick.NewickTree) tree.assert_valid() tree.add_branch_lengths() if tree.has_negative_branch_lengths(): msg_a = 'calculating weights for a tree ' msg_b = 'with negative branch lengths is not implemented' raise HandlingError(msg_a + msg_b) # get the selected names selection = Util.get_stripped_lines(fs.selection.splitlines()) selected_name_set = set(selection) possible_name_set = set(node.get_name() for node in tree.gen_tips()) extra_names = selected_name_set - possible_name_set if extra_names: msg_a = 'the following selected names are not valid tips: ' msg_b = str(tuple(extra_names)) raise HandlingError(msg_a + msg_b) # prune the tree for name in set(node.name for node in tree.gen_tips()) - set(selection): try: node = tree.get_unique_node(name) except NewickSearchError as e: raise HandlingError(e) tree.prune(node) # get the weights if fs.stone: name_weight_pairs = LeafWeights.get_stone_weights(tree) elif fs.thompson: name_weight_pairs = LeafWeights.get_thompson_weights(tree) # report the weights lines = ['%s: %f' % pair for pair in name_weight_pairs] return '\n'.join(lines) + '\n'
def get_response_content(fs): # get the tree tree = NewickIO.parse(fs.tree, FelTree.NewickTree) # get the selected names selection = Util.get_stripped_lines(fs.selection.splitlines()) selected_name_set = set(selection) possible_name_set = set(node.get_name() for node in tree.gen_tips()) extra_names = selected_name_set - possible_name_set if extra_names: msg_a = 'the following selected names ' msg_b = 'are not valid tips: %s' % str(tuple(extra_names)) raise HandlingError(msg_a + msg_b) complement_name_set = possible_name_set - selected_name_set # assert that neither the selected name set nor its complement is empty if not selected_name_set or not complement_name_set: raise HandlingError('the selection is degenerate') # define an ordering on the tips ordered_names = [node.get_name() for node in tree.gen_tips()] # convert the selected names to a Y vector Y_as_list = [] for name in ordered_names: if name in selected_name_set: value = 1 else: value = -1 Y_as_list.append(value) Y = np.array(Y_as_list) # get the distance matrix D = tree.get_distance_matrix(ordered_names) # get the R matrix R = Clustering.get_R_balaji(D) value = np.dot(np.dot(Y, R), Y.T) # return the taxon split evaluation return str(value) + '\n'
def get_response_content(fs): # get the newick trees. trees = [] for tree_string in iterutils.stripped_lines(StringIO(fs.trees)): # parse each tree # and make sure that it conforms to various requirements tree = NewickIO.parse(tree_string, FelTree.NewickTree) tip_names = [tip.get_name() for tip in tree.gen_tips()] if len(tip_names) < 4: msg = 'expected at least 4 tips but found ' + str(len(tip_names)) raise HandlingError(msg) if any(name is None for name in tip_names): raise HandlingError('each terminal node must be labeled') if len(set(tip_names)) != len(tip_names): raise HandlingError('each terminal node label must be unique') trees.append(tree) # get the threshold for negligibility of an eigenvector loading epsilon = fs.epsilon if not (0 <= epsilon < 1): raise HandlingError('invalid threshold for negligibility') # get the set of selected options selected_options = fs.options # analyze each tree results = [] for tree in trees: results.append(AnalysisResult(tree, epsilon)) # create the response out = StringIO() for result in results: for line in result.get_response_lines(selected_options): print >> out, line print >> out # return the response return out.getvalue()
def get_response_content(fs): """ @param fs: a FieldStorage object containing the cgi arguments @return: a (response_headers, response_text) pair """ # read the criterion string, creating the splitter object if fs.exact: splitter = Clustering.StoneExactDMS() elif fs.sign: splitter = Clustering.StoneSpectralSignDMS() elif fs.nj: splitter = Clustering.NeighborJoiningDMS() elif fs.random: splitter = Clustering.RandomDMS() # read the original tree tree = NewickIO.parse(fs.tree, FelTree.NewickTree) # define the maximum number of steps we want max_steps = 1000000 # Make sure that the splitter object is appropriate # for the number of taxa and the number of tree reconstructions. ntaxa = len(list(tree.gen_tips())) if splitter.get_complexity(ntaxa) * fs.iterations > max_steps: msg_a = 'use a faster bipartition function, ' msg_b = 'fewer taxa, or fewer tree reconstructions' raise HandlingError(msg_a + msg_b) # define the simulation parameters sim = Simulation(splitter, 'nj', 'cgi tree building simulation') sim.set_original_tree(tree) sim.set_step_limit(max_steps) # define an arbitrary but consistent ordering of the taxa ordered_names = [node.name for node in tree.gen_tips()] # attempt to simulate a bunch of distance matrices sampler = DMSampler.DMSampler(tree, ordered_names, fs.length) distance_matrices = [] for result in sampler.gen_samples_or_none(): # if a proposal was accepted then add it to the list if result: sequence_list, distance_matrix = result distance_matrices.append(distance_matrix) # if enough accepted samples have been generated then stop sampling remaining_acceptances = fs.iterations - len(distance_matrices) if not remaining_acceptances: break # If the remaining number of computrons is predicted # to be too much then stop. if sampler.get_remaining_computrons(remaining_acceptances) > max_steps: msg_a = 'this combination of parameters ' msg_b = 'is predicted to take too long' raise HandlingError(msg) sim.run(distance_matrices, ordered_names) # define the response out = StringIO() print >> out, 'partition error count frequencies:' print >> out, sim.get_histogram_string() print >> out, '' print >> out, 'weighted partition errors:', sim.get_deep_loss() # return the response return out.getvalue()
def get_response_content(fs): # get the newick trees. trees = [] for tree_string in iterutils.stripped_lines(StringIO(fs.trees)): # parse each tree and make sure that it conforms to various requirements tree = NewickIO.parse(tree_string, FelTree.NewickTree) tip_names = [tip.get_name() for tip in tree.gen_tips()] if len(tip_names) < 4: raise HandlingError('expected at least four tips but found ' + str(len(tip_names))) if any(name is None for name in tip_names): raise HandlingError('each terminal node must be labeled') if len(set(tip_names)) != len(tip_names): raise HandlingError('each terminal node label must be unique') trees.append(tree) # begin the response out = StringIO() # look at each tree nerrors = 0 ncounterexamples = 0 for tree in trees: # get the set of valid partitions implied by the tree valid_parts = TreeComparison.get_partitions(tree) ordered_tip_names = [tip.get_name() for tip in tree.gen_tips()] # assert that the partition implied by the correct formula is valid D = np.array(tree.get_distance_matrix(ordered_tip_names)) loadings = get_principal_coordinate(D) nonneg_leaf_set = frozenset( tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0) neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0) part = frozenset([nonneg_leaf_set, neg_leaf_set]) if part not in valid_parts: nerrors += 1 print >> out, 'error: a partition that was supposed to be valid was found to be invalid' print >> out, 'tree:', NewickIO.get_newick_string(tree) print >> out, 'invalid partition:', partition_to_string(part) print >> out # check the validity of the partition implied by the incorrect formula Q = D * D loadings = get_principal_coordinate(Q) nonneg_leaf_set = frozenset( tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0) neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0) part = frozenset([nonneg_leaf_set, neg_leaf_set]) if part not in valid_parts: ncounterexamples += 1 print >> out, 'found a counterexample!' print >> out, 'tree:', NewickIO.get_newick_string(tree) print >> out, 'invalid partition:', partition_to_string(part) print >> out print >> out, 'errors found:', nerrors print >> out, 'counterexamples found:', ncounterexamples # return the response return out.getvalue()
def get_running_time(self): """ @return: the number of seconds it took to run the simulation """ if self.start_time is None: raise HandlingError('the simulation has not been started') if self.stop_time is None: msg = 'the simulation was not successfully completed' raise HandlingError(msg) return self.stop_time - self.start_time
def get_response_content(fs): M = fs.matrix if M.shape[0] < 3 or M.shape[1] < 3: raise HandlingError('expected at least a 3x3 matrix') # draw the image try: ext = Form.g_imageformat_to_ext[fs.imageformat] return get_image(M.tolist(), (640, 480), ext, fs.axes, fs.connections, fs.vertices) except CairoUtil.CairoUtilError as e: raise HandlingError(e)
def get_response_content(fs): # use a fixed seed if requested if fs.seed: random.seed(fs.seed) # define the max number of rejection iterations limit = fs.npoints * 100 # validate input if fs.axis < 0: raise ValueError('the mds axis must be nonnegative') # get points defining the boundary of africa nafrica = len(g_africa_poly) africa_edges = [(i, (i + 1) % nafrica) for i in range(nafrica)] # get some points and edges inside africa points = sample_with_rejection(fs.npoints, g_africa_poly, limit) x_list, y_list = zip(*points) tri = Triangulation(x_list, y_list) tri_edges = [(i + nafrica, j + nafrica) for i, j in tri.edge_db.tolist()] # get the whole list of points allpoints = g_africa_poly + points # refine the list of edges tri_edges = list(gen_noncrossing_edges(tri_edges, africa_edges, allpoints)) tri_edges = get_mst(tri_edges, allpoints) alledges = africa_edges + tri_edges # make the graph laplacian A = np.zeros((len(points), len(points))) for ia, ib in tri_edges: xa, ya = allpoints[ia] xb, yb = allpoints[ib] d = math.hypot(xb - xa, yb - ya) A[ia - nafrica, ib - nafrica] = 1 / d A[ib - nafrica, ia - nafrica] = 1 / d L = Euclid.adjacency_to_laplacian(A) ws, vs = EigUtil.eigh(np.linalg.pinv(L)) if fs.axis >= len(ws): raise ValueError('choose a smaller mds axis') v = vs[fs.axis] # get the color and sizes for the points v /= max(np.abs(v)) colors = [(0, 0, 0)] * nafrica + [get_color(x) for x in v] radii = [2] * nafrica + [5 for p in points] # get the width and height of the drawable area of the image width = fs.total_width - 2 * fs.border height = fs.total_height - 2 * fs.border if width < 1 or height < 1: msg = 'the image dimensions do not allow for enough drawable area' raise HandlingError(msg) # draw the image ext = Form.g_imageformat_to_ext[fs.imageformat] try: helper = ImgHelper(allpoints, alledges, fs.total_width, fs.total_height, fs.border) return helper.get_image_string(colors, radii, ext) except CairoUtil.CairoUtilError as e: raise HandlingError(e)
def get_response_content(fs): # get the tree tree = NewickIO.parse(fs.tree, FelTree.NewickTree) alphabetically_ordered_states = list( sorted(node.name for node in tree.gen_tips())) n = len(alphabetically_ordered_states) if n < 2: raise HandlingError('the newick tree should have at least two leaves') # read the ordered labels states = Util.get_stripped_lines(StringIO(fs.inlabels)) if len(states) > 1: if set(states) != set(alphabetically_ordered_states): msg_a = 'if ordered labels are provided, ' msg_b = 'each should correspond to a leaf of the newick tree' raise HandlingError(msg_a + msg_b) else: states = alphabetically_ordered_states # create the distance matrix D = tree.get_distance_matrix(states) # create the perturbed distance matrix if necessary if fs.strength: P = [row[:] for row in D] for i in range(n): for j in range(i): x = random.normalvariate(0, fs.strength) new_distance = D[i][j] * math.exp(x) P[i][j] = new_distance P[j][i] = new_distance else: P = D # start collecting the paragraphs paragraphs = [] # show the distance matrix if requested if fs.perturbed: paragraph = StringIO() print >> paragraph, 'a perturbed distance matrix:' print >> paragraph, MatrixUtil.m_to_string(P) paragraphs.append(paragraph.getvalue().strip()) # show the distance matrix if requested if fs.distance: paragraph = StringIO() print >> paragraph, 'the original distance matrix:' print >> paragraph, MatrixUtil.m_to_string(D) paragraphs.append(paragraph.getvalue().strip()) # show the ordered labels if requested if fs.outlabels: paragraph = StringIO() print >> paragraph, 'ordered labels:' print >> paragraph, '\n'.join(states) paragraphs.append(paragraph.getvalue().strip()) # return the response return '\n\n'.join(paragraphs) + '\n'
def run(self, distance_matrices, ordered_names): """ This function stores the losses for each reconstruction. @param distance_matrices: a sequence of distance matrices @param ordered_names: order of taxa in the distance matrix """ if self.start_time is not None: msg = 'each simulation object should be run only once' raise HandlingError(msg) if not distance_matrices: raise HandlingErrror('no distance matrices were provided') tip_name_set = set(node.name for node in self.original_tree.gen_tips()) if tip_name_set != set(ordered_names): raise HandlingError('leaf name mismatch') self.start_time = time.time() # Define the reference tree and its maximum cost # under different loss functions. reference_tree = self.original_tree max_error_count = TreeComparison.get_nontrivial_split_count( reference_tree) max_loss_value = TreeComparison.get_weighted_split_count( reference_tree) for distance_matrix in distance_matrices: # create the tree builder tree_builder = NeighborhoodJoining.TreeBuilder( distance_matrix, ordered_names, self.splitter) # set parameters of the validating tree builder tree_builder.set_fallback_name(self.fallback_name) # build the tree try: query_tree = tree_builder.build() except NeighborhoodJoining.NeighborhoodJoiningError as e: raise HandlingError(e) # Note the number and weight of partition errors # during the reconstruction. error_count = TreeComparison.get_split_distance( query_tree, reference_tree) loss_value = TreeComparison.get_weighted_split_distance( query_tree, reference_tree) # make sure that the summary is internally consistent assert error_count <= max_error_count, (error_count, max_error_count) assert loss_value <= max_loss_value, (loss_value, max_loss_value) # save the reconstruction characteristics to use later self.error_counts.append(error_count) self.loss_values.append(loss_value) self.max_error_counts.append(max_error_count) self.max_loss_values.append(max_loss_value) self.stop_time = time.time()
def get_response_content(fs): # get the tree tree = Newick.parse(fs.tree, Newick.NewickTree) tree.assert_valid() # get the node try: node = tree.get_unique_node(fs.node) except Newick.NewickSearchError as e: raise HandlingError(e) if node is tree.root: raise HandlingError('the root cannot be removed') # remove the node tree.remove_node(node) # return the response return tree.get_newick_string() + '\n'
def get_response_content(fs): # read the matrix D = fs.matrix if len(D) < 3: raise HandlingError('the matrix should have at least three rows') # read the ordered labels ordered_labels = Util.get_stripped_lines(fs.labels.splitlines()) if len(ordered_labels) != len(D): msg_a = 'the number of ordered labels should be the same ' msg_b = 'as the number of rows in the matrix' raise HandlingError(msg_a + msg_b) # get the newick tree tree = NeighborJoining.make_tree(D.tolist(), ordered_labels) # return the response return NewickIO.get_newick_string(tree) + '\n'
def get_response_content(fs): # define the requested physical size of the images (in pixels) physical_size = (640, 480) # build the newick tree from the string tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree) nvertices = len(list(tree.preorder())) nleaves = len(list(tree.gen_tips())) # Get ordered ids with the leaves first, # and get the corresponding distance matrix. ordered_ids = get_ordered_ids(tree) D = np.array(tree.get_partial_distance_matrix(ordered_ids)) # get the image extension ext = Form.g_imageformat_to_ext[fs.imageformat] # get the scaling factors and offsets if fs.hticks < 2: msg = 'expected at least two ticks on the horizontal axis' raise HandlingError(msg) width, height = physical_size xoffset = fs.border yoffset = fs.border yscale = float(height - 2 * fs.border) xscale = (width - 2 * fs.border) / float(fs.hticks - 1) # define the eigendecomposition function if fs.slow: fn = get_augmented_spectrum elif fs.fast: fn = get_augmented_spectrum_fast # define the target eigenvalues tip_ids = [id(node) for node in tree.gen_tips()] D_tips = np.array(tree.get_partial_distance_matrix(tip_ids)) G_tips = Euclid.edm_to_dccov(D_tips) target_ws = scipy.linalg.eigh(G_tips, eigvals_only=True) * fs.denom # draw the image return create_image(ext, physical_size, xscale, yscale, xoffset, yoffset, D, nleaves, fs.hticks, fs.denom, fn, target_ws)
def get_response_content(fs): # check input compatibility if fs.nvertices < fs.naxes+1: msg_a = 'attempting to plot too many eigenvectors ' msg_b = 'for the given number of vertices' raise ValueError(msg_a + msg_b) # define the requested physical size of the images (in pixels) physical_size = (640, 480) # get the points L = create_laplacian_matrix(fs.nvertices) D = Euclid.laplacian_to_edm(L) HSH = Euclid.edm_to_dccov(D) W, VT = np.linalg.eigh(HSH) V = VT.T.tolist() if fs.eigenvalue_scaling: vectors = [np.array(v)*w for w, v in list(reversed(sorted(zip(np.sqrt(W), V))))[:-1]] else: vectors = [np.array(v) for w, v in list(reversed(sorted(zip(np.sqrt(W), V))))[:-1]] X = np.array(zip(*vectors)) # transform the points to eigenfunctions such that the first point is positive F = X.T[:fs.naxes] for i in range(fs.naxes): if F[i][0] < 0: F[i] *= -1 # draw the image try: ext = Form.g_imageformat_to_ext[fs.imageformat] return create_image_string(ext, physical_size, F, fs.xaxis_length) except CairoUtil.CairoUtilError as e: raise HandlingError(e)
def get_response_content(fs): # get the tree tree = NewickIO.parse(fs.tree, FelTree.NewickTree) ordered_names = list(sorted(node.name for node in tree.gen_tips())) n = len(ordered_names) if n < 2: raise HandlingError('the newick tree should have at least two leaves') # get the eigendecomposition D = np.array(tree.get_distance_matrix(ordered_names)) G = (-0.5) * MatrixUtil.double_centered(D) eigenvalues, eigenvector_transposes = np.linalg.eigh(G) eigenvectors = eigenvector_transposes.T sorted_eigensystem = list(reversed(list(sorted((w, v) for w, v in zip(eigenvalues, eigenvectors))))) sorted_eigenvalues, sorted_eigenvectors = zip(*sorted_eigensystem) M = zip(*sorted_eigenvectors) # write the html out = StringIO() print >> out, '<html>' print >> out, '<body>' print >> out, HtmlTable.get_labeled_table_string( sorted_eigenvalues, ordered_names, M) print >> out, '</body>' print >> out, '</html>' # write the response return out.getvalue()