def diff(self, other_angle, next_stem_length=1): ''' Calculate the distance between the start and end of the hypothetical next stem defined by these two angles. :param other_angle: The other angle stat :param next_stem_length: the length of the stem that is attached to this angle. :param return: The distance between the starts and ends of the two hypothetical next stems defined by this angle and the other angle ''' this_stem_start = np.array( ftuv.spherical_polar_to_cartesian([self.r1, self.u1, self.v1])) this_stem_end = np.array( ftuv.spherical_polar_to_cartesian( [next_stem_length, self.u, self.v])) other_stem_start = np.array( ftuv.spherical_polar_to_cartesian( [other_angle.r1, other_angle.u1, other_angle.v1])) other_stem_end = np.array( ftuv.spherical_polar_to_cartesian( [next_stem_length, other_angle.u, other_angle.v])) return ftuv.vec_distance(this_stem_start, other_stem_start) + ftuv.vec_distance( this_stem_start + this_stem_end, other_stem_start + other_stem_end)
def test_projection_with_virtual_residues(self): cg = ftmc.CoarseGrainRNA.from_bg_file( 'test/forgi/threedee/data/1y26.cg') proj = fpp.Projection2D(cg, [1., 1., 1.], project_virtual_residues=[1, len(cg.seq) // 2]) elem1 = cg.get_node_from_residue_num(1) elem2 = cg.get_node_from_residue_num(len(cg.seq) // 2) self.assertLess( ftuv.vec_distance(proj._coords[elem1][0], proj.get_vres_by_position(1)), ftuv.vec_distance(proj._coords[elem2][0], proj.get_vres_by_position(1)), msg= "Projected virtual residue is closer to projection of a cg-element " "far away than to the projection of corresponding cg element.") self.assertLess( ftuv.vec_distance(proj._coords[elem2][0], proj.get_vres_by_position(len(cg.seq) // 2)), ftuv.vec_distance(proj._coords[elem1][0], proj.get_vres_by_position(len(cg.seq) // 2)), msg= "Projected virtual residue is closer to projection of a cg-element " "far away than to the corresponding cg element.")
def cg_dist_sum(cgs, elem1, elem2): d = [] for cg in cgs: start, end = cg.coords[elem1] d1 = ftuv.vec_distance(start, end) start, end = cg.coords[elem2] d2 = ftuv.vec_distance(start, end) d.append(d1 + d2) return d
def test_virtual_atoms_stem_distance_to_stacked_base(self): cg = ftmc.CoarseGrainRNA.from_bg_file( 'test/forgi/threedee/data/1y26.cg') va1 = cg.virtual_atoms(1) va2 = cg.virtual_atoms(2) self.assertLess(ftuv.vec_distance( va1["C1'"], va2["C1'"]), 10, msg="Virtual atoms too far apart") self.assertGreater(ftuv.vec_distance( va1["C1'"], va2["C1'"]), 2, msg="Virtual atoms too close")
def test_virtual_atoms_stem_distance_to_stacked_base(self): cg = ftmc.CoarseGrainRNA('test/forgi/threedee/data/1y26.cg') va1 = cg.virtual_atoms(1) va2 = cg.virtual_atoms(2) self.assertLess(ftuv.vec_distance(va1["C1'"], va2["C1'"]), 10, msg="Virtual atoms too far apart") self.assertGreater(ftuv.vec_distance(va1["C1'"], va2["C1'"]), 2, msg="Virtual atoms too close")
def test_virtual_atoms_stem_distance_to_pairing_partner(self): cg = ftmc.CoarseGrainRNA.from_bg_file( 'test/forgi/threedee/data/1y26.cg') va1 = cg.virtual_atoms(1) va2 = cg.virtual_atoms(cg.pairing_partner(1)) self.assertLess(ftuv.vec_distance(va1["C1'"], va2["C1'"]), 25, msg="Virtual atoms too far apart") self.assertGreater(ftuv.vec_distance(va1["C1'"], va2["C1'"]), 8, msg="Virtual atoms too close")
def test_to_and_from_cgstring_vres(self): cg, = ftmc.CoarseGrainRNA.from_pdb('test/forgi/threedee/data/2mis.pdb') cg.add_all_virtual_residues() cgstri = cg.to_cg_string() self.assertIn("vres", cgstri) cg2 = ftmc.CoarseGrainRNA.from_bg_string(cgstri) self.assertEqual(len(cg2.vposs["h0"]), cg2.defines["h0"][1] - cg2.defines["h0"][0] + 1) self.assertLess( ftuv.vec_distance(cg.vposs["h0"][0], cg2.vposs["h0"][0]), 10**-8) self.assertLess( ftuv.vec_distance(cg.vposs["i0"][2], cg2.vposs["i0"][2]), 10**-8)
def test_to_and_from_cgstring_vres(self): cg, = ftmc.CoarseGrainRNA.from_pdb('test/forgi/threedee/data/2mis.pdb') cg.add_all_virtual_residues() cgstri = cg.to_cg_string() self.assertIn("vres", cgstri) cg2 = ftmc.CoarseGrainRNA.from_bg_string(cgstri) self.assertEqual( len(cg2.vposs["h0"]), cg2.defines["h0"][1] - cg2.defines["h0"][0] + 1) self.assertLess(ftuv.vec_distance( cg.vposs["h0"][0], cg2.vposs["h0"][0]), 10**-8) self.assertLess(ftuv.vec_distance( cg.vposs["i0"][2], cg2.vposs["i0"][2]), 10**-8)
def test_line_segment_distance_windschief(self): a0 = np.array([0., 0., -10.]) a1 = np.array([0., 0., 10.]) b0 = np.array([5.2, -10., 5.]) b1 = np.array([5.2, 10., 5.]) self.assertAlmostEqual(ftuv.vec_distance( *ftuv.line_segment_distance(a0, a1, b0, b1)), 5.2)
def test_line_segment_distance_point_to_line(self): a0 = np.array([0., 0., 1.]) a1 = np.array([0., 0., 10.]) b0 = np.array([0., -10., 12.]) b1 = np.array([0., 10., 12.]) self.assertAlmostEqual(ftuv.vec_distance( *ftuv.line_segment_distance(a0, a1, b0, b1)), 2.)
def test_line_segment_distance_parallel(self): a0 = np.array([0., 0., 1.]) a1 = np.array([0., 0., 10.]) b0 = np.array([0., 0., 11.]) b1 = np.array([0., 0., 20.]) self.assertAlmostEqual(ftuv.vec_distance( *ftuv.line_segment_distance(a0, a1, b0, b1)), 1.)
def get_flanking_stem_vres_distance(bg, ld): ''' Get the distance between the two virtual residues adjacent to this bulge region. @param bg: The BulgeGraph data structure @param ld: The name of the linking bulge ''' if len(bg.edges[ld]) == 2: connecting_stems = list(bg.edges[ld]) (s1b, s1e) = bg.get_sides(connecting_stems[0], ld) (s2b, s2e) = bg.get_sides(connecting_stems[1], ld) if s1b == 1: (vr1_p, vr1_v, vr1_v_l, vr1_v_r) = ftug.virtual_res_3d_pos(bg, connecting_stems[0], bg.stem_length(connecting_stems[0]) - 1) else: (vr1_p, vr1_v, vr1_v_l, vr1_v_r) = ftug.virtual_res_3d_pos(bg, connecting_stems[0], 0) if s2b == 1: (vr2_p, vr2_v, vr2_v_l, vr2_v_r) = ftug.virtual_res_3d_pos(bg, connecting_stems[1], bg.stem_length(connecting_stems[1]) - 1) else: (vr2_p, vr2_v, vr2_v_l, vr2_v_r) = ftug.virtual_res_3d_pos(bg, connecting_stems[1], 0) dist2 = cuv.vec_distance((vr1_p + 7 * vr1_v), (vr2_p + 7. * vr2_v)) else: dist2 = 0. return dist2
def _clashfree_annot_pos(pos, coords): for c in coords: dist = ftuv.vec_distance(c, pos) #log.debug("vec_dist=%s", dist) if dist<14: return False return True
def test_line_segment_distance_real_world(self): a0 = np.array([0., 0., 1.]) a1 = np.array([-2.76245752, -6.86976093, 7.54094508]) b0 = np.array([-27.57744115, 6.96488989, -22.47619655]) b1 = np.array([-16.93424799, -4.0631445, -16.19822301]) self.assertLess(ftuv.vec_distance( *ftuv.line_segment_distance(a0, a1, b0, b1)), 25)
def distances(s): ''' Compute the distance array for a shape s. ''' ds = [ftuv.vec_distance(p1, p2) for p1, p2 in it.combinations(s, r=2)] return np.array(ds)
def _condense_pointWithLine_step(self, cutoff): """ Used by `self.condense(cutoff)` as a single condensation step of a point with a line segment. """ for i, source in enumerate(self.proj_graph.nodes()): for j, target in enumerate(self.proj_graph.nodes()): if j > i and self.proj_graph.has_edge(source, target): for k, node in enumerate(self.proj_graph.nodes()): if k == i or k == j: continue nearest = ftuv.closest_point_on_seg( source, target, node) nearest = tuple(nearest) if nearest == source or nearest == target: continue if (ftuv.vec_distance(nearest, node) < cutoff): newnode = ftuv.middlepoint(node, tuple(nearest)) attr_dict = self.proj_graph.adj[source][target] self.proj_graph.remove_edge(source, target) if source != newnode: self.proj_graph.add_edge( source, newnode, attr_dict=attr_dict) if target != newnode: self.proj_graph.add_edge(target, newnode, attr_dict=attr_dict) if newnode != node: # Equality possible bcse of floating point inaccuracy for neighbor in self.proj_graph.adj[node].keys(): attr_dict = self.proj_graph.adj[node][neighbor] self.proj_graph.add_edge(newnode, neighbor, attr_dict=attr_dict) self.proj_graph.remove_node(node) return True return False
def get_longest_arm_length(self): """ Get the length of the longest arm. An arm is a simple path from a node of `degree!=2` to a node of `degree 1`, if all the other nodes on the path have `degree 2`. .. note:: This measure is sensitive to the resolution of a projection the same way the length of a coastline is sensitive to the resolution. .. warning:: Whether this code will stay in the library or not depends on future evaluation of the usefulness of this and similar descriptors. :returns: The length and a tuple of points `(leaf_node, corresponding_branch_point)` """ lengths={} target={} for leaf, degree in nx.degree(self.proj_graph).items(): if degree!=1: continue lengths[leaf]=0 previous=None current=leaf while True: next=[ x for x in self.proj_graph[current].keys() if x != previous ] assert len(next)==1 next=next[0] lengths[leaf]+=ftuv.vec_distance(current, next) if self.proj_graph.degree(next)!=2: break previous=current current=next target[leaf]=next best_leaf=max(lengths, key=lambda x: lengths[x]) return lengths[best_leaf], (best_leaf, target[best_leaf])
def _condense_one(self, cutoff): """ Condenses two adjacent projection points into one. :returns: True if a condensation was done, False if no condenstaion is possible. """ for i, node1 in enumerate(self.proj_graph.nodes()): for j, node2 in enumerate(self.proj_graph.nodes()): if j <= i: continue if ftuv.vec_distance(node1, node2) < cutoff: newnode = ftuv.middlepoint(node1, node2) #self.proj_graph.add_node(newnode) for neighbor in list(self.proj_graph.adj[node1].keys()): self.proj_graph.add_edge( newnode, neighbor, attr_dict=self.proj_graph.adj[node1][neighbor]) for neighbor in list(self.proj_graph.adj[node2].keys()): self.proj_graph.add_edge( newnode, neighbor, attr_dict=self.proj_graph.adj[node2][neighbor]) if newnode != node1: #Equality can happen because of floating point inaccuracy self.proj_graph.remove_node(node1) if newnode != node2: self.proj_graph.remove_node(node2) return True return False
def get_some_leaf_leaf_distances(self): """ Get a list of distances between some pairs of leaf nodes. The distances are measured in direct line, not along the path .. warning:: Whether this code will stay in the library or not depends on future evaluation of the usefulness of this and similar descriptors. :returns: a list of floats (lengths in Angstrom) """ lengths = [] leaves = [ leaf for leaf in self.proj_graph.nodes() if self.proj_graph.degree(leaf) == 1 ] for leaf1, leaf2 in it.combinations(leaves, 2): lengths.append((ftuv.vec_distance(leaf1, leaf2), leaf1, leaf2)) lengths.sort(reverse=True, key=lambda x: x[0]) newlengths = [] visited = set() for l, leaf1, leaf2 in lengths: if leaf1 in visited or leaf2 in visited: continue newlengths.append(l) visited.add(leaf1) visited.add(leaf2) return newlengths
def test_line_segment_distance_parallel(self): a0 = np.array([0., 0., 1.]) a1 = np.array([0., 0., 10.]) b0 = np.array([0., 0., 11.]) b1 = np.array([0., 0., 20.]) self.assertAlmostEqual( ftuv.vec_distance(*ftuv.line_segment_distance(a0, a1, b0, b1)), 1.)
def distances(s): ''' Compute the distance array for a shape s. ''' ds = [ftuv.vec_distance(p1, p2) for p1,p2 in it.combinations(s, r=2)] return np.array(ds)
def test_line_segment_distance_point_to_line(self): a0 = np.array([0., 0., 1.]) a1 = np.array([0., 0., 10.]) b0 = np.array([0., -10., 12.]) b1 = np.array([0., 10., 12.]) self.assertAlmostEqual( ftuv.vec_distance(*ftuv.line_segment_distance(a0, a1, b0, b1)), 2.)
def test_line_segment_distance_real_world(self): a0 = np.array([0., 0., 1.]) a1 = np.array([-2.76245752, -6.86976093, 7.54094508]) b0 = np.array([-27.57744115, 6.96488989, -22.47619655]) b1 = np.array([-16.93424799, -4.0631445, -16.19822301]) self.assertLess( ftuv.vec_distance(*ftuv.line_segment_distance(a0, a1, b0, b1)), 25)
def _condense_one(self, cutoff): """ Condenses two adjacent projection points into one. :returns: True if a condensation was done, False if no condenstaion is possible. """ for i, node1 in enumerate(self.proj_graph.nodes()): for j, node2 in enumerate(self.proj_graph.nodes()): if j <= i: continue if ftuv.vec_distance(node1, node2) < cutoff: newnode = ftuv.middlepoint(node1, node2) # self.proj_graph.add_node(newnode) for neighbor in list(self.proj_graph.adj[node1].keys()): self.proj_graph.add_edge(newnode, neighbor, attr_dict=self.proj_graph.adj[node1][neighbor]) for neighbor in list(self.proj_graph.adj[node2].keys()): self.proj_graph.add_edge(newnode, neighbor, attr_dict=self.proj_graph.adj[node2][neighbor]) if newnode != node1: # Equality can happen because of floating point inaccuracy self.proj_graph.remove_node(node1) if newnode != node2: self.proj_graph.remove_node(node2) return True return False
def get_some_leaf_leaf_distances(self): """ Get a list of distances between some pairs of leaf nodes. The distances are measured in direct line, not along the path .. warning:: Whether this code will stay in the library or not depends on future evaluation of the usefulness of this and similar descriptors. :returns: a list of floats (lengths in Angstrom) """ lengths = [] leaves = [leaf for leaf in self.proj_graph.nodes( ) if self.proj_graph.degree(leaf) == 1] for leaf1, leaf2 in it.combinations(leaves, 2): lengths.append((ftuv.vec_distance(leaf1, leaf2), leaf1, leaf2)) lengths.sort(reverse=True, key=lambda x: x[0]) newlengths = [] visited = set() for l, leaf1, leaf2 in lengths: if leaf1 in visited or leaf2 in visited: continue newlengths.append(l) visited.add(leaf1) visited.add(leaf2) return newlengths
def _get_path_length(self, path): """ :param path: a list of nodes """ l = 0 for i in range(len(path) - 1): l += ftuv.vec_distance(path[i], path[i + 1]) return l
def test_line_segment_distance_windschief(self): a0 = np.array([0., 0., -10.]) a1 = np.array([0., 0., 10.]) b0 = np.array([5.2, -10., 5.]) b1 = np.array([5.2, 10., 5.]) self.assertAlmostEqual( ftuv.vec_distance(*ftuv.line_segment_distance(a0, a1, b0, b1)), 5.2)
def drmsd(coords1, coords2): ''' Calculate the dRMSD measure. This should be the RMSD between all of the inter-atom distances in two structures. @param coords1: The vectors of the 'atoms' in the first structure. @param coords2: The vectors of the 'atoms' in the second structure. @return: The dRMSD measure. ''' ds1 = np.array([ftuv.vec_distance(c1, c2) for c1,c2 in it.combinations(coords1, r=2)]) ds2 = np.array([ftuv.vec_distance(c1, c2) for c1,c2 in it.combinations(coords2, r=2)]) rmsd = math.sqrt(np.mean((ds1 - ds2) * (ds1 - ds2))) #rmsd = math.sqrt(np.mean(ftuv.vec_distance(ds1, ds2))) return rmsd
def test_projection_with_virtual_residues(self): cg = ftmc.CoarseGrainRNA.from_bg_file( 'test/forgi/threedee/data/1y26.cg') proj = fpp.Projection2D(cg, [1., 1., 1.], project_virtual_residues=[ 1, len(cg.seq) // 2]) elem1 = cg.get_node_from_residue_num(1) elem2 = cg.get_node_from_residue_num(len(cg.seq) // 2) self.assertLess(ftuv.vec_distance(proj._coords[elem1][0], proj.get_vres_by_position(1)), ftuv.vec_distance( proj._coords[elem2][0], proj.get_vres_by_position(1)), msg="Projected virtual residue is closer to projection of a cg-element " "far away than to the projection of corresponding cg element.") self.assertLess(ftuv.vec_distance(proj._coords[elem2][0], proj.get_vres_by_position(len(cg.seq) // 2)), ftuv.vec_distance(proj._coords[elem1][0], proj.get_vres_by_position(len(cg.seq) // 2)), msg="Projected virtual residue is closer to projection of a cg-element " "far away than to the corresponding cg element.")
def is_basepair_pair(res1, res2): pairs = _get_points(res1, res2) if not pairs: return False for pair in pairs[1:]: # pairs[0] is only for coplanarity]] d = ftuv.vec_distance(pair[0], pair[1]) if d >= HBOND_CUTOFF: return False if is_almost_coplanar(*[point for pair in pairs for point in pair]): return True return False
def bulge_length(bg, ld): ''' Calculate the physical length of a bulge region. This is equal to the distance between the ends of the two stems that flank the region. ''' connecting_stems = list(bg.edges[ld]) (s1b, s1e) = bg.get_sides(connecting_stems[0], ld) (s2b, s2e) = bg.get_sides(connecting_stems[1], ld) return cuv.vec_distance(bg.coords[connecting_stems[0]][s1b], bg.coords[connecting_stems[1]][s2b])
def test_radius_of_gyration(self): cg = ftmc.CoarseGrainRNA('test/forgi/threedee/data/1y26.cg') self.check_graph_integrity(cg) rog = cg.radius_of_gyration() self.assertGreater(rog, 0.) maxDist = max( ftuv.vec_distance(cg.coords[d1][0], cg.coords[d2][0]) for d1, d2 in it.combinations(cg.defines, 2)) estimated_radius_circum_cricle = maxDist / 2 #NOTE: The ROG is 0.77 times the radius of the circumcircle, for m->inf many points # in a 3D unit sphere with the nth point placed at radius (n/m)**1/3 self.assertLess(rog, estimated_radius_circum_cricle * 0.77)
def test_radius_of_gyration(self): cg = ftmc.CoarseGrainRNA.from_bg_file( 'test/forgi/threedee/data/1y26.cg') self.check_graph_integrity(cg) rog = cg.radius_of_gyration() self.assertGreater(rog, 0.) maxDist = max(ftuv.vec_distance(p0, p1) for p0, p1 in it.combinations(cg.coords._coordinates, 2)) estimated_radius_circum_cricle = maxDist / 2 # NOTE: The ROG is 0.77 times the radius of the circumcircle, for m->inf many points # in a 3D unit sphere with the nth point placed at radius (n/m)**1/3 self.assertLess(rog, estimated_radius_circum_cricle * 0.77)
def get_total_length(self): """ Returns the sum of the lengths of all edges in the projection graph. .. note:: This measure is sensitive to the resolution of a projection. In an AFM image, one might not see all cycles. .. warning:: Whether this code will stay in the library or not depends on future evaluation of the usefulness of this and similar descriptors. """ l = 0 for edge in self.proj_graph.edges(): l += ftuv.vec_distance(edge[0], edge[1]) return l
def get_leaf_leaf_distances(self): """ Get a list of distances between any pair of leaf nodes. The distances are measured in direct line, not along the path .. warning:: Whether this code will stay in the library or not depends on future evaluation of the usefulness of this and similar descriptors. :returns: a list of floats (lengths in Angstrom) """ lengths=[] leaves=[ leaf for leaf in self.proj_graph.nodes() if self.proj_graph.degree(leaf)==1] for leaf1, leaf2 in it.combinations(leaves, 2): lengths.append(ftuv.vec_distance(leaf1, leaf2)) lengths.sort(reverse=True) return lengths
def reconstruct_loop(chain, sm, ld, side=0, samples=40, consider_contacts=True, consider_starting_pos=True): ''' Reconstruct a particular loop. The chain should already have the stems reconstructed. @param chain: A Bio.PDB.Chain structure. @param sm: A SpatialModel structure @param ld: The name of the loop ''' #samples = 2 bg = sm.bg seq = bg.get_flanking_sequence(ld, side) (a,b,i1,i2) = bg.get_flanking_handles(ld, side) if a == 0 and b == 1: # the loop is just a placeholder and doesn't # have a length. # This should be taken care of in a more elegant # manner, but it'll have to wait until it causes # a problem return None # get some diagnostic information bl = abs(bg.defines[ld][side * 2 + 1] - bg.defines[ld][side * 2 + 0]) dist = cuv.vec_distance(bg.coords[ld][1], bg.coords[ld][0]) dist2 = get_flanking_stem_vres_distance(bg, ld) #dist3 = ftug.junction_virtual_atom_distance(bg, ld) dist3 = 0. sys.stderr.write("reconstructing %s ([%d], %d, %.2f, %.2f, %.2f):" % (ld, len(bg.edges[ld]), bl, dist, dist2, dist3)) (best_loop_chain, min_r) = build_loop(chain, seq, (a,b,i1,i2), bg.seq_length, samples, consider_contacts, consider_starting_pos) print_alignment_pymol_file((a,b,i1,i2)) ftup.trim_chain(best_loop_chain, i1, i2+1) sys.stderr.write('\n') add_loop_chain(chain, best_loop_chain, (a,b,i1,i2), bg.seq_length) return ((a,b,i1,i2), best_loop_chain, min_r)
def get_leaf_leaf_distances(self): """ Get a list of distances between any pair of leaf nodes. The distances are measured in direct line, not along the path .. warning:: Whether this code will stay in the library or not depends on future evaluation of the usefulness of this and similar descriptors. :returns: a list of floats (lengths in Angstrom) """ lengths = [] leaves = [ leaf for leaf in self.proj_graph.nodes() if self.proj_graph.degree(leaf) == 1 ] for leaf1, leaf2 in it.combinations(leaves, 2): lengths.append(ftuv.vec_distance(leaf1, leaf2)) lengths.sort(reverse=True) return lengths
def get_longest_arm_length(self): """ Get the length of the longest arm. An arm is a simple path from a node of `degree!=2` to a node of `degree 1`, if all the other nodes on the path have `degree 2`. .. note:: This measure is sensitive to the resolution of a projection the same way the length of a coastline is sensitive to the resolution. .. warning:: Whether this code will stay in the library or not depends on future evaluation of the usefulness of this and similar descriptors. :returns: The length and a tuple of points `(leaf_node, corresponding_branch_point)` """ import networkx as nx lengths = {} target = {} for leaf, degree in nx.degree(self.proj_graph): if degree != 1: continue lengths[leaf] = 0 previous = None current = leaf while True: next = [ x for x in self.proj_graph[current].keys() if x != previous ] assert len(next) == 1 next = next[0] lengths[leaf] += ftuv.vec_distance(current, next) if self.proj_graph.degree(next) != 2: break previous = current current = next target[leaf] = next best_leaf = max(lengths, key=lambda x: lengths[x]) return lengths[best_leaf], (best_leaf, target[best_leaf])
def _condense_pointWithLine_step(self, cutoff): """ Used by `self.condense(cutoff)` as a single condensation step of a point with a line segment. """ for i, source in enumerate(self.proj_graph.nodes()): for j, target in enumerate(self.proj_graph.nodes()): if j > i and self.proj_graph.has_edge(source, target): for k, node in enumerate(self.proj_graph.nodes()): if k == i or k == j: continue nearest = ftuv.closest_point_on_seg( source, target, node) nearest = tuple(nearest) if nearest == source or nearest == target: continue if (ftuv.vec_distance(nearest, node) < cutoff): newnode = ftuv.middlepoint(node, tuple(nearest)) attr_dict = self.proj_graph.adj[source][target] self.proj_graph.remove_edge(source, target) if source != newnode: self.proj_graph.add_edge(source, newnode, attr_dict=attr_dict) if target != newnode: self.proj_graph.add_edge(target, newnode, attr_dict=attr_dict) if newnode != node: #Equality possible bcse of floating point inaccuracy for neighbor in self.proj_graph.adj[node].keys( ): attr_dict = self.proj_graph.adj[node][ neighbor] self.proj_graph.add_edge( newnode, neighbor, attr_dict=attr_dict) self.proj_graph.remove_node(node) return True return False
def update_statistics(self, energy_function, sm, prev_energy, tracking_energies = None, tracked_energies=None): ''' Add a newly sampled structure to the set of statistics. :param energy_function: The energy_function used to evaluate the structure. :param sm: The spatial model that was sampled. :param prev_energy: The evaluated (accepted) energy of the current step :tracking_energyis: The energy_functions which are calculated for statistics, but not used for sampling. :tracked_energies: The energy values of the tracking_energies. ''' self.counter += 1 if self.energy_orig is None: self.energy_orig = 0. try: self.sm_orig.bg.add_all_virtual_residues() self.energy_orig = energy_function.eval_energy(self.sm_orig) except KeyError: # most likely no native structure was provided pass energy = prev_energy #energy = energy_function.eval_energy(sm, background=True) if energy_function.uses_background(): energy_nobg = energy_function.eval_energy(sm, background=False) else: energy_nobg=energy mcc = None if self.centers_orig is not None: r = 0. if not self.no_rmsd: centers_new = ftug.bg_virtual_residues(sm.bg) r = cbr.centered_rmsd(self.centers_orig, centers_new) #r = cbr.drmsd(self.centers_orig, centers_new) cm = self.confusion_matrix_calculator.evaluate(sm.bg) mcc = ftme.mcc(cm) else: # no original coordinates provided so we can't calculate rmsds r = 0. dist = None dist2 = None cg = sm.bg dists = [] for (self.dist1, self.dist2) in self.dists: node1 = cg.get_node_from_residue_num(self.dist1) node2 = cg.get_node_from_residue_num(self.dist2) pos1, len1 = cg.get_position_in_element(self.dist1) pos2, len2 = cg.get_position_in_element(self.dist2) #fud.pv('node1, node2, pos1, pos2') vec1 = cg.coords[node1][1] - cg.coords[node1][0] vec2 = cg.coords[node2][1] - cg.coords[node2][0] #mid1 = (cg.coords[node1][0] + cg.coords[node1][1]) / 2 #mid2 = (cg.coords[node2][0] + cg.coords[node2][1]) / 2 mid1 = cg.coords[node1][0] + pos1 * (vec1 / len1) mid2 = cg.coords[node2][0] + pos2 * (vec2 / len2) #fud.pv('mid1, mid2') dists += [ftuv.vec_distance(mid1, mid2)] #self.energy_rmsd_structs += [(energy, r, sm.bg)] self.energy_rmsd_structs += [(energy_nobg, r, copy.deepcopy(sm.bg))] #self.energy_rmsd_structs += [(energy, r, sm.bg.copy())] sorted_energies = sorted(self.energy_rmsd_structs, key=lambda x: x[0]) self.energy_rmsd_structs = sorted_energies[:self.save_n_best] if r > self.highest_rmsd: self.highest_rmsd = r if r < self.lowest_rmsd: self.lowest_rmsd = r lowest_energy = sorted_energies[0][0] lowest_rmsd = sorted_energies[0][1] ''' if energy == lowest_energy: for key in sm.angle_defs: print >>sys.stderr, key, str(sm.angle_defs[key]) ''' if not self.silent: if self.verbose: ''' for energy_func in energy_function.energies: print energy_func.__class__.__name__, energy_func.eval_energy(sm) ''' _, rog=fbe.length_and_rog(sm.bg) #output_str = u"native_energy [{:s} {:d}]: {:3d} {:5.03g} {:5.3f} ROG: {:5.3f} | min: output_str = u"native_energy [%s %d]: %3d %5.03g %5.3f ROG: %5.3f | min: %5.2f (%5.2f) %5.2f | extreme_rmsds: %5.2f %5.2f (%.2f)" % ( sm.bg.name, sm.bg.seq_length, self.counter, energy, r , rog, lowest_energy, self.energy_orig, lowest_rmsd, self.lowest_rmsd, self.highest_rmsd, energy_nobg) output_str += " |" # assume that the energy function is a combined energy if isinstance(self.energy_function, fbe.CombinedEnergy): for e in self.energy_function.iterate_energies(): if isinstance(e,fbe.DistanceExponentialEnergy): output_str += " [clamp {},{}: {:.1f}]".format(e.from_elem, e.to_elem, e.get_distance(sm)) if tracked_energies and tracking_energies: output_str += " | [tracked Energies]" for i,e in enumerate(tracking_energies): sn=e.shortname() if len(sn)>12: sn=sn[:9]+"..." output_str += " [{}]: ".format(sn) output_str += "%5.03g" % (tracked_energies[i]) elif tracking_energies: output_str += " | [tracked Energies]" for e in tracking_energies: sn=e.shortname() if len(sn)>12: sn=sn[:9]+"..." output_str += " [{}]: ".format(sn) output_str += "%5.03g" % (e.eval_energy(sm)) if dist: output_str += " | dist %.2f" % (dist) for dist2 in dists: if dist2 is not None: output_str += " | [dist2: %.2f]" % (dist2) if mcc is not None: output_str += " | [mcc: %.3f]" % (mcc) output_str += " [time: %.1f]" % (time.time() - self.creation_time) #Print to both STDOUT and the log file. if self.output_file != sys.stdout: print (output_str.strip()) if self.output_file != None: print(output_str, file=self.output_file) self.output_file.flush() self.update_plots(energy, r) ''' if self.counter % 1000 == 0: import pdb; pdb.set_trace() ''' if self.counter % 10 == 0: if not self.silent: self.save_top(self.save_n_best, counter=self.counter) if self.step_save > 0 and self.counter % self.step_save == 0: #If a projection match energy was used, save the optimal projection direction to the file. if isinstance(self.energy_function, fbe.CombinedEnergy): for e in self.energy_function.iterate_energies(): if hasattr(e, "accepted_projDir"): sm.bg.project_from=e.accepted_projDir sm.bg.to_cg_file(os.path.join(cbc.Configuration.sampling_output_dir, 'step%06d.coord' % (self.counter)))
def extend_pk_description(dataset, filename, pk_type, rna, pk, pk_number): """ Return a extended descripiton of current pseudoknot in the current files e.g. angles between stems :param dataset: Current dataset that will be updated :param filename: Filename of the current structure :parma pk_type: Class of the pseudoknot :param rna: A forgi CoarseGrainRNA object :param pk: Structure of the pseudoknot, a NumberedDotbracket object, in a condensed (shadow-like) representation. This representation always contains the most 5' basepair. :param pk_number: consecutive number of the pseudoknot """ domains = rna.get_domains() helices = domains["rods"] # A list of elements, e.g. ["s0", "i0", "s1"] log.debug("Helices: %s", helices) #rna.log(logging.WARNING) stems_5p = [] stems_3p = [] nums = [] log.debug("pk Residue numbers %s", pk.residue_numbers) log.debug("pk helix ends %s", pk.helix_ends) for i, resnum in enumerate(pk.residue_numbers): num = rna.seq.to_integer(resnum) nums.append(num) element_5p = rna.get_node_from_residue_num(num) stems_5p.append(element_5p) num2 = rna.seq.to_integer(pk.helix_ends[i]) log.debug("num %s nums2 %s", num, num2) element_3p =rna.get_node_from_residue_num(num2) stems_3p.append(element_3p) log.debug("nums %s", nums) for i, stem1_5p in enumerate(stems_5p): dataset["Filename"].append(filename) dataset["rnaname"] = rna.name dataset["pk_type"].append(pk_type) dataset["pk_id"].append(pk_number) dataset["angle_nr"].append(i) if pk_type == "other": dataset["pk_structure"].append(str(pk)) else: dataset["pk_structure"].append("") #is this the first occurrence of stem in stems? if stems_5p.index(stem1_5p)==i: #first occurrence. Strand 0, look at 3' end of helix stem1 = stems_3p[i] strand = 0 else: assert i>stems_5p.index(stem1_5p) stem1 = stem1_5p strand = 1 try: stem2_5p = stems_5p[i+1] except IndexError: stem2_5p = stems_5p[0] outside_pk = True else: outside_pk = False if outside_pk or stems_5p.index(stem2_5p)==i+1: #first occurrence stem2 = stem2_5p strand2 = 0 else: strand2 = 1 if outside_pk: stem2 = stems_3p[0] else: stem2 = stems_3p[i+1] log.debug("Stem 5' %s, 3' %s, stem1 %s stem2 %s", stems_5p, stems_3p, stem1, stem2) # enable stacking analysis via DSSR # differentiate between stacking (True), no stacking (False) and brakes # within/aorund the pseudoknot (-1) incl. 'virtual' angles e.g. H-Type angle_type3 ml_stack=[] if rna.dssr: nc_bps = list(rna.dssr.noncanonical_pairs()) nc_dict = defaultdict(list) for nt1, nt2, typ in nc_bps: nc_dict[nt1].append((nt2, typ)) nc_dict[nt2].append((nt1, typ)) stacking_loops = rna.dssr.stacking_loops() start_found = 0 connection = [] stacking = None branch = None log.debug("Checking %s and %s for stacking, strand %s", stem1, stem2, strand) for elem in rna.iter_elements_along_backbone(): #walk along the backbone if start_found == strand+1: if branch: log.debug("in branch: elem %s, branch %s, stacking %s", elem, branch, stacking) if elem == branch: log.debug("End branch at %s", elem) branch = None log.debug("Branch end") continue if elem[0] != "s": connection.append(elem) if rna.defines[elem] and rna.defines[elem][-1] in rna.backbone_breaks_after: stacking = -1 if elem not in stacking_loops and stacking != -1: stacking = False elif elem == stem2: if stacking is None: stacking = True log.debug("Found second stem, elem %s, stacking %s", elem, stacking) break elif elem[0] == "s" and connection: branch = elem if rna.defines[elem][-1] in rna.backbone_breaks_after: stacking = -1 log.debug("elem %s, stacking %s, branch %s", elem, stacking, branch) elif elem == stem1: start_found += 1 if rna.defines[elem][strand*2+1] in rna.backbone_breaks_after: stacking = -1 log.debug("First stem, elem %s, stacking %s", elem, stacking) else: log.debug("End iteration, stacking->-1") stacking = -1 log.debug("Finally, stacking = %s", stacking) # more detailed stacking (including backbone brackes within and around the pseudoknot) dataset["this_loop_stacking_dssr"].append(stacking) dataset["connecting_loops"].append(",".join(connection)) # more genereal stacking information connecting_loops = rna.edges[stem1]&rna.edges[stem2] for loop in connecting_loops: if loop in stacking_loops: ml_stack.append(loop) stacks = rna.dssr.coaxial_stacks() log.info("Stacks: %s", stacks) for stack in stacks: if stem1 in stack and stem2 in stack: # the two stems stack, but we do not specify along which # multiloop segment they stack. dataset["is_stacking_dssr"].append(True) break else: dataset["is_stacking_dssr"].append(False) # Does the connection form base-triples with the stem? stem1_triples=0 stem2_triples=0 aminors1 = 0 aminors2 = 0 aminors = list(rna.dssr.aminor_interactions()) for elem in connection: for nt in rna.define_residue_num_iterator(elem,seq_ids=True): if (nt, stem1) in aminors: aminors1+=1 log.debug("AMinor %s (%s), %s", nt, elem, stem1) elif (nt, stem2) in aminors: aminors2+=1 log.debug("AMinor %s (%s), %s", nt, elem, stem2) else: for partner, typ in nc_dict[nt]: if rna.get_elem(partner)==stem1: log.debug("base_triple %s, %s: %s-%s (%s)", elem, stem1, nt,partner,typ) stem1_triples+=1 elif rna.get_elem(partner)==stem2: log.debug("base_triple %s, %s: %s-%s (%s)", elem, stem2, nt,partner,typ) stem2_triples+=1 log.debug("%s has a length of %s and %s triples", stem1, rna.stem_length(stem1),stem1_triples) log.debug("%s has a length of %s and %s triples", stem2, rna.stem_length(stem2),stem2_triples) dataset["stem1_basetripleperc_dssr"].append(stem1_triples/rna.stem_length(stem1)) dataset["stem2_basetripleperc_dssr"].append(stem2_triples/rna.stem_length(stem2)) dataset["stem1_aminorperc_dssr"].append(aminors1/rna.stem_length(stem1)) dataset["stem2_aminorperc_dssr"].append(aminors2/rna.stem_length(stem2)) else: dataset["is_stacking_dssr"].append(float("nan")) dataset["this_loop_stacking_dssr"].append(float("nan")) dataset["connecting_loops"].append("") dataset["stem1_basetripleperc_dssr"].append(float("nan")) dataset["stem2_basetripleperc_dssr"].append(float("nan")) dataset["stem1_aminorperc_dssr"].append(float("nan")) dataset["stem2_aminorperc_dssr"].append(float("nan")) dataset["stacking_loops"].append(",".join(ml_stack)) pos1, dir1 = stem_parameters(stem1, rna, not strand) pos2, dir2 = stem_parameters(stem2, rna, strand2) dataset["stem1"].append(stem1) dataset["stem2"].append(stem2) dataset["angle_between_stems"].append(ftuv.vec_angle(dir1, dir2)) dataset["distance_between"].append(ftuv.vec_distance(pos1, pos2)) next_stem = None if not outside_pk: next_stem = stem_after_next_ml(rna, nums[i], before=stem2) if next_stem==stem2: next_stem = None if next_stem: posN, dirN = stem_parameters(next_stem, rna, 0) dataset["angle_to_next"].append(ftuv.vec_angle(dir1, dirN)) dataset["distance_to_next"].append(ftuv.vec_distance(pos1, posN)) dataset["next_stem"].append(next_stem) else: dataset["angle_to_next"].append("") dataset["distance_to_next"].append("") dataset["next_stem"].append("") dataset["outside_pk"].append(outside_pk)
def describe_rna(cg, file_num, dist_pais, angle_pairs): data = {} data["nt_length"] = cg.seq_length data["num_cg_elems"] = len(cg.defines) for letter in "smifth": data["num_" + letter] = len([x for x in cg.defines if x[0] == letter]) multiloops = cg.find_mlonly_multiloops() descriptors = [] junct3 = 0 junct4 = 0 reg = 0 pk = 0 op = 0 for ml in multiloops: descriptors = cg.describe_multiloop(ml) if "regular_multiloop" in descriptors: if len(ml) == 3: junct3 += 1 elif len(ml) == 4: junct4 += 1 reg += 1 if "pseudoknot" in descriptors: pk += 1 if "open" in descriptors: op += 1 data["3-way-junctions"] = junct3 data["4-way-junctions"] = junct4 #print (descriptors) data["open_mls"] = op # print(data["open_mls"][-1]) data["pseudoknots"] = pk data["regular_mls"] = reg data["total_mls"] = len(multiloops) try: data["longest_ml"] = max(len(x) for x in multiloops) except ValueError: data["longest_ml"] = 0 try: data["rog_fast"] = cg.radius_of_gyration("fast") except (ftmc.RnaMissing3dError, AttributeError): data["rog_fast"] = float("nan") data["rog_vres"] = float("nan") data["anisotropy_fast"] = float("nan") data["anisotropy_vres"] = float("nan") data["asphericity_fast"] = float("nan") data["asphericity_vres"] = float("nan") else: data["rog_vres"] = cg.radius_of_gyration("vres") data["anisotropy_fast"] = ftmd.anisotropy(cg.get_ordered_stem_poss()) data["anisotropy_vres"] = ftmd.anisotropy( cg.get_ordered_virtual_residue_poss()) data["asphericity_fast"] = ftmd.asphericity(cg.get_ordered_stem_poss()) data["asphericity_vres"] = ftmd.asphericity( cg.get_ordered_virtual_residue_poss()) for from_nt, to_nt in dist_pairs: try: dist = ftuv.vec_distance(cg.get_virtual_residue(int(from_nt), True), cg.get_virtual_residue(int(to_nt), True)) except Exception as e: dist = float("nan") log.warning("%d%s File %s: Could not calculate distance between " "%d and %d: %s occurred: %s", file_num, {1: "st", 2: "nd", 3: "rd"}.get( file_num % 10 * (file_num % 100 not in [11, 12, 13]), "th"), cg.name, from_nt, to_nt, type(e).__name__, e) data["distance_{}_{}".format(from_nt, to_nt)] = dist for elem1, elem2 in angle_pairs: try: angle = ftuv.vec_angle(cg.coords.get_direction(elem1), cg.coords.get_direction(elem2)) except Exception as e: angle = float("nan") log.warning("%d%s File %s: Could not calculate angle between " "%s and %s: %s occurred: %s", file_num, {1: "st", 2: "nd", 3: "rd"}.get( file_num % 10 * (file_num % 100 not in [11, 12, 13]), "th"), cg.name, elem1, elem2, type(e).__name__, e) data["angle_{}_{}".format(elem1, elem2)] = angle data["missing_residues_5prime"] = (len(cg.seq.with_missing[:1]) - 1) data["missing_residues_3prime"] = ( len(cg.seq.with_missing[cg.seq_length:]) - 1) data["missing_residues_middle"] = ( len(cg.seq.with_missing[1:cg.seq_length]) - len(cg.seq[1:cg.seq_length])) data["missing_residues_total"] = ( len(cg.seq.with_missing[:]) - len(cg.seq[:])) fp = len(cg.seq.with_missing[:1]) - 1 tp = 0 old_bp = None bp = None for bp in cg.backbone_breaks_after: fp += len(cg.seq.with_missing[bp:bp + 1].split('&')[1]) - 1 tp += len(cg.seq.with_missing[bp:bp + 1].split('&')[0]) - 1 tp += len(cg.seq.with_missing[cg.seq_length:]) - 1 data["missing_residues_5prime_chain"] = (fp) data["missing_residues_3prime_chain"] = (tp) data["missing_residues_middle_chain"] = ( data["missing_residues_total"] - fp - tp) incomplete_elem_types = Counter(x[0] for x in cg.incomplete_elements) data["s_with_missing"] = incomplete_elem_types["s"] data["i_with_missing"] = incomplete_elem_types["i"] data["m_with_missing"] = incomplete_elem_types["m"] data["h_with_missing"] = incomplete_elem_types["h"] mp = "" if incomplete_elem_types["s"]: for elem in cg.incomplete_elements: if elem[0] != "s": continue for i in range(cg.defines[elem][0], cg.defines[elem][1]): left_s = cg.seq.with_missing[i:i + 1] if len(left_s) > 2: right_s = cg.seq.with_missing[cg.pairing_partner( i + 1):cg.pairing_partner(i)] if len(right_s) > 2: mp += "{}&{};".format(left_s, right_s) data["missing_basepairs"] = mp return data
def __init__(self, cg, proj_direction=None, rotation=0, project_virtual_atoms=False, project_virtual_residues=[]): """ :param cg: a CoarseGrainRNA object with 3D coordinates for every element .. note:: The projection is generated from this cg, but it is not associated with it after construction. Thus future changes of the cg are not reflected in the projection. :param proj_direction: a carthesian vector (in 3D space) in the direction of projection. The length of this vector is not used. If proj_direction is None, cg.project_from is used. If proj_direction and cg.project_from is None, an error is raised. :param rotate: Degrees. Rotate the projection by this amount. """ #: The projected coordinates of all stems self._coords = dict() self._cross_points = None self._proj_graph = None # Calculate orthonormal basis of projection plane. # Compare to none, because `if np.array:` raises ValueError. if proj_direction is not None: proj_direction = np.array(proj_direction, dtype=np.float) elif cg.project_from is not None: # We make a copy here. In case cg.project_from is modified, # we still want to be able to look up from what direction the projection was generated. proj_direction = np.array(cg.project_from, dtype=np.float) else: raise ValueError( "No projection direction given and none present in the cg Object.") _, unit_vec1, unit_vec2 = ftuv.create_orthonormal_basis(proj_direction) self._unit_vec1 = unit_vec1 self._unit_vec2 = unit_vec2 self._proj_direction = proj_direction self._virtual_residues = [] self.virtual_residue_numbers = project_virtual_residues self._project(cg, project_virtual_atoms, project_virtual_residues) # Rotate and translate projection into a standard orientation points = list(self.points) v1, v2 = diameter(points) #: The longest distance between any two points of the projection. self.longest_axis = ftuv.vec_distance(v1, v2) v1 = np.array(v1) v2 = np.array(v2) shift = (v1 + v2) / 2 for key, edge in self._coords.items(): self._coords[key] = (edge[0] - shift, edge[1] - shift) if project_virtual_atoms: self._virtual_atoms = self._virtual_atoms - shift if project_virtual_residues: self._virtual_residues = self._virtual_residues - shift rot = math.atan2(*(v2 - v1)) rot = math.degrees(rot) self.rotate(rot) xmean = np.mean([x[0] for p in self._coords.values() for x in p]) ymean = np.mean([x[1] for p in self._coords.values() for x in p]) mean = np.array([xmean, ymean]) for key, edge in self._coords.items(): self._coords[key] = (edge[0] - mean, edge[1] - mean) # Thanks to numpy broadcasting, this works without a loop. if project_virtual_atoms: self._virtual_atoms = self._virtual_atoms - mean if project_virtual_residues: self._virtual_residues = self._virtual_residues - mean # From this, further rotate if requested by the user. if rotation != 0: self.rotate(rotation)
def cg_distance(cgs, elem): d = [] for cg in cgs: start, end = cg.coords[elem] d.append(ftuv.vec_distance(start, end)) return d
def plot(self, ax=None, show=False, margin=5, linewidth=None, add_labels=False, line2dproperties={}, xshift=0, yshift=0, show_distances=[], print_distances=False, virtual_atoms=True): """ Plots the 2D projection. This uses modified copy-paste code by Syrtis Major (c)2014-2015 under the BSD 3-Clause license and code from matplotlib under the PSF license. :param ax: The axes to draw to. You can get it by calling `fig, ax=matplotlib.pyplot.subplots()` :param show: If true, the matplotlib.pyplot.show() will be called at the end of this function. :param margin: A numeric value. The margin around the plotted projection inside the (sub-)plot. :param linewidth: The width of the lines projection. :param add_labels: Display the name of the corresponding coarse grain element in the middle of each segment in the projection. Either a bool or a set of labels to display. :param line2dproperties: A dictionary. Will be passed as `**kwargs` to the constructor of `matplotlib.lines.Line2D`. See http://matplotlib.org/api/lines_api.html#matplotlib.lines.Line2D :param xshift, yshift: Shift the projection by the given amount inside the canvas. :param show_distances: A list of tuples of strings, e.g. `[("h1","h8"),("h2","m15")]`. Show the distances between these elements in the plot :param print_distances: Bool. Print all distances from show_distances at the side of the plot instead of directly next to the distance """ # In case of ssh without -X option, a TypeError might be raised # during the import of pyplot # This probably depends on the version of some library. # This is also the reason why we import matplotlib only inside the plot function. text = [] try: if ax is None or show: import matplotlib.pyplot as plt import matplotlib.lines as lines import matplotlib.transforms as mtransforms import matplotlib.text as mtext import matplotlib.font_manager as font_manager except TypeError as e: warnings.warn( "Cannot plot projection. Maybe you could not load Gtk " "(no X11 server available)? During the import of matplotlib" "the following Error occured:\n {}: {}".format( type(e).__name__, e)) return except ImportError as e: warnings.warn( "Cannot import matplotlib. Do you have matplotlib installed? " "The following error occured:\n {}: {}".format( type(e).__name__, e)) return #try: # import shapely.geometry as sg # import shapely.ops as so #except ImportError as e: # warnings.warn("Cannot import shapely. " # "The following error occured:\n {}: {}".format(type(e).__name__, e)) # area=False # #return #else: # area=True area = False polygons = [] def circles(x, y, s, c='b', ax=None, vmin=None, vmax=None, **kwargs): """ Make a scatter of circles plot of x vs y, where x and y are sequence like objects of the same lengths. The size of circles are in data scale. Parameters ---------- x,y : scalar or array_like, shape (n, ) Input data s : scalar or array_like, shape (n, ) Radius of circle in data scale (ie. in data unit) c : color or sequence of color, optional, default : 'b' `c` can be a single color format string, or a sequence of color specifications of length `N`, or a sequence of `N` numbers to be mapped to colors using the `cmap` and `norm` specified via kwargs. Note that `c` should not be a single numeric RGB or RGBA sequence because that is indistinguishable from an array of values to be colormapped. `c` can be a 2-D array in which the rows are RGB or RGBA, however. ax : Axes object, optional, default: None Parent axes of the plot. It uses gca() if not specified. vmin, vmax : scalar, optional, default: None `vmin` and `vmax` are used in conjunction with `norm` to normalize luminance data. If either are `None`, the min and max of the color array is used. (Note if you pass a `norm` instance, your settings for `vmin` and `vmax` will be ignored.) Returns ------- paths : `~matplotlib.collections.PathCollection` Other parameters ---------------- kwargs : `~matplotlib.collections.Collection` properties eg. alpha, edgecolors, facecolors, linewidths, linestyles, norm, cmap Examples -------- a = np.arange(11) circles(a, a, a*0.2, c=a, alpha=0.5, edgecolor='none') License -------- This function is copied (and potentially modified) from http://stackoverflow.com/a/24567352/5069869 Copyright Syrtis Major, 2014-2015 This function is under [The BSD 3-Clause License] (http://opensource.org/licenses/BSD-3-Clause) """ from matplotlib.patches import Circle from matplotlib.collections import PatchCollection #import matplotlib.colors as colors if ax is None: raise TypeError() if fus.is_string_type(c): color = c # ie. use colors.colorConverter.to_rgba_array(c) else: color = None # use cmap, norm after collection is created kwargs.update(color=color) if np.isscalar(x): patches = [ Circle((x, y), s), ] elif np.isscalar(s): patches = [Circle((x_, y_), s) for x_, y_ in zip(x, y)] else: patches = [Circle((x_, y_), s_) for x_, y_, s_ in zip(x, y, s)] collection = PatchCollection(patches, **kwargs) if color is None: collection.set_array(np.asarray(c)) if vmin is not None or vmax is not None: collection.set_clim(vmin, vmax) ax.add_collection(collection) ax.autoscale_view() return collection class MyLine(lines.Line2D): """ Copied and modified from http://matplotlib.org/examples/api/line_with_text.html, which is part of matplotlib 1.5.0 (Copyright (c) 2012-2013 Matplotlib Development Team; All Rights Reserved). Used under the matplotlib license: http://matplotlib.org/users/license.html """ def __init__(self, *args, **kwargs): # we'll update the position when the line data is set fm = font_manager.FontProperties(size="large", weight="demi") self.text = mtext.Text(0, 0, '', fontproperties=fm) lines.Line2D.__init__(self, *args, **kwargs) # we can't access the label attr until *after* the line is # inited self.text.set_text(self.get_label()) def set_figure(self, figure): self.text.set_figure(figure) lines.Line2D.set_figure(self, figure) def set_axes(self, axes): self.text.set_axes(axes) lines.Line2D.set_axes(self, axes) def set_transform(self, transform): # 2 pixel offset texttrans = transform + mtransforms.Affine2D().translate(2, 2) self.text.set_transform(texttrans) lines.Line2D.set_transform(self, transform) def set_data(self, x, y): if len(x): self.text.set_position( ((x[0] + x[-1]) / 2, (y[0] + y[-1]) / 2)) lines.Line2D.set_data(self, x, y) def draw(self, renderer): # draw my label at the end of the line with 2 pixel offset lines.Line2D.draw(self, renderer) self.text.draw(renderer) if "linewidth" in line2dproperties and linewidth is not None: warnings.warn( "Got multiple values for 'linewidth' (also present in line2dproperties)" ) if linewidth is not None: line2dproperties["linewidth"] = linewidth if "solid_capstyle" not in line2dproperties: line2dproperties["solid_capstyle"] = "round" if ax is None: try: fig, ax = plt.subplots(1, 1) except Exception as e: warnings.warn( "Cannot create Axes or Figure. You probably have no graphical " "display available. The Error was:\n {}: {}".format( type(e).__name__, e)) return lprop = copy.copy(line2dproperties) if virtual_atoms and len(self._virtual_atoms) > 0: circles(self._virtual_atoms[:, 0], self._virtual_atoms[:, 1], c="gray", s=0.7, ax=ax) for label, (s, e) in self._coords.items(): if "color" not in line2dproperties: if label.startswith("s"): lprop["color"] = "green" elif label.startswith("i"): lprop["color"] = "gold" elif label.startswith("h"): lprop["color"] = "blue" elif label.startswith("m"): lprop["color"] = "red" elif label.startswith("f") or label.startswith("t"): lprop["color"] = "blue" else: lprop["color"] = "black" if add_labels != False and (add_labels == True or label in add_labels): lprop["label"] = label else: lprop["label"] = "" #line=lines.Line2D([s[0], e[0]],[s[1],e[1]], **lprop) line = MyLine([s[0] + xshift, e[0] + xshift], [s[1] + yshift, e[1] + yshift], **lprop) ax.add_line(line) s = s + np.array([xshift, yshift]) e = e + np.array([xshift, yshift]) vec = np.array(e) - np.array(s) nvec = np.array([vec[1], -vec[0]]) try: div = math.sqrt(nvec[0]**2 + nvec[1]**2) except ZeroDivisionError: div = 100000 a = e + nvec * 5 / div b = e - nvec * 5 / div c = s + nvec * 5 / div d = s - nvec * 5 / div #For now disabling area representation area = False if area: polygon = sg.Polygon([a, b, d, c]) polygons.append(polygon) for s, e in show_distances: st = (self._coords[s][0] + self._coords[s][1]) / 2 en = (self._coords[e][0] + self._coords[e][1]) / 2 d = ftuv.vec_distance(st, en) if print_distances: line = MyLine([st[0] + xshift, en[0] + xshift], [st[1] + yshift, en[1] + yshift], color="orange", linestyle="--") text.append("{:3} - {:3}: {:5.2f}".format(s, e, d)) else: line = MyLine([st[0] + xshift, en[0] + xshift], [st[1] + yshift, en[1] + yshift], label=str(round(d, 1)), color="orange", linestyle="--") ax.add_line(line) ax.axis(self.get_bounding_square(margin)) fm = font_manager.FontProperties(["monospace"], size="x-small") if print_distances: ax.text(0.01, 0.05, "\n".join(["Distances:"] + text), transform=ax.transAxes, fontproperties=fm) if area: rnaArea = so.cascaded_union(polygons) rnaXs, rnaYs = rnaArea.exterior.xy ax.fill(rnaXs, rnaYs, alpha=0.5) out = ax.plot() if show: plt.show() return return out
def extend_pk_description(dataset, filename, pk_type, rna, pk, pk_number): """ Return a extended descripiton of current pseudoknot in the current files e.g. angles between stems :param dataset: Current dataset that will be updated :param filename: Filename of the current structure :parma pk_type: Class of the pseudoknot :param rna: A forgi CoarseGrainRNA object :param pk: Structure of the pseudoknot, a NumberedDotbracket object, in a condensed (shadow-like) representation. This representation always contains the most 5' basepair. :param pk_number: consecutive number of the pseudoknot """ domains = rna.get_domains() helices = domains["rods"] # A list of elements, e.g. ["s0", "i0", "s1"] log.debug("Helices: %s", helices) #rna.log(logging.WARNING) stems_5p = [] stems_3p = [] nums = [] log.debug("pk Residue numbers %s", pk.residue_numbers) log.debug("pk helix ends %s", pk.helix_ends) for i, resnum in enumerate(pk.residue_numbers): num = rna.seq.to_integer(resnum) nums.append(num) element_5p = rna.get_node_from_residue_num(num) stems_5p.append(element_5p) num2 = rna.seq.to_integer(pk.helix_ends[i]) log.debug("num %s nums2 %s", num, num2) element_3p = rna.get_node_from_residue_num(num2) stems_3p.append(element_3p) log.debug("nums %s", nums) for i, stem1_5p in enumerate(stems_5p): dataset["Filename"].append(filename) dataset["rnaname"] = rna.name dataset["pk_type"].append(pk_type) dataset["pk_id"].append(pk_number) dataset["angle_nr"].append(i) if pk_type == "other": dataset["pk_structure"].append(str(pk)) else: dataset["pk_structure"].append("") #is this the first occurrence of stem in stems? if stems_5p.index(stem1_5p) == i: #first occurrence. Strand 0, look at 3' end of helix stem1 = stems_3p[i] strand = 0 else: assert i > stems_5p.index(stem1_5p) stem1 = stem1_5p strand = 1 try: stem2_5p = stems_5p[i + 1] except IndexError: stem2_5p = stems_5p[0] outside_pk = True else: outside_pk = False if outside_pk or stems_5p.index(stem2_5p) == i + 1: #first occurrence stem2 = stem2_5p strand2 = 0 else: strand2 = 1 if outside_pk: stem2 = stems_3p[0] else: stem2 = stems_3p[i + 1] log.debug("Stem 5' %s, 3' %s, stem1 %s stem2 %s", stems_5p, stems_3p, stem1, stem2) # enable stacking analysis via DSSR # differentiate between stacking (True), no stacking (False) and brakes # within/aorund the pseudoknot (-1) incl. 'virtual' angles e.g. H-Type angle_type3 ml_stack = [] if rna.dssr: nc_bps = list(rna.dssr.noncanonical_pairs()) nc_dict = defaultdict(list) for nt1, nt2, typ in nc_bps: nc_dict[nt1].append((nt2, typ)) nc_dict[nt2].append((nt1, typ)) stacking_loops = rna.dssr.stacking_loops() start_found = 0 connection = [] stacking = None branch = None log.debug("Checking %s and %s for stacking, strand %s", stem1, stem2, strand) for elem in rna.iter_elements_along_backbone( ): #walk along the backbone if start_found == strand + 1: if branch: log.debug("in branch: elem %s, branch %s, stacking %s", elem, branch, stacking) if elem == branch: log.debug("End branch at %s", elem) branch = None log.debug("Branch end") continue if elem[0] != "s": connection.append(elem) if rna.defines[elem] and rna.defines[elem][ -1] in rna.backbone_breaks_after: stacking = -1 if elem not in stacking_loops and stacking != -1: stacking = False elif elem == stem2: if stacking is None: stacking = True log.debug("Found second stem, elem %s, stacking %s", elem, stacking) break elif elem[0] == "s" and connection: branch = elem if rna.defines[elem][-1] in rna.backbone_breaks_after: stacking = -1 log.debug("elem %s, stacking %s, branch %s", elem, stacking, branch) elif elem == stem1: start_found += 1 if rna.defines[elem][strand * 2 + 1] in rna.backbone_breaks_after: stacking = -1 log.debug("First stem, elem %s, stacking %s", elem, stacking) else: log.debug("End iteration, stacking->-1") stacking = -1 log.debug("Finally, stacking = %s", stacking) # more detailed stacking (including backbone brackes within and around the pseudoknot) dataset["this_loop_stacking_dssr"].append(stacking) dataset["connecting_loops"].append(",".join(connection)) # more genereal stacking information connecting_loops = rna.edges[stem1] & rna.edges[stem2] for loop in connecting_loops: if loop in stacking_loops: ml_stack.append(loop) stacks = rna.dssr.coaxial_stacks() log.info("Stacks: %s", stacks) for stack in stacks: if stem1 in stack and stem2 in stack: # the two stems stack, but we do not specify along which # multiloop segment they stack. dataset["is_stacking_dssr"].append(True) break else: dataset["is_stacking_dssr"].append(False) # Does the connection form base-triples with the stem? stem1_triples = 0 stem2_triples = 0 aminors1 = 0 aminors2 = 0 aminors = list(rna.dssr.aminor_interactions()) for elem in connection: for nt in rna.define_residue_num_iterator(elem, seq_ids=True): if (nt, stem1) in aminors: aminors1 += 1 log.debug("AMinor %s (%s), %s", nt, elem, stem1) elif (nt, stem2) in aminors: aminors2 += 1 log.debug("AMinor %s (%s), %s", nt, elem, stem2) else: for partner, typ in nc_dict[nt]: if rna.get_elem(partner) == stem1: log.debug("base_triple %s, %s: %s-%s (%s)", elem, stem1, nt, partner, typ) stem1_triples += 1 elif rna.get_elem(partner) == stem2: log.debug("base_triple %s, %s: %s-%s (%s)", elem, stem2, nt, partner, typ) stem2_triples += 1 log.debug("%s has a length of %s and %s triples", stem1, rna.stem_length(stem1), stem1_triples) log.debug("%s has a length of %s and %s triples", stem2, rna.stem_length(stem2), stem2_triples) dataset["stem1_basetripleperc_dssr"].append(stem1_triples / rna.stem_length(stem1)) dataset["stem2_basetripleperc_dssr"].append(stem2_triples / rna.stem_length(stem2)) dataset["stem1_aminorperc_dssr"].append(aminors1 / rna.stem_length(stem1)) dataset["stem2_aminorperc_dssr"].append(aminors2 / rna.stem_length(stem2)) else: dataset["is_stacking_dssr"].append(float("nan")) dataset["this_loop_stacking_dssr"].append(float("nan")) dataset["connecting_loops"].append("") dataset["stem1_basetripleperc_dssr"].append(float("nan")) dataset["stem2_basetripleperc_dssr"].append(float("nan")) dataset["stem1_aminorperc_dssr"].append(float("nan")) dataset["stem2_aminorperc_dssr"].append(float("nan")) dataset["stacking_loops"].append(",".join(ml_stack)) pos1, dir1 = stem_parameters(stem1, rna, not strand) pos2, dir2 = stem_parameters(stem2, rna, strand2) dataset["stem1"].append(stem1) dataset["stem2"].append(stem2) dataset["angle_between_stems"].append(ftuv.vec_angle(dir1, dir2)) dataset["distance_between"].append(ftuv.vec_distance(pos1, pos2)) next_stem = None if not outside_pk: next_stem = stem_after_next_ml(rna, nums[i], before=stem2) if next_stem == stem2: next_stem = None if next_stem: posN, dirN = stem_parameters(next_stem, rna, 0) dataset["angle_to_next"].append(ftuv.vec_angle(dir1, dirN)) dataset["distance_to_next"].append(ftuv.vec_distance(pos1, posN)) dataset["next_stem"].append(next_stem) else: dataset["angle_to_next"].append("") dataset["distance_to_next"].append("") dataset["next_stem"].append("") dataset["outside_pk"].append(outside_pk)
def update(self, sm, step): dist = ftuv.vec_distance(sm.bg.get_virtual_residue(self._nuc1, True), sm.bg.get_virtual_residue(self._nuc2, True)) self.history[0].append(dist) return "{:6.2f} A".format(dist)
def describe_ml_segments(cg): data = defaultdict(list) loops = cg.find_mlonly_multiloops() for loop in it.chain(loops, [[i] for i in cg.iloop_iterator()]): print(loop) if loop[0][0] == "i": description = ["interior_loop"] else: description = cg.describe_multiloop(loop) try: j3_roles = cg._assign_loop_roles(loop) except ValueError: j3_roles = None if j3_roles: j3_familyFlat = cg._junction_family_westhof1(j3_roles) j3_family3D = cg._junction_family_3d(j3_roles) j3_familyPerp = cg._junction_family_is_perpenticular(j3_roles) j3_Delta = cg.get_length( j3_roles["J23"]) - cg.get_length(j3_roles["J31"]) else: j3_family3D = None j3_familyFlat = None j3_familyPerp = None j3_Delta = None loop_start = float("inf") for segment in loop: if cg.define_a(segment)[0] < loop_start: loop_start = cg.define_a(segment)[0] for segment in loop: if segment[0] not in "mi": continue data["loop_start_after"].append(loop_start) data["segment_start_after"].append(cg.define_a(segment)[0]) data["segment"].append(segment) data["junction_length"].append(len(loop)) data["segment_length"].append(cg.get_length(segment)) if segment[0] == "i": dims = list(sorted(cg.get_bulge_dimensions(segment))) else: dims = [-1, -1] data["iloop_length_1"].append(dims[0]) data["iloop_length_2"].append(dims[1]) data["loops_largest_segment_length"].append( max(cg.get_length(x) for x in loop)) data["loops_shortest_segment_length"].append( min(cg.get_length(x) for x in loop)) data["sum_of_loops_segment_lengths"].append( sum(cg.get_length(x) for x in loop)) data["loop_segment_lengths"].append( ",".join(map(str, sorted(cg.get_length(x) for x in loop)))) data["angle_type"].append( abs(cg.get_angle_type(segment, allow_broken=True))) s1, s2 = cg.connections(segment) vec1 = cg.coords.get_direction(s1) if cg.get_sides(s1, segment) == (1, 0): vec1 = -vec1 else: assert cg.get_sides(s1, segment) == (0, 1) vec2 = cg.coords.get_direction(s2) if cg.get_sides(s2, segment) == (1, 0): vec2 = -vec2 else: assert cg.get_sides(s2, segment) == (0, 1) data["angle_between_stems"].append(ftuv.vec_angle(vec1, vec2)) data["offset1"].append(ftuv.point_line_distance(cg.coords[s1][cg.get_sides(s1, segment)[0]], cg.coords[s2][0], cg.coords.get_direction( s2) )) data["offset2"].append(ftuv.point_line_distance(cg.coords[s2][cg.get_sides(s2, segment)[0]], cg.coords[s1][0], cg.coords.get_direction( s1) )) closer1, far1 = cg.coords[s1][cg.get_sides( s1, segment)[0]], cg.coords[s1][cg.get_sides(s1, segment)[1]] closer2, far2 = cg.coords[s2][cg.get_sides( s2, segment)[0]], cg.coords[s2][cg.get_sides(s2, segment)[1]] data["offset"].append(ftuv.vec_distance(*ftuv.line_segment_distance(closer1, closer1 + (closer1 - far1) * 100000, closer2, closer2 + (closer2 - far2) * 100000))) data["junction_va_distance"].append( ftug.junction_virtual_atom_distance(cg, segment)) data["is_external_multiloop"].append("open" in description) data["is_pseudoknotted_multiloop"].append( "pseudoknot" in description) data["is_regular_multiloop"].append( "regular_multiloop" in description) data["is_interior_loop"].append("interior_loop" in description) if j3_roles is not None: elem_role, = [x[0] for x in j3_roles.items() if x[1] == segment] else: elem_role = "?" data["j3_role"].append(elem_role) data["j3_familyFlat"].append(j3_familyFlat) data["j3_family3D"].append(j3_family3D) data["j3_familyPerp"].append(j3_familyPerp) data["j3_Delta_j23_j31"].append(j3_Delta) dssr_stacking = False if "dssr_stacks" in cg.infos: if segment in cg.infos["dssr_stacks"]: dssr_stacking = True data["dssr_stacking"].append(dssr_stacking) kh_stem_angle = float("nan") if abs(cg.get_angle_type(segment, allow_broken=True)) == 5: next_ml = cg.get_next_ml_segment(segment) if isinstance(next_ml, str) and next_ml[0] == "m" and abs(cg.get_angle_type(next_ml, allow_broken=True)) == 5: stems1 = cg.edges[segment] stems2 = cg.edges[next_ml] try: s1, s2 = (stems1 | stems2) - (stems1 & stems2) except ValueError: pass else: vec1 = cg.coords.get_direction(s1) vec2 = cg.coords.get_direction(s2) angle = ftuv.vec_angle(vec1, vec2) if angle > math.pi / 2: angle = math.pi - angle kh_stem_angle = angle data["kh_stem_angle"].append(kh_stem_angle) if data: data["pk_number"] = number_by(data, "loop_start_after", "is_pseudoknotted_multiloop") data["loop_number"] = number_by(data, "loop_start_after", None) data["reguler_multiloop_number"] = number_by(data, "loop_start_after", "is_regular_multiloop") return data
def describe_ml_segments(cg): data = defaultdict(list) loops = cg.find_mlonly_multiloops() for loop in it.chain(loops, [[i] for i in cg.iloop_iterator()]): print(loop) if loop[0][0] == "i": description = ["interior_loop"] else: description = cg.describe_multiloop(loop) try: j3_roles = cg._assign_loop_roles(loop) except ValueError: j3_roles = None if j3_roles: j3_familyFlat = cg._junction_family_westhof1(j3_roles) j3_family3D = cg._junction_family_3d(j3_roles) j3_familyPerp = cg._junction_family_is_perpenticular(j3_roles) j3_Delta = cg.get_length(j3_roles["J23"]) - cg.get_length( j3_roles["J31"]) else: j3_family3D = None j3_familyFlat = None j3_familyPerp = None j3_Delta = None loop_start = float("inf") for segment in loop: if cg.define_a(segment)[0] < loop_start: loop_start = cg.define_a(segment)[0] for segment in loop: if segment[0] not in "mi": continue data["loop_start_after"].append(loop_start) data["segment_start_after"].append(cg.define_a(segment)[0]) data["segment"].append(segment) data["junction_length"].append(len(loop)) data["segment_length"].append(cg.get_length(segment)) if segment[0] == "i": dims = list(sorted(cg.get_bulge_dimensions(segment))) else: dims = [-1, -1] data["iloop_length_1"].append(dims[0]) data["iloop_length_2"].append(dims[1]) data["loops_largest_segment_length"].append( max(cg.get_length(x) for x in loop)) data["loops_shortest_segment_length"].append( min(cg.get_length(x) for x in loop)) data["sum_of_loops_segment_lengths"].append( sum(cg.get_length(x) for x in loop)) data["loop_segment_lengths"].append(",".join( map(str, sorted(cg.get_length(x) for x in loop)))) data["angle_type"].append( abs(cg.get_angle_type(segment, allow_broken=True))) s1, s2 = cg.connections(segment) vec1 = cg.coords.get_direction(s1) if cg.get_sides(s1, segment) == (1, 0): vec1 = -vec1 else: assert cg.get_sides(s1, segment) == (0, 1) vec2 = cg.coords.get_direction(s2) if cg.get_sides(s2, segment) == (1, 0): vec2 = -vec2 else: assert cg.get_sides(s2, segment) == (0, 1) data["angle_between_stems"].append(ftuv.vec_angle(vec1, vec2)) data["offset1"].append( ftuv.point_line_distance( cg.coords[s1][cg.get_sides(s1, segment)[0]], cg.coords[s2][0], cg.coords.get_direction(s2))) data["offset2"].append( ftuv.point_line_distance( cg.coords[s2][cg.get_sides(s2, segment)[0]], cg.coords[s1][0], cg.coords.get_direction(s1))) closer1, far1 = cg.coords[s1][cg.get_sides( s1, segment)[0]], cg.coords[s1][cg.get_sides(s1, segment)[1]] closer2, far2 = cg.coords[s2][cg.get_sides( s2, segment)[0]], cg.coords[s2][cg.get_sides(s2, segment)[1]] data["offset"].append( ftuv.vec_distance(*ftuv.line_segment_distance( closer1, closer1 + (closer1 - far1) * 100000, closer2, closer2 + (closer2 - far2) * 100000))) data["junction_va_distance"].append( ftug.junction_virtual_atom_distance(cg, segment)) data["is_external_multiloop"].append("open" in description) data["is_pseudoknotted_multiloop"].append( "pseudoknot" in description) data["is_regular_multiloop"].append( "regular_multiloop" in description) data["is_interior_loop"].append("interior_loop" in description) if j3_roles is not None: elem_role, = [ x[0] for x in j3_roles.items() if x[1] == segment ] else: elem_role = "?" data["j3_role"].append(elem_role) data["j3_familyFlat"].append(j3_familyFlat) data["j3_family3D"].append(j3_family3D) data["j3_familyPerp"].append(j3_familyPerp) data["j3_Delta_j23_j31"].append(j3_Delta) dssr_stacking = False if "dssr_stacks" in cg.infos: if segment in cg.infos["dssr_stacks"]: dssr_stacking = True data["dssr_stacking"].append(dssr_stacking) kh_stem_angle = float("nan") if abs(cg.get_angle_type(segment, allow_broken=True)) == 5: next_ml = cg.get_next_ml_segment(segment) if isinstance(next_ml, str) and next_ml[0] == "m" and abs( cg.get_angle_type(next_ml, allow_broken=True)) == 5: stems1 = cg.edges[segment] stems2 = cg.edges[next_ml] try: s1, s2 = (stems1 | stems2) - (stems1 & stems2) except ValueError: pass else: vec1 = cg.coords.get_direction(s1) vec2 = cg.coords.get_direction(s2) angle = ftuv.vec_angle(vec1, vec2) if angle > math.pi / 2: angle = math.pi - angle kh_stem_angle = angle data["kh_stem_angle"].append(kh_stem_angle) if data: data["pk_number"] = number_by(data, "loop_start_after", "is_pseudoknotted_multiloop") data["loop_number"] = number_by(data, "loop_start_after", None) data["reguler_multiloop_number"] = number_by(data, "loop_start_after", "is_regular_multiloop") return data
def describe_rna(cg, file_num, dist_pais, angle_pairs): data = {} data["nt_length"] = cg.seq_length data["num_cg_elems"] = len(cg.defines) for letter in "smifth": data["num_" + letter] = len([x for x in cg.defines if x[0] == letter]) multiloops = cg.find_mlonly_multiloops() descriptors = [] junct3 = 0 junct4 = 0 reg = 0 pk = 0 op = 0 for ml in multiloops: descriptors = cg.describe_multiloop(ml) if "regular_multiloop" in descriptors: if len(ml) == 3: junct3 += 1 elif len(ml) == 4: junct4 += 1 reg += 1 if "pseudoknot" in descriptors: pk += 1 if "open" in descriptors: op += 1 data["3-way-junctions"] = junct3 data["4-way-junctions"] = junct4 #print (descriptors) data["open_mls"] = op # print(data["open_mls"][-1]) data["pseudoknots"] = pk data["regular_mls"] = reg data["total_mls"] = len(multiloops) try: data["longest_ml"] = max(len(x) for x in multiloops) except ValueError: data["longest_ml"] = 0 try: data["rog_fast"] = cg.radius_of_gyration("fast") except (ftmc.RnaMissing3dError, AttributeError): data["rog_fast"] = float("nan") data["rog_vres"] = float("nan") data["anisotropy_fast"] = float("nan") data["anisotropy_vres"] = float("nan") data["asphericity_fast"] = float("nan") data["asphericity_vres"] = float("nan") else: data["rog_vres"] = cg.radius_of_gyration("vres") data["anisotropy_fast"] = ftmd.anisotropy(cg.get_ordered_stem_poss()) data["anisotropy_vres"] = ftmd.anisotropy( cg.get_ordered_virtual_residue_poss()) data["asphericity_fast"] = ftmd.asphericity(cg.get_ordered_stem_poss()) data["asphericity_vres"] = ftmd.asphericity( cg.get_ordered_virtual_residue_poss()) for from_nt, to_nt in dist_pairs: try: dist = ftuv.vec_distance( cg.get_virtual_residue(int(from_nt), True), cg.get_virtual_residue(int(to_nt), True)) except Exception as e: dist = float("nan") log.warning( "%d%s File %s: Could not calculate distance between " "%d and %d: %s occurred: %s", file_num, { 1: "st", 2: "nd", 3: "rd" }.get(file_num % 10 * (file_num % 100 not in [11, 12, 13]), "th"), cg.name, from_nt, to_nt, type(e).__name__, e) data["distance_{}_{}".format(from_nt, to_nt)] = dist for elem1, elem2 in angle_pairs: try: angle = ftuv.vec_angle(cg.coords.get_direction(elem1), cg.coords.get_direction(elem2)) except Exception as e: angle = float("nan") log.warning( "%d%s File %s: Could not calculate angle between " "%s and %s: %s occurred: %s", file_num, { 1: "st", 2: "nd", 3: "rd" }.get(file_num % 10 * (file_num % 100 not in [11, 12, 13]), "th"), cg.name, elem1, elem2, type(e).__name__, e) data["angle_{}_{}".format(elem1, elem2)] = angle data["missing_residues_5prime"] = (len(cg.seq.with_missing[:1]) - 1) data["missing_residues_3prime"] = ( len(cg.seq.with_missing[cg.seq_length:]) - 1) data["missing_residues_middle"] = ( len(cg.seq.with_missing[1:cg.seq_length]) - len(cg.seq[1:cg.seq_length])) data["missing_residues_total"] = (len(cg.seq.with_missing[:]) - len(cg.seq[:])) fp = len(cg.seq.with_missing[:1]) - 1 tp = 0 old_bp = None bp = None for bp in cg.backbone_breaks_after: fp += len(cg.seq.with_missing[bp:bp + 1].split('&')[1]) - 1 tp += len(cg.seq.with_missing[bp:bp + 1].split('&')[0]) - 1 tp += len(cg.seq.with_missing[cg.seq_length:]) - 1 data["missing_residues_5prime_chain"] = (fp) data["missing_residues_3prime_chain"] = (tp) data["missing_residues_middle_chain"] = (data["missing_residues_total"] - fp - tp) incomplete_elem_types = Counter(x[0] for x in cg.incomplete_elements) data["s_with_missing"] = incomplete_elem_types["s"] data["i_with_missing"] = incomplete_elem_types["i"] data["m_with_missing"] = incomplete_elem_types["m"] data["h_with_missing"] = incomplete_elem_types["h"] mp = "" if incomplete_elem_types["s"]: for elem in cg.incomplete_elements: if elem[0] != "s": continue for i in range(cg.defines[elem][0], cg.defines[elem][1]): left_s = cg.seq.with_missing[i:i + 1] if len(left_s) > 2: right_s = cg.seq.with_missing[cg.pairing_partner(i + 1):cg. pairing_partner(i)] if len(right_s) > 2: mp += "{}&{};".format(left_s, right_s) data["missing_basepairs"] = mp return data
def __init__(self, cg, proj_direction=None, rotation=0, project_virtual_atoms=False, project_virtual_residues=[]): """ :param cg: a CoarseGrainRNA object with 3D coordinates for every element .. note:: The projection is generated from this cg, but it is not associated with it after construction. Thus future changes of the cg are not reflected in the projection. :param proj_direction: a carthesian vector (in 3D space) in the direction of projection. The length of this vector is not used. If proj_direction is None, cg.project_from is used. If proj_direction and cg.project_from is None, an error is raised. :param rotate: Degrees. Rotate the projection by this amount. """ #: The projected coordinates of all stems self._coords = dict() self._cross_points = None self._proj_graph = None #Calculate orthonormal basis of projection plane. if proj_direction is not None: #Compare to none, because `if np.array:` raises ValueError. proj_direction = np.array(proj_direction, dtype=np.float) elif cg.project_from is not None: # We make a copy here. In case cg.project_from is modified, # we still want to be able to look up from what direction the projection was generated. proj_direction = np.array(cg.project_from, dtype=np.float) else: raise ValueError( "No projection direction given and none present in the cg Object." ) _, unit_vec1, unit_vec2 = ftuv.create_orthonormal_basis(proj_direction) self._unit_vec1 = unit_vec1 self._unit_vec2 = unit_vec2 self._proj_direction = proj_direction self._virtual_residues = [] self.virtual_residue_numbers = project_virtual_residues self._project(cg, project_virtual_atoms, project_virtual_residues) #Rotate and translate projection into a standard orientation points = list(self.points) v1, v2 = diameter(points) #: The longest distance between any two points of the projection. self.longest_axis = ftuv.vec_distance(v1, v2) v1 = np.array(v1) v2 = np.array(v2) shift = (v1 + v2) / 2 for key, edge in self._coords.items(): self._coords[key] = (edge[0] - shift, edge[1] - shift) if project_virtual_atoms: self._virtual_atoms = self._virtual_atoms - shift if project_virtual_residues: self._virtual_residues = self._virtual_residues - shift rot = math.atan2(*(v2 - v1)) rot = math.degrees(rot) self.rotate(rot) xmean = np.mean([x[0] for p in self._coords.values() for x in p]) ymean = np.mean([x[1] for p in self._coords.values() for x in p]) mean = np.array([xmean, ymean]) for key, edge in self._coords.items(): self._coords[key] = (edge[0] - mean, edge[1] - mean) #Thanks to numpy broadcasting, this works without a loop. if project_virtual_atoms: self._virtual_atoms = self._virtual_atoms - mean if project_virtual_residues: self._virtual_residues = self._virtual_residues - mean #From this, further rotate if requested by the user. if rotation != 0: self.rotate(rotation)
def plot(self, ax=None, show=False, margin=5, linewidth=None, add_labels=False, line2dproperties={}, xshift=0, yshift=0, show_distances=[], print_distances=False, virtual_atoms=True): """ Plots the 2D projection. This uses modified copy-paste code by Syrtis Major (c)2014-2015 under the BSD 3-Clause license and code from matplotlib under the PSF license. :param ax: The axes to draw to. You can get it by calling `fig, ax=matplotlib.pyplot.subplots()` :param show: If true, the matplotlib.pyplot.show() will be called at the end of this function. :param margin: A numeric value. The margin around the plotted projection inside the (sub-)plot. :param linewidth: The width of the lines projection. :param add_labels: Display the name of the corresponding coarse grain element in the middle of each segment in the projection. Either a bool or a set of labels to display. :param line2dproperties: A dictionary. Will be passed as `**kwargs` to the constructor of `matplotlib.lines.Line2D`. See http://matplotlib.org/api/lines_api.html#matplotlib.lines.Line2D :param xshift, yshift: Shift the projection by the given amount inside the canvas. :param show_distances: A list of tuples of strings, e.g. `[("h1","h8"),("h2","m15")]`. Show the distances between these elements in the plot :param print_distances: Bool. Print all distances from show_distances at the side of the plot instead of directly next to the distance """ # In case of ssh without -X option, a TypeError might be raised # during the import of pyplot # This probably depends on the version of some library. # This is also the reason why we import matplotlib only inside the plot function. text = [] try: if ax is None or show: import matplotlib.pyplot as plt import matplotlib.lines as lines import matplotlib.transforms as mtransforms import matplotlib.text as mtext import matplotlib.font_manager as font_manager except TypeError as e: warnings.warn("Cannot plot projection. Maybe you could not load Gtk " "(no X11 server available)? During the import of matplotlib" "the following Error occured:\n {}: {}".format(type(e).__name__, e)) return except ImportError as e: warnings.warn("Cannot import matplotlib. Do you have matplotlib installed? " "The following error occured:\n {}: {}".format(type(e).__name__, e)) return # try: # import shapely.geometry as sg # import shapely.ops as so # except ImportError as e: # warnings.warn("Cannot import shapely. " # "The following error occured:\n {}: {}".format(type(e).__name__, e)) # area=False # #return # else: # area=True area = False polygons = [] def circles(x, y, s, c='b', ax=None, vmin=None, vmax=None, **kwargs): """ Make a scatter of circles plot of x vs y, where x and y are sequence like objects of the same lengths. The size of circles are in data scale. Parameters ---------- x,y : scalar or array_like, shape (n, ) Input data s : scalar or array_like, shape (n, ) Radius of circle in data scale (ie. in data unit) c : color or sequence of color, optional, default : 'b' `c` can be a single color format string, or a sequence of color specifications of length `N`, or a sequence of `N` numbers to be mapped to colors using the `cmap` and `norm` specified via kwargs. Note that `c` should not be a single numeric RGB or RGBA sequence because that is indistinguishable from an array of values to be colormapped. `c` can be a 2-D array in which the rows are RGB or RGBA, however. ax : Axes object, optional, default: None Parent axes of the plot. It uses gca() if not specified. vmin, vmax : scalar, optional, default: None `vmin` and `vmax` are used in conjunction with `norm` to normalize luminance data. If either are `None`, the min and max of the color array is used. (Note if you pass a `norm` instance, your settings for `vmin` and `vmax` will be ignored.) Returns ------- paths : `~matplotlib.collections.PathCollection` Other parameters ---------------- kwargs : `~matplotlib.collections.Collection` properties eg. alpha, edgecolors, facecolors, linewidths, linestyles, norm, cmap Examples -------- a = np.arange(11) circles(a, a, a*0.2, c=a, alpha=0.5, edgecolor='none') License -------- This function is copied (and potentially modified) from http://stackoverflow.com/a/24567352/5069869 Copyright Syrtis Major, 2014-2015 This function is under [The BSD 3-Clause License] (http://opensource.org/licenses/BSD-3-Clause) """ from matplotlib.patches import Circle from matplotlib.collections import PatchCollection #import matplotlib.colors as colors if ax is None: raise TypeError() if fus.is_string_type(c): color = c # ie. use colors.colorConverter.to_rgba_array(c) else: color = None # use cmap, norm after collection is created kwargs.update(color=color) if np.isscalar(x): patches = [Circle((x, y), s), ] elif np.isscalar(s): patches = [Circle((x_, y_), s) for x_, y_ in zip(x, y)] else: patches = [Circle((x_, y_), s_) for x_, y_, s_ in zip(x, y, s)] collection = PatchCollection(patches, **kwargs) if color is None: collection.set_array(np.asarray(c)) if vmin is not None or vmax is not None: collection.set_clim(vmin, vmax) ax.add_collection(collection) ax.autoscale_view() return collection class MyLine(lines.Line2D): """ Copied and modified from http://matplotlib.org/examples/api/line_with_text.html, which is part of matplotlib 1.5.0 (Copyright (c) 2012-2013 Matplotlib Development Team; All Rights Reserved). Used under the matplotlib license: http://matplotlib.org/users/license.html """ def __init__(self, *args, **kwargs): # we'll update the position when the line data is set fm = font_manager.FontProperties(size="large", weight="demi") self.text = mtext.Text(0, 0, '', fontproperties=fm) lines.Line2D.__init__(self, *args, **kwargs) # we can't access the label attr until *after* the line is # inited self.text.set_text(self.get_label()) def set_figure(self, figure): self.text.set_figure(figure) lines.Line2D.set_figure(self, figure) def set_axes(self, axes): self.text.set_axes(axes) lines.Line2D.set_axes(self, axes) def set_transform(self, transform): # 2 pixel offset texttrans = transform + mtransforms.Affine2D().translate(2, 2) self.text.set_transform(texttrans) lines.Line2D.set_transform(self, transform) def set_data(self, x, y): if len(x): self.text.set_position( ((x[0] + x[-1]) / 2, (y[0] + y[-1]) / 2)) lines.Line2D.set_data(self, x, y) def draw(self, renderer): # draw my label at the end of the line with 2 pixel offset lines.Line2D.draw(self, renderer) self.text.draw(renderer) if "linewidth" in line2dproperties and linewidth is not None: warnings.warn( "Got multiple values for 'linewidth' (also present in line2dproperties)") if linewidth is not None: line2dproperties["linewidth"] = linewidth if "solid_capstyle" not in line2dproperties: line2dproperties["solid_capstyle"] = "round" if ax is None: try: fig, ax = plt.subplots(1, 1) except Exception as e: warnings.warn("Cannot create Axes or Figure. You probably have no graphical " "display available. The Error was:\n {}: {}".format(type(e).__name__, e)) return lprop = copy.copy(line2dproperties) if virtual_atoms and len(self._virtual_atoms) > 0: circles( self._virtual_atoms[:, 0], self._virtual_atoms[:, 1], c="gray", s=0.7, ax=ax) for label, (s, e) in self._coords.items(): if "color" not in line2dproperties: if label.startswith("s"): lprop["color"] = "green" elif label.startswith("i"): lprop["color"] = "gold" elif label.startswith("h"): lprop["color"] = "blue" elif label.startswith("m"): lprop["color"] = "red" elif label.startswith("f") or label.startswith("t"): lprop["color"] = "blue" else: lprop["color"] = "black" if add_labels != False and (add_labels == True or label in add_labels): lprop["label"] = label else: lprop["label"] = "" #line=lines.Line2D([s[0], e[0]],[s[1],e[1]], **lprop) line = MyLine([s[0] + xshift, e[0] + xshift], [s[1] + yshift, e[1] + yshift], **lprop) ax.add_line(line) s = s + np.array([xshift, yshift]) e = e + np.array([xshift, yshift]) vec = np.array(e) - np.array(s) nvec = np.array([vec[1], -vec[0]]) try: div = math.sqrt(nvec[0]**2 + nvec[1]**2) except ZeroDivisionError: div = 100000 a = e + nvec * 5 / div b = e - nvec * 5 / div c = s + nvec * 5 / div d = s - nvec * 5 / div # For now disabling area representation area = False if area: polygon = sg.Polygon([a, b, d, c]) polygons.append(polygon) for s, e in show_distances: st = (self._coords[s][0] + self._coords[s][1]) / 2 en = (self._coords[e][0] + self._coords[e][1]) / 2 d = ftuv.vec_distance(st, en) if print_distances: line = MyLine([st[0] + xshift, en[0] + xshift], [st[1] + yshift, en[1] + yshift], color="orange", linestyle="--") text.append("{:3} - {:3}: {:5.2f}".format(s, e, d)) else: line = MyLine([st[0] + xshift, en[0] + xshift], [st[1] + yshift, en[1] + yshift], label=str(round(d, 1)), color="orange", linestyle="--") ax.add_line(line) ax.axis(self.get_bounding_square(margin)) fm = font_manager.FontProperties(["monospace"], size="x-small") if print_distances: ax.text(0.01, 0.05, "\n".join(["Distances:"] + text), transform=ax.transAxes, fontproperties=fm) if area: rnaArea = so.cascaded_union(polygons) rnaXs, rnaYs = rnaArea.exterior.xy ax.fill(rnaXs, rnaYs, alpha=0.5) out = ax.plot() if show: plt.show() return return out
def describe_rna(cg, file_num, dist_pais, angle_pairs): data = {} data["nt_length"] = cg.seq_length data["num_cg_elems"] = len(cg.defines) for letter in "smifth": data["num_" + letter] = len([x for x in cg.defines if x[0] == letter]) multiloops = cg.find_mlonly_multiloops() descriptors = [] junct3 = 0 junct4 = 0 reg = 0 pk = 0 op = 0 for ml in multiloops: descriptors = cg.describe_multiloop(ml) if "regular_multiloop" in descriptors: if len(ml) == 3: junct3 += 1 elif len(ml) == 4: junct4 += 1 reg += 1 if "pseudoknot" in descriptors: pk += 1 if "open" in descriptors: op += 1 data["3-way-junctions"] = junct3 data["4-way-junctions"] = junct4 #print (descriptors) data["open_mls"] = op #print(data["open_mls"][-1]) data["pseudoknots"] = pk data["regular_mls"] = reg data["total_mls"] = len(multiloops) try: data["longest_ml"] = max(len(x) for x in multiloops) except ValueError: data["longest_ml"] = 0 try: data["rog_fast"] = cg.radius_of_gyration("fast") except (ftmc.RnaMissing3dError, AttributeError): data["rog_fast"] = float("nan") data["rog_vres"] = float("nan") data["anisotropy_fast"] = float("nan") data["anisotropy_vres"] = float("nan") data["asphericity_fast"] = float("nan") data["asphericity_vres"] = float("nan") else: data["rog_vres"] = cg.radius_of_gyration("vres") data["anisotropy_fast"] = ftmd.anisotropy(cg.get_ordered_stem_poss()) data["anisotropy_vres"] = ftmd.anisotropy( cg.get_ordered_virtual_residue_poss()) data["asphericity_fast"] = ftmd.asphericity(cg.get_ordered_stem_poss()) data["asphericity_vres"] = ftmd.asphericity( cg.get_ordered_virtual_residue_poss()) for from_nt, to_nt in dist_pairs: try: dist = ftuv.vec_distance( cg.get_virtual_residue(int(from_nt), True), cg.get_virtual_residue(int(to_nt), True)) except Exception as e: dist = float("nan") log.warning( "%d%s File %s: Could not calculate distance between " "%d and %d: %s occurred: %s", file_num, { 1: "st", 2: "nd", 3: "rd" }.get(file_num % 10 * (file_num % 100 not in [11, 12, 13]), "th"), cg.name, from_nt, to_nt, type(e).__name__, e) data["distance_{}_{}".format(from_nt, to_nt)] = dist for elem1, elem2 in angle_pairs: try: angle = ftuv.vec_angle(cg.coords.get_direction(elem1), cg.coords.get_direction(elem2)) except Exception as e: angle = float("nan") log.warning( "%d%s File %s: Could not calculate angle between " "%s and %s: %s occurred: %s", file_num, { 1: "st", 2: "nd", 3: "rd" }.get(file_num % 10 * (file_num % 100 not in [11, 12, 13]), "th"), cg.name, elem1, elem2, type(e).__name__, e) data["angle_{}_{}".format(elem1, elem2)] = angle return data