def test_angle_stat_get_angle_from_cg(self): fa_text = """>1 AAACCGGGCCCCCCAAUUU (((..(((...)))..))) """ cg, = ftmc.CoarseGrainRNA.from_fasta_text(fa_text) cg.coords["s0"] = np.array([0., 0., 0.]), np.array([0., 0., 1.]) cg.twists["s0"] = np.array([0., -1., 0]), np.array([0., 1., 0.]) cg.coords["s1"] = np.array([0., 0., 2.]), np.array([0., 1., 3.]) cg.twists["s1"] = np.array([-1., 0., 0.]), np.array([1., 0., 0.]) cg.coords["h0"] = np.array([0, 1, 3]), np.array([0, 2, 4]) cg.add_bulge_coords_from_stems() print (cg.coords["i0"]) print (cg.twists) as1, as2 = cg.get_bulge_angle_stats("i0") self.assertAlmostEqual(as1.get_angle(), ftuv.vec_angle(cg.coords["s0"][0] - cg.coords["s0"][1], cg.coords["s1"][1] - cg.coords["s1"][0]) ) self.assertAlmostEqual(as2.get_angle(), ftuv.vec_angle(cg.coords["s1"][1] - cg.coords["s1"][0], cg.coords["s0"][0] - cg.coords["s0"][1]) ) self.assertAlmostEqual(as1.get_angle(), math.radians(135)) self.assertAlmostEqual(as2.get_angle(), math.radians(135))
def test_angle_stat_get_angle_from_cg(self): fa_text = """>1 AAACCGGGCCCCCCAAUUU (((..(((...)))..))) """ cg, = ftmc.CoarseGrainRNA.from_fasta_text(fa_text) cg.coords["s0"] = np.array([0., 0., 0.]), np.array([0., 0., 1.]) cg.twists["s0"] = np.array([0., -1., 0]), np.array([0., 1., 0.]) cg.coords["s1"] = np.array([0., 0., 2.]), np.array([0., 1., 3.]) cg.twists["s1"] = np.array([-1., 0., 0.]), np.array([1., 0., 0.]) cg.coords["h0"] = np.array([0, 1, 3]), np.array([0, 2, 4]) cg.add_bulge_coords_from_stems() print(cg.coords["i0"]) print(cg.twists) as1, as2 = cg.get_bulge_angle_stats("i0") self.assertAlmostEqual( as1.get_angle(), ftuv.vec_angle(cg.coords["s0"][0] - cg.coords["s0"][1], cg.coords["s1"][1] - cg.coords["s1"][0])) self.assertAlmostEqual( as2.get_angle(), ftuv.vec_angle(cg.coords["s1"][1] - cg.coords["s1"][0], cg.coords["s0"][0] - cg.coords["s0"][1])) self.assertAlmostEqual(as1.get_angle(), math.radians(135)) self.assertAlmostEqual(as2.get_angle(), math.radians(135))
def main(): # Moving segment moving = make_random_chain(20) # Fixed segment # Last three residues of the moving segment # after applying a random rotation/translation fixed = rotate_last_three(moving) angles1 = [ vec_angle(moving[i - 1] - moving[i - 2], moving[i] - moving[i - 1]) for i in range(2, len(moving)) ] distances1 = [ magnitude(moving[i] - moving[i - 1]) for i in range(1, len(moving)) ] #print "moving:", moving if len(sys.argv) < 2: moving = ccd(moving, fixed, 10, True) else: moving = ccd(moving, fixed, iterations=int(sys.argv[1]), print_d=False) #print "moving:", moving angles2 = [ vec_angle(moving[i - 1] - moving[i - 2], moving[i] - moving[i - 1]) for i in range(2, len(moving)) ] distances2 = [ magnitude(moving[i] - moving[i - 1]) for i in range(1, len(moving)) ] assert (allclose(distances1, distances2)) assert (allclose(angles1, angles2))
def main(): # Moving segment moving=make_random_chain(20) # Fixed segment # Last three residues of the moving segment # after applying a random rotation/translation fixed=rotate_last_three(moving) angles1 = [vec_angle(moving[i-1] - moving[i-2], moving[i] - moving[i-1]) for i in range(2, len(moving))] distances1 = [magnitude(moving[i] - moving[i-1]) for i in range(1, len(moving))] #print "moving:", moving if len(sys.argv) < 2: moving = ccd(moving, fixed, 10, True) else: moving = ccd(moving, fixed, iterations = int(sys.argv[1]), print_d = False) #print "moving:", moving angles2 = [vec_angle(moving[i-1] - moving[i-2], moving[i] - moving[i-1]) for i in range(2, len(moving))] distances2 = [magnitude(moving[i] - moving[i-1]) for i in range(1, len(moving))] assert(allclose(distances1, distances2)) assert(allclose(angles1, angles2))
def verify_virtual_twist_angles(self, cg, s): sl = cg.stem_length(s) for i in range(0, sl): (pos, vec, vec_l, vec_r) = ftug.virtual_res_3d_pos_core(cg.coords[s], cg.twists[s],i,sl) if i > 1: self.assertGreater(ftuv.vec_angle(vec, prev_vec), 0.53) self.assertLess(ftuv.vec_angle(vec, prev_vec), 0.73) prev_vec = vec
def verify_virtual_twist_angles(self, cg, s): sl = cg.stem_length(s) for i in range(0, sl): (pos, vec, vec_l, vec_r) = ftug.virtual_res_3d_pos_core(cg.coords[s], cg.twists[s],i,sl) if i > 1: self.assertGreater(ftuv.vec_angle(vec, prev_vec), 0.1) self.assertLess(ftuv.vec_angle(vec, prev_vec), 0.95) prev_vec = vec
def main(): usage = """ usage """ num_args = 0 parser = OptionParser(usage=usage) #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str') #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option') (options, args) = parser.parse_args() if len(args) < num_args: parser.print_help() sys.exit(1) cg = ftmc.from_pdb(args[0]) angles = [] for loop in it.chain(cg.iloop_iterator(), cg.mloop_iterator()): conn = cg.connections(loop) (s1b, s1e) = cg.get_sides(conn[0], loop) (s2b, s2e) = cg.get_sides(conn[1], loop) angle = ftuv.vec_angle( cg.coords[conn[0]][s1b] - cg.coords[conn[0]][s1e], cg.coords[conn[1]][s2e] - cg.coords[conn[1]][s2b]) for rn in cg.define_residue_num_iterator(loop, adjacent=True): angles += [(rn, angle)] for rn, angle in sorted(angles): print "{}:{}".format(rn, angle)
def get_angle(self): ''' Return the angle between the two connected stems. ''' return ftuv.vec_angle( np.array([-1., 0., 0.]), ftuv.spherical_polar_to_cartesian([1, self.u, self.v]))
def main(): usage = """ python interior_loop_angles.py pdb_file Iterate over the interior loop angles and calculate how much of a kink they introduce between the two adjacent stems. """ num_args = 0 parser = OptionParser(usage=usage) #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str') #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option') (options, args) = parser.parse_args() if len(args) < num_args: parser.print_help() sys.exit(1) cg = ftmc.from_pdb(op.expanduser(args[0])) for iloop in cg.iloop_iterator(): conn = cg.connections(iloop) angle = ftuv.vec_angle(cg.coords[conn[0]][1] - cg.coords[conn[0]][0], cg.coords[conn[1]][1] - cg.coords[conn[1]][0]) fud.pv('iloop, angle')
def main(): usage = """ usage """ num_args= 0 parser = OptionParser(usage=usage) #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str') #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option') (options, args) = parser.parse_args() if len(args) < num_args: parser.print_help() sys.exit(1) cg = ftmc.from_pdb(args[0]) angles = [] for loop in it.chain(cg.iloop_iterator(), cg.mloop_iterator()): conn = cg.connections(loop) (s1b, s1e) = cg.get_sides(conn[0], loop) (s2b, s2e) = cg.get_sides(conn[1], loop) angle = ftuv.vec_angle(cg.coords[conn[0]][s1b] - cg.coords[conn[0]][s1e], cg.coords[conn[1]][s2e] - cg.coords[conn[1]][s2b]) for rn in cg.define_residue_num_iterator(loop, adjacent=True): angles += [(rn, angle)] for rn, angle in sorted(angles): print "{}:{}".format(rn, angle)
def main(): usage = """ python interior_loop_angles.py pdb_file Iterate over the interior loop angles and calculate how much of a kink they introduce between the two adjacent stems. """ num_args= 0 parser = OptionParser(usage=usage) #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str') #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option') (options, args) = parser.parse_args() if len(args) < num_args: parser.print_help() sys.exit(1) cg = ftmc.from_pdb(op.expanduser(args[0])) for iloop in cg.iloop_iterator(): conn = cg.connections(iloop) angle = ftuv.vec_angle(cg.coords[conn[0]][1] - cg.coords[conn[0]][0], cg.coords[conn[1]][1] - cg.coords[conn[1]][0]) fud.pv('iloop, angle')
def output_all_distances(bg): for (key1, key2) in it.permutations(bg.defines.keys(), 2): if bg.has_connection(key1, key2): continue longrange = "N" if key2 in bg.longrange[key1]: longrange = "Y" #point1 = bg.get_point(key1) #point2 = bg.get_point(key2) try: (i1,i2) = cuv.line_segment_distance(bg.coords[key1][0], bg.coords[key1][1], bg.coords[key2][0], bg.coords[key2][1]) if abs(cuv.magnitude(i2 - i1)) < 0.000001: continue vec1 = bg.coords[key1][1] - bg.coords[key1][0] ''' basis = cuv.create_orthonormal_basis(vec1) coords2 = cuv.change_basis(i2 - i1, basis, cuv.standard_basis) (r, u, v) = cuv.spherical_cartesian_to_polar(coords2) ''' v = cuv.vec_angle(vec1, i2 - i1) except KeyError as ke: #print >>sys.stderr, 'Skipping %s or %s.' % (key1, key2) continue seq1 = 'x' seq2 = 'x' ''' receptor_angle = 0. if bg.get_type(key1) != 's' and bg.get_type(key1) != 'i' and bg.get_length(key1) > 1: seq1 = bg.get_seq(key1) if bg.get_type(key2) != 's' and bg.get_type(key2) != 'i'and bg.get_length(key2) > 1: seq2 = bg.get_seq(key2) if bg.get_type(key1) == 'l' and bg.get_type(key2) == 's': receptor_angle = cgg.receptor_angle(bg, key1, key2) ''' print "%s %s %d %s %s %d %f %s %s %s %f" % (key1, key1[0], bg.get_length(key1), key2, key2[0], bg.get_length(key2), cuv.magnitude(i2-i1), seq1, seq2, longrange, v)
def get_relative_orientation(cg, l1, l2): ''' Return how l1 is related to l2 in terms of three parameters. l2 should be the receptor of a potential A-Minor interaction, whereas l1 should be the donor. 1. Distance between the closest points of the two elements 2. The angle between l2 and the vector between the two 3. The angle between the minor groove of l2 and the vector between l1 and l2 ''' (i1, i2) = ftuv.line_segment_distance(cg.coords[l1][0], cg.coords[l1][1], cg.coords[l2][0], cg.coords[l2][1]) ''' angle1 = ftuv.vec_angle(cg.coords[l2][1] - cg.coords[l2][0], i2 - i1) ''' angle1 = ftuv.vec_angle(cg.coords[l2][1] - cg.coords[l2][0], cg.coords[l1][1] - cg.coords[l1][0]) #fud.pv('angle1') tw = cg.get_twists(l2) if l2[0] != 's': angle2 = ftuv.vec_angle((tw[0] + tw[1]) / 2., i2 - i1) else: stem_len = cg.stem_length(l2) pos = ftuv.magnitude(i2 - cg.coords[l2][0]) / ftuv.magnitude(cg.coords[l2][1] - cg.coords[l2][0]) * stem_len vec = ftug.virtual_res_3d_pos_core(cg.coords[l2], cg.twists[l2], pos, stem_len)[1] angle2 = ftuv.vec_angle(vec, i2 - i1) dist = ftug.element_distance(cg, l1, l2) return (dist, angle1, angle2)
def main(): usage = """ python interior_loop_angles.py pdb_file Iterate over the interior loop angles and calculate how much of a kink they introduce between the two adjacent stems. """ num_args = 0 parser = OptionParser(usage=usage) parser.add_option("-o", "--output", action="store", help="Store data in csv with this filename") #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str') #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option') (options, args) = parser.parse_args() data = list() if len(args) < num_args: parser.print_help() sys.exit(1) cg, = ftmc.CoarseGrainRNA.from_pdb(op.expanduser(args[0])) for iloop in cg.iloop_iterator(): conn = cg.connections(iloop) angle = ftuv.vec_angle(cg.coords[conn[0]][1] - cg.coords[conn[0]][0], cg.coords[conn[1]][1] - cg.coords[conn[1]][0]) data.append([iloop, angle]) fud.pv('iloop, angle') if options.output and len(data): with open(options.output, 'w') as FILE: writer = csv.writer(FILE, delimiter="\t", lineterminator="\n") writer.writerow(["iloop", "angle"]) for row in data: writer.writerow(row)
def get_angle(self): ''' Return the angle between the two connected stems. ''' return ftuv.vec_angle(np.array([-1., 0., 0.]), ftuv.spherical_polar_to_cartesian([1, self.u, self.v]))
def describe_ml_segments(cg): data = defaultdict(list) loops = cg.find_mlonly_multiloops() for loop in it.chain(loops, [[i] for i in cg.iloop_iterator()]): print(loop) if loop[0][0] == "i": description = ["interior_loop"] else: description = cg.describe_multiloop(loop) try: j3_roles = cg._assign_loop_roles(loop) except ValueError: j3_roles = None if j3_roles: j3_familyFlat = cg._junction_family_westhof1(j3_roles) j3_family3D = cg._junction_family_3d(j3_roles) j3_familyPerp = cg._junction_family_is_perpenticular(j3_roles) j3_Delta = cg.get_length(j3_roles["J23"]) - cg.get_length( j3_roles["J31"]) else: j3_family3D = None j3_familyFlat = None j3_familyPerp = None j3_Delta = None loop_start = float("inf") for segment in loop: if cg.define_a(segment)[0] < loop_start: loop_start = cg.define_a(segment)[0] for segment in loop: if segment[0] not in "mi": continue data["loop_start_after"].append(loop_start) data["segment_start_after"].append(cg.define_a(segment)[0]) data["segment"].append(segment) data["junction_length"].append(len(loop)) data["segment_length"].append(cg.get_length(segment)) if segment[0] == "i": dims = list(sorted(cg.get_bulge_dimensions(segment))) else: dims = [-1, -1] data["iloop_length_1"].append(dims[0]) data["iloop_length_2"].append(dims[1]) data["loops_largest_segment_length"].append( max(cg.get_length(x) for x in loop)) data["loops_shortest_segment_length"].append( min(cg.get_length(x) for x in loop)) data["sum_of_loops_segment_lengths"].append( sum(cg.get_length(x) for x in loop)) data["loop_segment_lengths"].append(",".join( map(str, sorted(cg.get_length(x) for x in loop)))) data["angle_type"].append( abs(cg.get_angle_type(segment, allow_broken=True))) s1, s2 = cg.connections(segment) vec1 = cg.coords.get_direction(s1) if cg.get_sides(s1, segment) == (1, 0): vec1 = -vec1 else: assert cg.get_sides(s1, segment) == (0, 1) vec2 = cg.coords.get_direction(s2) if cg.get_sides(s2, segment) == (1, 0): vec2 = -vec2 else: assert cg.get_sides(s2, segment) == (0, 1) data["angle_between_stems"].append(ftuv.vec_angle(vec1, vec2)) data["offset1"].append( ftuv.point_line_distance( cg.coords[s1][cg.get_sides(s1, segment)[0]], cg.coords[s2][0], cg.coords.get_direction(s2))) data["offset2"].append( ftuv.point_line_distance( cg.coords[s2][cg.get_sides(s2, segment)[0]], cg.coords[s1][0], cg.coords.get_direction(s1))) closer1, far1 = cg.coords[s1][cg.get_sides( s1, segment)[0]], cg.coords[s1][cg.get_sides(s1, segment)[1]] closer2, far2 = cg.coords[s2][cg.get_sides( s2, segment)[0]], cg.coords[s2][cg.get_sides(s2, segment)[1]] data["offset"].append( ftuv.vec_distance(*ftuv.line_segment_distance( closer1, closer1 + (closer1 - far1) * 100000, closer2, closer2 + (closer2 - far2) * 100000))) data["junction_va_distance"].append( ftug.junction_virtual_atom_distance(cg, segment)) data["is_external_multiloop"].append("open" in description) data["is_pseudoknotted_multiloop"].append( "pseudoknot" in description) data["is_regular_multiloop"].append( "regular_multiloop" in description) data["is_interior_loop"].append("interior_loop" in description) if j3_roles is not None: elem_role, = [ x[0] for x in j3_roles.items() if x[1] == segment ] else: elem_role = "?" data["j3_role"].append(elem_role) data["j3_familyFlat"].append(j3_familyFlat) data["j3_family3D"].append(j3_family3D) data["j3_familyPerp"].append(j3_familyPerp) data["j3_Delta_j23_j31"].append(j3_Delta) dssr_stacking = False if "dssr_stacks" in cg.infos: if segment in cg.infos["dssr_stacks"]: dssr_stacking = True data["dssr_stacking"].append(dssr_stacking) kh_stem_angle = float("nan") if abs(cg.get_angle_type(segment, allow_broken=True)) == 5: next_ml = cg.get_next_ml_segment(segment) if isinstance(next_ml, str) and next_ml[0] == "m" and abs( cg.get_angle_type(next_ml, allow_broken=True)) == 5: stems1 = cg.edges[segment] stems2 = cg.edges[next_ml] try: s1, s2 = (stems1 | stems2) - (stems1 & stems2) except ValueError: pass else: vec1 = cg.coords.get_direction(s1) vec2 = cg.coords.get_direction(s2) angle = ftuv.vec_angle(vec1, vec2) if angle > math.pi / 2: angle = math.pi - angle kh_stem_angle = angle data["kh_stem_angle"].append(kh_stem_angle) if data: data["pk_number"] = number_by(data, "loop_start_after", "is_pseudoknotted_multiloop") data["loop_number"] = number_by(data, "loop_start_after", None) data["reguler_multiloop_number"] = number_by(data, "loop_start_after", "is_regular_multiloop") return data
def describe_rna(cg, file_num, dist_pais, angle_pairs): data = {} data["nt_length"] = cg.seq_length data["num_cg_elems"] = len(cg.defines) for letter in "smifth": data["num_" + letter] = len([x for x in cg.defines if x[0] == letter]) multiloops = cg.find_mlonly_multiloops() descriptors = [] junct3 = 0 junct4 = 0 reg = 0 pk = 0 op = 0 for ml in multiloops: descriptors = cg.describe_multiloop(ml) if "regular_multiloop" in descriptors: if len(ml) == 3: junct3 += 1 elif len(ml) == 4: junct4 += 1 reg += 1 if "pseudoknot" in descriptors: pk += 1 if "open" in descriptors: op += 1 data["3-way-junctions"] = junct3 data["4-way-junctions"] = junct4 #print (descriptors) data["open_mls"] = op # print(data["open_mls"][-1]) data["pseudoknots"] = pk data["regular_mls"] = reg data["total_mls"] = len(multiloops) try: data["longest_ml"] = max(len(x) for x in multiloops) except ValueError: data["longest_ml"] = 0 try: data["rog_fast"] = cg.radius_of_gyration("fast") except (ftmc.RnaMissing3dError, AttributeError): data["rog_fast"] = float("nan") data["rog_vres"] = float("nan") data["anisotropy_fast"] = float("nan") data["anisotropy_vres"] = float("nan") data["asphericity_fast"] = float("nan") data["asphericity_vres"] = float("nan") else: data["rog_vres"] = cg.radius_of_gyration("vres") data["anisotropy_fast"] = ftmd.anisotropy(cg.get_ordered_stem_poss()) data["anisotropy_vres"] = ftmd.anisotropy( cg.get_ordered_virtual_residue_poss()) data["asphericity_fast"] = ftmd.asphericity(cg.get_ordered_stem_poss()) data["asphericity_vres"] = ftmd.asphericity( cg.get_ordered_virtual_residue_poss()) for from_nt, to_nt in dist_pairs: try: dist = ftuv.vec_distance( cg.get_virtual_residue(int(from_nt), True), cg.get_virtual_residue(int(to_nt), True)) except Exception as e: dist = float("nan") log.warning( "%d%s File %s: Could not calculate distance between " "%d and %d: %s occurred: %s", file_num, { 1: "st", 2: "nd", 3: "rd" }.get(file_num % 10 * (file_num % 100 not in [11, 12, 13]), "th"), cg.name, from_nt, to_nt, type(e).__name__, e) data["distance_{}_{}".format(from_nt, to_nt)] = dist for elem1, elem2 in angle_pairs: try: angle = ftuv.vec_angle(cg.coords.get_direction(elem1), cg.coords.get_direction(elem2)) except Exception as e: angle = float("nan") log.warning( "%d%s File %s: Could not calculate angle between " "%s and %s: %s occurred: %s", file_num, { 1: "st", 2: "nd", 3: "rd" }.get(file_num % 10 * (file_num % 100 not in [11, 12, 13]), "th"), cg.name, elem1, elem2, type(e).__name__, e) data["angle_{}_{}".format(elem1, elem2)] = angle data["missing_residues_5prime"] = (len(cg.seq.with_missing[:1]) - 1) data["missing_residues_3prime"] = ( len(cg.seq.with_missing[cg.seq_length:]) - 1) data["missing_residues_middle"] = ( len(cg.seq.with_missing[1:cg.seq_length]) - len(cg.seq[1:cg.seq_length])) data["missing_residues_total"] = (len(cg.seq.with_missing[:]) - len(cg.seq[:])) fp = len(cg.seq.with_missing[:1]) - 1 tp = 0 old_bp = None bp = None for bp in cg.backbone_breaks_after: fp += len(cg.seq.with_missing[bp:bp + 1].split('&')[1]) - 1 tp += len(cg.seq.with_missing[bp:bp + 1].split('&')[0]) - 1 tp += len(cg.seq.with_missing[cg.seq_length:]) - 1 data["missing_residues_5prime_chain"] = (fp) data["missing_residues_3prime_chain"] = (tp) data["missing_residues_middle_chain"] = (data["missing_residues_total"] - fp - tp) incomplete_elem_types = Counter(x[0] for x in cg.incomplete_elements) data["s_with_missing"] = incomplete_elem_types["s"] data["i_with_missing"] = incomplete_elem_types["i"] data["m_with_missing"] = incomplete_elem_types["m"] data["h_with_missing"] = incomplete_elem_types["h"] mp = "" if incomplete_elem_types["s"]: for elem in cg.incomplete_elements: if elem[0] != "s": continue for i in range(cg.defines[elem][0], cg.defines[elem][1]): left_s = cg.seq.with_missing[i:i + 1] if len(left_s) > 2: right_s = cg.seq.with_missing[cg.pairing_partner(i + 1):cg. pairing_partner(i)] if len(right_s) > 2: mp += "{}&{};".format(left_s, right_s) data["missing_basepairs"] = mp return data
def update(self, sm, step): angle = ftuv.vec_angle(sm.bg.coords.get_direction(self._elem1), sm.bg.coords.get_direction(self._elem2)) self.history[0].append(angle) return "{:6.2f}".format(math.degrees(angle))
def describe_ml_segments(cg): data = defaultdict(list) loops = cg.find_mlonly_multiloops() for loop in it.chain(loops, [[i] for i in cg.iloop_iterator()]): print(loop) if loop[0][0] == "i": description = ["interior_loop"] else: description = cg.describe_multiloop(loop) try: j3_roles = cg._assign_loop_roles(loop) except ValueError: j3_roles = None if j3_roles: j3_familyFlat = cg._junction_family_westhof1(j3_roles) j3_family3D = cg._junction_family_3d(j3_roles) j3_familyPerp = cg._junction_family_is_perpenticular(j3_roles) j3_Delta = cg.get_length( j3_roles["J23"]) - cg.get_length(j3_roles["J31"]) else: j3_family3D = None j3_familyFlat = None j3_familyPerp = None j3_Delta = None loop_start = float("inf") for segment in loop: if cg.define_a(segment)[0] < loop_start: loop_start = cg.define_a(segment)[0] for segment in loop: if segment[0] not in "mi": continue data["loop_start_after"].append(loop_start) data["segment_start_after"].append(cg.define_a(segment)[0]) data["segment"].append(segment) data["junction_length"].append(len(loop)) data["segment_length"].append(cg.get_length(segment)) if segment[0] == "i": dims = list(sorted(cg.get_bulge_dimensions(segment))) else: dims = [-1, -1] data["iloop_length_1"].append(dims[0]) data["iloop_length_2"].append(dims[1]) data["loops_largest_segment_length"].append( max(cg.get_length(x) for x in loop)) data["loops_shortest_segment_length"].append( min(cg.get_length(x) for x in loop)) data["sum_of_loops_segment_lengths"].append( sum(cg.get_length(x) for x in loop)) data["loop_segment_lengths"].append( ",".join(map(str, sorted(cg.get_length(x) for x in loop)))) data["angle_type"].append( abs(cg.get_angle_type(segment, allow_broken=True))) s1, s2 = cg.connections(segment) vec1 = cg.coords.get_direction(s1) if cg.get_sides(s1, segment) == (1, 0): vec1 = -vec1 else: assert cg.get_sides(s1, segment) == (0, 1) vec2 = cg.coords.get_direction(s2) if cg.get_sides(s2, segment) == (1, 0): vec2 = -vec2 else: assert cg.get_sides(s2, segment) == (0, 1) data["angle_between_stems"].append(ftuv.vec_angle(vec1, vec2)) data["offset1"].append(ftuv.point_line_distance(cg.coords[s1][cg.get_sides(s1, segment)[0]], cg.coords[s2][0], cg.coords.get_direction( s2) )) data["offset2"].append(ftuv.point_line_distance(cg.coords[s2][cg.get_sides(s2, segment)[0]], cg.coords[s1][0], cg.coords.get_direction( s1) )) closer1, far1 = cg.coords[s1][cg.get_sides( s1, segment)[0]], cg.coords[s1][cg.get_sides(s1, segment)[1]] closer2, far2 = cg.coords[s2][cg.get_sides( s2, segment)[0]], cg.coords[s2][cg.get_sides(s2, segment)[1]] data["offset"].append(ftuv.vec_distance(*ftuv.line_segment_distance(closer1, closer1 + (closer1 - far1) * 100000, closer2, closer2 + (closer2 - far2) * 100000))) data["junction_va_distance"].append( ftug.junction_virtual_atom_distance(cg, segment)) data["is_external_multiloop"].append("open" in description) data["is_pseudoknotted_multiloop"].append( "pseudoknot" in description) data["is_regular_multiloop"].append( "regular_multiloop" in description) data["is_interior_loop"].append("interior_loop" in description) if j3_roles is not None: elem_role, = [x[0] for x in j3_roles.items() if x[1] == segment] else: elem_role = "?" data["j3_role"].append(elem_role) data["j3_familyFlat"].append(j3_familyFlat) data["j3_family3D"].append(j3_family3D) data["j3_familyPerp"].append(j3_familyPerp) data["j3_Delta_j23_j31"].append(j3_Delta) dssr_stacking = False if "dssr_stacks" in cg.infos: if segment in cg.infos["dssr_stacks"]: dssr_stacking = True data["dssr_stacking"].append(dssr_stacking) kh_stem_angle = float("nan") if abs(cg.get_angle_type(segment, allow_broken=True)) == 5: next_ml = cg.get_next_ml_segment(segment) if isinstance(next_ml, str) and next_ml[0] == "m" and abs(cg.get_angle_type(next_ml, allow_broken=True)) == 5: stems1 = cg.edges[segment] stems2 = cg.edges[next_ml] try: s1, s2 = (stems1 | stems2) - (stems1 & stems2) except ValueError: pass else: vec1 = cg.coords.get_direction(s1) vec2 = cg.coords.get_direction(s2) angle = ftuv.vec_angle(vec1, vec2) if angle > math.pi / 2: angle = math.pi - angle kh_stem_angle = angle data["kh_stem_angle"].append(kh_stem_angle) if data: data["pk_number"] = number_by(data, "loop_start_after", "is_pseudoknotted_multiloop") data["loop_number"] = number_by(data, "loop_start_after", None) data["reguler_multiloop_number"] = number_by(data, "loop_start_after", "is_regular_multiloop") return data
def describe_rna(cg, file_num, dist_pais, angle_pairs): data = {} data["nt_length"] = cg.seq_length data["num_cg_elems"] = len(cg.defines) for letter in "smifth": data["num_" + letter] = len([x for x in cg.defines if x[0] == letter]) multiloops = cg.find_mlonly_multiloops() descriptors = [] junct3 = 0 junct4 = 0 reg = 0 pk = 0 op = 0 for ml in multiloops: descriptors = cg.describe_multiloop(ml) if "regular_multiloop" in descriptors: if len(ml) == 3: junct3 += 1 elif len(ml) == 4: junct4 += 1 reg += 1 if "pseudoknot" in descriptors: pk += 1 if "open" in descriptors: op += 1 data["3-way-junctions"] = junct3 data["4-way-junctions"] = junct4 #print (descriptors) data["open_mls"] = op #print(data["open_mls"][-1]) data["pseudoknots"] = pk data["regular_mls"] = reg data["total_mls"] = len(multiloops) try: data["longest_ml"] = max(len(x) for x in multiloops) except ValueError: data["longest_ml"] = 0 try: data["rog_fast"] = cg.radius_of_gyration("fast") except (ftmc.RnaMissing3dError, AttributeError): data["rog_fast"] = float("nan") data["rog_vres"] = float("nan") data["anisotropy_fast"] = float("nan") data["anisotropy_vres"] = float("nan") data["asphericity_fast"] = float("nan") data["asphericity_vres"] = float("nan") else: data["rog_vres"] = cg.radius_of_gyration("vres") data["anisotropy_fast"] = ftmd.anisotropy(cg.get_ordered_stem_poss()) data["anisotropy_vres"] = ftmd.anisotropy( cg.get_ordered_virtual_residue_poss()) data["asphericity_fast"] = ftmd.asphericity(cg.get_ordered_stem_poss()) data["asphericity_vres"] = ftmd.asphericity( cg.get_ordered_virtual_residue_poss()) for from_nt, to_nt in dist_pairs: try: dist = ftuv.vec_distance( cg.get_virtual_residue(int(from_nt), True), cg.get_virtual_residue(int(to_nt), True)) except Exception as e: dist = float("nan") log.warning( "%d%s File %s: Could not calculate distance between " "%d and %d: %s occurred: %s", file_num, { 1: "st", 2: "nd", 3: "rd" }.get(file_num % 10 * (file_num % 100 not in [11, 12, 13]), "th"), cg.name, from_nt, to_nt, type(e).__name__, e) data["distance_{}_{}".format(from_nt, to_nt)] = dist for elem1, elem2 in angle_pairs: try: angle = ftuv.vec_angle(cg.coords.get_direction(elem1), cg.coords.get_direction(elem2)) except Exception as e: angle = float("nan") log.warning( "%d%s File %s: Could not calculate angle between " "%s and %s: %s occurred: %s", file_num, { 1: "st", 2: "nd", 3: "rd" }.get(file_num % 10 * (file_num % 100 not in [11, 12, 13]), "th"), cg.name, elem1, elem2, type(e).__name__, e) data["angle_{}_{}".format(elem1, elem2)] = angle return data
def get_relative_orientation(cg, loop, stem): ''' Return how loop is related to stem in terms of three parameters. The stem is the receptor of a potential A-Minor interaction, whereas the loop is the donor. The 3 parameters are: 1. Distance between the closest points of the two elements 2. The angle between the stem and the vector between the two 3. The angle between the minor groove of l2 and the projection of the vector between stem and loop onto the plane normal to the stem direction. ''' point_on_stem, point_on_loop = ftuv.line_segment_distance( cg.coords[stem][0], cg.coords[stem][1], cg.coords[loop][0], cg.coords[loop][1]) conn_vec = point_on_loop - point_on_stem dist = ftuv.magnitude(conn_vec) angle1 = ftuv.vec_angle(cg.coords.get_direction(stem), conn_vec) # The direction of the stem vector is irrelevant, so # choose the smaller of the two angles between two lines if angle1 > np.pi / 2: angle1 = np.pi - angle1 tw = cg.get_twists(stem) if dist == 0: angle2 = float("nan") else: if stem[0] != 's': raise ValueError( "The receptor needs to be a stem, not {}".format(stem)) else: stem_len = cg.stem_length(stem) # Where along the helix our A-residue points to the minor groove. # This can be between residues. We express it as floating point nucleotide coordinates. # So 0.0 means at the first basepair, while 1.5 means between the second and the third basepair. pos = ftuv.magnitude( point_on_stem - cg.coords[stem][0]) / ftuv.magnitude( cg.coords.get_direction(stem)) * (stem_len - 1) # The vector pointing to the minor groove, even if we are not at a virtual residue (pos is a float value) virt_twist = ftug.virtual_res_3d_pos_core(cg.coords[stem], cg.twists[stem], pos, stem_len)[1] # The projection of the connection vector onto the plane normal to the stem conn_proj = ftuv.vector_rejection(conn_vec, cg.coords.get_direction(stem)) try: # Note: here the directions of both vectors are well defined, # so angles >90 degrees make sense. angle2 = ftuv.vec_angle(virt_twist, conn_proj) except ValueError: if np.all(virt_twist == 0): angle2 = float("nan") else: raise # Furthermore, the direction of the second angle is meaningful. # We call use a positive angle, if the cross-product of the two vectors # has the same sign as the stem vector and a negative angle otherwise cr = np.cross(virt_twist, conn_proj) sign = ftuv.is_almost_parallel(cr, cg.coords.get_direction(stem)) #assert sign != 0, "{} vs {} not (anti) parallel".format( # cr, cg.coords.get_direction(stem)) angle2 *= sign return dist, angle1, angle2
def get_relative_orientation(cg, loop, stem): ''' Return how loop is related to stem in terms of three parameters. The stem is the receptor of a potential A-Minor interaction, whereas the loop is the donor. The 3 parameters are: 1. Distance between the closest points of the two elements 2. The angle between the stem and the vector between the two 3. The angle between the minor groove of l2 and the projection of the vector between stem and loop onto the plane normal to the stem direction. ''' point_on_stem, point_on_loop = ftuv.line_segment_distance(cg.coords[stem][0], cg.coords[stem][1], cg.coords[loop][0], cg.coords[loop][1]) conn_vec = point_on_loop - point_on_stem dist = ftuv.magnitude(conn_vec) angle1 = ftuv.vec_angle(cg.coords.get_direction(stem), conn_vec) # The direction of the stem vector is irrelevant, so # choose the smaller of the two angles between two lines if angle1 > np.pi / 2: angle1 = np.pi - angle1 tw = cg.get_twists(stem) if dist == 0: angle2 = float("nan") else: if stem[0] != 's': raise ValueError( "The receptor needs to be a stem, not {}".format(stem)) else: stem_len = cg.stem_length(stem) # Where along the helix our A-residue points to the minor groove. # This can be between residues. We express it as floating point nucleotide coordinates. # So 0.0 means at the first basepair, while 1.5 means between the second and the third basepair. pos = ftuv.magnitude(point_on_stem - cg.coords[stem][0]) / ftuv.magnitude( cg.coords.get_direction(stem)) * (stem_len - 1) # The vector pointing to the minor groove, even if we are not at a virtual residue (pos is a float value) virt_twist = ftug.virtual_res_3d_pos_core( cg.coords[stem], cg.twists[stem], pos, stem_len)[1] # The projection of the connection vector onto the plane normal to the stem conn_proj = ftuv.vector_rejection( conn_vec, cg.coords.get_direction(stem)) try: # Note: here the directions of both vectors are well defined, # so angles >90 degrees make sense. angle2 = ftuv.vec_angle(virt_twist, conn_proj) except ValueError: if np.all(virt_twist == 0): angle2 = float("nan") else: raise # Furthermore, the direction of the second angle is meaningful. # We call use a positive angle, if the cross-product of the two vectors # has the same sign as the stem vector and a negative angle otherwise cr = np.cross(virt_twist, conn_proj) sign = ftuv.is_almost_parallel(cr, cg.coords.get_direction(stem)) #assert sign != 0, "{} vs {} not (anti) parallel".format( # cr, cg.coords.get_direction(stem)) angle2 *= sign return dist, angle1, angle2
def describe_ml_segments(cg): data = defaultdict(list) loops = cg.find_mlonly_multiloops() for loop in loops: description = cg.describe_multiloop(loop) try: j3_roles = cg.assign_loop_roles(loop) except ValueError: j3_roles = None if j3_roles: j3_familyFlat = cg.junction_family_westhof1(j3_roles) j3_family3D = cg.junction_family_3d(j3_roles) j3_familyPerp = cg.junction_family_is_perpenticular(j3_roles) j3_Delta = cg.get_length(j3_roles["J23"]) - cg.get_length( j3_roles["J31"]) else: j3_family3D = None j3_familyFlat = None j3_familyPerp = None j3_Delta = None loop_start = float("inf") for segment in loop: if cg.define_a(segment)[0] < loop_start: loop_start = cg.define_a(segment)[0] for segment in loop: if segment[0] != "m": continue data["loop_start_after"].append(loop_start) data["segment_start_after"].append(cg.define_a(segment)[0]) data["segment"].append(segment) data["junction_length"].append(len(loop)) data["segment_length"].append(cg.get_length(segment)) data["loops_largest_segment_length"].append( max(cg.get_length(x) for x in loop)) data["loops_shortest_segment_length"].append( min(cg.get_length(x) for x in loop)) data["sum_of_loops_segment_lengths"].append( sum(cg.get_length(x) for x in loop)) data["loop_segment_lengths"].append(",".join( map(str, sorted(cg.get_length(x) for x in loop)))) data["angle_type"].append( abs(cg.get_angle_type(segment, allow_broken=True))) s1, s2 = cg.connections(segment) vec1 = cg.coords.get_direction(s1) if cg.get_sides(s1, segment) == (1, 0): vec1 = -vec1 else: assert cg.get_sides(s1, segment) == (0, 1) vec2 = cg.coords.get_direction(s2) if cg.get_sides(s2, segment) == (1, 0): vec2 = -vec2 else: assert cg.get_sides(s2, segment) == (0, 1) data["angle_between_stems"].append(ftuv.vec_angle(vec1, vec2)) data["junction_va_distance"].append( ftug.junction_virtual_atom_distance(cg, segment)) data["is_external_multiloop"].append("open" in description) data["is_pseudoknotted_multiloop"].append( "pseudoknot" in description) data["is_regular_multiloop"].append( "regular_multiloop" in description) if j3_roles is not None: elem_role, = [ x[0] for x in j3_roles.items() if x[1] == segment ] else: elem_role = "?" data["j3_role"].append(elem_role) data["j3_familyFlat"].append(j3_familyFlat) data["j3_family3D"].append(j3_family3D) data["j3_familyPerp"].append(j3_familyPerp) data["j3_Delta_j23_j31"].append(j3_Delta) if data: data["pk_number"] = number_by(data, "loop_start_after", "is_pseudoknotted_multiloop") data["loop_number"] = number_by(data, "loop_start_after", None) data["reguler_multiloop_number"] = number_by(data, "loop_start_after", "is_regular_multiloop") return data
def extend_pk_description(dataset, filename, pk_type, rna, pk, pk_number): """ Return a extended descripiton of current pseudoknot in the current files e.g. angles between stems :param dataset: Current dataset that will be updated :param filename: Filename of the current structure :parma pk_type: Class of the pseudoknot :param rna: A forgi CoarseGrainRNA object :param pk: Structure of the pseudoknot, a NumberedDotbracket object, in a condensed (shadow-like) representation. This representation always contains the most 5' basepair. :param pk_number: consecutive number of the pseudoknot """ domains = rna.get_domains() helices = domains["rods"] # A list of elements, e.g. ["s0", "i0", "s1"] log.debug("Helices: %s", helices) #rna.log(logging.WARNING) stems_5p = [] stems_3p = [] nums = [] log.debug("pk Residue numbers %s", pk.residue_numbers) log.debug("pk helix ends %s", pk.helix_ends) for i, resnum in enumerate(pk.residue_numbers): num = rna.seq.to_integer(resnum) nums.append(num) element_5p = rna.get_node_from_residue_num(num) stems_5p.append(element_5p) num2 = rna.seq.to_integer(pk.helix_ends[i]) log.debug("num %s nums2 %s", num, num2) element_3p = rna.get_node_from_residue_num(num2) stems_3p.append(element_3p) log.debug("nums %s", nums) for i, stem1_5p in enumerate(stems_5p): dataset["Filename"].append(filename) dataset["rnaname"] = rna.name dataset["pk_type"].append(pk_type) dataset["pk_id"].append(pk_number) dataset["angle_nr"].append(i) if pk_type == "other": dataset["pk_structure"].append(str(pk)) else: dataset["pk_structure"].append("") #is this the first occurrence of stem in stems? if stems_5p.index(stem1_5p) == i: #first occurrence. Strand 0, look at 3' end of helix stem1 = stems_3p[i] strand = 0 else: assert i > stems_5p.index(stem1_5p) stem1 = stem1_5p strand = 1 try: stem2_5p = stems_5p[i + 1] except IndexError: stem2_5p = stems_5p[0] outside_pk = True else: outside_pk = False if outside_pk or stems_5p.index(stem2_5p) == i + 1: #first occurrence stem2 = stem2_5p strand2 = 0 else: strand2 = 1 if outside_pk: stem2 = stems_3p[0] else: stem2 = stems_3p[i + 1] log.debug("Stem 5' %s, 3' %s, stem1 %s stem2 %s", stems_5p, stems_3p, stem1, stem2) # enable stacking analysis via DSSR # differentiate between stacking (True), no stacking (False) and brakes # within/aorund the pseudoknot (-1) incl. 'virtual' angles e.g. H-Type angle_type3 ml_stack = [] if rna.dssr: nc_bps = list(rna.dssr.noncanonical_pairs()) nc_dict = defaultdict(list) for nt1, nt2, typ in nc_bps: nc_dict[nt1].append((nt2, typ)) nc_dict[nt2].append((nt1, typ)) stacking_loops = rna.dssr.stacking_loops() start_found = 0 connection = [] stacking = None branch = None log.debug("Checking %s and %s for stacking, strand %s", stem1, stem2, strand) for elem in rna.iter_elements_along_backbone( ): #walk along the backbone if start_found == strand + 1: if branch: log.debug("in branch: elem %s, branch %s, stacking %s", elem, branch, stacking) if elem == branch: log.debug("End branch at %s", elem) branch = None log.debug("Branch end") continue if elem[0] != "s": connection.append(elem) if rna.defines[elem] and rna.defines[elem][ -1] in rna.backbone_breaks_after: stacking = -1 if elem not in stacking_loops and stacking != -1: stacking = False elif elem == stem2: if stacking is None: stacking = True log.debug("Found second stem, elem %s, stacking %s", elem, stacking) break elif elem[0] == "s" and connection: branch = elem if rna.defines[elem][-1] in rna.backbone_breaks_after: stacking = -1 log.debug("elem %s, stacking %s, branch %s", elem, stacking, branch) elif elem == stem1: start_found += 1 if rna.defines[elem][strand * 2 + 1] in rna.backbone_breaks_after: stacking = -1 log.debug("First stem, elem %s, stacking %s", elem, stacking) else: log.debug("End iteration, stacking->-1") stacking = -1 log.debug("Finally, stacking = %s", stacking) # more detailed stacking (including backbone brackes within and around the pseudoknot) dataset["this_loop_stacking_dssr"].append(stacking) dataset["connecting_loops"].append(",".join(connection)) # more genereal stacking information connecting_loops = rna.edges[stem1] & rna.edges[stem2] for loop in connecting_loops: if loop in stacking_loops: ml_stack.append(loop) stacks = rna.dssr.coaxial_stacks() log.info("Stacks: %s", stacks) for stack in stacks: if stem1 in stack and stem2 in stack: # the two stems stack, but we do not specify along which # multiloop segment they stack. dataset["is_stacking_dssr"].append(True) break else: dataset["is_stacking_dssr"].append(False) # Does the connection form base-triples with the stem? stem1_triples = 0 stem2_triples = 0 aminors1 = 0 aminors2 = 0 aminors = list(rna.dssr.aminor_interactions()) for elem in connection: for nt in rna.define_residue_num_iterator(elem, seq_ids=True): if (nt, stem1) in aminors: aminors1 += 1 log.debug("AMinor %s (%s), %s", nt, elem, stem1) elif (nt, stem2) in aminors: aminors2 += 1 log.debug("AMinor %s (%s), %s", nt, elem, stem2) else: for partner, typ in nc_dict[nt]: if rna.get_elem(partner) == stem1: log.debug("base_triple %s, %s: %s-%s (%s)", elem, stem1, nt, partner, typ) stem1_triples += 1 elif rna.get_elem(partner) == stem2: log.debug("base_triple %s, %s: %s-%s (%s)", elem, stem2, nt, partner, typ) stem2_triples += 1 log.debug("%s has a length of %s and %s triples", stem1, rna.stem_length(stem1), stem1_triples) log.debug("%s has a length of %s and %s triples", stem2, rna.stem_length(stem2), stem2_triples) dataset["stem1_basetripleperc_dssr"].append(stem1_triples / rna.stem_length(stem1)) dataset["stem2_basetripleperc_dssr"].append(stem2_triples / rna.stem_length(stem2)) dataset["stem1_aminorperc_dssr"].append(aminors1 / rna.stem_length(stem1)) dataset["stem2_aminorperc_dssr"].append(aminors2 / rna.stem_length(stem2)) else: dataset["is_stacking_dssr"].append(float("nan")) dataset["this_loop_stacking_dssr"].append(float("nan")) dataset["connecting_loops"].append("") dataset["stem1_basetripleperc_dssr"].append(float("nan")) dataset["stem2_basetripleperc_dssr"].append(float("nan")) dataset["stem1_aminorperc_dssr"].append(float("nan")) dataset["stem2_aminorperc_dssr"].append(float("nan")) dataset["stacking_loops"].append(",".join(ml_stack)) pos1, dir1 = stem_parameters(stem1, rna, not strand) pos2, dir2 = stem_parameters(stem2, rna, strand2) dataset["stem1"].append(stem1) dataset["stem2"].append(stem2) dataset["angle_between_stems"].append(ftuv.vec_angle(dir1, dir2)) dataset["distance_between"].append(ftuv.vec_distance(pos1, pos2)) next_stem = None if not outside_pk: next_stem = stem_after_next_ml(rna, nums[i], before=stem2) if next_stem == stem2: next_stem = None if next_stem: posN, dirN = stem_parameters(next_stem, rna, 0) dataset["angle_to_next"].append(ftuv.vec_angle(dir1, dirN)) dataset["distance_to_next"].append(ftuv.vec_distance(pos1, posN)) dataset["next_stem"].append(next_stem) else: dataset["angle_to_next"].append("") dataset["distance_to_next"].append("") dataset["next_stem"].append("") dataset["outside_pk"].append(outside_pk)
def describe_rna(cg, file_num, dist_pais, angle_pairs): data = {} data["nt_length"] = cg.seq_length data["num_cg_elems"] = len(cg.defines) for letter in "smifth": data["num_" + letter] = len([x for x in cg.defines if x[0] == letter]) multiloops = cg.find_mlonly_multiloops() descriptors = [] junct3 = 0 junct4 = 0 reg = 0 pk = 0 op = 0 for ml in multiloops: descriptors = cg.describe_multiloop(ml) if "regular_multiloop" in descriptors: if len(ml) == 3: junct3 += 1 elif len(ml) == 4: junct4 += 1 reg += 1 if "pseudoknot" in descriptors: pk += 1 if "open" in descriptors: op += 1 data["3-way-junctions"] = junct3 data["4-way-junctions"] = junct4 #print (descriptors) data["open_mls"] = op # print(data["open_mls"][-1]) data["pseudoknots"] = pk data["regular_mls"] = reg data["total_mls"] = len(multiloops) try: data["longest_ml"] = max(len(x) for x in multiloops) except ValueError: data["longest_ml"] = 0 try: data["rog_fast"] = cg.radius_of_gyration("fast") except (ftmc.RnaMissing3dError, AttributeError): data["rog_fast"] = float("nan") data["rog_vres"] = float("nan") data["anisotropy_fast"] = float("nan") data["anisotropy_vres"] = float("nan") data["asphericity_fast"] = float("nan") data["asphericity_vres"] = float("nan") else: data["rog_vres"] = cg.radius_of_gyration("vres") data["anisotropy_fast"] = ftmd.anisotropy(cg.get_ordered_stem_poss()) data["anisotropy_vres"] = ftmd.anisotropy( cg.get_ordered_virtual_residue_poss()) data["asphericity_fast"] = ftmd.asphericity(cg.get_ordered_stem_poss()) data["asphericity_vres"] = ftmd.asphericity( cg.get_ordered_virtual_residue_poss()) for from_nt, to_nt in dist_pairs: try: dist = ftuv.vec_distance(cg.get_virtual_residue(int(from_nt), True), cg.get_virtual_residue(int(to_nt), True)) except Exception as e: dist = float("nan") log.warning("%d%s File %s: Could not calculate distance between " "%d and %d: %s occurred: %s", file_num, {1: "st", 2: "nd", 3: "rd"}.get( file_num % 10 * (file_num % 100 not in [11, 12, 13]), "th"), cg.name, from_nt, to_nt, type(e).__name__, e) data["distance_{}_{}".format(from_nt, to_nt)] = dist for elem1, elem2 in angle_pairs: try: angle = ftuv.vec_angle(cg.coords.get_direction(elem1), cg.coords.get_direction(elem2)) except Exception as e: angle = float("nan") log.warning("%d%s File %s: Could not calculate angle between " "%s and %s: %s occurred: %s", file_num, {1: "st", 2: "nd", 3: "rd"}.get( file_num % 10 * (file_num % 100 not in [11, 12, 13]), "th"), cg.name, elem1, elem2, type(e).__name__, e) data["angle_{}_{}".format(elem1, elem2)] = angle data["missing_residues_5prime"] = (len(cg.seq.with_missing[:1]) - 1) data["missing_residues_3prime"] = ( len(cg.seq.with_missing[cg.seq_length:]) - 1) data["missing_residues_middle"] = ( len(cg.seq.with_missing[1:cg.seq_length]) - len(cg.seq[1:cg.seq_length])) data["missing_residues_total"] = ( len(cg.seq.with_missing[:]) - len(cg.seq[:])) fp = len(cg.seq.with_missing[:1]) - 1 tp = 0 old_bp = None bp = None for bp in cg.backbone_breaks_after: fp += len(cg.seq.with_missing[bp:bp + 1].split('&')[1]) - 1 tp += len(cg.seq.with_missing[bp:bp + 1].split('&')[0]) - 1 tp += len(cg.seq.with_missing[cg.seq_length:]) - 1 data["missing_residues_5prime_chain"] = (fp) data["missing_residues_3prime_chain"] = (tp) data["missing_residues_middle_chain"] = ( data["missing_residues_total"] - fp - tp) incomplete_elem_types = Counter(x[0] for x in cg.incomplete_elements) data["s_with_missing"] = incomplete_elem_types["s"] data["i_with_missing"] = incomplete_elem_types["i"] data["m_with_missing"] = incomplete_elem_types["m"] data["h_with_missing"] = incomplete_elem_types["h"] mp = "" if incomplete_elem_types["s"]: for elem in cg.incomplete_elements: if elem[0] != "s": continue for i in range(cg.defines[elem][0], cg.defines[elem][1]): left_s = cg.seq.with_missing[i:i + 1] if len(left_s) > 2: right_s = cg.seq.with_missing[cg.pairing_partner( i + 1):cg.pairing_partner(i)] if len(right_s) > 2: mp += "{}&{};".format(left_s, right_s) data["missing_basepairs"] = mp return data
def extend_pk_description(dataset, filename, pk_type, rna, pk, pk_number): """ Return a extended descripiton of current pseudoknot in the current files e.g. angles between stems :param dataset: Current dataset that will be updated :param filename: Filename of the current structure :parma pk_type: Class of the pseudoknot :param rna: A forgi CoarseGrainRNA object :param pk: Structure of the pseudoknot, a NumberedDotbracket object, in a condensed (shadow-like) representation. This representation always contains the most 5' basepair. :param pk_number: consecutive number of the pseudoknot """ domains = rna.get_domains() helices = domains["rods"] # A list of elements, e.g. ["s0", "i0", "s1"] log.debug("Helices: %s", helices) #rna.log(logging.WARNING) stems_5p = [] stems_3p = [] nums = [] log.debug("pk Residue numbers %s", pk.residue_numbers) log.debug("pk helix ends %s", pk.helix_ends) for i, resnum in enumerate(pk.residue_numbers): num = rna.seq.to_integer(resnum) nums.append(num) element_5p = rna.get_node_from_residue_num(num) stems_5p.append(element_5p) num2 = rna.seq.to_integer(pk.helix_ends[i]) log.debug("num %s nums2 %s", num, num2) element_3p =rna.get_node_from_residue_num(num2) stems_3p.append(element_3p) log.debug("nums %s", nums) for i, stem1_5p in enumerate(stems_5p): dataset["Filename"].append(filename) dataset["rnaname"] = rna.name dataset["pk_type"].append(pk_type) dataset["pk_id"].append(pk_number) dataset["angle_nr"].append(i) if pk_type == "other": dataset["pk_structure"].append(str(pk)) else: dataset["pk_structure"].append("") #is this the first occurrence of stem in stems? if stems_5p.index(stem1_5p)==i: #first occurrence. Strand 0, look at 3' end of helix stem1 = stems_3p[i] strand = 0 else: assert i>stems_5p.index(stem1_5p) stem1 = stem1_5p strand = 1 try: stem2_5p = stems_5p[i+1] except IndexError: stem2_5p = stems_5p[0] outside_pk = True else: outside_pk = False if outside_pk or stems_5p.index(stem2_5p)==i+1: #first occurrence stem2 = stem2_5p strand2 = 0 else: strand2 = 1 if outside_pk: stem2 = stems_3p[0] else: stem2 = stems_3p[i+1] log.debug("Stem 5' %s, 3' %s, stem1 %s stem2 %s", stems_5p, stems_3p, stem1, stem2) # enable stacking analysis via DSSR # differentiate between stacking (True), no stacking (False) and brakes # within/aorund the pseudoknot (-1) incl. 'virtual' angles e.g. H-Type angle_type3 ml_stack=[] if rna.dssr: nc_bps = list(rna.dssr.noncanonical_pairs()) nc_dict = defaultdict(list) for nt1, nt2, typ in nc_bps: nc_dict[nt1].append((nt2, typ)) nc_dict[nt2].append((nt1, typ)) stacking_loops = rna.dssr.stacking_loops() start_found = 0 connection = [] stacking = None branch = None log.debug("Checking %s and %s for stacking, strand %s", stem1, stem2, strand) for elem in rna.iter_elements_along_backbone(): #walk along the backbone if start_found == strand+1: if branch: log.debug("in branch: elem %s, branch %s, stacking %s", elem, branch, stacking) if elem == branch: log.debug("End branch at %s", elem) branch = None log.debug("Branch end") continue if elem[0] != "s": connection.append(elem) if rna.defines[elem] and rna.defines[elem][-1] in rna.backbone_breaks_after: stacking = -1 if elem not in stacking_loops and stacking != -1: stacking = False elif elem == stem2: if stacking is None: stacking = True log.debug("Found second stem, elem %s, stacking %s", elem, stacking) break elif elem[0] == "s" and connection: branch = elem if rna.defines[elem][-1] in rna.backbone_breaks_after: stacking = -1 log.debug("elem %s, stacking %s, branch %s", elem, stacking, branch) elif elem == stem1: start_found += 1 if rna.defines[elem][strand*2+1] in rna.backbone_breaks_after: stacking = -1 log.debug("First stem, elem %s, stacking %s", elem, stacking) else: log.debug("End iteration, stacking->-1") stacking = -1 log.debug("Finally, stacking = %s", stacking) # more detailed stacking (including backbone brackes within and around the pseudoknot) dataset["this_loop_stacking_dssr"].append(stacking) dataset["connecting_loops"].append(",".join(connection)) # more genereal stacking information connecting_loops = rna.edges[stem1]&rna.edges[stem2] for loop in connecting_loops: if loop in stacking_loops: ml_stack.append(loop) stacks = rna.dssr.coaxial_stacks() log.info("Stacks: %s", stacks) for stack in stacks: if stem1 in stack and stem2 in stack: # the two stems stack, but we do not specify along which # multiloop segment they stack. dataset["is_stacking_dssr"].append(True) break else: dataset["is_stacking_dssr"].append(False) # Does the connection form base-triples with the stem? stem1_triples=0 stem2_triples=0 aminors1 = 0 aminors2 = 0 aminors = list(rna.dssr.aminor_interactions()) for elem in connection: for nt in rna.define_residue_num_iterator(elem,seq_ids=True): if (nt, stem1) in aminors: aminors1+=1 log.debug("AMinor %s (%s), %s", nt, elem, stem1) elif (nt, stem2) in aminors: aminors2+=1 log.debug("AMinor %s (%s), %s", nt, elem, stem2) else: for partner, typ in nc_dict[nt]: if rna.get_elem(partner)==stem1: log.debug("base_triple %s, %s: %s-%s (%s)", elem, stem1, nt,partner,typ) stem1_triples+=1 elif rna.get_elem(partner)==stem2: log.debug("base_triple %s, %s: %s-%s (%s)", elem, stem2, nt,partner,typ) stem2_triples+=1 log.debug("%s has a length of %s and %s triples", stem1, rna.stem_length(stem1),stem1_triples) log.debug("%s has a length of %s and %s triples", stem2, rna.stem_length(stem2),stem2_triples) dataset["stem1_basetripleperc_dssr"].append(stem1_triples/rna.stem_length(stem1)) dataset["stem2_basetripleperc_dssr"].append(stem2_triples/rna.stem_length(stem2)) dataset["stem1_aminorperc_dssr"].append(aminors1/rna.stem_length(stem1)) dataset["stem2_aminorperc_dssr"].append(aminors2/rna.stem_length(stem2)) else: dataset["is_stacking_dssr"].append(float("nan")) dataset["this_loop_stacking_dssr"].append(float("nan")) dataset["connecting_loops"].append("") dataset["stem1_basetripleperc_dssr"].append(float("nan")) dataset["stem2_basetripleperc_dssr"].append(float("nan")) dataset["stem1_aminorperc_dssr"].append(float("nan")) dataset["stem2_aminorperc_dssr"].append(float("nan")) dataset["stacking_loops"].append(",".join(ml_stack)) pos1, dir1 = stem_parameters(stem1, rna, not strand) pos2, dir2 = stem_parameters(stem2, rna, strand2) dataset["stem1"].append(stem1) dataset["stem2"].append(stem2) dataset["angle_between_stems"].append(ftuv.vec_angle(dir1, dir2)) dataset["distance_between"].append(ftuv.vec_distance(pos1, pos2)) next_stem = None if not outside_pk: next_stem = stem_after_next_ml(rna, nums[i], before=stem2) if next_stem==stem2: next_stem = None if next_stem: posN, dirN = stem_parameters(next_stem, rna, 0) dataset["angle_to_next"].append(ftuv.vec_angle(dir1, dirN)) dataset["distance_to_next"].append(ftuv.vec_distance(pos1, posN)) dataset["next_stem"].append(next_stem) else: dataset["angle_to_next"].append("") dataset["distance_to_next"].append("") dataset["next_stem"].append("") dataset["outside_pk"].append(outside_pk)