def main(): usage = """ python interior_loop_angles.py pdb_file Iterate over the interior loop angles and calculate how much of a kink they introduce between the two adjacent stems. """ num_args= 0 parser = OptionParser(usage=usage) #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str') #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option') (options, args) = parser.parse_args() if len(args) < num_args: parser.print_help() sys.exit(1) cg = ftmc.from_pdb(op.expanduser(args[0])) for iloop in cg.iloop_iterator(): conn = cg.connections(iloop) angle = ftuv.vec_angle(cg.coords[conn[0]][1] - cg.coords[conn[0]][0], cg.coords[conn[1]][1] - cg.coords[conn[1]][0]) fud.pv('iloop, angle')
def calculate_variation(angle_stats, loop_size): ''' Calculate how much the statistics for a particular vary based on its dimensions. I.e., small bulges should vary very little whereas larger loops should vary a lot. An exact method for calculating how much a loop can vary is difficult to derive, but this method will use the simplest available, the volume of the n-dimensional enclosure defined by the minimum and the maximum coordinates. :param stats: forgi.threedee.model.stats.AngleStats :param dims: The dimensions of the loop (i.e. (1,3)) :return: The volume of the accessible area. ''' # ang_type indicates whether it's an iloop forward/backward # or a multiloop forward/backward ang_types = [1,2,3,4] for ang_type in ang_types: ang_dims = tuple(list(loop_size) + [ang_type]) if ang_dims in angle_stats: fud.pv('ang_dims') fud.pv('len(angle_stats[ang_dims])')
def calculate_variation(angle_stats, loop_size): ''' Calculate how much the statistics for a particular vary based on its dimensions. I.e., small bulges should vary very little whereas larger loops should vary a lot. An exact method for calculating how much a loop can vary is difficult to derive, but this method will use the simplest available, the volume of the n-dimensional enclosure defined by the minimum and the maximum coordinates. :param stats: forgi.threedee.model.stats.AngleStats :param dims: The dimensions of the loop (i.e. (1,3)) :return: The volume of the accessible area. ''' # ang_type indicates whether it's an iloop forward/backward # or a multiloop forward/backward ang_types = [1, 2, 3, 4] for ang_type in ang_types: ang_dims = tuple(list(loop_size) + [ang_type]) if ang_dims in angle_stats: fud.pv('ang_dims') fud.pv('len(angle_stats[ang_dims])')
def main(): usage = """ python interior_loop_angles.py pdb_file Iterate over the interior loop angles and calculate how much of a kink they introduce between the two adjacent stems. """ num_args = 0 parser = OptionParser(usage=usage) #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str') #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option') (options, args) = parser.parse_args() if len(args) < num_args: parser.print_help() sys.exit(1) cg = ftmc.from_pdb(op.expanduser(args[0])) for iloop in cg.iloop_iterator(): conn = cg.connections(iloop) angle = ftuv.vec_angle(cg.coords[conn[0]][1] - cg.coords[conn[0]][0], cg.coords[conn[1]][1] - cg.coords[conn[1]][0]) fud.pv('iloop, angle')
def test_from_pdb(self): with open('test/data/1MZP.pdb', 'r') as f: text = f.read() res = forna.pdb_to_json(text, '2ZM5') s = json.dumps(res) fud.pv('s') '''
def main(args): #Setup that does not use the random number generator. randstate=random.getstate()#Just for verification purposes sm, original_sm, ofilename, energy, energies_to_track = setup_deterministic(args) assert randstate==random.getstate()#Just for verification purposes fud.pv("energies_to_track") #Eval-energy mode if args.eval_energy: sm.bg.add_all_virtual_residues() fud.pv('energy.eval_energy(sm, verbose=True, background=False)') if sm.constraint_energy: fud.pv('sm.constraint_energy.eval_energy(sm, verbose=True, background=False)') if sm.junction_constraint_energy: fud.pv('sm.junction_constraint_energy.eval_energy(sm, verbose=True, background=False)') for track_energy in energies_to_track: fud.pv('track_energy.eval_energy(sm, verbose=True, background=False)') sys.exit(0) #Set-up the random Number generator. #Until here, no call to random should be made. if args.seed: seed_num=args.seed else: seed_num = random.randint(0,4294967295) #sys.maxint) #4294967295 is maximal value for numpy random.seed(seed_num) np.random.seed(seed_num) #Main function, dependent on random.seed with open_for_out(ofilename) as out_file: if isinstance(energy, fbe.CombinedEnergy): energies_to_track+=energy.uncalibrated_energies elif isinstance(energy, fbe.CoarseGrainEnergy): energies_to_track+=[energy] stat=setup_stat(out_file, sm, args, energies_to_track, original_sm) try: print ("# Random Seed: {}".format(seed_num), file=out_file) print ("# Command: `{}`".format(" ".join(sys.argv)), file=out_file) for e in energy.iterate_energies(): if isinstance(e, fbe.FPPEnergy): print("# Used FPP energy with options: --scale {} --ref-img {} " "--fpp-landmarks {}".format(e.scale, e.ref_image, ":".join(",".join(map(str,x)) for x in e.landmarks)), file=out_file) if args.exhaustive: sampler = fbs.ExhaustiveExplorer(sm, energy, stat, args.exhaustive, args.start_from_scratch) elif args.new_ml: sampler = fbs.ImprovedMultiloopMCMC(sm, energy, stat, start_from_scratch=args.start_from_scratch, dump_measures=args.dump_energies) else: sampler = fbs.MCMCSampler(sm, energy, stat, start_from_scratch=args.start_from_scratch, dump_measures=args.dump_energies) for i in range(args.iterations): sampler.step() finally: #Clean-up print("INFO: Random seed was {}".format(seed_num), file=sys.stderr)
def json_to_json(rna_json_str): ''' Convert an RNA json string to fasta file, then to a bulge_graph and then back to a json. The purpose is to maintain the integrity of the molecule and to maintain the positions of all the hidden nodes after modification. ''' with open('test.out', 'w') as f: f.write(rna_json_str) (all_fastas, all_xs, all_ys, all_uids, different_tree_links) = json_to_fasta(rna_json_str) big_json = {'nodes': [], 'links': []} coords_to_index = dict() for fasta_text, xs, ys, uids in zip(all_fastas, all_xs, all_ys, all_uids): bg = fgb.BulgeGraph() bg.from_fasta(fasta_text) new_json = bg_to_json(bg, xs=xs, ys=ys, uids=uids) for l in new_json['links']: # the indices of the new nodes will be offset, so the links # have to have their node pointers adjusted as well l['source'] += len(big_json['nodes']) l['target'] += len(big_json['nodes']) big_json['links'] += [l] # Create a mapping between the coordinates of a node and its index # in the node list. To be used when creating links between different # molecules, which are stored according to the coordinates of the nodes # being linked for i, n in enumerate(new_json['nodes']): if n['node_type'] == 'nucleotide': coords_to_index[(n['x'], n['y'])] = i + len(big_json['nodes']) big_json['nodes'] += new_json['nodes'] # add the links that are between different molecules for dtl in different_tree_links: fud.pv('dtl') n1 = coords_to_index[(dtl[0])] n2 = coords_to_index[(dtl[1])] fud.pv('n1,n2') big_json['links'] += [{ 'source': n1, 'target': n2, 'link_type': 'basepair', 'value': 1 }] #fud.pv('big_json["nodes"]') return big_json
def test_angle_stat_difference(self): as1 = ftms.AngleStat(u=1.57, v=0., r1=1, u1=1.57, v1=0) as2 = ftms.AngleStat(u=1.57, v=0., r1=1, u1=1.57, v1=0) self.assertTrue(np.allclose([as1.diff(as2)],[0])) as2 = ftms.AngleStat(u=0, v=0., r1=1, u1=1.57, v1=0) self.assertTrue(np.allclose([as1.diff(as2)],[math.sqrt(2)],0.01)) as2 = ftms.AngleStat(u=1.57, v=0., r1=1, u1=0, v1=0) fud.pv('as1.diff(as2)')
def test_angle_stat_difference(self): as1 = ftms.AngleStat(u=1.57, v=0., r1=1, u1=1.57, v1=0) as2 = ftms.AngleStat(u=1.57, v=0., r1=1, u1=1.57, v1=0) self.assertTrue(np.allclose([as1.diff(as2)], [0])) as2 = ftms.AngleStat(u=0, v=0., r1=1, u1=1.57, v1=0) self.assertTrue(np.allclose([as1.diff(as2)], [math.sqrt(2)], 0.01)) as2 = ftms.AngleStat(u=1.57, v=0., r1=1, u1=0, v1=0) fud.pv('as1.diff(as2)')
def test_sample_stats(self): fa_text = """>1 AGAGGUUCUAGCUACACCCUCUAUAAAAAACUAAGG (((((............))))).............. """ cg = ftmc.CoarseGrainRNA() cg.from_fasta(fa_text) conf_stats = ftms.get_conformation_stats() stats = conf_stats.sample_stats(cg, 't1') fud.pv('cg.to_cg_string()') fud.pv('stats')
def main(): usage = './get_stem_fragments.py [temp.comp]' parser = OptionParser() #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str') #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option') parser.add_option('-o', '--output-dir', dest='output_dir', default=fbc.Configuration.stem_fragment_dir, help='The directory in which to output all of the fragments', type='str') (options, args) = parser.parse_args() if not os.path.exists(options.output_dir): os.makedirs(options.output_dir) if len(args) == 1: bg = ttmc.CoarseGrainRNA(args[0]) for st in bg.elements(): filename = '%s_%s.pdb' % (bg.name, "_".join(map(str, bg.defines[st]))) out_file = os.path.join(options.output_dir, filename) s = PDBParser().get_structure('t', os.path.join(fbc.Configuration.data_base_dir, "%s/temp.pdb" % (bg.name))) output_stem_fragment(bg.defines[st], s, out_file) sys.exit(0) #stats = [cbs.get_angle_stats(), cbs.get_loop_stats()] #stem_stats = cbs.get_stem_stats() structures = dict() prev_pdb_name = '' for l in it.chain(cbs.get_angle_stats().values(), cbs.get_stem_stats().values(), cbs.get_loop_stats().values()): for ss in l: filename = '%s_%s.pdb' % (ss.pdb_name, "_".join(map(str, ss.define))) out_file = os.path.join(options.output_dir, filename) if ss.pdb_name != prev_pdb_name: cud.pv('ss.define, fbc.Configuration.data_base_dir, ss.pdb_name') s = PDBParser().get_structure('t', os.path.join(fbc.Configuration.data_base_dir, "%s/temp.pdb" % (ss.pdb_name))) prev_pdb_name = ss.pdb_name print out_file, ss.define output_stem_fragment(ss.define, s, out_file)
def main(): usage = """ python interior_loop_angles.py pdb_file Iterate over the interior loop angles and calculate how much of a kink they introduce between the two adjacent stems. """ num_args = 0 parser = OptionParser(usage=usage) parser.add_option("-o", "--output", action="store", help="Store data in csv with this filename") #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str') #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option') (options, args) = parser.parse_args() data = list() if len(args) < num_args: parser.print_help() sys.exit(1) cg, = ftmc.CoarseGrainRNA.from_pdb(op.expanduser(args[0])) for iloop in cg.iloop_iterator(): conn = cg.connections(iloop) angle = ftuv.vec_angle(cg.coords[conn[0]][1] - cg.coords[conn[0]][0], cg.coords[conn[1]][1] - cg.coords[conn[1]][0]) data.append([iloop, angle]) fud.pv('iloop, angle') if options.output and len(data): with open(options.output, 'w') as FILE: writer = csv.writer(FILE, delimiter="\t", lineterminator="\n") writer.writerow(["iloop", "angle"]) for row in data: writer.writerow(row)
def main(): usage = """ ./helix_orienation_divergences.py Analyze how much the helix-helix orientations diverge between two data sets. """ num_args=0 parser = OptionParser() parser.add_option('-r', '--resolution', dest='resolution', default=10, help="The resolution of the resulting plot", type='int') parser.add_option('-a', '--angle', dest='angle', default=0, help="The angle of the camera", type='float') parser.add_option('-f', '--fig-name', dest='fig_name', default='', help="The name of the file to save the figure to. If it is not specified, the figure will not be saved", type='str') parser.add_option('-i', '--interior_loops', dest='interior_loops', default=False, help='Cluster only the interior loops', action='store_true') parser.add_option('-m', '--multi_loops', dest='multi_loops', default=False, help='Cluster only the interior loops', action='store_true') #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option') (options, args) = parser.parse_args() if len(args) < num_args: parser.print_help() sys.exit(1) column_names = ['type', 'pdb', 's1', 's2', 'u', 'v', 't', 'r', 'u1', 'v1', 'atype', 'something1', 'something2', 'sth3', 'sth4'] real_stats = ftms.ConformationStats('fess/stats/real.stats').angle_stats sampled_stats = ftms.ConformationStats('fess/stats/temp.stats').angle_stats # count how many statistics we have for each statistic type stat_counts = c.defaultdict(int) for sc in real_stats.keys(): stat_counts[sc] += len(real_stats[sc]) histograms = dict() for b in stat_counts.keys(): if b[2] != 2.: # only look at type 2 angles continue if options.interior_loops: if b[0] == 1000 or b[1] == 1000: continue if options.multi_loops: if b[0] != 1000 and b[1] != 1000: continue (selected_sizes, count) = get_nearest_dimension_sizes(b, stat_counts, 1) if count < 3: continue fud.pv('b, selected_sizes') combined_real = [] # get the statistics that correspond to the selected sampled sizes for ss in selected_sizes: #ss_r = get_certain_angle_stats(real_stats, ss) ss_r = real_stats[ss] combined_real += list(ss_r[['u','v']].as_matrix()) num_points = len(combined_real) combined_real = np.array(combined_real) #histograms[b] = (np.histogram2d(combined_real[:,0], combined_real[:,1], range=[[0, m.pi], [-m.pi, m.pi]])[0] + 0.5) / float(num_points) histograms[b] = combined_real dists = [] named_dists = dict() pp_dists = dict() for k1, k2 in it.combinations(histograms.keys(), 2): per_point_distances = [] for p1 in histograms[k1]: point_distances = [] for p2 in histograms[k2]: point_distances += [ftuv.magnitude(p1 - p2)] per_point_distances += [min(point_distances)] for p2 in histograms[k2]: point_distances = [] for p1 in histograms[k1]: point_distances += [ftuv.magnitude(p1-p2)] per_point_distances += [min(point_distances)] dists += [max(per_point_distances)] named_dists[(k1,k2)] = max(per_point_distances) pp_dists[(k1,k2)] = per_point_distances ''' kl = histograms[k1] * (histograms[k1] / histograms[k2]) kl = sum(map(sum, kl)) dists += [kl] ''' fud.pv('dists') Z = sch.complete(dists) fud.pv('Z') sch.dendrogram(Z, labels = histograms.keys(), leaf_rotation=90) plt.subplots_adjust(bottom=0.25) plt.show() k1 = (6,7,2) k2 = (5,6,2) rs = get_certain_angle_stats(real_stats, k1) ss = get_certain_angle_stats(real_stats, k2) fud.pv('named_dists[(k1,k2)]') fud.pv('pp_dists[(k1,k2)]') real_us = rs[['u', 'v']].as_matrix() sampled_us = ss[['u','v']].as_matrix() U_r = real_us[:,0] V_r = real_us[:,1] U_s = sampled_us[:,0] V_s = sampled_us[:,1] total_r = len(U_r) total_s = len(U_s) hr = np.histogram2d(U_r, V_r) hs = np.histogram2d(U_s, V_s) pseudo_r = (hr[0] + 1) / total_r pseudo_s = (hs[0] + 1) / total_r kl = pseudo_r * (pseudo_r / pseudo_s) fud.pv('kl') fud.pv('sum(map(sum, kl))') X_r = np.sin(U_r) * np.cos(V_r) Y_r = np.sin(U_r) * np.sin(V_r) Z_r = np.cos(U_r) r = 1. X_s = r * np.sin(U_s) * np.cos(V_s) Y_s = r * np.sin(U_s) * np.sin(V_s) Z_s = r * np.cos(U_s) fud.pv('real_us') real_us_orig = np.copy(real_us) sampled_us_orig = np.copy(sampled_us) print len(real_us), len(sampled_us) fig = plt.figure(figsize=(10,10)) ax = Axes3D(fig) a = Arrow3D([-1.3,1.3],[0,0],[0,0], mutation_scale=20, lw=5, arrowstyle="-|>", color="g") ax.add_artist(a) ax.plot(X_r, Y_r, Z_r, 'bo', alpha=0.3) ax.plot(X_s, Y_s, Z_s, 'ro', alpha=0.3) u, v = np.mgrid[0:2*np.pi:20j, 0:np.pi:10j] x=np.cos(u)*np.sin(v) y=np.sin(u)*np.sin(v) z=np.cos(v) ax.plot_wireframe(x, y, z, color="y") #surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors, # linewidth=0, antialiased=False) ax._axis3don=False ax.set_zlim3d(-1, 1) ax.w_zaxis.set_major_locator(LinearLocator(6)) ax.view_init(0, options.angle) ''' plt.subplots_adjust(left=0.4, right=0.9, top=0.9, bottom=0.1) for i in xrange(0, 360, 40): savefig("fig%d.png", (i)) ''' ''' sm = cm.ScalarMappable(cmap=cm.jet) sm.set_array(W) fig.colorbar(sm) ''' if options.fig_name != "": plt.savefig(options.fig_name, bbox_inches='tight') else: plt.show()
def main(): usage = """ ./visualize_cg.py cg_file Display the coarse-grain representation of a structure in pymol. """ num_args = 1 parser = OptionParser(usage=usage) # parser.add_option('-u', '--useless', dest='uselesss', # default=False, action='store_true', help='Another useless option') parser.add_option('-g', '--highlight', dest='highlight', default=None, help="Highlight some elements", type='str') parser.add_option('-o', '--output', dest='output', default=None, help="Create a picture of the scene and exit", type='str') parser.add_option('-r', '--longrange', dest='longrange', default=False, action='store_true', help="Display long-range interactions") parser.add_option('-l', '--loops', dest='loops', default=True, action='store_false', help="Don't display the coarse-grain hairpin loops") parser.add_option('-c', '--cones', dest='cones', default=False, action='store_true', help="Display cones that portrude from the stems") parser.add_option('-x', '--text', dest='text', default=False, action='store_true', help="Add labels to the figure.") parser.add_option('-a', '--align', dest='align', default=False, action='store_true', help='Align all of the structures with the first') parser.add_option( '-e', '--encompassing-stems', dest='encompassing_stems', default=False, action='store_true', help='Show the big stems that encompass the colinear ones.') parser.add_option('-v', '--virtual-atoms', dest='virtual_atoms', default=False, action='store_true', help='Display the virtual atoms') parser.add_option('-d', '--distance', dest='distance', default=None, help="Draw the lines between specified virtual residues") parser.add_option('-b', '--basis', dest='basis', default=False, action='store_true', help='Display the coordinate basis of each element') parser.add_option('', '--batch', dest='batch', default=False, action='store_true', help='Start pymol in batch mode') parser.add_option( '', '--sidechain-atoms', dest='sidechain_atoms', default=False, action='store_true', help= 'Include the sidechain atoms. Automatically enables --virtual-atoms') parser.add_option( '', '--rainbow', dest='rainbow', default=False, action='store_true', help= 'Color each of the nucleotide positions (i.e. average atoms) according to the colors of \ the rainbow and their position') parser.add_option('', '--only-elements', dest='only_elements', default=None, help='Display only these elements ' 'element names should be ' 'separated by commas') parser.add_option('', '--color-gradual', dest='color_gradual', default=None, help='Color the specified elements' 'gradually from one to the other, example (i1,i4,m1)', type='str') (options, args) = parser.parse_args() print "hi" if len(args) < num_args: parser.print_help() sys.exit(1) print "hi1" pp = cvp.PymolPrinter() pp.add_loops = options.loops pp.draw_cones = options.cones # sys.exit(1) pp.add_longrange = options.longrange pp.print_text = options.text pp.encompassing_stems = options.encompassing_stems pp.virtual_atoms = options.virtual_atoms pp.sidechain_atoms = options.sidechain_atoms pp.basis = options.basis pp.rainbow = options.rainbow if options.only_elements is not None: pp.only_elements = options.only_elements.split(',') cgs = [] for a in args: cgs += [cmg.CoarseGrainRNA(a)] if options.align: align_cgs(cgs) if options.color_gradual is not None: pp.element_specific_colors = dict() import matplotlib.pyplot as plt cmap = plt.get_cmap('coolwarm') for d in cgs[0].defines: pp.element_specific_colors[d] = 'black' to_color_nodes = options.color_gradual.split(',') for i, node in enumerate(to_color_nodes): print node, cmap(i / float(len(to_color_nodes))) pp.element_specific_colors[node] = cmap(i / float(len(to_color_nodes))) for i, cg in enumerate(cgs): if i > 0: pp.color_modifier = .3 #pp.override_color = 'middle gray' pp.coordinates_to_pymol(cg) # highlight things in purple if options.highlight is not None: for s in options.highlight.split(','): fud.pv('s') pp.add_twists = False pp.add_stem_like(cg, s, color='purple', width=3.) # display the distances between nucleotides if options.distance is not None: virtual_atoms = ftug.virtual_atoms(cg, sidechain=False) for dist_pair in options.distance.split(':'): fud.pv('dist_pair') fr, to = dist_pair.split(',') fr = int(fr) to = int(to) pp.add_dashed(virtual_atoms[fr]["C1'"], virtual_atoms[to]["C1'"], width=1.2) with tf.NamedTemporaryFile() as f: with tf.NamedTemporaryFile(suffix='.pml') as f1: f.write(pp.pymol_string()) f.flush() pymol_cmd = 'hide all\n' pymol_cmd += 'run %s\n' % (f.name) pymol_cmd += 'show cartoon, all\n' pymol_cmd += 'bg white\n' pymol_cmd += 'clip slab, 10000\n' pymol_cmd += 'orient\n' if options.output is not None: pymol_cmd += 'ray\n' pymol_cmd += 'png %s\n' % (options.output) pymol_cmd += 'quit\n' f1.write(pymol_cmd) f1.flush() print "f1.name:", f1.name if options.batch: p = sp.Popen(['pymol', '-cq', f1.name], stdout=sp.PIPE, stderr=sp.PIPE) else: p = sp.Popen(['pymol', f1.name], stdout=sp.PIPE, stderr=sp.PIPE) out, err = p.communicate() print >> sys.stderr, "err:", err
def predict(bg, energies_to_sample, options): fud.pv('energies_to_sample[0].energies') if options.cheating: sm = fbm.SpatialModel(bg) #energies_to_sample += [fbe.CombinedEnergy([], [fbe.CheatingEnergy(sm.bg)])] energies_to_sample = [fbe.CheatingEnergy(sm.bg)] if not os.path.exists(options.output_dir): os.makedirs(options.output_dir) if options.output_file == None or options.output_file == sys.stdout: options.output_file = sys.stdout else: options.output_file = open(options.output_file, 'w') cbc.Configuration.sampling_output_dir = op.join(options.output_dir, bg.name) if options.output_dir_suffix != None: cbc.Configuration.sampling_output_dir = op.join(cbc.Configuration.sampling_output_dir, options.output_dir_suffix) if not os.path.exists(cbc.Configuration.sampling_output_dir): os.makedirs(cbc.Configuration.sampling_output_dir) if options.fix_all_loops: options.fix_loop = ','.join([d for d in bg.defines if d[0] == 'i']) if options.jared_dir is not None: # run the jar3d_annotate script to get a list of potential statistics for each interior loop jared_script = op.join(options.jared_dir, 'scripts/annotate_structure.py') jared_data = op.join(options.jared_dir, 'JAR3D') filtered_stats_fn = op.join(cbc.Configuration.sampling_output_dir, 'filtered.stats') cmd = ['python', jared_script, options.bg_filename, '-o', jared_data, '-m', '-e', '-d', jared_data] fud.pv('cmd') p = spr.Popen(cmd, stdout=spr.PIPE) out, err = p.communicate() with open(filtered_stats_fn, 'w') as filtered_out: filtered_out.write(out) filtered_stats = ftms.FilteredConformationStats(stats_file=options.stats_file, filter_filename=filtered_stats_fn) ftms.set_conformation_stats(filtered_stats) print >>sys.stderr, "Using JAR3D filtered stats" elif options.filtered_stats_file is not None: filtered_stats = ftms.FilteredConformationStats(stats_file=options.stats_file, filter_filename=options.filtered_stats_file) ftms.set_conformation_stats(filtered_stats) elif options.fix_loop is not None: filtered_stats = ftms.FilteredConformationStats(stats_file=options.stats_file) filtered_stats.filtered_stats = col.defaultdict(list) for element_to_fix in options.fix_loop.split(','): print >>sys.stderr, "fixing loop", element_to_fix if element_to_fix[0] != 'i' and element_to_fix[0] != 'm': print >>sys.stderr, "Cannot fix non-interior loop or multi-loop stats, yet!" sys.exit(1) as1, as2 = bg.get_bulge_angle_stats(element_to_fix) filtered_stats.filtered_stats[(element_to_fix, as1.ang_type)] += [as1] filtered_stats.filtered_stats[(element_to_fix, as2.ang_type)] += [as2] ftms.set_conformation_stats(filtered_stats) fud.pv('element_to_fix') elif options.stats_file is not None: cbc.Configuration.stats_file = options.stats_file print >>sys.stderr, "1" ftms.set_conformation_stats(ftms.ConformationStats(options.stats_file)) sm = fbm.SpatialModel(bg) if options.log_to_file: options.output_file = open(op.join(cbc.Configuration.sampling_output_dir, 'log.txt'), 'w') if options.eval_energy: for s in sm.bg.stem_iterator(): cgg.add_virtual_residues(sm.bg, s) for energy in energies_to_sample: fud.pv('energy.eval_energy(sm, verbose=True, background=False)') sys.exit(1) if options.plot: plotter = fbs.StatisticsPlotter() else: plotter = None colors = ['g','y','r'] samplers = [] # parse the distances that we want to keep track of to_track_dists = [] if options.dists is not None: for distance_pair in options.dists.split(':'): to_track_dists += [map(int, distance_pair.split(','))] # only samples from the first energy will be saved silent = False for color,energy in zip(colors, energies_to_sample): fud.pv('options.no_rmsd') stat = fbs.SamplingStatistics(sm, plotter, color, silent=silent, output_file=options.output_file, save_n_best = options.save_n_best, dists = to_track_dists, save_iterative_cg_measures=options.save_iterative_cg_measures, no_rmsd = options.no_rmsd) stat.step_save = options.step_save fud.pv('options.mcmc_sampler') if options.mcmc_sampler: sm = fbm.SpatialModel(copy.deepcopy(bg)) sm.constraint_energy = fbe.CombinedEnergy([]) sm.junction_constraint_energy = fbe.CombinedEnergy([]) if not (options.cheating or options.no_constraint): sm.constraint_energy = fbe.CombinedEnergy([fbe.CoarseStemClashEnergy(), fbe.StemVirtualResClashEnergy()]) sm.junction_constraint_energy = fbe.RoughJunctionClosureEnergy() else: sm.constraint_energy = None sm.junction_constraint_energy = None #sm.constraint_energy = fbe.CombinedEnergy([fbe.RoughJunctionClosureEnergy()]) #sm.constraint_energy = fbe.CombinedEnergy([fbe.StemVirtualResClashEnergy()]) #sm.constraint_energy = fbe.CombinedEnergy([fbe.StemVirtualResClashEnergy(), fbe.RoughJunctionClosureEnergy()]) if options.track_energies: energies_to_track = [fbe.RadiusOfGyrationEnergy()] fud.pv('len(list(bg.hloop_iterator()))') if len(list(bg.hloop_iterator())) > 1: energies_to_track += [fbe.ShortestLoopDistanceEnergy()] for h in bg.hloop_iterator(): energies_to_track += [fbe.ShortestLoopDistancePerLoop(h)] energies_to_track += [fbe.AMinorEnergy(loop_type='h')] #energies_to_track += [fbe.AMinorEnergy(loop_type='i')] else: energies_to_track = [] fud.pv('energies_to_track') fud.pv('energy') sampler = fbs.MCMCSampler(sm, energy, stat, options.stats_type, options.no_rmsd, energies_to_track=energies_to_track) sampler.dump_measures = options.dump_energies samplers += [sampler] else: sm = fbm.SpatialModel(copy.deepcopy(bg)) sm.constraint_energy = fbe.StemVirtualResClashEnergy() samplers += [fbs.GibbsBGSampler(sm, energy, stat)] silent = True fud.pv('samplers') for i in range(options.iterations): if options.single_sampler: samplers[0].step() else: for s in samplers: s.step() #stats.print_final_stats(energy_function) #stats.save_top() if plotter: plotter.finish() #plotter.plt.ioff() #plt.show() pass
def main(): #seed(2) #seterr(all='ignore') #seterr(all='raise') parser = optparse.OptionParser() parser.add_option('', '--loop-energy', dest='loop_energy', default=False, action='store_true', help='Use the radius of gyration energy') parser.add_option('', '--dump-energies', dest='dump_energies', default=False, action='store_true', help='Dump the energies to file') parser.add_option('', '--track-energies', dest='track_energies', default=False, help='Track additional energy for diagnostics', action='store_true') parser.add_option('', '--energy-prefactor', dest='energy_prefactor', default=30, help='A multiplier for the energy', type='int') parser.add_option('-e', '--energy', dest='energy', default='energies/lrde.energy', help="The energy function to use when evaluating structures") parser.add_option('-i', '--iterations', dest='iterations', default=10000, help='Number of structures to generate', type='int') parser.add_option('-b', '--best_filename', dest='best_filename', default='best.coord', help="The filename to dump the best (least rmsd structure) into", type='str') parser.add_option('-p', '--plot', dest='plot', default=False, action='store_true', help="Plot the energies as they are encountered") parser.add_option('-d', '--distance', dest='distance_energy', default=False, action='store_true', help='Use the DistanceEnergy energy') parser.add_option('-c', '--clamp', dest='clamp', default=None, help='Clamp two elements together (i.e. add an energy with a target distance of 10 angstroms). The energy should be formatted as p1,p2:p3,p4:p5,p6 where p1 and p2 are clamped, p3 and p4 are clamped and p5 and p6 are clamped.', type='str') parser.add_option('-m', '--mcmc', dest='mcmc_sampler', default=True, action='store_true', help='Sample using the mcmc sampler.') parser.add_option('', '--rog', dest='radius_of_gyration', default=False, action='store_true', help='Use the radius of gyration energy') parser.add_option('', '--cylinder-rog', dest='cylinder_radius_of_gyration', default=False, action='store_true', help='Use the cylinder_intersection and radius of gyration energy') parser.add_option('', '--aminor-perloop-rog', dest='aminor_perloop_radius_of_gyration', default=False, action='store_true', help='Use the aminor and radius of gyration energies') parser.add_option('', '--specific-aminor', dest='specific_aminor', default=None, help='Use the specific aminor energy', type='str') parser.add_option('', '--aminor-perloop', dest='aminor_perloop', default=False, action='store_true', help='Use the aminor and radius of gyration energies') parser.add_option('', '--aminor-shortestloop', dest='aminor_shortestloop', default=False, action='store_true', help='Use the aminor and radius of gyration energies') parser.add_option('', '--aminor-rog', dest='aminor_radius_of_gyration', default=False, action='store_true', help='Use the aminor and radius of gyration energies') parser.add_option('', '--aminor', dest='aminor', default=False, action='store_true', help='Use the aminor and radius of gyration energies') parser.add_option('', '--cylinder-perloop-rog', dest='cylinder_perloop_radius_of_gyration', default=False, action='store_true', help='Use the radius of gyration energy') parser.add_option('', '--cylinder-shortestloop-rog', dest='cylinder_shortestloop_radius_of_gyration', default=False, action='store_true', help='Use the radius of gyration energy') parser.add_option('', '--cylinder-loop-rog', dest='cylinder_loop_radius_of_gyration', default=False, action='store_true', help='Use the radius of gyration energy') parser.add_option('', '--loop-rog', dest='loop_radius_of_gyration', default=False, action='store_true', help='Use the radius of gyration energy') parser.add_option('', '--constant-energy', dest='constant_energy', default=False, action='store_true', help='Use a constant energy') parser.add_option('', '--random-energy', dest='random_energy', default=False, action='store_true', help='Use a random energy') parser.add_option('', '--cylinder-loop', dest='cylinder_loop', default=False, action='store_true', help='Use the radius of gyration energy') parser.add_option('-y', '--cylinder-intersection', dest='cyl_intersect', default=False, action='store_true', help='Use the cylinder-intersection energy') parser.add_option('-g', '--cheating', dest='cheating', default=False, action='store_true', help='Use the rmsd from the real structure as the energy.') parser.add_option('', '--sequence-file', dest='sequence_file', default='', help='The file containing sequence for the structure. To be used with the --secondary-structure flag', type='str') parser.add_option('', '--sequence-str', dest='sequence_str', default='', help='The sequence of the structure. To be used with the --secondary-structure flag', type='str') parser.add_option('', '--eval-energy', dest='eval_energy', default=False, action='store_true', help='Evaluate the energy of the parameter') parser.add_option('', '--output-dir', dest='output_dir', default='.', help='Directory to store the sampled_structures', type='str') parser.add_option('', '--output-file', dest='output_file', default=None, help='File to output the information about the sampling to. Defaults to standard out', type=str) parser.add_option('', '--log-to-file', dest='log_to_file', default=False, help='Print a log of the output to a file in the directory where the best structures are stored.', action="store_true") parser.add_option('', '--save-n-best', dest='save_n_best', default=3, help='Save the best n structures.', type=int) parser.add_option('', '--step-save', dest='step_save', default=0, help="Save the structure at every n'th step.", type='int') parser.add_option('', '--no-background', dest='background', default=True, action='store_false', help="Don't use the background probability distribution.") parser.add_option('', '--stats-file', dest='stats_file', default=fess.data_file('stats/combined.stats'), help='Use a different set of statistics for sampling', type='str') parser.add_option('', '--filtered-stats-file', dest='filtered_stats_file', default=None, help='Filter the statistics used for sampling using some other file.', type='str') parser.add_option('', '--output-dir-suffix', dest='output_dir_suffix', default=None, help="Specify an addition to the output directory", type='str') parser.add_option('', '--stats-type', dest='stats_type', default=None, help="Use these types of statistics.", type='str') parser.add_option('', '--single-sampler', dest='single_sampler', default=False, help='Use only a single sampler', action='store_true') parser.add_option('', '--no-rmsd', dest='no_rmsd', default=False, help='Refrain from trying to calculate the rmsd.', action='store_true') parser.add_option('', '--dists', dest='dists', default=None, help="Calculate the distance between pairs of nucleotides (i.e. 14,96:14,119)", type='str') parser.add_option('', '--save-iterative-cg-measures', dest='save_iterative_cg_measures', default=False, help='Save the coarse-grain measures every time the energy function is recalculated', action='store_true') parser.add_option('', '--jared-dir', dest='jared_dir', default=None, help='Use JAR3D to predict geometries for the interior loops', type='str') parser.add_option('', '--start-at-native', dest='start_at_native', default=False, action='store_true', help='Start at the native conformation') parser.add_option('', '--fix-loop', dest='fix_loop', default=None, help='Fix the correct coordinates of a particular loop to the correct ones') parser.add_option('', '--fix-all-loops', dest='fix_all_loops', default=False, action='store_true', help='Fix the geometries of all loops in the structure') parser.add_option('', '--no-constraint', dest='no_constraint', default=False, action='store_true', help="Don't use a constraint energy") parser.add_option('', '--stretch', dest='stretch', default=1.0, help="Stretch RO target distribution.", type='float') (options, args) = parser.parse_args() fud.pv('options.no_rmsd') if len(args) < 1: print "Usage: ./gibbs.py temp.comp" print "Or ./gibb.py temp.fa. If the extension of the argument file ends in .fa, then treat it as a fasta file." sys.exit(1) fud.pv('args') bgs = [] for arg in args: if arg[-3:] == '.fa': bgs += bgs_from_fasta(arg) else: bgs += [ftmc.CoarseGrainRNA(arg)] if len(bgs) > 1: print >> sys.stderr, "WARNING: More than one structure entered... only the first will be bearbeitet" #bg.calc_bp_distances() energies_to_sample = [] if options.cyl_intersect: energies_to_sample += [fbe.CombinedEnergy([], [fbe.CylinderIntersectionEnergy()])] if options.radius_of_gyration: sse = fbe.RadiusOfGyrationEnergy(energy_prefactor=options.energy_prefactor) sse.background = options.background energies_to_sample += [fbe.CombinedEnergy([], [sse])] if options.constant_energy: ce = fbe.ConstantEnergy() energies_to_sample += [fbe.CombinedEnergy([], [ce])] if options.random_energy: re = fbe.RandomEnergy() energies_to_sample += [fbe.CombinedEnergy([], [re])] if options.loop_energy: lle = fbe.ShortestLoopDistanceEnergy() energies_to_sample += [fbe.CombinedEnergy([], [lle])] if options.aminor_shortestloop: nonconstraint = [] bg = bgs[0] nonconstraint += [fbe.ShortestLoopDistanceEnergy()] ''' for hloop in bg.hloop_iterator(): nonconstraint += [fbe.ShortestLoopDistancePerLoop(hloop)] ''' nonconstraint += [fbe.AMinorEnergy(loop_type = 'h')] nonconstraint += [fbe.AMinorEnergy(loop_type = 'i')] energies_to_sample += [fbe.CombinedEnergy([], nonconstraint)] if options.specific_aminor: nonconstraint = [fbe.RadiusOfGyrationEnergy()] bg = bgs[0] # if we specify all, then we try and maximize the A-Minor interaction potential # for all internal and hairpin loops if len(options.specific_aminor.split(',')) == 1 and options.specific_aminor == 'all': for d in bg.defines: if d[0] == 'i' or d[0] == 'h': if 'AA' in "".join(bg.get_define_seq_str(d)): nonconstraint += [fbe.SpecificAMinorEnergy(loop_name=d, energy_prefactor=1)] fud.pv('d') else: for sa in options.specific_aminor.split(','): nonconstraint += [fbe.SpecificAMinorEnergy(loop_name=sa, energy_prefactor=1)] for hloop in bg.hloop_iterator(): if len(list(bg.define_residue_num_iterator(hloop))) > 4: fud.pv('hloop') nonconstraint += [fbe.ShortestLoopDistancePerLoop(hloop)] energies_to_sample += [fbe.CombinedEnergy([], nonconstraint)] if options.aminor_perloop: nonconstraint = [] bg = bgs[0] for hloop in bg.hloop_iterator(): nonconstraint += [fbe.ShortestLoopDistancePerLoop(hloop)] nonconstraint += [fbe.AMinorEnergy(loop_type = 'h')] nonconstraint += [fbe.AMinorEnergy(loop_type = 'i')] energies_to_sample += [fbe.CombinedEnergy([], nonconstraint)] if options.cylinder_perloop_radius_of_gyration: cie = fbe.CylinderIntersectionEnergy() #lle = fbe.ShortestLoopDistanceEnergy() rog = fbe.RadiusOfGyrationEnergy(energy_prefactor=options.energy_prefactor) nonconstraint = [rog, cie] bg = bgs[0] for hloop in bg.hloop_iterator(): nonconstraint += [fbe.ShortestLoopDistancePerLoop(hloop)] rog.background = options.background energies_to_sample += [fbe.CombinedEnergy([], nonconstraint)] if options.cylinder_shortestloop_radius_of_gyration: cie = fbe.CylinderIntersectionEnergy() lle = fbe.ShortestLoopDistanceEnergy() rog = fbe.RadiusOfGyrationEnergy(energy_prefactor=options.energy_prefactor) rog.background = options.background energies_to_sample += [fbe.CombinedEnergy([], [lle, rog, cie])] if options.cylinder_loop_radius_of_gyration: cie = fbe.CylinderIntersectionEnergy() lle = fbe.LoopLoopEnergy() rog = fbe.RadiusOfGyrationEnergy(energy_prefactor=options.energy_prefactor) rog.background = options.background energies_to_sample += [fbe.CombinedEnergy([], [lle, rog, cie])] if options.cylinder_loop: lle = fbe.LoopLoopEnergy() cie = fbe.CylinderIntersectionEnergy() sse.background = options.background energies_to_sample += [fbe.CombinedEnergy([], [cie, lle])] if options.aminor: ame1 = fbe.AMinorEnergy(loop_type='h') ame2 = fbe.AMinorEnergy(loop_type='i') energies_to_sample += [fbe.CombinedEnergy([], [ame1, ame2])] #energies_to_sample += [fbe.CombinedEnergy([], [sse, ame1])] if options.aminor_radius_of_gyration: sse = fbe.RadiusOfGyrationEnergy(energy_prefactor=options.energy_prefactor) ame1 = fbe.AMinorEnergy(loop_type='h') ame2 = fbe.AMinorEnergy(loop_type='i') sse.background = options.background energies_to_sample += [fbe.CombinedEnergy([], [sse, ame1, ame2])] #energies_to_sample += [fbe.CombinedEnergy([], [sse, ame1])] if options.cylinder_radius_of_gyration: sse = fbe.RadiusOfGyrationEnergy(energy_prefactor=options.energy_prefactor) cie = fbe.CylinderIntersectionEnergy() sse.background = options.background energies_to_sample += [fbe.CombinedEnergy([], [sse, fbe.CylinderIntersectionEnergy()])] if options.loop_radius_of_gyration: sse = fbe.RadiusOfGyrationEnergy(energy_prefactor=options.energy_prefactor) sse.background = options.background energies_to_sample += [fbe.CombinedEnergy([], [sse, fbe.LoopLoopEnergy()])] if options.distance_energy: energies_to_sample += [fbe.DistanceEnergy(bg.get_long_range_constraints())] if len(energies_to_sample) == 0 or options.aminor_perloop_radius_of_gyration: rog = fbe.RadiusOfGyrationEnergy(energy_prefactor=options.energy_prefactor, adjustment=options.stretch) nonconstraint = [rog] bg = bgs[0] for hloop in bg.hloop_iterator(): nonconstraint += [fbe.ShortestLoopDistancePerLoop(hloop)] nonconstraint += [fbe.AMinorEnergy(loop_type = 'h')] nonconstraint += [fbe.AMinorEnergy(loop_type = 'i')] nonconstraint += [fbe.StemVirtualResClashEnergy()] rog.background = options.background energies_to_sample += [fbe.CombinedEnergy([], nonconstraint)] if options.clamp is not None: pairs = options.clamp.split(':') bg = bgs[0] for p in pairs: r1,r2 = p.split(',') try: # initially we assume the clamp target are residue numbers r1 = int(r1) r2 = int(r2) e1 = bg.get_node_from_residue_num(int(r1)) e2 = bg.get_node_from_residue_num(int(r2)) except ValueError: # ... or they are element names e1 = r1 e2 = r2 if e1 not in bg.defines.keys() or e2 not in bg.defines.keys(): print >>sys.stderr, "ERROR: Invalid values for clamping" sys.exit(1) if e1 == e2: print >>sys.stderr, "Can't clamp identical elements" print >>sys.stderr, "clamping {0}, {1}".format(e1,e2) # the first energy to sample should be a CombinedEnergy energies_to_sample[0].energies += [fbe.DistanceExponentialEnergy(e1,e2,15.,1.)] for bg in bgs: options.bg_filename = args[0] fud.pv('energies_to_sample') if len(list(bg.stem_iterator())) == 0: print >> sys.stderr, "Cannot simulate an open chain, the structure needs to have at least one stem" sys.exit(1) predict(bg, energies_to_sample, options)
def main(): usage = """ python cg_to_fornac_html.py file1.cg file2.cg Convert coarse grain files to html files using fornac to display a 2D version of the structure. """ num_args= 1 parser = OptionParser(usage=usage) parser.add_option('-d', '--distance', dest='distance', default=25, help="Draw links between elements that are within a certain distance from each other", type='float') parser.add_option('-b', '--bp-distance', dest='bp_distance', default=16, help="Draw links only between nucleotides which are so many nucleotides apart", type='int') parser.add_option('-s', '--sort-by', dest='sort_by', default='mcc', help="What to sort by (options: mcc, pca)", type='string') parser.add_option('-n', '--names', dest='names', default=False, action='store_true', help='Add the name of the structure to the display') (options, args) = parser.parse_args() if len(args) < num_args: parser.print_help() sys.exit(1) structs = [] pair_bitmaps = [] cgs = [] all_links = [] mccs = [] cm=None for filename in args: cg = ftmc.CoarseGrainRNA(filename) cgs += [cg] if not cm: cm=ftme.ConfusionMatrix(cg) (links, pair_bitmap) = extract_extra_links(cg, options.distance, options.bp_distance, correct_links = None if len(all_links) == 0 else all_links[0]) all_links += [links] pair_bitmaps += [pair_bitmap] mcc = ftme.mcc(cm.evaluate(cg)) rmsd = ftme.cg_rmsd(cgs[0], cg) seq_struct = {"sequence": cg.seq, "structure": cg.to_dotbracket_string(), "extraLinks": links} fud.pv('options.names') fud.pv('mcc, rmsd') if options.names: seq_struct['name'] = op.basename(filename) + " ({:.2f},{:.1f})".format(mcc, rmsd) else: seq_struct['name'] = '' structs += [seq_struct] mccs += [mcc] if options.sort_by == 'pca': print >>sys.stderr, "Sorting by pca" ix = reorder_structs(pair_bitmaps) else: print >>sys.stderr, "Sorting by mcc" ix = np.argsort(-np.array(mccs)) new_array = [0 for i in range(len(ix))] for i,x in enumerate(ix): new_array[i] = structs[x] print output_template.format(json.dumps(new_array))
def reconstruct_element(cg_to, cg_from, elem_to, elem_from, chain_to, chain_from, close_loop=True, reverse=False): ''' Take an element (elem2) from one chain (chain2, cg2) and place it on the new chain while aligning on the adjoining elements. The neighboring elemtns need to be present in chain_to in order for the next element to be aligned to their starting and ending positions. The dimensions and type of elem_to and elem_from need to be identical. @param cg_to: The coarse-grain representation of the target chain @param cg_from: The coarse-grain representation of the source chain @param elem_to: The element to replace @param elem_from: The source element @param chain_to: The chain to graft onto @param chain_from: The chain to excise from ''' # get the range of the nucleotides ranges_to = cg_to.define_range_iterator(elem_to, adjacent=True, seq_ids=True) ranges_from = cg_from.define_range_iterator(elem_from, adjacent=True, seq_ids=True) chains_to_align = [] handles = [] # the chains containing the aligned and loop-closed nucleotides new_chains = [] # iterate over each strand for r1,r2 in zip(ranges_to, ranges_from): chains_to_align += [ftup.extract_subchain(chain_from, r2[0], r2[1])] handles += [r1 + r2] align_starts(chain_to, chains_to_align[-1], [handles[-1]], end=2, reverse=reverse) r = 0. loop_chain = chains_to_align[-1] if close_loop: (r, loop_chain) = align_and_close_loop(cg_to.seq_length, chain_to, chains_to_align[-1], [handles[-1]]) fud.pv('elem_to, r') new_chains += [loop_chain] counter = 1 for res1, res2 in zip(cg_to.iterate_over_seqid_range(*r1), cg_from.iterate_over_seqid_range(*r2)): if elem_to[0] != 'f': # omit the frist nucleotide, since that should be part of # the preceding stem, except in the case of 5' unpaired regions if counter > 1: loop_chain[res2].id = res1 add_residue_to_rosetta_chain(chain_to, loop_chain[res2]) else: loop_chain[res2].id = res1 add_residue_to_rosetta_chain(chain_to, loop_chain[res2]) counter += 1 return new_chains
def main(): usage = """ python cg_to_fornac_html.py file1.cg file2.cg Convert coarse grain files to html files using fornac to display a 2D version of the structure. """ num_args = 1 parser = OptionParser(usage=usage) parser.add_option( '-d', '--distance', dest='distance', default=25, help= "Draw links between elements that are within a certain distance from each other", type='float') parser.add_option( '-b', '--bp-distance', dest='bp_distance', default=16, help= "Draw links only between nucleotides which are so many nucleotides apart", type='int') parser.add_option('-s', '--sort-by', dest='sort_by', default='mcc', help="What to sort by (options: mcc, pca)", type='string') parser.add_option('-n', '--names', dest='names', default=False, action='store_true', help='Add the name of the structure to the display') (options, args) = parser.parse_args() if len(args) < num_args: parser.print_help() sys.exit(1) structs = [] pair_bitmaps = [] cgs = [] all_links = [] mccs = [] cm = None for filename in args: cg = ftmc.CoarseGrainRNA(filename) cgs += [cg] if not cm: cm = ftme.AdjacencyCorrelation(cg) (links, pair_bitmap) = extract_extra_links( cg, options.distance, options.bp_distance, correct_links=None if len(all_links) == 0 else all_links[0]) all_links += [links] pair_bitmaps += [pair_bitmap] mcc = ftme.mcc(cm.evaluate(cg)) rmsd = ftme.cg_rmsd(cgs[0], cg) seq_struct = { "sequence": cg.seq, "structure": cg.to_dotbracket_string(), "extraLinks": links } fud.pv('options.names') fud.pv('mcc, rmsd') if options.names: seq_struct['name'] = op.basename( filename) + " ({:.2f},{:.1f})".format(mcc, rmsd) else: seq_struct['name'] = '' structs += [seq_struct] mccs += [mcc] if options.sort_by == 'pca': print("Sorting by pca", file=sys.stderr) ix = reorder_structs(pair_bitmaps) else: print("Sorting by mcc", file=sys.stderr) ix = np.argsort(-np.array(mccs)) new_array = [0 for i in range(len(ix))] for i, x in enumerate(ix): new_array[i] = structs[x] print(output_template.format(json.dumps(new_array)))
def coordinates_to_pymol(self, cg): loops = list(cg.hloop_iterator()) for key in cg.coords.keys(): if self.constraints is not None: if key not in self.constraints: continue (p, n) = cg.coords[key] color = self.get_element_color(key) if key[0] == 's': self.add_stem_like(cg, key) self.draw_bounding_boxes(cg, key) else: if key[0] == 'h': if self.add_loops: if key in loops: self.add_segment(p, n, color, 1.0, key + " " + str(cg.get_length(key))) elif key[0] == 'm': twists = cg.get_twists(key) # check if the multiloop is longer than one. If it's not, then # it has an empty define and we its length will be 1 if len(cg.defines[key]) == 0: self.add_segment(p, n, color, 1.0, key + " 1") else: self.add_segment(p, n, color, 1.0, key + " " + str(cg.defines[key][1] - cg.defines[key][0] + 1)) self.add_segment(p, p+ 7 * twists[0], 'light gray', 0.3) self.add_segment(n, n+ 7 * twists[1], 'light gray', 0.3) x = (p + n) / 2 t = ftuv.normalize((twists[0] + twists[1]) / 2.) self.add_segment(x, x + 7 * t, 'middle gray', 0.3) elif key[0] == 'f': if self.visualize_three_and_five_prime: self.add_segment(p, n, color, 1.0, key + " " + str(cg.defines[key][1] - cg.defines[key][0] + 1) + "") elif key[0] == 't': if self.visualize_three_and_five_prime: self.add_segment(p, n, color, 1.0, key + " " + str(cg.defines[key][1] - cg.defines[key][0]) + "") else: #self.add_stem_like(cg, key, "yellow", 1.0) self.add_segment(p, n, color, 1.0, key) if self.add_longrange: for key1 in cg.longrange.keys(): for key2 in cg.longrange[key1]: try: p = cuv.line_segment_distance(cg.coords[key1][0], cg.coords[key1][1], cg.coords[key2][0], cg.coords[key2][1]) (point1, point2) = p #point1 = cg.get_point(key1) #point2 = cg.get_point(key2) dash_length = 0.6 gap_length = dash_length * 2 direction = ftuv.normalize(point2 - point1) num_dashes = ftuv.magnitude(point2 - point1) / (dash_length + gap_length) fud.pv('num_dashes') for i in range(int(num_dashes)): self.add_segment(point1 + i * (dash_length + gap_length) * direction, point1 + (i * (dash_length + gap_length) + dash_length) * direction, "purple", 0.3, "") ''' self.add_segment(point1, point2, "purple", 0.3, key1 + " " + key2) ''' except: continue if self.encompassing_stems: self.add_encompassing_cylinders(cg, 7.) if self.max_stem_distances > 0: for (s1, s2) in it.permutations(cg.stem_iterator(), r=2): (i1, i2) = cuv.line_segment_distance(cg.coords[s1][0], cg.coords[s1][1], cg.coords[s2][0], cg.coords[s2][1]) if cuv.magnitude(i2 - i1) < self.max_stem_distances: #self.add_segment(i1, i2, 'cyan', 0.3, s1 + " " + s2) self.add_segment(i1, i2, 'cyan', 0.3) if self.virtual_atoms: va = ftug.virtual_atoms(cg, sidechain=False) atom_width = 0.5 for i,r in enumerate(sorted(va.keys())): for a in va[r].keys(): if self.rainbow: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt cmap = plt.get_cmap('gist_rainbow') self.add_sphere(va[r][a], color_rgb = cmap(i / float(len(va.keys()))), width=atom_width) else: d = cg.get_node_from_residue_num(r) if d[0] == 's': self.add_sphere(va[r][a], 'green', width=atom_width) elif d[0] == 'i': self.add_sphere(va[r][a], 'yellow', width=atom_width) elif d[0] == 'm': self.add_sphere(va[r][a], 'red', width=atom_width) elif d[0] == 'h': self.add_sphere(va[r][a], 'blue', width=atom_width) if self.basis: for d in cg.defines.keys(): origin, basis = ftug.element_coord_system(cg, d) self.add_segment(origin, origin + 7. * basis[1], 'purple', 2.) print >>sys.stderr, "energy_function:", self.energy_function # print the contributions of the energy function, if one is specified if self.energy_function is not None: print >>sys.stderr, "key" sum_energy = 0. e_func = self.energy_function e_func_iter = e_func.interaction_energy_iter(cg, background=False) int_energies = list(e_func_iter) max_energy = max(int_energies, key=lambda x: x[1]) print >>sys.stderr, "max_energy:", max_energy for (interaction, energy) in int_energies: (p, n) = (cg.get_point(interaction[0]), cg.get_point(interaction[1])) scaled_energy = - max_energy[1] + energy self.add_segment(p, n, 'purple', 3 * np.exp(scaled_energy)) sum_energy += energy if self.stem_stem_orientations is not None: for (s1, s2) in it.permutations(cg.stem_iterator(), 2): ''' if cg.are_adjacent_stems(s1, s2): continue ''' if s1 != 's65': if s2 != 's65': continue s1_vec = cg.coords[s1][1] - cg.coords[s1][0] s2_vec = cg.coords[s2][1] - cg.coords[s2][0] (i1, i2) = cuv.line_segment_distance(cg.coords[s1][0], cg.coords[s1][1], cg.coords[s2][0], cg.coords[s2][1]) i_vec = i2 - i1 #i_rej will be orthogonal to s1_vec in the direction #of s2 i_rej = cuv.vector_rejection(i_vec, s1_vec) #plane_vec will be orthogonal to s1_vec and to the direction # of s2 plane_vec = np.cross(i_rej, s1_vec) # s2_proj is in the intersection plane s2_proj_in = cuv.vector_rejection(s2_vec, plane_vec) # s2 proj_out is out of the intersection plane #s2_proj_out = cuv.vector_rejection(s2_vec, i_rej) start_point = cg.coords[s1][0] + 5 * cg.twists[s1][0] ortho_offset = cuv.magnitude(i_rej) dist = cuv.magnitude(i_vec) + 0.0001 lateral_offset = m.sqrt(dist ** 2 - ortho_offset ** 2) if lateral_offset > 10: continue ''' #self.add_segment(start_point, start_point + 10 * cuv.normalize(s2_vec), 'white', 0.5) #self.add_segment(start_point, start_point + 5 * cuv.normalize(plane_vec), 'magenta', 0.5) #self.add_segment(start_point, start_point + 5 * cuv.normalize(i_vec), 'cyan', 0.5) #self.add_segment(i1, i1 + i_rej, 'cyan', 0.5) ''' self.add_segment(start_point, start_point + 7 * cuv.normalize(s2_proj_in), 'white', 1.5) '''
def main(): usage = """ ./visualize_cg.py cg_file Display the coarse-grain representation of a structure in pymol. """ num_args = 1 parser = OptionParser(usage=usage) # parser.add_option('-u', '--useless', dest='uselesss', # default=False, action='store_true', help='Another useless option') parser.add_option('-g', '--highlight', dest='highlight', default=None, help="Highlight some elements", type='str') parser.add_option('-o', '--output', dest='output', default=None, help="Create a picture of the scene and exit", type='str') parser.add_option('-r', '--longrange', dest='longrange', default=False, action='store_true', help="Display long-range interactions") parser.add_option('-l', '--loops', dest='loops', default=True, action='store_false', help="Don't display the coarse-grain hairpin loops") parser.add_option('-c', '--cones', dest='cones', default=False, action='store_true', help="Display cones that portrude from the stems") parser.add_option('-x', '--text', dest='text', default=False, action='store_true', help="Add labels to the figure.") parser.add_option('-a', '--align', dest='align', default=False, action='store_true', help='Align all of the structures with the first') parser.add_option('-e', '--encompassing-stems', dest='encompassing_stems', default=False, action='store_true', help='Show the big stems that encompass the colinear ones.') parser.add_option('-v', '--virtual-atoms', dest='virtual_atoms', default=False, action='store_true', help='Display the virtual atoms') parser.add_option('-d', '--distance', dest='distance', default=None, help="Draw the lines between specified virtual residues") parser.add_option('-b', '--basis', dest='basis', default=False, action='store_true', help='Display the coordinate basis of each element') parser.add_option('', '--batch', dest='batch', default=False, action='store_true', help='Start pymol in batch mode') parser.add_option('', '--sidechain-atoms', dest='sidechain_atoms', default=False, action='store_true', help='Include the sidechain atoms. Automatically enables --virtual-atoms') parser.add_option('', '--rainbow', dest='rainbow', default=False, action='store_true', help='Color each of the nucleotide positions (i.e. average atoms) according to the colors of \ the rainbow and their position') parser.add_option('', '--only-elements', dest='only_elements', default=None, help='Display only these elements ' 'element names should be ' 'separated by commas') parser.add_option('', '--color-gradual', dest='color_gradual', default=None, help='Color the specified elements' 'gradually from one to the other, example (i1,i4,m1)', type='str') (options, args) = parser.parse_args() print "hi" if len(args) < num_args: parser.print_help() sys.exit(1) print "hi1" pp = cvp.PymolPrinter() pp.add_loops = options.loops pp.draw_cones = options.cones # sys.exit(1) pp.add_longrange = options.longrange pp.print_text = options.text pp.encompassing_stems = options.encompassing_stems pp.virtual_atoms = options.virtual_atoms pp.sidechain_atoms = options.sidechain_atoms pp.basis = options.basis pp.rainbow = options.rainbow if options.only_elements is not None: pp.only_elements = options.only_elements.split(',') cgs = [] for a in args: cgs += [cmg.CoarseGrainRNA(a)] if options.align: align_cgs(cgs) if options.color_gradual is not None: pp.element_specific_colors = dict() import matplotlib.pyplot as plt cmap = plt.get_cmap('coolwarm') for d in cgs[0].defines: pp.element_specific_colors[d]= 'black' to_color_nodes = options.color_gradual.split(',') for i,node in enumerate(to_color_nodes): print node, cmap(i / float(len(to_color_nodes))) pp.element_specific_colors[node] = cmap(i / float(len(to_color_nodes))) for i, cg in enumerate(cgs): if i > 0: pp.color_modifier = .3 #pp.override_color = 'middle gray' pp.coordinates_to_pymol(cg) # highlight things in purple if options.highlight is not None: for s in options.highlight.split(','): fud.pv('s') pp.add_twists = False pp.add_stem_like(cg, s, color='purple', width=3.) # display the distances between nucleotides if options.distance is not None: virtual_atoms = ftug.virtual_atoms(cg, sidechain=False) for dist_pair in options.distance.split(':'): fud.pv('dist_pair') fr, to = dist_pair.split(',') fr = int(fr) to = int(to) pp.add_dashed(virtual_atoms[fr]["C1'"], virtual_atoms[to]["C1'"], width=1.2) with tf.NamedTemporaryFile() as f: with tf.NamedTemporaryFile(suffix='.pml') as f1: f.write(pp.pymol_string()) f.flush() pymol_cmd = 'hide all\n' pymol_cmd += 'run %s\n' % (f.name) pymol_cmd += 'show cartoon, all\n' pymol_cmd += 'bg white\n' pymol_cmd += 'clip slab, 10000\n' pymol_cmd += 'orient\n' if options.output is not None: pymol_cmd += 'ray\n' pymol_cmd += 'png %s\n' % (options.output) pymol_cmd += 'quit\n' f1.write(pymol_cmd) f1.flush() print "f1.name:", f1.name if options.batch: p = sp.Popen(['pymol', '-cq', f1.name], stdout=sp.PIPE, stderr=sp.PIPE) else: p = sp.Popen(['pymol', f1.name], stdout=sp.PIPE, stderr=sp.PIPE) out, err = p.communicate() print >>sys.stderr, "err:", err
def json_to_fasta(rna_json_str): ''' Convert an RNA json as returned by fasta to json into a fasta string (which will later be used to create a BulgeGraph and the another json. :param rna_json_str: A json string representation of an RNA as returned by fasta_to_json :return: A fasta string representing this molecule along with the x and y coordinates... (fasta, xs, ys), where xs and ys are lists ''' rna_json = json.loads(rna_json_str) # store the pair tables for each molecule separately pair_list = col.defaultdict(list) node_list = col.defaultdict(list) label_list = col.defaultdict(list) # make dictionaries hashable, it's ok here because it will only be used # for the nodes and the links and their values don't change class hashabledict(dict): def __hash__(self): return hash(tuple(sorted(self.items()))) # store which molecule each node is in hashable_links = [hashabledict(l) for l in rna_json['links']] hashable_nodes = [hashabledict(n) for n in rna_json['nodes']] all_nodes = set([n for n in hashable_nodes if n['node_type'] != 'pseudo']) links_dict = col.defaultdict(list) for link in hashable_links: if link['link_type'] == 'backbone' or link['link_type'] == 'label_link': links_dict[hashabledict( link['source'])] += [hashabledict(link['target'])] links_dict[hashabledict( link['target'])] += [hashabledict(link['source'])] trees = [] to_visit = [] nodes_to_trees = dict() # calculate the list of trees in the forest of RNA molecules # trees correspond to individual molecules # different trees do not share backbone bonds while len(all_nodes) > 0: # if there's nodes left, then there's a new tree to be made to_visit += [list(all_nodes)[0]] curr_tree = set() while len(to_visit) > 0: # the current tree has more nodes curr_node = to_visit.pop() all_nodes.remove(curr_node) curr_tree.add(curr_node) nodes_to_trees[curr_node] = curr_tree for neighbor in links_dict[curr_node]: # add all of the neighbors if neighbor not in all_nodes: # we've already seen this neighbor continue to_visit.append(neighbor) trees += [curr_tree] different_tree_links = [] for link in rna_json['links']: # only consider base-pair links if link['link_type'] != 'basepair' and link[ 'link_type'] != 'pseudoknot': continue from_node = hashabledict(link['source']) to_node = hashabledict(link['target']) #from_node = rna_json['nodes'][link['source']] #to_node = rna_json['nodes'][link['target']] if nodes_to_trees[from_node] == nodes_to_trees[to_node]: # the position of each node in the RNA is one greater than its id pair_list[frozenset(nodes_to_trees[from_node])] += [ (int(from_node['id']), int(to_node['id'])) ] pair_list[frozenset(nodes_to_trees[from_node])] += [ (int(to_node['id']), int(from_node['id'])) ] else: print >> sys.stderr, "Different trees" different_tree_links += [((from_node['x'], from_node['y']), (to_node['x'], to_node['y']))] # list the nods in each molecule for node in hashable_nodes: if node['node_type'] == 'nucleotide': node_list[frozenset(nodes_to_trees[node])] += [ (node['id'], node['name'], node['x'], node['y'], node['struct_name'], node['uid']) ] if node['node_type'] == 'label': print >> sys.stderr, "adding label" label_list[frozenset(nodes_to_trees[node])] += [(node['x'], node['y'])] all_fastas = [] all_xs = [] all_ys = [] all_uids = [] for key in node_list.keys(): pair_table = fus.tuples_to_pairtable(pair_list[key], len(node_list[key])) dotbracket = fus.pairtable_to_dotbracket(pair_table) seq = "".join(n[1] for n in node_list[key]) fud.pv('seq') fud.pv('len(seq)') fud.pv('len(label_list[key])') all_xs += [[n[2] for n in node_list[key]] + [n[0] for n in label_list[key]]] all_ys += [[n[3] for n in node_list[key]] + [n[1] for n in label_list[key]]] fud.pv('len(all_xs[-1])') all_uids += [[n[5] for n in node_list[key]]] all_fastas += [ ">{}\n{}\n{}".format(node_list[key][0][4], seq, dotbracket) ] fud.pv('all_fastas') return (all_fastas, all_xs, all_ys, all_uids, different_tree_links)
def main(): usage = """ ./helix_orienation_divergences.py Analyze how much the helix-helix orientations diverge between two data sets. """ num_args = 0 parser = OptionParser() parser.add_option('-r', '--resolution', dest='resolution', default=10, help="The resolution of the resulting plot", type='int') parser.add_option('-a', '--angle', dest='angle', default=0, help="The angle of the camera", type='float') parser.add_option( '-f', '--fig-name', dest='fig_name', default='', help= "The name of the file to save the figure to. If it is not specified, the figure will not be saved", type='str') parser.add_option('-i', '--interior_loops', dest='interior_loops', default=False, help='Cluster only the interior loops', action='store_true') parser.add_option('-m', '--multi_loops', dest='multi_loops', default=False, help='Cluster only the interior loops', action='store_true') #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option') (options, args) = parser.parse_args() if len(args) < num_args: parser.print_help() sys.exit(1) column_names = [ 'type', 'pdb', 's1', 's2', 'u', 'v', 't', 'r', 'u1', 'v1', 'atype', 'something1', 'something2', 'sth3', 'sth4' ] real_stats = ftms.ConformationStats('fess/stats/real.stats').angle_stats sampled_stats = ftms.ConformationStats('fess/stats/temp.stats').angle_stats # count how many statistics we have for each statistic type stat_counts = c.defaultdict(int) for sc in real_stats.keys(): stat_counts[sc] += len(real_stats[sc]) histograms = dict() for b in stat_counts.keys(): if b[2] != 2.: # only look at type 2 angles continue if options.interior_loops: if b[0] == 1000 or b[1] == 1000: continue if options.multi_loops: if b[0] != 1000 and b[1] != 1000: continue (selected_sizes, count) = get_nearest_dimension_sizes(b, stat_counts, 1) if count < 3: continue fud.pv('b, selected_sizes') combined_real = [] # get the statistics that correspond to the selected sampled sizes for ss in selected_sizes: #ss_r = get_certain_angle_stats(real_stats, ss) ss_r = real_stats[ss] combined_real += list(ss_r[['u', 'v']].as_matrix()) num_points = len(combined_real) combined_real = np.array(combined_real) #histograms[b] = (np.histogram2d(combined_real[:,0], combined_real[:,1], range=[[0, m.pi], [-m.pi, m.pi]])[0] + 0.5) / float(num_points) histograms[b] = combined_real dists = [] named_dists = dict() pp_dists = dict() for k1, k2 in it.combinations(histograms.keys(), 2): per_point_distances = [] for p1 in histograms[k1]: point_distances = [] for p2 in histograms[k2]: point_distances += [ftuv.magnitude(p1 - p2)] per_point_distances += [min(point_distances)] for p2 in histograms[k2]: point_distances = [] for p1 in histograms[k1]: point_distances += [ftuv.magnitude(p1 - p2)] per_point_distances += [min(point_distances)] dists += [max(per_point_distances)] named_dists[(k1, k2)] = max(per_point_distances) pp_dists[(k1, k2)] = per_point_distances ''' kl = histograms[k1] * (histograms[k1] / histograms[k2]) kl = sum(map(sum, kl)) dists += [kl] ''' fud.pv('dists') Z = sch.complete(dists) fud.pv('Z') sch.dendrogram(Z, labels=histograms.keys(), leaf_rotation=90) plt.subplots_adjust(bottom=0.25) plt.show() k1 = (6, 7, 2) k2 = (5, 6, 2) rs = get_certain_angle_stats(real_stats, k1) ss = get_certain_angle_stats(real_stats, k2) fud.pv('named_dists[(k1,k2)]') fud.pv('pp_dists[(k1,k2)]') real_us = rs[['u', 'v']].as_matrix() sampled_us = ss[['u', 'v']].as_matrix() U_r = real_us[:, 0] V_r = real_us[:, 1] U_s = sampled_us[:, 0] V_s = sampled_us[:, 1] total_r = len(U_r) total_s = len(U_s) hr = np.histogram2d(U_r, V_r) hs = np.histogram2d(U_s, V_s) pseudo_r = (hr[0] + 1) / total_r pseudo_s = (hs[0] + 1) / total_r kl = pseudo_r * (pseudo_r / pseudo_s) fud.pv('kl') fud.pv('sum(map(sum, kl))') X_r = np.sin(U_r) * np.cos(V_r) Y_r = np.sin(U_r) * np.sin(V_r) Z_r = np.cos(U_r) r = 1. X_s = r * np.sin(U_s) * np.cos(V_s) Y_s = r * np.sin(U_s) * np.sin(V_s) Z_s = r * np.cos(U_s) fud.pv('real_us') real_us_orig = np.copy(real_us) sampled_us_orig = np.copy(sampled_us) print len(real_us), len(sampled_us) fig = plt.figure(figsize=(10, 10)) ax = Axes3D(fig) a = Arrow3D([-1.3, 1.3], [0, 0], [0, 0], mutation_scale=20, lw=5, arrowstyle="-|>", color="g") ax.add_artist(a) ax.plot(X_r, Y_r, Z_r, 'bo', alpha=0.3) ax.plot(X_s, Y_s, Z_s, 'ro', alpha=0.3) u, v = np.mgrid[0:2 * np.pi:20j, 0:np.pi:10j] x = np.cos(u) * np.sin(v) y = np.sin(u) * np.sin(v) z = np.cos(v) ax.plot_wireframe(x, y, z, color="y") #surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors, # linewidth=0, antialiased=False) ax._axis3don = False ax.set_zlim3d(-1, 1) ax.w_zaxis.set_major_locator(LinearLocator(6)) ax.view_init(0, options.angle) ''' plt.subplots_adjust(left=0.4, right=0.9, top=0.9, bottom=0.1) for i in xrange(0, 360, 40): savefig("fig%d.png", (i)) ''' ''' sm = cm.ScalarMappable(cmap=cm.jet) sm.set_array(W) fig.colorbar(sm) ''' if options.fig_name != "": plt.savefig(options.fig_name, bbox_inches='tight') else: plt.show()