def convert_center(image): """ Centers image. """ OSU.system_command("convert %s -background none -gravity Center %s.temp" % (image, image)) os.rename(image + ".temp", image)
def write_R2D2_output_to_files(reactivities_prefix, R2D2_pairs, R2D2_consensus, R2D2_consensus_ct, react_rhos, crystals_mat, crystals_ctfile, crystals_ct, cryst_seq): # Write out results of R2D2 iterations with open("%s_R2D2_pairs.txt" % (reactivities_prefix), "w") as f: f.write("\n".join([ "\t".join([str(bp) for bp in row]) for row in R2D2_pairs.tolist() ]) + "\n") with open("%s_R2D2_consensus.txt" % (reactivities_prefix), "w") as f: f.write("\n".join([ "\t".join([str(bp) for bp in row]) for row in R2D2_consensus.tolist() ]) + "\n") with open("%s_R2D2_consensus.stats" % (reactivities_prefix), "w") as f: f.write( str( SU.calc_benchmark_statistics_matrix(R2D2_consensus, crystals_mat))) write_reactivities_in_ct( SU.binary_mat_to_binary_ct(R2D2_consensus), react_rhos, reactivities_prefix + "_R2D2_consensus_ct_react.txt") SU.ct_list_to_file(R2D2_consensus_ct, cryst_seq, "%s_R2D2_consensus.ct" % (reactivities_prefix)) SU.runRNAstructure_CircleCompare( "%s_R2D2_consensus.ct" % (reactivities_prefix), crystals_ctfile, "%s_R2D2_consensus.ps" % (reactivities_prefix)) OSU.system_command("convert %s_R2D2_consensus.ps %s_R2D2_consensus.jpg" % (reactivities_prefix, reactivities_prefix)) write_reactivities_in_ct(crystals_ct, react_rhos, reactivities_prefix + "_crystal_ct_react.txt")
def run_Fold(seqfile, reactivities_prefix, react_rhos, num_proc, crystals_mat, crystals_ctfile, output_suffix, shape_direct=False, shape_slope=1.1, shape_intercept=-0.3): """ RNAstructure-Fold process Will handle both SHAPE-directed and not SHAPE-directed """ if lock is not None: lock.acquire() if shape_direct: SU.runRNAstructure_fold(seqfile, "%s_%s.ct" % (reactivities_prefix, output_suffix), shapefile=reactivities_prefix + ".rho", p=num_proc, shape_intercept=shape_intercept, shape_slope=shape_slope) else: SU.runRNAstructure_fold(seqfile, "%s_%s.ct" % (reactivities_prefix, output_suffix), p=num_proc, shape_intercept=shape_intercept, shape_slope=shape_slope) SU.runRNAstructure_CircleCompare( "%s_%s.ct" % (reactivities_prefix, output_suffix), crystals_ctfile, "%s_%s.ps" % (reactivities_prefix, output_suffix)) if lock is not None: lock.release() OSU.system_command("convert %s_%s.ps %s_%s.jpg" % (reactivities_prefix, output_suffix, reactivities_prefix, output_suffix)) with open("%s_%s.stats" % (reactivities_prefix, output_suffix), "w") as f: fold_shape_ct = SU.get_ct_structs( "%s_%s.ct" % (reactivities_prefix, output_suffix))[0] fold_shape_react_mat = SU.ct_struct_to_binary_mat(fold_shape_ct) f.write( str( SU.calc_benchmark_statistics_matrix(fold_shape_react_mat, crystals_mat))) write_reactivities_in_ct( fold_shape_ct, react_rhos, "%s_%s_ct_react.txt" % (reactivities_prefix, output_suffix)) return fold_shape_ct, fold_shape_react_mat
def convert_center_resize(image, res): """ Centers image and resizes. """ try: print "convert %s -alpha discrete -blur 0x1 -background none -gravity Center -extent %s %s.temp" % ( image, res, image) OSU.system_command( "convert %s -alpha discrete -blur 0x1 -background none -gravity Center -extent %s %s.temp" % (image, res, image)) os.rename(image + ".temp", image) except: print "convert %s -background none -gravity Center -extent %s %s.temp" % ( image, res, image) OSU.system_command( "convert %s -background none -gravity Center -extent %s %s.temp" % (image, res, image)) os.rename(image + ".temp", image)
def wait_for_jcoll(jcoll, out_dir, wait=30): """ Makes current process wait until no jobs are left in a job collection jcoll """ wait_jlist = True while wait_jlist: jlist_jcoll = subprocess.Popen( ['/opt/voyager/nbs/bin/jlist', '-jcoll', jcoll], stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = jlist_jcoll.communicate() if "listempty" in out: wait_jlist = False break OSU.system_command( "echo \"waiting\n%s\n%s\n\" >> %sjcoll_waiting.txt" % (out, err, out_dir)) time.sleep(wait) OSU.system_command("echo \"%s\n\" >> %sjcoll_waiting.txt" % (time.localtime(), out_dir))
def generate_DG_output(cotrans, start=-1, end=-1): """ Generate DG state plots and .dump file """ if start == -1: start = sorted(cotrans.file_data)[0] if end == -1: end = sorted(cotrans.file_data)[-1] print "generate_DG_output: " + str(start) + " " + str(end) with open(cotrans.output_dir + "/DG_state_plot.dump", 'w') as dump: dump.write("nt\tDG\tmfe_flag\tbest_flag\tdistance\trc_flag\n") for length in sorted(cotrans.file_data): DG = cotrans.file_data[length]["free_energies"] min_DG = min(DG) best = cotrans.file_data[length][ "min_dist_indices"] # list of struct_num of min_distance line = [ "\t".join([ str(length), # nt str(dg), # DG str(int(min_DG == dg)), # mfe_flag str(int(c in best)), # best_flag str(cotrans.file_data[length]["distances"][c]), # distance str(cotrans.file_data[length]["rc_flag"]) ]) # rc_flag for dg, c in zip(DG, range(len(DG))) ] dump.write("\n".join(line)) dump.write("\n") print "R < make_DG_state_plot.R --no-save --args %s/DG_state_plot.pdf %s/DG_state_plot.dump %s %s" % ( cotrans.output_dir, cotrans.output_dir, start, end) OSU.system_command( "R < make_DG_state_plot.R --no-save --args %s/DG_state_plot.pdf %s/DG_state_plot.dump %s %s" % (cotrans.output_dir, cotrans.output_dir, start, end)) return
def output_train_model_stats(all_diffs_stats, min_dist_struct_indices, F_score, crystals, reactivities_structs, output_dir): """ Output summary of the benchmarking """ # Pickle benchmark statistics and indices of structures with the minimum distance with open(output_dir + "/save_all_diffs_stats.p", "wb") as f: pickle.dump(all_diffs_stats, f) with open(output_dir + "/save_min_dist_struct_indices.p", "wb") as f: pickle.dump(min_dist_struct_indices, f) # Generate circle compare diagrams of minimum distance structures for k in sorted(reactivities_structs.keys()): ck = k.split('-')[ 0] # corresponding crystal key from the reactivity key for mdi in min_dist_struct_indices[k]: outpre = output_dir + "/" + k + str(mdi) SU.ct_list_to_file(reactivities_structs[k][mdi][0], crystals[ck][1], outpre + ".ct") SU.runRNAstructure_CircleCompare(outpre + ".ct", crystals[ck][3], outpre + ".ps") OSU.system_command("convert %s.ps %s.jpg" % (outpre, outpre)) # Write out information on highest F score structures with open(output_dir + "/diffs_best_F.txt", 'w') as f: f.write("Max avg F score: " + str(F_score) + "\n") for k in sorted(reactivities_structs.keys()): reactivities_labels = [ reactivities_structs[k][x][1] for x in min_dist_struct_indices[k] ] f.write("Methods: " + str(k) + "\t" + str(reactivities_labels) + "\n") for k, stat_i in all_diffs_stats.items(): f.write("Structure key: %s\n" % (k)) f.write(str(stat_i) + "\n") return
def generate_movie(regex, outfile, size="1200x2800"): """ Generate a movie with images as described by regex. """ if size != "": try: print "ffmpeg -r 1 -i " + regex + " -vcodec mpeg4 -b 800k -r 10 -s " + size + " -pix_fmt yuv420p " + outfile OSU.system_command("ffmpeg -r 1 -i " + regex + " -vcodec mpeg4 -b 800k -r 10 -s " + size + " -pix_fmt yuv420p " + outfile) except: print "ffmpeg -framerate 1 -i " + regex + " -c:v libx264 -r 10 -s " + size + " -pix_fmt yuv420p " + outfile OSU.system_command("ffmpeg -framerate 1 -i " + regex + " -c:v libx264 -r 10 -s " + size + " -pix_fmt yuv420p " + outfile) else: print "ffmpeg -framerate 1 -i " + regex + " -vcodec mpeg4 -b 800k -r 10 -pix_fmt yuv420p " + outfile OSU.system_command("ffmpeg -framerate 1 -i " + regex + " -vcodec mpeg4 -b 800k -r 10 -pix_fmt yuv420p " + outfile)
def wait_jcoll_finish_any(jcoll, out_dir, max_jobs, wait=30): """ Waits until any job in the job collection is done before returning the number of available jobs """ wait_jlist = True while wait_jlist: num_running = count_jcoll_remaining(jcoll, out_dir) if num_running < max_jobs: wait_jlist = False OSU.system_command( "echo \"jobs available\n%s\n%s\n\" >> %sjcoll_waiting.txt" % (num_running, max_jobs, out_dir)) break OSU.system_command( "echo \"waiting jobs available\n%s\n%s\n\" >> %sjcoll_waiting.txt" % (num_running, max_jobs, out_dir)) time.sleep(wait) OSU.system_command("echo \"%s\n\" >> %sjcoll_waiting.txt" % (time.localtime(), out_dir)) return max_jobs - num_running
def vertical_image_concat(outfile, images): """ Vertical concatenate images to a new image file. """ OSU.system_command("convert -append " + " ".join(images) + " " + outfile)
def run(self): """ The main routine of R2D2. Parses reactivities output from Spats and controls the calls to PCSU. """ max_best_states = -1 # max number of best states across the lengths OSU.create_directory(self.output_dir) ct_dir = OSU.create_directory( self.output_dir + "/ct/" ) #JBL - extra // in this directory name # AMY - did this on purpose in case user forgets a trailing '/' pickle_dir = OSU.create_directory( self.output_dir + "/pickles/" ) #JBL - extra // in this directory name # AMY - did this on purpose in case user forgets a trailing '/' infiles = glob.glob(self.input_dir + "/*_reactivities.txt") # Pre-processing all input reactivities files - trimming adapter, recalculating thetas, calculating rhos max_best_states = 0 rhos = {} rhos_cut = {} #JBL TODO - check for 3 input files # Set up and run parallized calculations on each length args_pool = zip(infiles, repeat(self.output_dir), repeat(ct_dir), repeat(pickle_dir), repeat(self.adapterseq), repeat(self.endcut), repeat(self.pol_fp), repeat(self.e), repeat(self.constrained_c), repeat(self.cap_rhos), repeat(self.scale_rho_max), repeat(self.scaling_func), repeat(self.weight_paired)) print "run args_pool length: " + str(len(args_pool)) if self.p > 1: # start pool if multithread #JBL TODO - check multithread with 3 input files pool = Pool(processes=self.p) for length_key, file_data_length_key, struct_distances_length, num_min_states, rho, rho_cut in pool.imap( PCSU.run_cotrans_length_helper, args_pool): print "done length_key: " + str(length_key) if max_best_states < num_min_states: max_best_states = num_min_states self.file_data[length_key] = file_data_length_key self.struct_distances[length_key] = struct_distances_length rhos[length_key + self.endcut] = "\t".join([str(r) for r in rho]) + "\n" rhos_cut[length_key + self.endcut] = "\t".join([str(r) for r in rho_cut]) + "\n" else: # no multiprocessing for args_slice in args_pool: length_key, file_data_length_key, struct_distances_length, num_min_states, rho, rho_cut = PCSU.run_cotrans_length_helper( args_slice) print "done length_key: " + str(length_key) if max_best_states < num_min_states: max_best_states = num_min_states self.file_data[length_key] = file_data_length_key self.struct_distances[length_key] = struct_distances_length rhos[length_key + self.endcut] = "\t".join([str(r) for r in rho]) + "\n" rhos_cut[length_key + self.endcut] = "\t".join([str(r) for r in rho_cut]) + "\n" # Output the rho reactivity matrix with open(self.output_dir + "rho_table.txt", 'w') as f: print "sorted(rhos): " + str(len(rhos.keys())) for key in sorted(rhos): f.write(rhos[key]) with open(self.output_dir + "rho_table_cut.txt", 'w') as f: print "sorted(rhos): " + str(len(rhos_cut.keys())) for key in sorted(rhos_cut): f.write(rhos_cut[key]) # organizing files into their respective directories for file_ext in ["rho", "theta", "seq", "pfs", "con", "efn2"]: OSU.create_directory(self.output_dir + file_ext + "_dir/") OSU.system_command( "mv %s/*%s %s/%s_dir/" % (self.output_dir, file_ext, self.output_dir, file_ext)) #import ipdb; ipdb.set_trace() #JBL- entering debugging here - breakpoint 1 self.generate_output() # generate majority of output
See examples/run_CoTrans_example.sh for an example of how to use this code. Author: Angela M Yu, 2014-2016 Version: 0.0.1 Copyright (C) 2016 Julius B. Lucks and Angela M Yu. All rights reserved. Distributed under the terms of the GNU General Public License, see 'LICENSE'. """ import R2D2 import LucksLabUtils_config import OSU LucksLabUtils_config.config("Quest_R2D2") OSU.system_command("echo $PATH") OSU.system_command("echo $CLASSPATH") opts = OSU.getopts("", [ "in_dir=", "out_dir=", "adapter=", "p=", "e=", "endcut=", "constrained_c=", "scale_rho_max=", "draw_all=", "most_count_tie_break=", "scaling_func=", "weight_paired=", "cap_rhos=", "pol_fp=" ]) print opts # This specifically calls R2D2.R2D2() assuming the user has specified the arguments: # in_dir, out_dir, adapter, e, endcut, constrained_c, scale_rho_max, draw_all, most_count_tie_break, scaling_func, weight_paired, cap_rhos, pol_fp # Only in_dir, out_dir, and adapter are truly required to run R2D2.R2D2(). Default values for the other parameters are set within R2D2.py. cotrans = R2D2.R2D2( opts['--in_dir'],
def generate_MFE_CoTrans_movie(seq, outdir, seq_start=-1, seq_end=-1, rhos_dir="", SHAPE_direct=False): """ Generate co-transcriptional MFE folding movie. Options to start and end at specific lengths, seq_start and seq_end respectively. Can overlay rho reactivities if given a directory with .rho files corresponding to the sequence. """ OSU.create_directory(outdir) OSU.create_directory(outdir + "/seq/") OSU.create_directory(outdir + "/ct/") if seq_start == -1: seq_start = 0 if seq_end == -1: seq_end = len(seq) else: seq_end += 1 zero_padding = int(math.floor(math.log10(seq_end)) + 1) varna_num = 0 rhos = {} if rhos_dir != "": # reads through .rho files found in rhos_dir for rf in glob.glob(rhos_dir + "/*.rho"): # read in each rho reactivitiy spectra with open(rf, "r") as f: rho = [line.split()[1] for line in f.readlines()] rhos[len(rho)] = [rho, rf] # add in rho file here for seqi in range(seq_start + 1, seq_end + 1): if seqi in rhos: rho_varna = "\"" + ";".join(rhos[seqi][0] + (["-1"] * (seq_end - seqi))) + "\"" else: rho_varna = "\"" + ";".join(["-1"] * (seq_end)) + "\"" seqf = outdir + "/seq/" + str(seqi) + ".seq" ctf = outdir + "/ct/" + str(seqi) + ".ct" NAU.make_seq(seq[seq_start:seqi], seqf) if SHAPE_direct and seqi in rhos: SU.runRNAstructure_fold(seqf, ctf, rhos[seqi][1]) elif SHAPE_direct: continue else: SU.runRNAstructure_fold(seqf, ctf) SU.run_ct2dot(ctf, 0, "temp.dbn") OSU.system_command("sed '$s/$/&%s/' temp.dbn > temp_ext.dbn " % ("." * (seq_end - seqi))) varna_num += 1 run_VARNA( "temp_ext.dbn", outdir + str(varna_num).zfill(zero_padding) + "_structure.png", rho_varna) convert_center_resize( outdir + str(varna_num).zfill(zero_padding) + "_structure.png", "1440x2000") OSU.remove_file(outdir + "temp.dbn") OSU.remove_file(outdir + "temp_ext.dbn") generate_movie(outdir + "%%%dd_structure.png" % (zero_padding), outdir + "/movie.mp4", "")
def run_KineFold(reqfile): """ Calls KineFold with supplied reqfile. """ OSU.system_command("kinefold_long_static %s -noprint" % (reqfile))
for i in range(1, 48) ] else: raise NotImplementedError( "Needs --100_times_dir option or --3_times_dirs, --50_times_dir, and --47_times_dir" ) combined = defaultdict(set) for count, td in enumerate(times_dirs): dg_dump_file = td + "/DG_state_plot.dump" if not OSU.check_file_exists(dg_dump_file): if OSU.check_file_exists(td + "results_except_draw.tgz"): print td + "results_except_draw.tgz: unpacking DG_state_plot.dump" OSU.system_command( "tar -zxvf %sresults_except_draw.tgz -C %s ./DG_state_plot.dump" % (td, td)) else: raise IOError("results_except_draw.tgz not found in " + td) with open(dg_dump_file, "r") as f: print "Reading: " + dg_dump_file f.readline() # throw away header for line in f: vars = line.split() str_key = "%s,%s" % (vars[0], vars[1]) if vars[3] == "1" and vars[-1] == "1": combined[str_key].add(count) elif str_key not in combined: combined[str_key]
def generate_best_struct_images(cotrans_length, length, longest_length, varna_num, zero_padding, draw_dir, ct_dir, draw_all, most_count_tie_break): """ Generates images of the minimum distance structures at a given length """ print "generate_best_struct_images: " + str(length) fname = cotrans_length["filename"] rho_varna = "\"" + ";".join([str(r) for r in cotrans_length["rho"]]) + "\"" seen_snum = [] mult_images = [] if cotrans_length["rc_flag"]: # plot only structures with a good RC for snum in cotrans_length["min_dist_indices"]: seen_snum.append(snum) for sf in range(len(seen_snum)): draw_outname_pre = "%s/%s_%s_%s" % (draw_dir, fname, seen_snum[sf], str(varna_num).zfill(zero_padding)) if len(seen_snum) > 1 and draw_all: draw_outname_pre += "_mult" + str(sf) mult_images.append(draw_outname_pre + "_structure.png") elif len(seen_snum) > 1 and not draw_all: # draw only the structure with the most supporting counts from the Boltzman samples structs_str = [",".join(s) for s in cotrans_length["structs"]] if most_count_tie_break: tie_break = max( dict((k, v) for k, v in cotrans_length["sampled_structs_count"].iteritems() if structs_str.index( k) in cotrans_length["min_dist_indices"])) tie_break_i = structs_str.index(tie_break) else: tie_break = min([ cotrans_length["free_energies"][sn] for sn in cotrans_length["min_dist_indices"] ]) tie_break_i = [ cotrans_length["free_energies"][sn] for sn in cotrans_length["min_dist_indices"] ].index(tie_break) tie_break_i = cotrans_length["min_dist_indices"][tie_break_i] if tie_break_i != seen_snum[sf]: continue rho_varna = rho_varna[:-1] + ";".join([""] + ["-1"] * (longest_length - length)) + "\"" SU.run_ct2dot(ct_dir + fname + "_unique.ct", seen_snum[sf], draw_outname_pre + ".dbn") # determine length of .'s needed to fill in the whole length OSU.system_command( "sed '$s/$/&%s/' %s.dbn > %s%s_temp.dbn " % ("." * (longest_length - length), draw_outname_pre, draw_dir, varna_num)) VIU.run_VARNA(draw_dir + str(varna_num) + "_temp.dbn", draw_outname_pre + "_structure.png", rho_varna) # same fix this as above if sf == len(seen_snum) - 1 and len( seen_snum) > 1 and draw_all: # vertical concat mult images v_outname = re.sub("_mult\d+", "", mult_images[0]) VIU.vertical_image_concat(v_outname, mult_images) draw_outname_pre = re.findall("(.*)_structure.png$", v_outname)[0] if sf == len(seen_snum) - 1 or not draw_all: print draw_dir + str(varna_num).zfill( zero_padding) + "_structure.png" print "SYM LINK: " + draw_dir + str(varna_num).zfill( zero_padding) + "_structure.png" OSU.make_symbolic_link( re.sub("_mult\d+", "", draw_outname_pre) + "_structure.png", draw_dir + str(varna_num).zfill(zero_padding) + "_structure.png") VIU.convert_center_resize( draw_dir + str(varna_num).zfill(zero_padding) + "_structure.png", "1200x2800") OSU.remove_file(draw_dir + str(varna_num) + "_temp.dbn") return draw_dir + str(varna_num).zfill(zero_padding) + "_structure.png"
if __name__ == "__main__": # read in arguments opts = OSU.getopts("", ["dbn_dirs=", "output_dir="]) dbn_dirs = opts["--dbn_dirs"].split(",") width = 1200 * len(dbn_dirs) output_dir = OSU.create_directory(opts["--output_dir"]) # read dbns to pair correct lengths together all_dbns = read_all_dbn_dirs(dbn_dirs) # create images by horizontally concatenating previously made images from R2D2 output count = 0 zero_padding = int(math.floor(math.log10(len(all_dbns))) + 1) for len in sorted(all_dbns): count += 1 VIU.horizontal_image_concat( "%s/%s.png" % (output_dir, str(count).zfill(zero_padding)), all_dbns[len]) VIU.convert_center_resize( "%s/%s.png" % (output_dir, str(count).zfill(zero_padding)), "%sx2800" % (width)) print "ffmpeg -framerate 1 -i %s -vcodec mpeg4 -r 10 -s %s -pix_fmt yuv420p %s" % ( output_dir + "/%%%dd.png" % (zero_padding), "%sx2800" % (width), output_dir + "/movie.mp4") OSU.system_command( "ffmpeg -framerate 1 -i %s -vcodec mpeg4 -r 10 -s %s -pix_fmt yuv420p %s" % (output_dir + "/%%%dd.png" % (zero_padding), "%sx2800" % (width), output_dir + "/movie.mp4"))
param_string = "_".join([str(s) for s in param]) job_name_param = "_".join([ job_name, param_string ])[:31] # Job name can only be up to 31 characters long # create .sh for parameter set if not exists if not OSU.check_file_exists("%snbs_script_%s.sh" % (sub_proc_sh_dir, param_string)): header = "##NBS-stdout:%s\n##NBS-stderr:%s\n##NBS-queue:batch\n##NBS-name:\"%s\"\n##NBS-jcoll:\"%s\"\n\nrm %s %s\n" % ( sub_proc_dir + job_name_param + ".out", sub_proc_dir + job_name_param + ".err", job_name_param, job_name, sub_proc_dir + job_name_param + ".out", sub_proc_dir + job_name_param + ".err") OSU.system_command( "echo \"%s/usr/bin/time /fs/home/amy35/tools/anaconda/bin/python ../find_parameters.py -r \'%s\' -c \'%s\' -o %s %s -n %s -p 1 --scaling_func %s --cluster_flag False --sub_proc True --arg_slice \'%s\' --job_name %s --load_results \'False\' --generate_structs \'False\' --cap_rhos %s --structs_pickle_dir %s\"> %snbs_script_%s.sh" % (header, opts['-r'], opts['-c'], opts['-o'], sampling_opts_string, opts['-n'], opts['--scaling_func'], param, job_name_param, cap_rhos, structs_pickle_dir, sub_proc_sh_dir, param_string)) # submit .sh to queue if not running, completed, or no job slots available if jobs_available > 0 and not PAU.check_job_on_queue( job_name_param) and not OSU.check_file_exists("".join([ training_res_dir, "save_training_results_", param_string, ".p" ])): print "/opt/voyager/nbs/bin/jsub %snbs_script_%s.sh -name %s -stdout %snbs_script_%s.out -stderr %snbs_script_%s.err" % ( sub_proc_sh_dir, param_string, job_name_param, sub_proc_dir, param_string, sub_proc_dir, param_string) OSU.system_command( "/opt/voyager/nbs/bin/jsub %snbs_script_%s.sh -name %s -stdout %snbs_script_%s.out -stderr %snbs_script_%s.err" % (sub_proc_sh_dir, param_string, job_name_param,
def R2D2_process(input_prefix, R2D2_output_dir, draw_dir, react_rhos, crystals_ck, rnum): """ Slightly reduced version of a R2D2 process for this benchmarking code. # taking code from cotranscriptional case (PCSU.run_cotrans_length) which has extraneous parts in this use case # few lines in PCSU.run_cotrans_length made it unable to be used for this case. ex. length_key Options: input_prefix - full path plus input prefix of reactivities R2D2_output_dir - R2D2 output directoory draw_dir - directory for .dbn output react_rhos - rho reactivities rnum - iteration number, names output files accordingly """ e = 50000 fname = re.findall("([^/]+)$", input_prefix)[0] output_prefix = "%s/%s_%s" % (R2D2_output_dir, fname, rnum) scaling_fns = { "D": SU.invert_scale_rho_vec, "U": SU.scale_vec_avg1, "K": SU.cap_rho_or_ct_list } scaling_func = "K" scale_rho_max = 1.0 constrained_c = 3.5 cap_rhos = True weight_paired = 0.8 sampled_structs_count = defaultdict(int) sampled_structs = set() # Vanilla Sampling if lock is None: structs, structs_labels = SU.RNAstructure_sample(input_prefix, e, R2D2_output_dir, label="noshape", num_proc=1, wn_tag="_%s" % (rnum)) else: structs, structs_labels = SU.RNAstructure_sample(input_prefix, e, R2D2_output_dir, label="noshape", num_proc=1, wn_tag="_%s" % (rnum), lock=lock) sampled_structs.update(structs_labels) OSU.increment_dict_counts(sampled_structs_count, structs) # Sampling with SHAPE constraints if lock is None: structs, structs_labels = SU.RNAstructure_sample( input_prefix, e, R2D2_output_dir, shapefile=input_prefix + ".rho", label="shape", num_proc=1, wn_tag="_%s" % (rnum)) else: structs, structs_labels = SU.RNAstructure_sample( input_prefix, e, R2D2_output_dir, shapefile=input_prefix + ".rho", label="shape", num_proc=1, wn_tag="_%s" % (rnum), lock=lock) sampled_structs.update(structs_labels) OSU.increment_dict_counts(sampled_structs_count, structs) # Sampling with hard constraints XB = SU.get_indices_rho_gt_c(react_rhos, constrained_c, one_index=True) # RNAstructure is 1-indexed SU.make_constraint_file(output_prefix + ".con", [], XB, [], [], [], [], [], []) if lock is None: structs, structs_labels = SU.RNAstructure_sample( input_prefix, e, R2D2_output_dir, constraintfile=output_prefix + ".con", label="constrained_" + str(constrained_c), num_proc=1, wn_tag="_%s" % (rnum)) else: structs, structs_labels = SU.RNAstructure_sample( input_prefix, e, R2D2_output_dir, constraintfile=output_prefix + ".con", label="constrained_" + str(constrained_c), num_proc=1, wn_tag="_%s" % (rnum), lock=lock) sampled_structs.update(structs_labels) OSU.increment_dict_counts(sampled_structs_count, structs) # Compressing sampled structures further by removing duplicates sampled by multiple methods. Keeping track of this though. # Saving more than I need to in this use case... ex. energies sampled_structs = SU.merge_labels(sampled_structs, to_string=False) structs = [t[0].split(",") for t in sampled_structs] SU.cts_to_file(structs, crystals_ck[1], output_prefix + "_unique.ct") SU.runRNAstructure_efn2(output_prefix + "_unique.ct", output_prefix + ".efn2") free_energies = SU.get_free_energy_efn2(output_prefix + ".efn2") if cap_rhos: scaled_rhos = scaling_fns[scaling_func](react_rhos, scale_rho_max) else: scaled_rhos = scaling_fns[scaling_func](react_rhos) with open(input_prefix + ".best_scaled_rho", "w") as f: f.write("\n".join([ "\t".join([str(zi), str(zr)]) for zi, zr in enumerate(scaled_rhos) ])) # Compute distances between scaled rhos and paired-vectors from drawn structures binary_structs = SU.ct_struct_to_binary_vec(structs) distances = [] for s in binary_structs: distances.append( SU.calc_bp_distance_vector_weighted( s, scaled_rhos, scaling_func=scaling_func, invert_struct="D" != scaling_func, paired_weight=weight_paired)) min_distance = min(distances) min_dist_indices = [ i for i, v in enumerate(distances) if v == min_distance ] # compare R2D2 against crystal structure selected_react_mats = [] for mdi in min_dist_indices: react_mat = SU.ct_struct_to_binary_mat(structs[mdi]) selected_react_mats.append(numpy.matrix(react_mat)) curr_prefix = "%s_%s_R2D2" % (output_prefix, mdi) curr_stats = SU.calc_benchmark_statistics_matrix( react_mat, crystals_ck[2]) with open(curr_prefix + ".stats", "w") as f: f.write(str(curr_stats)) #make file SU.ct_list_to_file(structs[mdi], crystals_ck[1], curr_prefix + ".ct") SU.runRNAstructure_CircleCompare(curr_prefix + ".ct", crystals_ck[3], curr_prefix + ".ps") OSU.system_command("convert %s.ps %s.jpg" % (curr_prefix, curr_prefix)) # saving R2D2 results R2D2_save = {} R2D2_save["structs"] = structs R2D2_save["distances"] = distances R2D2_save["min_dist_indices"] = min_dist_indices R2D2_save["min_distance"] = min_distance R2D2_save["scaled_rhos"] = scaled_rhos R2D2_save["react_mat"] = react_mat pickle.dump(R2D2_save, open(curr_prefix + ".p", "wb")) # output .dbn's like in normal R2D2 process # code taken from PCSU.generate_best_struct_images # PCSU.generate_best_struct_images contained some extraneuous calls for this use case. ex. draw_all = False, running VARNA seen_snum = [] iter_dbn_dir = OSU.create_directory("%s/%s" % (draw_dir, rnum)) for snum in min_dist_indices: seen_snum.append(snum) for sf in range(len(seen_snum)): draw_outname_pre = "%s/%snt_%s" % (iter_dbn_dir, len(react_rhos), seen_snum[sf]) if len(seen_snum) > 1: draw_outname_pre += "_mult" + str(sf) SU.run_ct2dot(output_prefix + "_unique.ct", seen_snum[sf], draw_outname_pre + ".dbn") # return curr_stats and selected structures return curr_stats, selected_react_mats
def horizontal_image_concat(outfile, images): """ Horizontal concatenate images to a new image file. """ OSU.system_command("convert +append " + " ".join(images) + " " + outfile)