Beispiel #1
0
def RNAstructure_sample_process(worker_num,
                                in_file_prefix,
                                output_dir,
                                e,
                                seed,
                                wn_tag="",
                                lock=None):
    """
    Process used in RNAstructure_sample. Called from RNAstructure_sample_process_helper.
    """
    wn = str(worker_num) + wn_tag
    print "Worker num: " + wn
    if lock is not None:
        lock.acquire()
    runRNAstructure_stochastic(in_file_prefix + ".pfs",
                               output_dir + wn + "temp.ct",
                               e=e,
                               seed=seed,
                               parallel=False)
    if lock is not None:
        lock.release()
    structs = get_ct_structs(output_dir + wn + "temp.ct")
    structs_str = [",".join(s) for s in structs]
    OSU.remove_file(output_dir + wn + "temp.ct")
    return structs_str
def write_R2D2_output_to_files(reactivities_prefix, R2D2_pairs, R2D2_consensus,
                               R2D2_consensus_ct, react_rhos, crystals_mat,
                               crystals_ctfile, crystals_ct, cryst_seq):
    # Write out results of R2D2 iterations
    with open("%s_R2D2_pairs.txt" % (reactivities_prefix), "w") as f:
        f.write("\n".join([
            "\t".join([str(bp) for bp in row]) for row in R2D2_pairs.tolist()
        ]) + "\n")
    with open("%s_R2D2_consensus.txt" % (reactivities_prefix), "w") as f:
        f.write("\n".join([
            "\t".join([str(bp) for bp in row])
            for row in R2D2_consensus.tolist()
        ]) + "\n")
    with open("%s_R2D2_consensus.stats" % (reactivities_prefix), "w") as f:
        f.write(
            str(
                SU.calc_benchmark_statistics_matrix(R2D2_consensus,
                                                    crystals_mat)))
    write_reactivities_in_ct(
        SU.binary_mat_to_binary_ct(R2D2_consensus), react_rhos,
        reactivities_prefix + "_R2D2_consensus_ct_react.txt")
    SU.ct_list_to_file(R2D2_consensus_ct, cryst_seq,
                       "%s_R2D2_consensus.ct" % (reactivities_prefix))
    SU.runRNAstructure_CircleCompare(
        "%s_R2D2_consensus.ct" % (reactivities_prefix), crystals_ctfile,
        "%s_R2D2_consensus.ps" % (reactivities_prefix))
    OSU.system_command("convert %s_R2D2_consensus.ps %s_R2D2_consensus.jpg" %
                       (reactivities_prefix, reactivities_prefix))
    write_reactivities_in_ct(crystals_ct, react_rhos,
                             reactivities_prefix + "_crystal_ct_react.txt")
Beispiel #3
0
def convert_center(image):
    """
    Centers image.
    """
    OSU.system_command("convert %s -background none -gravity Center %s.temp" %
                       (image, image))
    os.rename(image + ".temp", image)
Beispiel #4
0
def load_train_model(rho_midpoint, constrain_val, paired_weight, reactivities,
                     crystals, sample_n, react_rhos, structs_pickle_dir,
                     output_dir, out_stat_dir, outname, scaling_func, cap_rhos,
                     shape_slope, shape_intercept):
    """
    Runs the analysis of a (rho_midpoint, constrain_val, paired_weight) triple by loading in the sampled structures from sampling with hard constraints and constrain_val.
    """
    constrained_folds = {}
    # Obselete since we use 1 reactivity at a time
    for k in reactivities:
        if isinstance(reactivities[k][3], list):
            constrained_folds[k] = reactivities[k][3]
        else:
            constrained_folds[k] = []
    out_param_dir = "/".join([
        out_stat_dir, "_".join(
            [str(constrain_val),
             str(rho_midpoint),
             str(paired_weight)])
    ]) + "/"
    OSU.create_directory(out_param_dir)

    if constrain_val != "no_constrained":
        # load sampled folds with hard constrain c
        constrained_structs_dict = pickle.load(
            open(
                "%sconstrained_folds_%s.p" %
                (structs_pickle_dir, str(constrain_val)), "rb"))
        # Obselete since we use 1 reactivity at a time
        for k in reactivities:
            constrained_structs = set(constrained_structs_dict[k])
            constrained_structs.update(set(constrained_folds[k]))
            constrained_structs = set(
                SU.merge_labels(list(constrained_structs), to_string=False))
            constrained_folds[k] = [(s.split(","), l)
                                    for s, l in constrained_structs]

    # call training routine
    stats_dict, min_dist_ind_dict, F_score = train_constraint_model(
        crystals,
        constrained_folds,
        constrain_val,
        react_rhos,
        rho_midpoint,
        out_stat_dir,
        outname + str(rho_midpoint),
        weight=paired_weight,
        scaling_func=scaling_func,
        cap_rhos=cap_rhos)

    return [[rho_midpoint, constrain_val, paired_weight, F_score]]
def run_Fold(seqfile,
             reactivities_prefix,
             react_rhos,
             num_proc,
             crystals_mat,
             crystals_ctfile,
             output_suffix,
             shape_direct=False,
             shape_slope=1.1,
             shape_intercept=-0.3):
    """
    RNAstructure-Fold process
    Will handle both SHAPE-directed and not SHAPE-directed
    """
    if lock is not None:
        lock.acquire()
    if shape_direct:
        SU.runRNAstructure_fold(seqfile,
                                "%s_%s.ct" %
                                (reactivities_prefix, output_suffix),
                                shapefile=reactivities_prefix + ".rho",
                                p=num_proc,
                                shape_intercept=shape_intercept,
                                shape_slope=shape_slope)
    else:
        SU.runRNAstructure_fold(seqfile,
                                "%s_%s.ct" %
                                (reactivities_prefix, output_suffix),
                                p=num_proc,
                                shape_intercept=shape_intercept,
                                shape_slope=shape_slope)
    SU.runRNAstructure_CircleCompare(
        "%s_%s.ct" % (reactivities_prefix, output_suffix), crystals_ctfile,
        "%s_%s.ps" % (reactivities_prefix, output_suffix))
    if lock is not None:
        lock.release()
    OSU.system_command("convert %s_%s.ps %s_%s.jpg" %
                       (reactivities_prefix, output_suffix,
                        reactivities_prefix, output_suffix))
    with open("%s_%s.stats" % (reactivities_prefix, output_suffix), "w") as f:
        fold_shape_ct = SU.get_ct_structs(
            "%s_%s.ct" % (reactivities_prefix, output_suffix))[0]
        fold_shape_react_mat = SU.ct_struct_to_binary_mat(fold_shape_ct)
        f.write(
            str(
                SU.calc_benchmark_statistics_matrix(fold_shape_react_mat,
                                                    crystals_mat)))
    write_reactivities_in_ct(
        fold_shape_ct, react_rhos,
        "%s_%s_ct_react.txt" % (reactivities_prefix, output_suffix))
    return fold_shape_ct, fold_shape_react_mat
Beispiel #6
0
def convert_center_resize(image, res):
    """
    Centers image and resizes.
    """
    try:
        print "convert %s -alpha discrete -blur 0x1 -background none -gravity Center -extent %s %s.temp" % (
            image, res, image)
        OSU.system_command(
            "convert %s -alpha discrete -blur 0x1 -background none -gravity Center -extent %s %s.temp"
            % (image, res, image))
        os.rename(image + ".temp", image)
    except:
        print "convert %s -background none -gravity Center -extent %s %s.temp" % (
            image, res, image)
        OSU.system_command(
            "convert %s -background none -gravity Center -extent %s %s.temp" %
            (image, res, image))
        os.rename(image + ".temp", image)
Beispiel #7
0
def wait_for_jcoll(jcoll, out_dir, wait=30):
    """
    Makes current process wait until no jobs are left in a job collection jcoll
    """
    wait_jlist = True
    while wait_jlist:
        jlist_jcoll = subprocess.Popen(
            ['/opt/voyager/nbs/bin/jlist', '-jcoll', jcoll],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE)
        out, err = jlist_jcoll.communicate()
        if "listempty" in out:
            wait_jlist = False
            break
        OSU.system_command(
            "echo \"waiting\n%s\n%s\n\" >> %sjcoll_waiting.txt" %
            (out, err, out_dir))
        time.sleep(wait)
    OSU.system_command("echo \"%s\n\" >> %sjcoll_waiting.txt" %
                       (time.localtime(), out_dir))
Beispiel #8
0
def generate_DG_output(cotrans, start=-1, end=-1):
    """
    Generate DG state plots and .dump file
    """
    if start == -1:
        start = sorted(cotrans.file_data)[0]
    if end == -1:
        end = sorted(cotrans.file_data)[-1]
    print "generate_DG_output: " + str(start) + " " + str(end)
    with open(cotrans.output_dir + "/DG_state_plot.dump", 'w') as dump:
        dump.write("nt\tDG\tmfe_flag\tbest_flag\tdistance\trc_flag\n")
        for length in sorted(cotrans.file_data):
            DG = cotrans.file_data[length]["free_energies"]
            min_DG = min(DG)
            best = cotrans.file_data[length][
                "min_dist_indices"]  # list of struct_num of min_distance

            line = [
                "\t".join([
                    str(length),  # nt
                    str(dg),  # DG
                    str(int(min_DG == dg)),  # mfe_flag
                    str(int(c in best)),  # best_flag
                    str(cotrans.file_data[length]["distances"][c]),  # distance
                    str(cotrans.file_data[length]["rc_flag"])
                ])  # rc_flag
                for dg, c in zip(DG, range(len(DG)))
            ]

            dump.write("\n".join(line))
            dump.write("\n")

    print "R < make_DG_state_plot.R --no-save --args %s/DG_state_plot.pdf %s/DG_state_plot.dump %s %s" % (
        cotrans.output_dir, cotrans.output_dir, start, end)
    OSU.system_command(
        "R < make_DG_state_plot.R --no-save --args %s/DG_state_plot.pdf %s/DG_state_plot.dump %s %s"
        % (cotrans.output_dir, cotrans.output_dir, start, end))
    return
Beispiel #9
0
def output_train_model_stats(all_diffs_stats, min_dist_struct_indices, F_score,
                             crystals, reactivities_structs, output_dir):
    """
    Output summary of the benchmarking
    """
    # Pickle benchmark statistics and indices of structures with the minimum distance
    with open(output_dir + "/save_all_diffs_stats.p", "wb") as f:
        pickle.dump(all_diffs_stats, f)
    with open(output_dir + "/save_min_dist_struct_indices.p", "wb") as f:
        pickle.dump(min_dist_struct_indices, f)

    # Generate circle compare diagrams of minimum distance structures
    for k in sorted(reactivities_structs.keys()):
        ck = k.split('-')[
            0]  # corresponding crystal key from the reactivity key
        for mdi in min_dist_struct_indices[k]:
            outpre = output_dir + "/" + k + str(mdi)
            SU.ct_list_to_file(reactivities_structs[k][mdi][0],
                               crystals[ck][1], outpre + ".ct")
            SU.runRNAstructure_CircleCompare(outpre + ".ct", crystals[ck][3],
                                             outpre + ".ps")
            OSU.system_command("convert %s.ps %s.jpg" % (outpre, outpre))

    # Write out information on highest F score structures
    with open(output_dir + "/diffs_best_F.txt", 'w') as f:
        f.write("Max avg F score: " + str(F_score) + "\n")
        for k in sorted(reactivities_structs.keys()):
            reactivities_labels = [
                reactivities_structs[k][x][1]
                for x in min_dist_struct_indices[k]
            ]
            f.write("Methods: " + str(k) + "\t" + str(reactivities_labels) +
                    "\n")
        for k, stat_i in all_diffs_stats.items():
            f.write("Structure key: %s\n" % (k))
            f.write(str(stat_i) + "\n")
    return
Beispiel #10
0
    def generate_output(self):
        """
        Majority of Cotranscriptional SHAPE-Seq output is created here. This includes the DG plot, best structure images, and the movie of the best structures.
        """
        draw_dir = OSU.create_directory(self.output_dir + "/draw/")
        nn_dir = OSU.create_directory(self.output_dir + "/nn/")
        OSU.create_directory(nn_dir + "distances/")
        sorted_lengths = sorted(self.file_data)
        zero_padding = int(math.floor(math.log10(sorted_lengths[-1])) + 1)

        # Parallelized function calls to generate DG plot, distance matrices for clustering of structures, and creating images of minimum distance structures
        draw_struct_nums = [
            length for length in sorted_lengths
            if self.file_data[length]["rc_flag"]
        ]
        draw_args_pool = zip([self.file_data[dsn] for dsn in draw_struct_nums],
                             draw_struct_nums, repeat(sorted_lengths[-1]),
                             range(1,
                                   len(draw_struct_nums) + 1),
                             repeat(zero_padding), repeat(draw_dir),
                             repeat(self.output_dir + "/ct/"),
                             repeat(self.draw_all),
                             repeat(self.most_count_tie_break))
        args_pool = [
            (PCSU.generate_DG_output, (self, 1, sorted_lengths[-1]))
        ] + zip(repeat(PCSU.generate_best_struct_images), draw_args_pool)

        if self.p == 1:
            for i in range(len(args_pool)):
                PCSU.calculate_function_helper(args_pool[i])
        else:
            PCSU.generate_DG_output(
                self, 1, sorted_lengths[-1]
            )  # moved this outside of multiprocessing because Quest has issues running it in a pool
            PCSU.run_output_multiprocessing_pool(
                PCSU.calculate_function_helper, args_pool[1:], self.p)

        if not OSU.check_file_exists(
                self.output_dir + "/DG_state_plot.pdf"
        ):  # Weird error on quest that it will ignore this command if sample size is very large
            PCSU.generate_DG_output(self, 1, sorted_lengths[-1])

        # Use ffmpeg on varna_num.png's to create video of the minimum distance folding pathway
        OSU.make_symbolic_link(
            draw_dir + str(len(draw_struct_nums)).zfill(zero_padding) +
            "_structure.png",
            draw_dir + str(len(draw_struct_nums) + 1).zfill(zero_padding) +
            "_structure.png")  # ffmpeg needs a duplicate of the last frame
        VIU.generate_movie(draw_dir + "%%%dd_structure.png" % (zero_padding),
                           self.output_dir + "/movie.mp4")
        return
def read_all_dbn_dirs(dbn_dirs):
    """
    Read dbns in multiple directories to pair correct lengths together.
    Returns a dictionary of .png files name that should have been made by R2D2 organized by length.
    """
    all_dbns = defaultdict(list)
    for dbn_dir in dbn_dirs:
        dbns = glob.glob(dbn_dir + "*.dbn")
        for dbn_f in dbns:
            with open(dbn_f, "r") as f:
                lines = f.readlines()
            length = len(lines[-1].split()[0])
            image = re.sub('(_mult\d+)?.dbn', '_structure.png', dbn_f)
            if image not in all_dbns[length] and OSU.check_file_exists(image):
                all_dbns[length].append(image)
    return all_dbns
Beispiel #12
0
def merge_labels(list_sl, to_string=True):
    """
    Merges labels of a list of tuples where the second element in the tuple is the label.
    to_String: flag if True, then the structs are turned into a string. Else, structs are kept as a comma separated list.
    """
    sampled_structs_dict = {}
    for e in list_sl:
        struct_string = e[0]
        if isinstance(e[1], basestring):
            labels = [e[1]]
        else:
            labels = list(set(OSU.flatten_list([b.split(",") for b in e[1]])))
        if to_string:
            struct_string = ",".join(
                e[0]
            )  #JBL Q: this is flattening the struct_string to a string, not the labels as indicated in documentation above?  #AMY: Yes, fixed it.
        for l in labels:
            if struct_string not in sampled_structs_dict:
                sampled_structs_dict[struct_string] = [l]
            elif l not in sampled_structs_dict[struct_string]:
                sampled_structs_dict[struct_string].append(l)
    return list([(k, ",".join(sampled_structs_dict[k]))
                 for k in sampled_structs_dict])
Beispiel #13
0
def generate_movie(regex, outfile, size="1200x2800"):
    """
    Generate a movie with images as described by regex.
    """
    if size != "":
        try:
            print "ffmpeg -r 1 -i " + regex + " -vcodec mpeg4 -b 800k -r 10 -s " + size + " -pix_fmt yuv420p " + outfile
            OSU.system_command("ffmpeg -r 1 -i " + regex +
                               " -vcodec mpeg4 -b 800k -r 10 -s " + size +
                               " -pix_fmt yuv420p " + outfile)
        except:
            print "ffmpeg -framerate 1 -i " + regex + " -c:v libx264 -r 10 -s " + size + " -pix_fmt yuv420p " + outfile
            OSU.system_command("ffmpeg -framerate 1 -i " + regex +
                               " -c:v libx264 -r 10 -s " + size +
                               " -pix_fmt yuv420p " + outfile)
    else:
        print "ffmpeg -framerate 1 -i " + regex + " -vcodec mpeg4 -b 800k -r 10 -pix_fmt yuv420p " + outfile
        OSU.system_command("ffmpeg -framerate 1 -i " + regex +
                           " -vcodec mpeg4 -b 800k -r 10 -pix_fmt yuv420p " +
                           outfile)
Beispiel #14
0
def wait_jcoll_finish_any(jcoll, out_dir, max_jobs, wait=30):
    """
    Waits until any job in the job collection is done before returning the number of available jobs
    """
    wait_jlist = True
    while wait_jlist:
        num_running = count_jcoll_remaining(jcoll, out_dir)
        if num_running < max_jobs:
            wait_jlist = False
            OSU.system_command(
                "echo \"jobs available\n%s\n%s\n\" >> %sjcoll_waiting.txt" %
                (num_running, max_jobs, out_dir))
            break
        OSU.system_command(
            "echo \"waiting jobs available\n%s\n%s\n\" >> %sjcoll_waiting.txt"
            % (num_running, max_jobs, out_dir))
        time.sleep(wait)
    OSU.system_command("echo \"%s\n\" >> %sjcoll_waiting.txt" %
                       (time.localtime(), out_dir))
    return max_jobs - num_running
Copyright (C) 2017  Julius B. Lucks, Angela M Yu.
All rights reserved.
Distributed under the terms of the GNU General Public License, see 'LICENSE'.
"""

import LucksLabUtils_config
import OSU
import SU
import glob
import re

LucksLabUtils_config.config("Quest_R2D2")  # set up environment

# parse command line arguments
opts = OSU.getopts("", [
    "KineFold_dir=", "outdir=", "KineFold_times=", "seq_name=", "time=",
    "pseudoknots=", "entanglements=", "speed=", "sequence="
])
print opts
KineFold_dir = opts["--KineFold_dir"]
outdir = opts["--outdir"]
KF_times = int(opts["--KineFold_times"]) if "--KineFold_times" in opts else 0
seq_name = opts["--seq_name"] if "--seq_name" in opts else "test"
time = opts["--time"] if "--time" in opts else 160000
pseudoknots = bool(
    opts["--pseudoknots"] == "True") if "--pseudoknots" in opts else False
entanglements = bool(
    opts["--entanglements"] == "True") if "--entanglementss" in opts else False
speed = opts["--speed"] if "--speed" in opts else 20
sequence = opts["--sequence"] if "--sequence" in opts else ""

# create directories
Copyright (C) 2016, 2017  Julius B. Lucks, Angela M Yu.
All rights reserved.
Distributed under the terms of the GNU General Public License, see 'LICENSE'.
"""

import LucksLabUtils_config
import VIU
import OSU
import SU
import re

LucksLabUtils_config.config("Quest_R2D2")  # set up environment

# parse command line arguments
opts = OSU.getopts("", [
    "seq=", "seq_start=", "seq_end=", "outdir=", "rhos_dir=", "SHAPE_direct=",
    "make_DG_dump="
])
print opts
seq = opts["--seq"]
outdir = opts["--outdir"]
seq_start = int(opts["--seq_start"]) if "--seq_start" in opts else -1
seq_end = int(opts["--seq_end"]) if "--seq_end" in opts else -1
rhos_dir = opts["--rhos_dir"] if "--rhos_dir" in opts else ""
SHAPE_direct = bool(
    opts["--SHAPE_direct"] == "True") if "--SHAPE_direct" in opts else False
make_DG_dump = bool(
    opts["--make_DG_dump"] == "True") if "--make_DG_dump" in opts else False

# generate MFE movie
VIU.generate_MFE_CoTrans_movie(seq, outdir, seq_start, seq_end, rhos_dir,
                               SHAPE_direct)
Version: 0.0.1
Author: Angela M Yu, 2014-2017

Copyright (C) 2017  Julius B. Lucks and Angela M Yu.
All rights reserved.
Distributed under the terms of the GNU General Public License, see 'LICENSE'.
"""

import SU
import OSU
import LucksLabUtils_config
import glob

# setup environment variables
LucksLabUtils_config.config("Quest_R2D2")
opts = OSU.getopts("o:", ["dbn_dir=", "out_prefix="])
print opts

output_dir = OSU.create_directory(opts['-o'])
dbn_dir = opts['--dbn_dir']
out_prefix = opts['--out_prefix']

dbns = {}
for dbnf in glob.glob(dbn_dir + "/*.dbn"):
    # read in each rho reactivitiy spectra
    with open(dbnf, "r") as f:
        dbn = f.readlines()[-1].strip()  # last line with dotbracket
        SU.run_dot2ct(dbnf, output_dir + "temp.ct")
        SU.runRNAstructure_efn2(output_dir + "temp.ct",
                                output_dir + "temp.efn2",
                                parallel=False)
Beispiel #18
0
    def run(self):
        """
        The main routine of R2D2.
        Parses reactivities output from Spats and controls the calls to PCSU.
        """

        max_best_states = -1  # max number of best states across the lengths
        OSU.create_directory(self.output_dir)
        ct_dir = OSU.create_directory(
            self.output_dir + "/ct/"
        )  #JBL - extra // in this directory name  # AMY - did this on purpose in case user forgets a trailing '/'
        pickle_dir = OSU.create_directory(
            self.output_dir + "/pickles/"
        )  #JBL - extra // in this directory name  # AMY - did this on purpose in case user forgets a trailing '/'
        infiles = glob.glob(self.input_dir + "/*_reactivities.txt")

        # Pre-processing all input reactivities files - trimming adapter, recalculating thetas, calculating rhos
        max_best_states = 0
        rhos = {}
        rhos_cut = {}

        #JBL TODO - check for 3 input files
        # Set up and run parallized calculations on each length
        args_pool = zip(infiles, repeat(self.output_dir), repeat(ct_dir),
                        repeat(pickle_dir), repeat(self.adapterseq),
                        repeat(self.endcut), repeat(self.pol_fp),
                        repeat(self.e), repeat(self.constrained_c),
                        repeat(self.cap_rhos), repeat(self.scale_rho_max),
                        repeat(self.scaling_func), repeat(self.weight_paired))
        print "run args_pool length: " + str(len(args_pool))

        if self.p > 1:  # start pool if multithread
            #JBL TODO - check multithread with 3 input files
            pool = Pool(processes=self.p)
            for length_key, file_data_length_key, struct_distances_length, num_min_states, rho, rho_cut in pool.imap(
                    PCSU.run_cotrans_length_helper, args_pool):
                print "done length_key: " + str(length_key)
                if max_best_states < num_min_states:
                    max_best_states = num_min_states
                self.file_data[length_key] = file_data_length_key
                self.struct_distances[length_key] = struct_distances_length
                rhos[length_key +
                     self.endcut] = "\t".join([str(r) for r in rho]) + "\n"
                rhos_cut[length_key +
                         self.endcut] = "\t".join([str(r)
                                                   for r in rho_cut]) + "\n"
        else:  # no multiprocessing
            for args_slice in args_pool:
                length_key, file_data_length_key, struct_distances_length, num_min_states, rho, rho_cut = PCSU.run_cotrans_length_helper(
                    args_slice)
                print "done length_key: " + str(length_key)
                if max_best_states < num_min_states:
                    max_best_states = num_min_states
                self.file_data[length_key] = file_data_length_key
                self.struct_distances[length_key] = struct_distances_length
                rhos[length_key +
                     self.endcut] = "\t".join([str(r) for r in rho]) + "\n"
                rhos_cut[length_key +
                         self.endcut] = "\t".join([str(r)
                                                   for r in rho_cut]) + "\n"

        # Output the rho reactivity matrix
        with open(self.output_dir + "rho_table.txt", 'w') as f:
            print "sorted(rhos): " + str(len(rhos.keys()))
            for key in sorted(rhos):
                f.write(rhos[key])
        with open(self.output_dir + "rho_table_cut.txt", 'w') as f:
            print "sorted(rhos): " + str(len(rhos_cut.keys()))
            for key in sorted(rhos_cut):
                f.write(rhos_cut[key])

        # organizing files into their respective directories
        for file_ext in ["rho", "theta", "seq", "pfs", "con", "efn2"]:
            OSU.create_directory(self.output_dir + file_ext + "_dir/")
            OSU.system_command(
                "mv %s/*%s %s/%s_dir/" %
                (self.output_dir, file_ext, self.output_dir, file_ext))

        #import ipdb; ipdb.set_trace() #JBL- entering debugging here - breakpoint 1
        self.generate_output()  # generate majority of output
    all_dbns = defaultdict(list)
    for dbn_dir in dbn_dirs:
        dbns = glob.glob(dbn_dir + "*.dbn")
        for dbn_f in dbns:
            with open(dbn_f, "r") as f:
                lines = f.readlines()
            length = len(lines[-1].split()[0])
            image = re.sub('(_mult\d+)?.dbn', '_structure.png', dbn_f)
            if image not in all_dbns[length] and OSU.check_file_exists(image):
                all_dbns[length].append(image)
    return all_dbns


if __name__ == "__main__":
    # read in arguments
    opts = OSU.getopts("", ["dbn_dirs=", "output_dir="])
    dbn_dirs = opts["--dbn_dirs"].split(",")
    width = 1200 * len(dbn_dirs)
    output_dir = OSU.create_directory(opts["--output_dir"])

    # read dbns to pair correct lengths together
    all_dbns = read_all_dbn_dirs(dbn_dirs)

    # create images by horizontally concatenating previously made images from R2D2 output
    count = 0
    zero_padding = int(math.floor(math.log10(len(all_dbns))) + 1)
    for len in sorted(all_dbns):
        count += 1
        VIU.horizontal_image_concat(
            "%s/%s.png" % (output_dir, str(count).zfill(zero_padding)),
            all_dbns[len])
Beispiel #20
0
All rights reserved.
Distributed under the terms of the GNU General Public License, see 'LICENSE'.
"""
import OSU
import LucksLabUtils_config
import VIU
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from seaborn import color_palette
import numpy as np

LucksLabUtils_config.config("Quest_R2D2")

opts = OSU.getopts("", [
    "Y_first_sampled=", "MDS_ct_coords=", "MDS_mat_coords=", "PCA_coords=",
    "outfile_pre=", "output_dir=", "first_color_dict="
])
print opts
filename_Y_first_sampled = opts["--Y_first_sampled"]
filename_MDS_ct_coords = opts["--MDS_ct_coords"]
filename_MDS_mat_coords = opts["--MDS_mat_coords"]
filename_PCA_coords = opts["--PCA_coords"]
outfile = opts["--outfile_pre"]
output_dir = OSU.create_directory(opts["--output_dir"])
filename_first_color_dict = opts["--first_color_dict"]

# plotting setup
plt.style.use('seaborn-whitegrid')
fig = plt.figure(figsize=(3, 3), dpi=300)

# load input
Beispiel #21
0
def horizontal_image_concat(outfile, images):
    """
    Horizontal concatenate images to a new image file.
    """
    OSU.system_command("convert +append " + " ".join(images) + " " + outfile)
Beispiel #22
0
def vertical_image_concat(outfile, images):
    """
    Vertical concatenate images to a new image file.
    """
    OSU.system_command("convert -append " + " ".join(images) + " " + outfile)
Beispiel #23
0
"""

from __future__ import division  # allows division of ints to output decimals
import LucksLabUtils_config
import OSU
import SU
import glob
import re
import numpy
from collections import defaultdict

LucksLabUtils_config.config("Quest_R2D2")  # set up environment

# parse command line arguments
opts = OSU.getopts("", [
    "KineFold_dir=", "out_dir=", "time_weight", "simulation_time_ms=",
    "last_structure"
])
print opts
KineFold_dir = opts["--KineFold_dir"]
outdir = OSU.create_directory(opts["--out_dir"])
time_weight = True if "--time_weight" in opts else False
last_structure = True if "--last_structure" in opts else False
simulation_time_ms = int(
    opts["--simulation_time_ms"]) if "--simulation_time_ms" in opts else -1

assert int(time_weight) + int(last_structure) <= 1, (
    "Only can specify either time_weight OR last_structure")


# From Paul Gasper's pairs_from_dbn_2.py
def read_dbn(dbn_fn):
import re
from itertools import repeat
from multiprocessing import Pool, Lock
import numpy
from sys import maxsize
from sklearn.decomposition import PCA
from sklearn.manifold import MDS
from sklearn.preprocessing import scale
from sklearn.metrics import pairwise_distances
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt

# setup environment variables
LucksLabUtils_config.config("Quest_R2D2")
opts = OSU.getopts("o:c:r:p:", ["shape_intercept=", "shape_slope="])
print opts
numpy.set_printoptions(threshold=maxsize)
plt.style.use('seaborn-whitegrid')
fig = plt.figure(figsize=(8, 8))

reactivities_files = glob.glob(opts['-r'])
crystal_files = glob.glob(opts['-c'])
output_dir = OSU.create_directory(opts['-o'])
R2D2_output_dir = OSU.create_directory(output_dir + "/R2D2_intermediate/")
num_proc = int(opts['-p']) if '-p' in opts else 1
shape_intercept = float(opts["--shape_intercept"]) \
                    if "--shape_intercept" in opts else -0.3
shape_slope = float(opts["--shape_slope"]) if "--shape_slope" in opts else 1.1

reactivities = PAU.parse_input_panels(reactivities_files, output_dir)
Version: 0.0.1
Author: Angela M Yu, 2014-2016

Copyright (C) 2016  Julius B. Lucks and Angela M Yu.
All rights reserved.
Distributed under the terms of the GNU General Public License, see 'LICENSE'.
"""

import OSU
import LucksLabUtils_config
from collections import defaultdict

# setup environment variables
LucksLabUtils_config.config("Quest_R2D2")
opts = OSU.getopts("o:", [
    "3_times_dirs=", "50_times_dir=", "47_times_dir=", "100_times_dir=",
    "file_prefix="
])
print opts

output_dir = OSU.create_directory(opts['-o'])
file_prefix = opts["--file_prefix"]

if "--100_times_dir" in opts:
    times_dirs = [
        "%s/%s%s/" % (opts["--100_times_dir"], file_prefix, i)
        for i in range(1, 101)
    ]
elif "--3_times_dirs" in opts and "--50_times_dir" in opts and "--47_times_dir" in opts:
    times_dirs = opts["--3_times_dirs"].split(",")
    times_dirs += [
        "%s/%s%s/" % (opts["--50_times_dir"], file_prefix, i)
Beispiel #26
0
def generate_MFE_CoTrans_movie(seq,
                               outdir,
                               seq_start=-1,
                               seq_end=-1,
                               rhos_dir="",
                               SHAPE_direct=False):
    """
    Generate co-transcriptional MFE folding movie.
    Options to start and end at specific lengths, seq_start and seq_end respectively.
    Can overlay rho reactivities if given a directory with .rho files corresponding to the sequence.
    """
    OSU.create_directory(outdir)
    OSU.create_directory(outdir + "/seq/")
    OSU.create_directory(outdir + "/ct/")
    if seq_start == -1:
        seq_start = 0
    if seq_end == -1:
        seq_end = len(seq)
    else:
        seq_end += 1
    zero_padding = int(math.floor(math.log10(seq_end)) + 1)
    varna_num = 0
    rhos = {}
    if rhos_dir != "":
        # reads through .rho files found in rhos_dir
        for rf in glob.glob(rhos_dir + "/*.rho"):
            # read in each rho reactivitiy spectra
            with open(rf, "r") as f:
                rho = [line.split()[1] for line in f.readlines()]
                rhos[len(rho)] = [rho, rf]  # add in rho file here

    for seqi in range(seq_start + 1, seq_end + 1):
        if seqi in rhos:
            rho_varna = "\"" + ";".join(rhos[seqi][0] +
                                        (["-1"] * (seq_end - seqi))) + "\""
        else:
            rho_varna = "\"" + ";".join(["-1"] * (seq_end)) + "\""
        seqf = outdir + "/seq/" + str(seqi) + ".seq"
        ctf = outdir + "/ct/" + str(seqi) + ".ct"
        NAU.make_seq(seq[seq_start:seqi], seqf)
        if SHAPE_direct and seqi in rhos:
            SU.runRNAstructure_fold(seqf, ctf, rhos[seqi][1])
        elif SHAPE_direct:
            continue
        else:
            SU.runRNAstructure_fold(seqf, ctf)
        SU.run_ct2dot(ctf, 0, "temp.dbn")
        OSU.system_command("sed '$s/$/&%s/' temp.dbn > temp_ext.dbn " %
                           ("." * (seq_end - seqi)))
        varna_num += 1
        run_VARNA(
            "temp_ext.dbn",
            outdir + str(varna_num).zfill(zero_padding) + "_structure.png",
            rho_varna)
        convert_center_resize(
            outdir + str(varna_num).zfill(zero_padding) + "_structure.png",
            "1440x2000")
    OSU.remove_file(outdir + "temp.dbn")
    OSU.remove_file(outdir + "temp_ext.dbn")
    generate_movie(outdir + "%%%dd_structure.png" % (zero_padding),
                   outdir + "/movie.mp4", "")
def R2D2_process(input_prefix, R2D2_output_dir, draw_dir, react_rhos,
                 crystals_ck, rnum):
    """
    Slightly reduced version of a R2D2 process for this benchmarking code.
    # taking code from cotranscriptional case (PCSU.run_cotrans_length) which has extraneous parts in this use case
    # few lines in PCSU.run_cotrans_length made it unable to be used for this case. ex. length_key

    Options:
    input_prefix - full path plus input prefix of reactivities
    R2D2_output_dir - R2D2 output directoory
    draw_dir - directory for .dbn output
    react_rhos - rho reactivities
    rnum - iteration number, names output files accordingly
    """
    e = 50000
    fname = re.findall("([^/]+)$", input_prefix)[0]
    output_prefix = "%s/%s_%s" % (R2D2_output_dir, fname, rnum)
    scaling_fns = {
        "D": SU.invert_scale_rho_vec,
        "U": SU.scale_vec_avg1,
        "K": SU.cap_rho_or_ct_list
    }
    scaling_func = "K"
    scale_rho_max = 1.0
    constrained_c = 3.5
    cap_rhos = True
    weight_paired = 0.8
    sampled_structs_count = defaultdict(int)
    sampled_structs = set()
    # Vanilla Sampling
    if lock is None:
        structs, structs_labels = SU.RNAstructure_sample(input_prefix,
                                                         e,
                                                         R2D2_output_dir,
                                                         label="noshape",
                                                         num_proc=1,
                                                         wn_tag="_%s" % (rnum))
    else:
        structs, structs_labels = SU.RNAstructure_sample(input_prefix,
                                                         e,
                                                         R2D2_output_dir,
                                                         label="noshape",
                                                         num_proc=1,
                                                         wn_tag="_%s" % (rnum),
                                                         lock=lock)
    sampled_structs.update(structs_labels)
    OSU.increment_dict_counts(sampled_structs_count, structs)
    # Sampling with SHAPE constraints
    if lock is None:
        structs, structs_labels = SU.RNAstructure_sample(
            input_prefix,
            e,
            R2D2_output_dir,
            shapefile=input_prefix + ".rho",
            label="shape",
            num_proc=1,
            wn_tag="_%s" % (rnum))
    else:
        structs, structs_labels = SU.RNAstructure_sample(
            input_prefix,
            e,
            R2D2_output_dir,
            shapefile=input_prefix + ".rho",
            label="shape",
            num_proc=1,
            wn_tag="_%s" % (rnum),
            lock=lock)
    sampled_structs.update(structs_labels)
    OSU.increment_dict_counts(sampled_structs_count, structs)
    # Sampling with hard constraints
    XB = SU.get_indices_rho_gt_c(react_rhos, constrained_c,
                                 one_index=True)  # RNAstructure is 1-indexed
    SU.make_constraint_file(output_prefix + ".con", [], XB, [], [], [], [], [],
                            [])
    if lock is None:
        structs, structs_labels = SU.RNAstructure_sample(
            input_prefix,
            e,
            R2D2_output_dir,
            constraintfile=output_prefix + ".con",
            label="constrained_" + str(constrained_c),
            num_proc=1,
            wn_tag="_%s" % (rnum))
    else:
        structs, structs_labels = SU.RNAstructure_sample(
            input_prefix,
            e,
            R2D2_output_dir,
            constraintfile=output_prefix + ".con",
            label="constrained_" + str(constrained_c),
            num_proc=1,
            wn_tag="_%s" % (rnum),
            lock=lock)
    sampled_structs.update(structs_labels)
    OSU.increment_dict_counts(sampled_structs_count, structs)
    # Compressing sampled structures further by removing duplicates sampled by multiple methods. Keeping track of this though.
    # Saving more than I need to in this use case... ex. energies
    sampled_structs = SU.merge_labels(sampled_structs, to_string=False)
    structs = [t[0].split(",") for t in sampled_structs]
    SU.cts_to_file(structs, crystals_ck[1], output_prefix + "_unique.ct")
    SU.runRNAstructure_efn2(output_prefix + "_unique.ct",
                            output_prefix + ".efn2")
    free_energies = SU.get_free_energy_efn2(output_prefix + ".efn2")
    if cap_rhos:
        scaled_rhos = scaling_fns[scaling_func](react_rhos, scale_rho_max)
    else:
        scaled_rhos = scaling_fns[scaling_func](react_rhos)
    with open(input_prefix + ".best_scaled_rho", "w") as f:
        f.write("\n".join([
            "\t".join([str(zi), str(zr)]) for zi, zr in enumerate(scaled_rhos)
        ]))
    # Compute distances between scaled rhos and paired-vectors from drawn structures
    binary_structs = SU.ct_struct_to_binary_vec(structs)
    distances = []
    for s in binary_structs:
        distances.append(
            SU.calc_bp_distance_vector_weighted(
                s,
                scaled_rhos,
                scaling_func=scaling_func,
                invert_struct="D" != scaling_func,
                paired_weight=weight_paired))
    min_distance = min(distances)
    min_dist_indices = [
        i for i, v in enumerate(distances) if v == min_distance
    ]
    # compare R2D2 against crystal structure
    selected_react_mats = []
    for mdi in min_dist_indices:
        react_mat = SU.ct_struct_to_binary_mat(structs[mdi])
        selected_react_mats.append(numpy.matrix(react_mat))
        curr_prefix = "%s_%s_R2D2" % (output_prefix, mdi)
        curr_stats = SU.calc_benchmark_statistics_matrix(
            react_mat, crystals_ck[2])
        with open(curr_prefix + ".stats", "w") as f:
            f.write(str(curr_stats))
        #make file
        SU.ct_list_to_file(structs[mdi], crystals_ck[1], curr_prefix + ".ct")
        SU.runRNAstructure_CircleCompare(curr_prefix + ".ct", crystals_ck[3],
                                         curr_prefix + ".ps")
        OSU.system_command("convert %s.ps %s.jpg" % (curr_prefix, curr_prefix))
    # saving R2D2 results
    R2D2_save = {}
    R2D2_save["structs"] = structs
    R2D2_save["distances"] = distances
    R2D2_save["min_dist_indices"] = min_dist_indices
    R2D2_save["min_distance"] = min_distance
    R2D2_save["scaled_rhos"] = scaled_rhos
    R2D2_save["react_mat"] = react_mat
    pickle.dump(R2D2_save, open(curr_prefix + ".p", "wb"))
    # output .dbn's like in normal R2D2 process
    # code taken from PCSU.generate_best_struct_images
    # PCSU.generate_best_struct_images contained some extraneuous calls for this use case. ex. draw_all = False, running VARNA
    seen_snum = []
    iter_dbn_dir = OSU.create_directory("%s/%s" % (draw_dir, rnum))
    for snum in min_dist_indices:
        seen_snum.append(snum)
    for sf in range(len(seen_snum)):
        draw_outname_pre = "%s/%snt_%s" % (iter_dbn_dir, len(react_rhos),
                                           seen_snum[sf])
        if len(seen_snum) > 1:
            draw_outname_pre += "_mult" + str(sf)
        SU.run_ct2dot(output_prefix + "_unique.ct", seen_snum[sf],
                      draw_outname_pre + ".dbn")
    # return curr_stats and selected structures
    return curr_stats, selected_react_mats
See examples/run_CoTrans_example.sh for an example of how to use this code.

Author: Angela M Yu, 2014-2016
Version: 0.0.1

Copyright (C) 2016  Julius B. Lucks and Angela M Yu.
All rights reserved.
Distributed under the terms of the GNU General Public License, see 'LICENSE'.
"""

import R2D2
import LucksLabUtils_config
import OSU

LucksLabUtils_config.config("Quest_R2D2")
OSU.system_command("echo $PATH")
OSU.system_command("echo $CLASSPATH")

opts = OSU.getopts("", [
    "in_dir=", "out_dir=", "adapter=", "p=", "e=", "endcut=", "constrained_c=",
    "scale_rho_max=", "draw_all=", "most_count_tie_break=", "scaling_func=",
    "weight_paired=", "cap_rhos=", "pol_fp="
])
print opts

# This specifically calls R2D2.R2D2() assuming the user has specified the arguments:
# in_dir, out_dir, adapter, e, endcut, constrained_c, scale_rho_max, draw_all, most_count_tie_break, scaling_func, weight_paired, cap_rhos, pol_fp
# Only in_dir, out_dir, and adapter are truly required to run R2D2.R2D2(). Default values for the other parameters are set within R2D2.py.

cotrans = R2D2.R2D2(
    opts['--in_dir'],
Beispiel #29
0
import OSU
import LucksLabUtils_config
from collections import defaultdict, Counter
import re
import SU
import VIU
from itertools import combinations, chain
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from seaborn import color_palette

LucksLabUtils_config.config("Quest_R2D2")

opts = OSU.getopts("", [
    "sample_sizes=", "outfile_pre=", "output_dir=", "reactivities_files=",
    "linker_seq=", "pol_fp=", "processors=", "MDS_processors="
])
print opts
infiles = opts["--reactivities_files"].split(",")
outfile = opts["--outfile_pre"]
output_dir = OSU.create_directory(opts["--output_dir"])
reactivities_files = opts["--reactivities_files"].split(",")
linker_seq = opts["--linker_seq"]
sample_sizes = [int(s) for s in opts["--sample_sizes"].split(",")]
pol_fp = int(opts["--pol_fp"]) if "--pol_fp" in opts else 0
p = int(opts["--processors"]) if "--processors" in opts else 1
MDS_p = int(opts["--MDS_processors"]) if "--MDS_processors" in opts else 1

# setup counters, scaling functions, and output file header
unique_struct_nums = defaultdict(list)
unique_struct_dists = defaultdict(list)
Beispiel #30
0
import glob
import SU
import OSU
import NAU
import re
import LucksLabUtils_config
import PAU
from itertools import cycle
from collections import namedtuple
import cPickle as pickle

# setup environment variables specific to the ICSE cluster at Cornell
LucksLabUtils_config.config("ICSE")
opts = OSU.getopts("o:c:r:n:p:", [
    "noshape", "shape", "constrain", "scaling_func=", "cluster_flag=",
    "job_name=", "sub_proc=", "arg_slice=", "load_results=",
    "generate_structs=", "structs_pickle_dir=", "cap_rhos=",
    "shape_intercept=", "shape_slope=", "restart"
])
print opts

reactivities_files = glob.glob(opts['-r'])
crystal_files = glob.glob(opts['-c'])
output_dir = opts['-o']
sample_n = int(opts['-n'])
num_proc = int(opts['-p'])
scaling_func = opts["--scaling_func"]
cluster_flag = opts["--cluster_flag"] == "True"
job_name = opts["--job_name"]
sub_proc = opts["--sub_proc"] == "True"
load_results = opts["--load_results"] == "True"
generate_structs = opts["--generate_structs"] == "True"