Esempio n. 1
0
    def generate_output(self):
        """
        Majority of Cotranscriptional SHAPE-Seq output is created here. This includes the DG plot, best structure images, and the movie of the best structures.
        """
        draw_dir = OSU.create_directory(self.output_dir + "/draw/")
        nn_dir = OSU.create_directory(self.output_dir + "/nn/")
        OSU.create_directory(nn_dir + "distances/")
        sorted_lengths = sorted(self.file_data)
        zero_padding = int(math.floor(math.log10(sorted_lengths[-1])) + 1)

        # Parallelized function calls to generate DG plot, distance matrices for clustering of structures, and creating images of minimum distance structures
        draw_struct_nums = [
            length for length in sorted_lengths
            if self.file_data[length]["rc_flag"]
        ]
        draw_args_pool = zip([self.file_data[dsn] for dsn in draw_struct_nums],
                             draw_struct_nums, repeat(sorted_lengths[-1]),
                             range(1,
                                   len(draw_struct_nums) + 1),
                             repeat(zero_padding), repeat(draw_dir),
                             repeat(self.output_dir + "/ct/"),
                             repeat(self.draw_all),
                             repeat(self.most_count_tie_break))
        args_pool = [
            (PCSU.generate_DG_output, (self, 1, sorted_lengths[-1]))
        ] + zip(repeat(PCSU.generate_best_struct_images), draw_args_pool)

        if self.p == 1:
            for i in range(len(args_pool)):
                PCSU.calculate_function_helper(args_pool[i])
        else:
            PCSU.generate_DG_output(
                self, 1, sorted_lengths[-1]
            )  # moved this outside of multiprocessing because Quest has issues running it in a pool
            PCSU.run_output_multiprocessing_pool(
                PCSU.calculate_function_helper, args_pool[1:], self.p)

        if not OSU.check_file_exists(
                self.output_dir + "/DG_state_plot.pdf"
        ):  # Weird error on quest that it will ignore this command if sample size is very large
            PCSU.generate_DG_output(self, 1, sorted_lengths[-1])

        # Use ffmpeg on varna_num.png's to create video of the minimum distance folding pathway
        OSU.make_symbolic_link(
            draw_dir + str(len(draw_struct_nums)).zfill(zero_padding) +
            "_structure.png",
            draw_dir + str(len(draw_struct_nums) + 1).zfill(zero_padding) +
            "_structure.png")  # ffmpeg needs a duplicate of the last frame
        VIU.generate_movie(draw_dir + "%%%dd_structure.png" % (zero_padding),
                           self.output_dir + "/movie.mp4")
        return
def read_all_dbn_dirs(dbn_dirs):
    """
    Read dbns in multiple directories to pair correct lengths together.
    Returns a dictionary of .png files name that should have been made by R2D2 organized by length.
    """
    all_dbns = defaultdict(list)
    for dbn_dir in dbn_dirs:
        dbns = glob.glob(dbn_dir + "*.dbn")
        for dbn_f in dbns:
            with open(dbn_f, "r") as f:
                lines = f.readlines()
            length = len(lines[-1].split()[0])
            image = re.sub('(_mult\d+)?.dbn', '_structure.png', dbn_f)
            if image not in all_dbns[length] and OSU.check_file_exists(image):
                all_dbns[length].append(image)
    return all_dbns
Esempio n. 3
0
# generate MFE movie
VIU.generate_MFE_CoTrans_movie(seq, outdir, seq_start, seq_end, rhos_dir,
                               SHAPE_direct)

# generate DG dump file
if make_DG_dump:
    ct_dir = outdir + "/ct/"
    efn2_dir = OSU.create_directory(outdir + "/efn2/")
    name_nums = range(1, len(seq) + 1)
    if seq_start != -1:
        name_nums = name_nums[seq_start - 1:]
    if seq_end != -1:
        name_nums = name_nums[:seq_end - seq_start + 2]

    # write dumpfile header
    fname_dump = outdir + re.match(
        ".*\/(.*)$", outdir.rstrip("/")).group(1) + "_DG_state_plot.dump"
    with open(fname_dump, "w") as f:
        f.write("nt\tDG\n")

    for n in name_nums:
        ct_file = "%s%s.ct" % (ct_dir, n)
        if not OSU.check_file_exists(ct_file):
            continue
        energy_file = "%s%s.efn2" % (efn2_dir, n)
        SU.runRNAstructure_efn2("%s%s.ct" % (ct_dir, n), energy_file)
        energy = SU.get_free_energy_efn2(energy_file)[0]
        with open(fname_dump, "a") as f:
            f.write("%s\t%s\n" % (n, energy))
Esempio n. 4
0
elif num_proc == 1 and cluster_flag and not load_results:  # This case is the first executed for the parallel version that utilizes the full cluster.
    # Surrounded job execution code to catch any subproc that doesn't finish to the pickling step.
    # Also acts as a limiter into the number of jobs that can be submitted to the queue at once.
    max_jobs = 511
    jobs_available = min(max_jobs, len(rm_cv_w))
    params_submitted = []
    while len(rm_cv_w) > 0:
        sub_proc_dir = OSU.create_directory(output_dir + "sub_proc_out/")
        for param in rm_cv_w:  # loop through all parameter sets
            param_string = "_".join([str(s) for s in param])
            job_name_param = "_".join([
                job_name, param_string
            ])[:31]  # Job name can only be up to 31 characters long

            # create .sh for parameter set if not exists
            if not OSU.check_file_exists("%snbs_script_%s.sh" %
                                         (sub_proc_sh_dir, param_string)):
                header = "##NBS-stdout:%s\n##NBS-stderr:%s\n##NBS-queue:batch\n##NBS-name:\"%s\"\n##NBS-jcoll:\"%s\"\n\nrm %s %s\n" % (
                    sub_proc_dir + job_name_param + ".out",
                    sub_proc_dir + job_name_param + ".err", job_name_param,
                    job_name, sub_proc_dir + job_name_param + ".out",
                    sub_proc_dir + job_name_param + ".err")
                OSU.system_command(
                    "echo \"%s/usr/bin/time /fs/home/amy35/tools/anaconda/bin/python ../find_parameters.py -r \'%s\' -c \'%s\' -o %s %s -n %s -p 1 --scaling_func %s --cluster_flag False --sub_proc True --arg_slice \'%s\' --job_name %s --load_results \'False\' --generate_structs \'False\' --cap_rhos %s --structs_pickle_dir %s\"> %snbs_script_%s.sh"
                    % (header, opts['-r'], opts['-c'], opts['-o'],
                       sampling_opts_string, opts['-n'],
                       opts['--scaling_func'], param, job_name_param, cap_rhos,
                       structs_pickle_dir, sub_proc_sh_dir, param_string))

            # submit .sh to queue if not running, completed, or no job slots available
            if jobs_available > 0 and not PAU.check_job_on_queue(
                    job_name_param) and not OSU.check_file_exists("".join([
        for i in range(1, 51)
    ]
    times_dirs += [
        "%s/%s%s/" % (opts["--47_times_dir"], file_prefix, i)
        for i in range(1, 48)
    ]
else:
    raise NotImplementedError(
        "Needs --100_times_dir option or --3_times_dirs, --50_times_dir, and --47_times_dir"
    )

combined = defaultdict(set)

for count, td in enumerate(times_dirs):
    dg_dump_file = td + "/DG_state_plot.dump"
    if not OSU.check_file_exists(dg_dump_file):
        if OSU.check_file_exists(td + "results_except_draw.tgz"):
            print td + "results_except_draw.tgz: unpacking DG_state_plot.dump"
            OSU.system_command(
                "tar -zxvf %sresults_except_draw.tgz -C %s ./DG_state_plot.dump"
                % (td, td))
        else:
            raise IOError("results_except_draw.tgz not found in " + td)

    with open(dg_dump_file, "r") as f:
        print "Reading: " + dg_dump_file
        f.readline()  # throw away header
        for line in f:
            vars = line.split()
            str_key = "%s,%s" % (vars[0], vars[1])
            if vars[3] == "1" and vars[-1] == "1":