Beispiel #1
0
def RNAstructure_sample_process(worker_num,
                                in_file_prefix,
                                output_dir,
                                e,
                                seed,
                                wn_tag="",
                                lock=None):
    """
    Process used in RNAstructure_sample. Called from RNAstructure_sample_process_helper.
    """
    wn = str(worker_num) + wn_tag
    print "Worker num: " + wn
    if lock is not None:
        lock.acquire()
    runRNAstructure_stochastic(in_file_prefix + ".pfs",
                               output_dir + wn + "temp.ct",
                               e=e,
                               seed=seed,
                               parallel=False)
    if lock is not None:
        lock.release()
    structs = get_ct_structs(output_dir + wn + "temp.ct")
    structs_str = [",".join(s) for s in structs]
    OSU.remove_file(output_dir + wn + "temp.ct")
    return structs_str
Beispiel #2
0
def reactivities_to_rho_file(input_dir,
                             adapterseq,
                             output_dir,
                             recalc=0,
                             rm_temp=True,
                             min_len=0,
                             max_len=0):
    """
    Takes a directory of reactivities files and the adapter sequence and
    creates .theta, .rho, and .seq files. Also outputs rho_table.txt
    which is a tab delimited file of rhos. Be aware that there is numerical
    error from using floats that are present in the output. There is an option (rm_temp)
    if you do not want to keep the .theta, .rho, and .seq files.
    """
    infiles = glob.glob(input_dir + "/*_reactivities.txt")
    OSU.create_directory(output_dir)
    outname = output_dir + "rho"

    rhos = {}
    for f in infiles:
        fname = re.findall("([^/]+).txt$", f)
        output_file_prefix = output_dir + "/" + fname[0]
        pos, rho_full, theta, rho_cut, seq, rc_flag, rc_sum, ut_sum, t_sum = parse_reactivity_rho(
            f, adapterseq, output_file_prefix, recalc)
        if sum([float(t) for t in theta]) == 0:
            print(
                "Not enough alignments in length {0} to calculate theta/rho. Line left blank."
                .format(len(pos)))
            rhos[len(pos)] = "-1\t" * len(pos) + "\n"
        else:
            rhos[len(pos)] = "\t".join([str(t) for t in rho_cut]) + "\n"

    outname += "_{0}min".format(min_len) if min_len != 0 else ""
    outname += "_{0}max".format(max_len) if max_len != 0 else ""
    outname += "_table.txt"
    with open(outname, 'w') as f:
        for key in sorted(rhos):
            if min_len == 0 or key >= min_len:  # Only apply logic if a value was supplied, but use bounds if so
                if max_len == 0 or key <= max_len:
                    f.write(rhos[key])
    if rm_temp:
        OSU.remove_file(input_dir + outname.split("/")[-1])
        shutil.move(outname, input_dir)
        OSU.remove_files_with_ext(output_dir, ".theta")
        OSU.remove_files_with_ext(output_dir, ".rho")
        OSU.remove_files_with_ext(output_dir, ".seq")
        OSU.remove_files_with_ext(output_dir, ".txt")
        os.rmdir(output_dir)
        return input_dir + os.path.basename(outname)
    return outname
Beispiel #3
0
def generate_MFE_CoTrans_movie(seq,
                               outdir,
                               seq_start=-1,
                               seq_end=-1,
                               rhos_dir="",
                               SHAPE_direct=False):
    """
    Generate co-transcriptional MFE folding movie.
    Options to start and end at specific lengths, seq_start and seq_end respectively.
    Can overlay rho reactivities if given a directory with .rho files corresponding to the sequence.
    """
    OSU.create_directory(outdir)
    OSU.create_directory(outdir + "/seq/")
    OSU.create_directory(outdir + "/ct/")
    if seq_start == -1:
        seq_start = 0
    if seq_end == -1:
        seq_end = len(seq)
    else:
        seq_end += 1
    zero_padding = int(math.floor(math.log10(seq_end)) + 1)
    varna_num = 0
    rhos = {}
    if rhos_dir != "":
        # reads through .rho files found in rhos_dir
        for rf in glob.glob(rhos_dir + "/*.rho"):
            # read in each rho reactivitiy spectra
            with open(rf, "r") as f:
                rho = [line.split()[1] for line in f.readlines()]
                rhos[len(rho)] = [rho, rf]  # add in rho file here

    for seqi in range(seq_start + 1, seq_end + 1):
        if seqi in rhos:
            rho_varna = "\"" + ";".join(rhos[seqi][0] +
                                        (["-1"] * (seq_end - seqi))) + "\""
        else:
            rho_varna = "\"" + ";".join(["-1"] * (seq_end)) + "\""
        seqf = outdir + "/seq/" + str(seqi) + ".seq"
        ctf = outdir + "/ct/" + str(seqi) + ".ct"
        NAU.make_seq(seq[seq_start:seqi], seqf)
        if SHAPE_direct and seqi in rhos:
            SU.runRNAstructure_fold(seqf, ctf, rhos[seqi][1])
        elif SHAPE_direct:
            continue
        else:
            SU.runRNAstructure_fold(seqf, ctf)
        SU.run_ct2dot(ctf, 0, "temp.dbn")
        OSU.system_command("sed '$s/$/&%s/' temp.dbn > temp_ext.dbn " %
                           ("." * (seq_end - seqi)))
        varna_num += 1
        run_VARNA(
            "temp_ext.dbn",
            outdir + str(varna_num).zfill(zero_padding) + "_structure.png",
            rho_varna)
        convert_center_resize(
            outdir + str(varna_num).zfill(zero_padding) + "_structure.png",
            "1440x2000")
    OSU.remove_file(outdir + "temp.dbn")
    OSU.remove_file(outdir + "temp_ext.dbn")
    generate_movie(outdir + "%%%dd_structure.png" % (zero_padding),
                   outdir + "/movie.mp4", "")
Beispiel #4
0
    if time_weight:
        kf_dbns, kf_energy_path, kf_times = SU.get_rnm_structs_dbn(
            rf, outdir, time_weight, simulation_time_ms)
    else:
        kf_dbns, kf_energy_path = SU.get_rnm_structs_dbn(rf, outdir)
    if last_structure:  # ignoring time spent because only considering structure at end of simulation
        OSU.remove_files(kf_dbns[:-1])
        kf_dbns = kf_dbns[-1:]
        kf_energy_path = kf_energy_path[-1:]
    for i in range(len(kf_dbns)):
        seq, struct = read_dbn(kf_dbns[i])
        nt_length = len(seq)
        pairs = dbn_to_pairs(struct)
        pair_dict[nt_length].append(pairs)
        seq_dict[nt_length] = seq
        OSU.remove_file(kf_dbns[i])
        if time_weight:
            time_dict[nt_length].append(kf_times[i])

# Edited from Paul Gasper's pairs_from_dbn_2.py
for key in seq_dict.keys():
    with open('{0}/{1}nt.pairs'.format(outdir, key), 'w') as out_fh:
        out_fh.write('{0}\n'.format(seq_dict[key]))
        if time_weight:
            weighted_bp_array = [
                (pdk.T * tdk).T
                for pdk, tdk in zip(pair_dict[key], time_dict[key])
            ]
            avg_bp_array = sum(weighted_bp_array) / sum(time_dict[key])
        else:
            avg_bp_array = sum(pair_dict[key]) / len(pair_dict[key])
Beispiel #5
0
                output_dir,
                seqfile,
                shapefile=reactivities[k][2] + ".rho",
                constraintfile="",
                label="shape",
                num_proc=num_proc,
                shape_slope=shape_slope,
                shape_intercept=shape_intercept)
            sampled_structs.update(structs_labels)
            print "after shape: " + str(len(sampled_structs))
            sampling_opts_string.append("--shape")
        sampled_structs = SU.merge_labels(list(sampled_structs),
                                          to_string=False)
        print "number of sampled structs: " + str(len(sampled_structs))
        reactivities[k].append(sampled_structs)
        OSU.remove_file(output_dir + "temp.seq")

out_stat_dir = output_dir + "/stats_out/"
OSU.create_directory(out_stat_dir)
OSU.create_directory(structs_pickle_dir)
CRW_pair = namedtuple("CRW_pair", ["c", "mr", "w"])

react_rhos = dict((k, reactivities[k][0]) for k in reactivities)
constrain_rho_F = {}

# Reduce search space
rho_midpoints = [0.1 * i for i in range(7, 41)]
weights = [0.1 * i for i in range(1, 10)]
print "Parameter values to test: "
print "rho_max and rho_c: " + str(rho_midpoints)
print "weights: " + str(weights) + "\n"
Beispiel #6
0
def generate_best_struct_images(cotrans_length, length, longest_length,
                                varna_num, zero_padding, draw_dir, ct_dir,
                                draw_all, most_count_tie_break):
    """
    Generates images of the minimum distance structures at a given length
    """
    print "generate_best_struct_images: " + str(length)
    fname = cotrans_length["filename"]
    rho_varna = "\"" + ";".join([str(r) for r in cotrans_length["rho"]]) + "\""

    seen_snum = []
    mult_images = []
    if cotrans_length["rc_flag"]:  # plot only structures with a good RC
        for snum in cotrans_length["min_dist_indices"]:
            seen_snum.append(snum)
    for sf in range(len(seen_snum)):
        draw_outname_pre = "%s/%s_%s_%s" % (draw_dir, fname, seen_snum[sf],
                                            str(varna_num).zfill(zero_padding))
        if len(seen_snum) > 1 and draw_all:
            draw_outname_pre += "_mult" + str(sf)
            mult_images.append(draw_outname_pre + "_structure.png")
        elif len(seen_snum) > 1 and not draw_all:
            # draw only the structure with the most supporting counts from the Boltzman samples
            structs_str = [",".join(s) for s in cotrans_length["structs"]]
            if most_count_tie_break:
                tie_break = max(
                    dict((k, v) for k, v in
                         cotrans_length["sampled_structs_count"].iteritems()
                         if structs_str.index(
                             k) in cotrans_length["min_dist_indices"]))
                tie_break_i = structs_str.index(tie_break)
            else:
                tie_break = min([
                    cotrans_length["free_energies"][sn]
                    for sn in cotrans_length["min_dist_indices"]
                ])
                tie_break_i = [
                    cotrans_length["free_energies"][sn]
                    for sn in cotrans_length["min_dist_indices"]
                ].index(tie_break)
                tie_break_i = cotrans_length["min_dist_indices"][tie_break_i]
            if tie_break_i != seen_snum[sf]:
                continue
        rho_varna = rho_varna[:-1] + ";".join([""] + ["-1"] *
                                              (longest_length - length)) + "\""
        SU.run_ct2dot(ct_dir + fname + "_unique.ct", seen_snum[sf],
                      draw_outname_pre + ".dbn")

        # determine length of .'s needed to fill in the whole length
        OSU.system_command(
            "sed '$s/$/&%s/' %s.dbn > %s%s_temp.dbn " %
            ("." *
             (longest_length - length), draw_outname_pre, draw_dir, varna_num))
        VIU.run_VARNA(draw_dir + str(varna_num) + "_temp.dbn",
                      draw_outname_pre + "_structure.png",
                      rho_varna)  # same fix this as above
        if sf == len(seen_snum) - 1 and len(
                seen_snum) > 1 and draw_all:  # vertical concat mult images
            v_outname = re.sub("_mult\d+", "", mult_images[0])
            VIU.vertical_image_concat(v_outname, mult_images)
            draw_outname_pre = re.findall("(.*)_structure.png$", v_outname)[0]
        if sf == len(seen_snum) - 1 or not draw_all:
            print draw_dir + str(varna_num).zfill(
                zero_padding) + "_structure.png"
            print "SYM LINK: " + draw_dir + str(varna_num).zfill(
                zero_padding) + "_structure.png"
            OSU.make_symbolic_link(
                re.sub("_mult\d+", "", draw_outname_pre) + "_structure.png",
                draw_dir + str(varna_num).zfill(zero_padding) +
                "_structure.png")
            VIU.convert_center_resize(
                draw_dir + str(varna_num).zfill(zero_padding) +
                "_structure.png", "1200x2800")
        OSU.remove_file(draw_dir + str(varna_num) + "_temp.dbn")
    return draw_dir + str(varna_num).zfill(zero_padding) + "_structure.png"