Python get_exon_regulated_by_sf Beispiele, union_dataset_function.get_exon_regulated_by_sf Python Beispiele

Beispiel #1

0

Datei anzeigen

def control_handler(cnx, exon_type, summary, regulation="down"):
    my_path = os.path.dirname(os.path.realpath(__file__))
    control_folder = my_path + "/control"
    control_file = "%s/control_%s.py" % (control_folder, summary)
    control_full = control_folder + "/control_full.pkl"
    ctrl_list, tmp = exon_control_handler.get_control_information(exon_type, control_file, control_full)
    exon2remove = union_dataset_function.get_exon_regulated_by_sf(cnx, regulation)
    if ctrl_list is None:
        print("Control dictionary was not found !")
        print("Creating control information")
        names, exon_tuple = exon_control_handler.get_control_exon_information(cnx, exon_type, exon2remove, regulation)
        # getting the new columns
        exon_tuple = exon_control_handler.remove_redundant_gene_information(exon_tuple)
        tmp = exon_control_handler.create_a_temporary_dictionary(names, exon_tuple)
        ctrl_list = exon_control_handler.get_summary_dictionaries(names, tmp, summary)
        exon_control_handler.write_control_file(exon_type, control_file, str(ctrl_list))
        exon_control_handler.write_pickle(control_full, tmp)
    if "rel_exon_intron_up" not in ctrl_list.keys():
        print("relative exon_intron size where not found.")
        print("getting relative exon_intron size")
        exon_tuple = get_control_exon_size_information(cnx, exon_type, exon2remove)
        print("Relative size calculation...")
        tmp_dic = tmp_dic_creator(exon_tuple)
        tmp = dict(tmp, **tmp_dic)
        print("summarizing")
        sum_dic = get_summary_dictionaries(tmp_dic, summary)
        ctrl_list = dict(ctrl_list, **sum_dic)
        print("writting...")
        write_adapted_dic(control_file, exon_type, str(ctrl_list))
        exon_control_handler.write_pickle(control_full, tmp)
    return ctrl_list, tmp

Beispiel #2

0

Datei anzeigen

def control_dictionaries_creator():
    """
    Create the control dictionary containing the values corresponding to the score of bp and ppt for every control exons
    """
    exon_class.set_debug(0)
    dir_path = os.path.dirname(os.path.realpath(__file__))
    fasterdb = os.path.dirname(os.path.realpath(__file__)).replace(
        "src/minimum_free_energy", "data/fasterDB_lite.db")
    seddb = os.path.dirname(os.path.realpath(__file__)).replace(
        "src/minimum_free_energy", "data/sed.db")
    ctrl_dir = dir_path + "/control_dictionaries/"
    cnx = sqlite3.connect(fasterdb)
    cnx_sed = sqlite3.connect(seddb)
    if not os.path.isdir(ctrl_dir):
        os.mkdir(ctrl_dir)
    exon_type = "CCE"
    exon2remove = union_dataset_function.get_exon_regulated_by_sf(
        cnx_sed, "down")
    ctrl_exon_list = get_control_exon_information(cnx, exon_type, exon2remove)
    print("retrieving upstream intron sequence")
    list_exon = [
        exon_class.ExonClass(cnx, exon[0], exon[1], exon[2])
        for exon in ctrl_exon_list
    ]
    print("calculating mfe")
    mfe_list_3ss, mfe_list_5ss = function.mfe_calculator(list_exon)
    cur_file = open(ctrl_dir + exon_type + "_mfe.py", "w")
    cur_file.write("mfe_3ss=" + str(mfe_list_3ss) + "\n")
    cur_file.write("mfe_5ss=" + str(mfe_list_5ss) + "\n")
    cur_file.close()

Beispiel #3

0

Datei anzeigen

Datei: variance_analysis.py Projekt: LBMC/Lemaire_et_al_2019

def irimia_analysis(cnx, exon_type, output, regulation, size_threshold,
                    target_column, dic_bed):
    """
    Create the histogram of the size of exons and make a levene test \
    to test if the variance of GC content of a group of big CCE exons \
    and a group of small exons (taken from Irimia et al) is different.

    :param cnx: (pymysql connection object) connection to Sed database.
    :param exon_type: (str) the type of control exons to analyse
    :param output: (str) folder where the results will be created
    :param regulation: (str) the regulation
    :param size_threshold: (int) the threshold
    :param target_column: (str) the feature of interest
    """
    exon_2_remove = udf.get_exon_regulated_by_sf(cnx, regulation)
    exon_list = get_control_exon(cnx, exon_type, exon_2_remove, regulation)
    dic_size = get_list_of_value(cnx, exon_list, target_column)
    small_exons, big_exons = get_two_groups_of_exon(dic_size, size_threshold,
                                                    target_column)
    del (small_exons)
    list_size = get_size(dic_bed)
    sizefig = "hist_of_Irimia_exon_size"
    make_histogram(list_size, output, sizefig, target_column, log=True)
    print(" nb exons having a size below/equal to %s nt (Irimia) : %s" %
          (size_threshold, len(dic_bed.keys())))
    print(" nb exons having a size greater to %s nt : %s" %
          (size_threshold, len(big_exons)))
    small_gc = get_gc_content(dic_bed)
    big_gc = get_list_of_value_iupac_dnt(cnx, big_exons, "iupac_exon", "S")
    make_histogram(small_gc, output, "gc_content_small_Irimia_exon",
                   "GC content Irimia micro-exons (<= %s nt)" % size_threshold)
    make_histogram(big_gc, output, "gc_content_big_%s_exon" % exon_type,
                   "GC content exons > %s nt" % size_threshold)
    write_test_result(big_gc, small_gc, size_threshold, output, exon_type)

Beispiel #4

0

Datei anzeigen

def main():
    base_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
    fasterdb = base_dir + "/data/fasterDB_lite.db"
    seddb = base_dir + "/data/sed.db"
    cnx = sqlite3.connect(fasterdb)
    cnx_sed = sqlite3.connect(seddb)
    exon2remove = udf.get_exon_regulated_by_sf(cnx_sed, "down")
    my_exons = get_control_exon_information(cnx, "CCE", exon2remove)
    with open("data/input/CCE_exons.txt", "w") as outfile:
        outfile.write("\n".join(my_exons) + "\n")

Beispiel #5

0

Datei anzeigen

Datei: create_GC_AT_bed_exon.py Projekt: LBMC/Lemaire_et_al_2019

def main():
    """
    Create a bed file containing info about GC frequency of every GC-AT exons.
    """
    base = os.path.dirname(
        os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
    seddb = base + "/data/sed.db"
    fasterdb = base + "/data/fasterDB_lite.db"
    output = base + "/result/correlation_GC-AT-exons_TAD"
    if not os.path.isdir(output):
        os.mkdir(output)
    cnx = sqlite3.connect(seddb)
    u1_exons = [
        list(map(int, exon)) for exon in get_exons_list(
            cnx, ["SNRPC", "SNRNP70", "DDX5_DDX17"], "down")
    ]
    u2_exons = [
        list(map(int, exon)) for exon in get_exons_list(
            cnx, ["U2AF2", "SF1", "SF3A3", "SF3B4"], "down")
    ]
    print("U1-exons : %s exons" % len(u1_exons))
    print("U2-exons : %s exons" % len(u2_exons))
    exon_list = udf.get_exon_regulated_by_sf(cnx, "down")
    print("Getting exon data ...")
    exon_data = get_exon_info(cnx, seddb, fasterdb, exon_list, u1_exons,
                              u2_exons)
    print("Writing bed")
    write_bed(output, exon_data, "data_for_regulated_exons")

    cnx.close()
    cnx = sqlite3.connect(seddb)
    exon2remove = udf.get_exon_regulated_by_sf(cnx, "down")
    cnx_fasterdb = sqlite3.connect(fasterdb)
    exon_list = get_control_exon_information(cnx_fasterdb, "CCE",
                                             exon2remove) + exon2remove
    cnx_fasterdb.close()
    print("CCE exons + regulated exons : %s" % len(exon_list))
    print("Getting CCE + regulated exon data ...")
    exon_data = get_exon_info(cnx, seddb, fasterdb, exon_list, u1_exons,
                              u2_exons)
    print("Writing bed")
    write_bed(output, exon_data, "data_for_regulated_CCE_exons")
    cnx.close()

Beispiel #6

0

Datei anzeigen

Datei: variance_analysis.py Projekt: LBMC/Lemaire_et_al_2019

def my_level_analysis(cnx,
                      exon_type,
                      output,
                      regulation,
                      size_threshold,
                      target_column,
                      level="control"):
    """
    Create the histogram of the size of exons and make a levene test \
    to test if the variance of GC content of a group of big exons \
    and a group of small exons is different.

    :param cnx: (pymysql connection object) connection to Sed database.
    :param exon_type: (str) the type of control exons to analyse
    :param output: (str) folder where the results will be created
    :param regulation: (str) the regulation
    :param size_threshold: (int) the threshold
    :param target_column: (str) the feature of interest
    :param level: (str) the level
    """
    sizefig = "hist_of_%s_exon_size" % exon_type
    if level == "control":
        exon_2_remove = udf.get_exon_regulated_by_sf(cnx, regulation)
        exon_list = get_control_exon(cnx, exon_type, exon_2_remove, regulation)
    else:
        exon_list = udf.get_exon_regulated(cnx, regulation)
        exon_type = "SF-down"
    dic_size = get_list_of_value(cnx, exon_list, target_column)
    # dic_size = {"exon": [], target_column: []}
    # for i in range(len(tmp["exon"])):
    #     if tmp[target_column][i] > 10:
    #         dic_size["exon"].append(tmp["exon"][i])
    #         dic_size[target_column].append(tmp[target_column][i])
    list_size = np.array(dic_size["exon_size"])
    print(" min : %s" % min(list_size))
    print(" max : %s" % max(list_size))
    print(" nb exons having a size below/equal %s : %s" %
          (size_threshold, len(list_size[list_size <= size_threshold])))
    make_histogram(list_size, output, sizefig, target_column, log=True)
    small_exons, big_exons = get_two_groups_of_exon(dic_size, size_threshold,
                                                    target_column)
    print(" nb exons having a size below/equal to %s nt : %s" %
          (size_threshold, len(small_exons)))
    print(" nb exons having a size greater to %s nt : %s" %
          (size_threshold, len(big_exons)))
    small_gc = get_list_of_value_iupac_dnt(cnx, small_exons, "iupac_exon", "S")
    big_gc = get_list_of_value_iupac_dnt(cnx, big_exons, "iupac_exon", "S")
    make_histogram(small_gc, output, "gc_content_small_%s_exon" % exon_type,
                   "GC content exons <= %s nt" % size_threshold)
    make_histogram(big_gc, output, "gc_content_big_%s_exon" % exon_type,
                   "GC content exons > %s nt" % size_threshold)
    write_test_result(big_gc, small_gc, size_threshold, output, exon_type)

Beispiel #7

0

Datei anzeigen

Datei: GC_AT_analysis_experimental_bp.py Projekt: LBMC/Lemaire_et_al_2019

def main(branch_point_file,
         name_bp_file,
         at_file,
         gc_file,
         fasterdb,
         seddb,
         output,
         exon_type="CCE"):
    """
    Create a GC/AT barplots with experimental branch points.

    :param branch_point_file: (str) a bed file containing branch point
    :param name_bp_file: (str) the name of the experimental bp file
    :param at_file: (str) a file containing AT exons
    :param gc_file: (str) a file containing GC exons
    :param fasterdb: (str) path to fasterdb database
    :param seddb: (str) path to sed database
    :param output: (str) folder where the figures will be created
    :param exon_type: (str) the type of control exons
    """

    result_file = "%s/intron_experimental_%s_table.txt" % (output,
                                                           name_bp_file)
    if not os.path.isfile(result_file):
        at_exon = read_file(at_file)
        gc_exon = read_file(gc_file)
        list_bp = read_file(branch_point_file)
        print(len(list_bp))
        cnx = sqlite3.connect(fasterdb)
        cnx_sed = sqlite3.connect(seddb)
        exon2remove = [
            list(map(int, exon))
            for exon in udf.get_exon_regulated_by_sf(cnx_sed, "down")
        ]
        ctrl_exons = get_ctrl_exons(cnx, exon_type, exon2remove)
        exon_list = gc_exon + at_exon + ctrl_exons
        type_exon = ["GC-exons"] * len(gc_exon) + \
                    ["AT-exons"] * len(at_exon) + \
                    ["%s-exons" % exon_type] * len(ctrl_exons)
        intron_data = get_intron_coordinates(cnx, exon_list, type_exon)
        df = get_intron_bp_data(intron_data, list_bp)
        print(df.head())
        df.to_csv(result_file, sep="\t", index=False)
        cnx.close()
        cnx_sed.close()
    else:
        print("Recovering %s" % result_file)
        df = pd.read_csv(result_file, sep="\t")
    create_barplot(df, output,
                   os.path.basename(result_file).replace(".txt", ""))

Beispiel #8

0

Datei anzeigen

Datei: launcher.py Projekt: LBMC/Lemaire_et_al_2019

def main_1d(list_file, name_file, seddb, exon_type, regulation, output, nt):
    """
    Create the 1.D figure with custom list of exons

    :param list_file: (list of str) list of exons files in the form \
    of GC_rich_exon file.
    :param name_file: (list of str) the name of each files of exons \
    given in ``list_file``
    :param seddb: (str) path to sed database
    :param exon_type: (str) the control exons
    :param regulation: (str) the resultation wanted up or down
    :param output: (str) pat were the result will be created
    :param nt: (str) the nt we want to use for the figure 1.1D
    """
    cnx = sqlite3.connect(seddb)
    exon2remove = union_dataset_function.get_exon_regulated_by_sf(
        cnx, regulation)
    my_level = "exons"
    list_gc_content = []
    list_intron_size = []
    list_file.append(None)
    name_file.append("%s_exons" % exon_type)
    for i in range(len(name_file)):
        if exon_type not in name_file[i]:
            list_gc_content.append(
                boxplot_gc_content_maker.extract_exon_gc_content_from_file(
                    cnx, list_file[i], nt))
            list_intron_size.append(
                boxplot_flanking_intron_size.
                extract_exon_min_flanking_intron_size_from_file(
                    cnx, list_file[i]))
        else:
            list_gc_content.append(
                boxplot_gc_content_maker.get_exon_control_gc_content(
                    cnx, exon_type, exon2remove, nt))
            list_intron_size.append(
                boxplot_flanking_intron_size.
                get_exon_control_min_flanking_intron_size(
                    cnx, exon_type, exon2remove))
    create_figure(list_gc_content, name_file, output, regulation,
                  "1.1D_%s_content" % nt, my_level)
    dataframe_creator(list_gc_content, name_file, output, regulation,
                      "1.1D_%s_content" % nt, my_level)
    create_figure(list_intron_size, name_file, output, regulation,
                  "1.2D_min_intron_size", my_level)
    dataframe_creator(list_intron_size, name_file, output, regulation,
                      "1.2D_min_intron_size", my_level)
    cnx.close()

Beispiel #9

0

Datei anzeigen

Datei: exon_control_handler.py Projekt: SebastienLemaire/Lemaire_et_al_2019

def control_handler(cnx, exon_type, regulation):
    my_path = os.path.dirname(os.path.realpath(__file__))
    control_folder = my_path + "/control"
    control_file = control_folder + "/control.py"
    control_full = control_folder + "/control_full.pkl"
    ctrl_list, tmp = get_control_information(exon_type, control_file, control_full)
    if ctrl_list is None:
        print("Control dictionary was not found !")
        print("Creating control information")
        exon2remove = union_dataset_function.get_exon_regulated_by_sf(cnx, regulation)
        names, exon_tuple = get_control_exon_information(cnx, exon_type, exon2remove, regulation)
        # getting the new columns
        exon_tuple = remove_redundant_gene_information(exon_tuple)
        tmp = create_a_temporary_dictionary(names, exon_tuple)
        ctrl_list = get_summary_dictionaries(names, tmp)
        write_control_file(exon_type, control_file, str(ctrl_list))
        write_pickle(control_full, tmp)
    return ctrl_list, tmp

Beispiel #10

0

Datei anzeigen

Datei: control_dictionnary.py Projekt: SebastienLemaire/Lemaire_et_al_2019

def control_dictionaries_creator(window_size):
    """
    Create the control dictionary containing the vector of values of control exons. those vector will be use to \
    display the frequencies of a given nucleotide in a meta-exon figures for the control exons. \
    Create control dictionary files that contain the values for the boxplot, metagene and metagene windowed figure for \
    coding CCE exons, CE exon, ACE exons and ASE exons.
    :param window_size: (int) the size of the window we want to use to create the control metagene windowsed \
    dictionaries
    """
    exon_class_metaexon.set_debug(0)
    dir_path = os.path.dirname(os.path.realpath(__file__))
    fasterdb = os.path.dirname(os.path.realpath(__file__)).replace(
        "src/metaexon_figure", "data/fasterDB_lite.db")
    seddb = os.path.dirname(os.path.realpath(__file__)).replace(
        "src/metaexon_figure", "data/sed.db")
    ctrl_dir = dir_path + "/control_dictionaries/"
    cnx = sqlite3.connect(fasterdb)
    cnx_sed = sqlite3.connect(seddb)
    if not os.path.isdir(ctrl_dir):
        os.mkdir(ctrl_dir)
    exon2remove = union_dataset_function.get_exon_regulated_by_sf(
        cnx_sed, "down")
    exon_type = ["ACE", "CCE"]
    for cur_exon_type in exon_type:
        ctrl_exon_list = get_control_exon_information(cnx, cur_exon_type,
                                                      exon2remove)
        # ctrl_exon_list = ctrl_exon_list[0:2]
        list_exon = [
            exon_class_metaexon.ExonClass(cnx, exon[0], exon[1], exon[2],
                                          window_size)
            for exon in ctrl_exon_list
        ]
        print("creating metagene windowsed")
        final_res_5p, final_res_3p, p5_analyzed, p3_analyzed = \
            exon_class_metaexon.get_metagene_vectors_windowsed(list_exon, window_size)
        print(final_res_5p)
        cur_file = open(ctrl_dir + cur_exon_type + "_metagene_windowsed.py",
                        "w")
        cur_file.write("final_res_5p=" + str(final_res_5p) + "\n")
        cur_file.write("final_res_3p=" + str(final_res_3p) + "\n")
        cur_file.write("# " + str(p5_analyzed) + " sequences 5' analysees\n")
        cur_file.write("# " + str(p3_analyzed) + " sequences 3' analysees\n")
        cur_file.close()
        del (final_res_5p, final_res_3p, p5_analyzed, p3_analyzed)

Beispiel #11

0

Datei anzeigen

Datei: create_predicted_bp_bed.py Projekt: LBMC/Lemaire_et_al_2019

def main():
    regulation = "down"
    exon_class_bp.set_debug(0)
    base = os.path.dirname(
        os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
    output = base + "/result/experimental_branch_point"
    at_exon_file = base + "/result/AT_rich_exons"
    gc_exon_file = base + "/result/GC_rich_exons"
    fasterdb = base + "/data/fasterDB_lite.db"
    seddb = base + "/data/sed.db"
    cnx = sqlite3.connect(fasterdb)
    cnx_sed = sqlite3.connect(seddb)
    exon_type = "CCE"
    at_exon = read_file(at_exon_file)
    gc_exon = read_file(gc_exon_file)
    exon2remove = [
        list(map(int, exon))
        for exon in udf.get_exon_regulated_by_sf(cnx_sed, regulation)
    ]
    ctrl_exons = get_ctrl_exons(cnx, exon_type, exon2remove)
    exon_list = gc_exon + at_exon + ctrl_exons
    type_exon = ["GC-exons"] * len(gc_exon) + \
                ["AT-exons"] * len(at_exon) + \
                ["%s-exons" % exon_type] * len(ctrl_exons)
    tot = len(exon_list)
    count = 0
    count_none = 0
    print("Creating bed of predicted branch points")
    with open("%s/predicted_branch_points.bed" % output, "w") as outf:
        for exon, name_exon in zip(exon_list, type_exon):
            exon = exon_class_bp.ExonClass(cnx, str(exon[0]), exon[0], exon[1])
            nb_good_bp, list_pos = function_bp.goob_bp_only(exon)
            if list_pos is not None:
                for line in list_pos:
                    line[3] += "_" + name_exon
                    line[0] = "chr" + str(line[0])
                    outf.write("\t".join(list(map(str, line))) + "\n")
            else:
                count_none += 1
            count += 1
            sys.stdout.write("%s/%s  (%s)              \r" %
                             (count, tot, count_none))
    cnx.close()
    cnx_sed.close()

Beispiel #12

0

Datei anzeigen

Datei: control_bp_ppt.py Projekt: SebastienLemaire/Lemaire_et_al_2019

def control_dictionaries_creator():
    """
    Create the control dictionary containing the values corresponding to the score of bp and ppt for every control exons
    """
    exon_class_bp.set_debug(0)
    dir_path = os.path.dirname(os.path.realpath(__file__))
    fasterdb = os.path.dirname(os.path.realpath(__file__)).replace(
        "src/make_control_files_bp_ppt", "data/fasterDB_lite.db")
    seddb = os.path.dirname(os.path.realpath(__file__)).replace(
        "src/make_control_files_bp_ppt", "data/sed.db")
    ctrl_dir = dir_path + "/control_dictionaries/"
    cnx = sqlite3.connect(fasterdb)
    cnx_sed = sqlite3.connect(seddb)
    exon2remove = union_dataset_function.get_exon_regulated_by_sf(
        cnx_sed, "down")
    if not os.path.isdir(ctrl_dir):
        os.mkdir(ctrl_dir)
    exon_type = ["CCE"]
    sizes = [100, 50, 35, 25]
    for cur_exon_type in exon_type:
        ctrl_exon_list = get_control_exon_information(cnx, cur_exon_type,
                                                      exon2remove)
        print("retrieving upstream intron sequence")
        list_exon = [
            exon_class_bp.ExonClass(cnx, exon[0], exon[1], exon[2])
            for exon in ctrl_exon_list
        ]
        for size in sizes:
            print("calculating bp and ppt score")
            bp_score_list, ppt_score_list, nb_bp_list, nb_good_bp_list, sequence_list, ag_count_list, \
                hbound_list, uaa_list, una_list = function_bp.bp_ppt_calculator(list_exon, size)
            cur_file = open(
                ctrl_dir + cur_exon_type + "_" + str(size) +
                "_bp_ppt_score.py", "w")
            cur_file.write("bp_score=" + str(bp_score_list) + "\n")
            cur_file.write("ppt_score=" + str(ppt_score_list) + "\n")
            cur_file.write("nb_bp=" + str(nb_bp_list) + "\n")
            cur_file.write("nb_good_bp=" + str(nb_good_bp_list) + "\n")
            cur_file.write("bp_seq=" + str(sequence_list) + "\n")
            cur_file.write("ag_count=" + str(ag_count_list) + "\n")
            cur_file.write("hbound=" + str(hbound_list) + "\n")
            cur_file.write("uaa_count=" + str(uaa_list) + "\n")
            cur_file.write("una_count=" + str(una_list) + "\n")
            cur_file.close()

Beispiel #13

0

Datei anzeigen

def main():
    """
    Create the graphics wanted
    """
    base = os.path.dirname(
        os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
    seddb = base + "/data/sed.db"
    fasterdb = base + "/data/fasterDB_lite.db"
    output = base + "/result/variance_analysis/bed_file/"
    if not os.path.isdir(output):
        os.mkdir(output)
    exon_type = "CCE"
    regulation = "down"
    threshold = 27
    cnx = sqlite3.connect(seddb)
    exon2remove = udf.get_exon_regulated_by_sf(cnx, regulation)
    print("Get every FasterDB small exons : (exons with a size greater than 2"
          " nucleotides and lower than %s nucleotides" % threshold)
    small_exons = get_small_exons(cnx, seddb, fasterdb, threshold, exon_type,
                                  exon2remove)
    print("\tSmall exons found : %s" % len(small_exons))
    print("Getting small exons down-regulated by a splicing factor ...")
    r = [exon[3] for exon in small_exons]
    with open("%s/small.txt" % output, "w") as ouf:
        ouf.write("\n".join(r) + "\n")
    small_down_sf_exon = get_small_sf_down_exons(cnx, seddb, fasterdb,
                                                 threshold)
    print("\tSmall exons downregulated by a splicing factor found : %s" %
          len(small_down_sf_exon))
    print("Getting control exons ...")
    ctrl_exon = get_control_exons(cnx, seddb, fasterdb, threshold, exon2remove,
                                  exon_type)
    print("\tControl exons found : %s" % len(ctrl_exon))
    cnx.close()
    print("Writing results")
    write_bed_file(small_exons, output,
                   "small_%s_exons_(3-%snt).bed" % (exon_type, threshold))
    write_bed_file(small_down_sf_exon, output,
                   "small_sf-downregulated_exons_(3-%snt).bed" % threshold)
    write_bed_file(ctrl_exon, output,
                   "%s_exons_%snt+.bed" % (exon_type, threshold))

Beispiel #14

0

Datei anzeigen

Datei: launcher.py Projekt: LBMC/Lemaire_et_al_2019

def main():
    exon_type = "CCE"
    regulation = "down"
    output = os.path.realpath(os.path.dirname(__file__)).replace(
        "src/boxplot_GC_content_and_flanking_intron_size",
        "result/boxplot_gc_content_and_flanking_intron_size/")
    if not os.path.isdir(output):
        os.mkdir(output)
    path = os.path.realpath(os.path.dirname(__file__)).replace(
        "src/boxplot_GC_content_and_flanking_intron_size", "result/")
    seddb = os.path.realpath(os.path.dirname(__file__)).replace(
        "src/boxplot_GC_content_and_flanking_intron_size", "data/sed.db")
    cnx = sqlite3.connect(seddb)
    exon2remove = union_dataset_function.get_exon_regulated_by_sf(
        cnx, regulation)
    gene2remove = [exon[0] for exon in exon2remove]
    at_file_pure = "%sAT_rich_exons" % path
    gc_file_pure = "%sGC_rich_exons" % path
    gene2remove_at_gc = get_common_genes(at_file_pure, gc_file_pure, output)
    # levels = ["exons", "genes"]
    levels = ["exons"]
    for my_level in levels:
        name_file = [
            "GC_pure_%s" % my_level,
            "AT_pure_%s" % my_level,
            "%s_%s" % (exon_type, my_level)
        ]
        list_file = [gc_file_pure, at_file_pure, None]
        list_gc_content = []
        list_intron_size = []
        list_gene_size = []
        list_exon_size = []
        for i in range(len(name_file)):
            if "exons" in name_file[i]:
                if exon_type not in name_file[i]:
                    list_gc_content.append(
                        boxplot_gc_content_maker.
                        extract_exon_gc_content_from_file(cnx, list_file[i]))
                    list_intron_size.append(
                        boxplot_flanking_intron_size.
                        extract_exon_min_flanking_intron_size_from_file(
                            cnx, list_file[i]))
                    list_exon_size.append(
                        boxplot_flanking_intron_size.
                        extract_exon_size_from_file(cnx, list_file[i]))
                else:
                    list_gc_content.append(
                        boxplot_gc_content_maker.get_exon_control_gc_content(
                            cnx, exon_type, exon2remove))
                    list_intron_size.append(
                        boxplot_flanking_intron_size.
                        get_exon_control_min_flanking_intron_size(
                            cnx, exon_type, exon2remove))
                    list_exon_size.append(
                        boxplot_flanking_intron_size.get_exon_control_size(
                            cnx, exon_type, exon2remove))
            if "genes" in name_file[i]:
                print(name_file[i])
                if exon_type not in name_file[i]:
                    list_gc_content.append(
                        boxplot_gc_content_maker.
                        extract_gene_gc_content_from_file(
                            cnx, list_file[i], gene2remove_at_gc))
                    list_intron_size.append(
                        boxplot_flanking_intron_size.
                        extract_gene_median_intron_size_from_file(
                            cnx, list_file[i], gene2remove_at_gc))
                    list_gene_size.append(
                        boxplot_gene_size.extract_gene_size_from_file(
                            cnx, list_file[i], gene2remove_at_gc))
                else:
                    list_gc_content.append(
                        boxplot_gc_content_maker.get_gene_control_gc_content(
                            cnx, exon_type, gene2remove))
                    list_intron_size.append(
                        boxplot_flanking_intron_size.
                        get_gene_control_median_flanking_intron_size(
                            cnx, exon_type, gene2remove))
                    list_gene_size.append(
                        boxplot_gene_size.get_control_gene_size(
                            cnx, exon_type, gene2remove))
        if my_level == "exons":
            create_figure(list_gc_content, name_file, output, regulation,
                          "GC_content", my_level)
            dataframe_creator(list_gc_content, name_file, output, regulation,
                              "GC_content", my_level)
            create_figure(list_intron_size, name_file, output, regulation,
                          "min_intron_size", my_level)
            dataframe_creator(list_intron_size, name_file, output, regulation,
                              "min_intron_size", my_level)
            create_figure(list_exon_size, name_file, output, regulation,
                          "exon_size", my_level)
            dataframe_creator(list_exon_size, name_file, output, regulation,
                              "exon_size", my_level)
        if my_level == "genes":
            # create_figure(list_intron_size, name_file, output, regulation, "median_intron_size", my_level)
            # dataframe_creator(list_intron_size, name_file, output, regulation, "median_intron_size", my_level)
            # create_figure(list_gene_size, name_file, output, regulation, "gene_size", my_level)
            # dataframe_creator(list_gene_size, name_file, output, regulation, "gene_size", my_level)
            # create_figure(list_gc_content, name_file, output, regulation, "GC_content", my_level)
            dataframe_creator2(list_gc_content, list_gene_size, name_file,
                               output, regulation, "GC_content", "gene_size",
                               my_level)
            dataframe_creator2(list_intron_size, list_gc_content, name_file,
                               output, regulation, "median_intron_size",
                               "GC_content", my_level)

Beispiel #15

0

Datei anzeigen

Datei: control_exon_adapter.py Projekt: SebastienLemaire/Lemaire_et_al_2019

def control_handler(cnx, exon_type, size=None, regulation="down"):
    my_path = os.path.dirname(os.path.realpath(__file__))
    control_folder = my_path + "/control"
    control_file = control_folder + "/control.py"
    control_full = control_folder + "/control_full.pkl"
    ctrl_list, tmp = exon_control_handler.get_control_information(
        exon_type, control_file, control_full)
    exon2remove = union_dataset_function.get_exon_regulated_by_sf(
        cnx, regulation)
    if ctrl_list is None:
        print("Control dictionary was not found !")
        print("Creating control information")
        names, exon_tuple = exon_control_handler.get_control_exon_information(
            cnx, exon_type, exon2remove, regulation)
        # getting the new columns
        exon_tuple = exon_control_handler.remove_redundant_gene_information(
            exon_tuple)
        tmp = exon_control_handler.create_a_temporary_dictionary(
            names, exon_tuple)
        ctrl_list = exon_control_handler.get_summary_dictionaries(names, tmp)
        exon_control_handler.write_control_file(exon_type, control_file,
                                                str(ctrl_list))
        exon_control_handler.write_pickle(control_full, tmp)
    if "rel_exon_intron_up" not in ctrl_list.keys():
        print("relative exon_intron size where not found.")
        print("getting relative exon_intron size")
        exon_tuple = get_control_exon_size_information(cnx, exon_type,
                                                       exon2remove)
        print("Relative size calculation...")
        tmp_dic = tmp_dic_creator(exon_tuple)
        tmp = dict(tmp, **tmp_dic)
        print("summarizing")
        sum_dic = get_summary_dictionaries(tmp_dic)
        ctrl_list = dict(ctrl_list, **sum_dic)
        print("writting...")
        write_adapted_dic(control_file, exon_type, str(ctrl_list))
        exon_control_handler.write_pickle(control_full, tmp)
    if size is not None and "nb_good_bp_%s" % size not in ctrl_list.keys():
        file_name = "%s_%s_bp_ppt_score.py" % (exon_type, size)
        ctrl_file = control_folder + "/" + file_name
        print(ctrl_file)
        if os.path.isfile(ctrl_file):
            print("Loading %s file to add nb_ggod_pb in control dic !" %
                  ctrl_file)
            sys.path.insert(0, control_folder)
            mod = __import__(file_name.replace(".py", ""))
            val = np.median(mod.nb_good_bp)
            ctrl_list["nb_good_bp_%s" % size] = val
            tmp["nb_good_bp_%s" % size] = mod.nb_good_bp
            val = np.median(mod.ag_count)
            ctrl_list["ag_count"] = val
            tmp["ag_count"] = mod.ag_count
            val = np.median(mod.hbound)
            ctrl_list["hbound"] = val
            tmp["hbound"] = mod.hbound
            write_adapted_dic(control_file, exon_type, str(ctrl_list))
            exon_control_handler.write_pickle(control_full, tmp)
        else:
            print("Creating control file, this operation will take some time")
            control_bp_ppt.control_dictionaries_creator(exon_type, size)
    if "mfe_3ss" not in ctrl_list.keys():
        file_name = "%s_mfe.py" % exon_type
        ctrl_file = control_folder + "/" + file_name
        if os.path.isfile(ctrl_file):
            print(
                "Loading %s file to add mfe_3ss and mfe_5ss in control dic !" %
                ctrl_file)
            sys.path.insert(0, control_folder)
            module = __import__(file_name.replace(".py", ""))
            ctrl_list["mfe_3ss"] = np.median(module.mfe_3ss)
            tmp["mfe_3ss"] = module.mfe_3ss
            ctrl_list["mfe_5ss"] = np.median(module.mfe_5ss)
            tmp["mfe_5ss"] = module.mfe_5ss
            write_adapted_dic(control_file, exon_type, str(ctrl_list))
            exon_control_handler.write_pickle(control_full, tmp)
        else:
            print("Creating control file, for mfe.")
            control_mfe.control_dictionaries_creator()

    return ctrl_list, tmp