コード例 #1
0
def create_othergc_exon_file(cnx, output):
    """
    Create the GC-exons file regulated by SRSF2 HNRNPC and SRSF3.

    :param cnx: (sqlite3 object) allow connection to sed database
    :param output: (str) path were the exon files will be created
    """
    output = output + "/other_GC_exons/"
    if not os.path.isdir(output):
        os.mkdir(output)
    gc_exon_all = get_exons_list(cnx, group_factor.gc_rich_down, "down")
    print("GC exons : %s" % len(gc_exon_all))
    at_exon_all = get_exons_list(cnx, group_factor.at_rich_down, "down")
    print("Number of at exons all: %s" % len(at_exon_all))
    other_gc_all = get_exons_list(cnx, group_factor.other, "down")
    venn_diagram_creator([other_gc_all, gc_exon_all, at_exon_all],
                         ["hnRNPC & SRSF3 & SRSF2", "GC-exons", "AT-exons"],
                         output)
    other_gc_list = [get_exons_list(cnx, [fact], "down")
                     for fact in group_factor.other]
    full_list = other_gc_list + [gc_exon_all, at_exon_all]
    full_names = list(group_factor.other) + ["GC_exons", "AT_exons"]
    common = get_multi_list_common_exons(full_list)
    for exon_list, list_name in zip(full_list, full_names):
        start = len(exon_list)
        print("%s : %s exons" % (list_name, start))
        file_writer(exon_list, list_name + "_all", output)
        exon_list = [exon for exon in exon_list if exon not in common]
        stop = len(exon_list)
        loss = round((start - stop) / start * 100, 1)
        print("%s : %s exons after filter (loss of %s percent)" %
              (list_name, stop, loss))
        file_writer(exon_list, list_name, output)
コード例 #2
0
def create_unregulated_exon_list(cnx, output, exon_type):
    """
    Create the list of GC/At rich unregulated exons

    :param cnx: (sqlite3 connect objecy) connection to sed database
    :param output: (str) path were the exon files will be created
    :param exon_type: (str) the type of control exons
    """
    gc_exon_all = get_exons_list(cnx, group_factor.gc_rich_down, "down")
    at_exon_all = get_exons_list(cnx, group_factor.at_rich_down, "down")
    gc_exon = [exon for exon in gc_exon_all if exon not in at_exon_all]
    at_exon = [exon for exon in at_exon_all if exon not in gc_exon_all]
    first_exons = find_first_exons(cnx, exon_type)
    last_exons = find_last_exons(cnx, exon_type)
    exon2remove = gc_exon + at_exon + last_exons
    print("exon to remove : %s" % len(exon2remove))
    min_intron_size, gc_content = \
        get_control_exon_information(cnx, exon_type, exon2remove)
    gc_exon = get_exons_of_interest(cnx, exon_type, exon2remove,
                                    min_intron_size, gc_content, "GC")
    at_exon = get_exons_of_interest(cnx, exon_type, exon2remove,
                                    min_intron_size, gc_content, "AT")
    gc_exon = [exon for exon in gc_exon if exon not in at_exon]
    at_exon = [exon for exon in at_exon if exon not in gc_exon]
    print("gc_exon : %s exons" % len(gc_exon))
    print("at_exon : %s exons" % len(at_exon))
    file_writer(gc_exon, "GC_unregulated", output)
    file_writer(at_exon, "AT_unregulated", output)
    cnx.close()
コード例 #3
0
def main():
    """
    Make the enrichment analysis comparing the frequencies of exon regulated by splicesome factors \
     for an AT and GC exons list.
    """
    nb_iteration = 10
    seddb = os.path.realpath(os.path.dirname(__file__)).replace(
        "src/GC_AT_group_regulated_U1_U2", "data/sed.db")
    cnx = sqlite3.connect(seddb)
    output = os.path.realpath(os.path.dirname(__file__)).replace(
        "src/GC_AT_group_regulated_U1_U2",
        "result/GC_AT_group_regulated_U1_U2/")
    if not os.path.isdir(output):
        os.mkdir(output)
    div_group = {
        "AT_rich": group_factor.at_rich_down,
        "GC_rich": group_factor.gc_rich_down,
        "SNRPC": ["SNRPC"],
        "SNRNP70": ["SNRNP70"],
        "DDX5_17": ["DDX5_DDX17"],
        "SF1": ["SF1"],
        "U2AF1": ["U2AF1"],
        "U2AF2": ['U2AF2'],
        "SF3A3": ["SF3A3"],
        "SF3B4": ["SF3B4"]
    }
    dic_exon = {}
    for name_group in div_group.keys():
        print("Getting all exon regulated by %s factor" % name_group)
        dic_exon[name_group] = get_exons_list(cnx, div_group[name_group],
                                              "down")
    at_gc_intersection = exon_intersection(dic_exon["AT_rich"],
                                           dic_exon["GC_rich"])
    u1_u2_intersection = exon_intersection(
        get_exons_list(cnx, group_factor.u1_factors, "down"),
        get_exons_list(cnx, group_factor.u2_factors, "down"))
    print("GC-AT group intersection : %s exons" % len(at_gc_intersection))
    print("U1-U2 interesection : %s exons" % len(u1_u2_intersection))
    dic_exon["GC_pure"] = exon_difference(dic_exon["GC_rich"],
                                          at_gc_intersection)
    dic_exon["AT_pure"] = exon_difference(dic_exon["AT_rich"],
                                          at_gc_intersection)
    for key in dic_exon:
        print("%s : %s" % (key, len(dic_exon[key])))
    dic_spliceosome = {}
    for key in dic_exon.keys():
        if "AT" not in key and "GC" not in key:
            dic_spliceosome[key] = dic_exon[key]

    analysis_dic, super_dict = analysis_maker(dic_exon["GC_pure"],
                                              dic_exon["AT_pure"],
                                              dic_spliceosome,
                                              u1_u2_intersection, nb_iteration)
    file_writer(output, analysis_dic, nb_iteration, "GC", "AT")
    with open("%ssuper_dict_%s.txt" % (output, nb_iteration), "w") as outfile:
        outfile.write("super_dict=%s\n" % str(super_dict))
    cnx.close()
コード例 #4
0
def create_ct_ga_rich_exon_list(cnx, output):
    """
    Create the GA and the CT rich exons list.

    :param cnx: (sqlite3 object) allow connection to sed database
    :param output: (str) path were the exon files will be created
    """
    ga_exon_all = get_exons_list(cnx, group_factor.ga_rich_down, "down")
    ct_exon_all = get_exons_list(cnx, group_factor.ct_rich_down, "down")
    ga_exon = [exon for exon in ga_exon_all if exon not in ct_exon_all]
    ct_exon = [exon for exon in ct_exon_all if exon not in ga_exon_all]
    print("ga_exon_all : %s exons" % len(ga_exon_all))
    print("ga_exon : %s exons" % len(ga_exon))
    print("ct_exon_all : %s exons" % len(ct_exon_all))
    print("ct_exon : %s exons" % len(ct_exon))
    file_writer(ga_exon, "GA_rich", output)
    file_writer(ct_exon, "CT_rich", output)
コード例 #5
0
def main():
    """
    Create a bed file containing info about GC frequency of every GC-AT exons.
    """
    base = os.path.dirname(
        os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
    seddb = base + "/data/sed.db"
    fasterdb = base + "/data/fasterDB_lite.db"
    output = base + "/result/correlation_GC-AT-exons_TAD"
    if not os.path.isdir(output):
        os.mkdir(output)
    cnx = sqlite3.connect(seddb)
    u1_exons = [
        list(map(int, exon)) for exon in get_exons_list(
            cnx, ["SNRPC", "SNRNP70", "DDX5_DDX17"], "down")
    ]
    u2_exons = [
        list(map(int, exon)) for exon in get_exons_list(
            cnx, ["U2AF2", "SF1", "SF3A3", "SF3B4"], "down")
    ]
    print("U1-exons : %s exons" % len(u1_exons))
    print("U2-exons : %s exons" % len(u2_exons))
    exon_list = udf.get_exon_regulated_by_sf(cnx, "down")
    print("Getting exon data ...")
    exon_data = get_exon_info(cnx, seddb, fasterdb, exon_list, u1_exons,
                              u2_exons)
    print("Writing bed")
    write_bed(output, exon_data, "data_for_regulated_exons")

    cnx.close()
    cnx = sqlite3.connect(seddb)
    exon2remove = udf.get_exon_regulated_by_sf(cnx, "down")
    cnx_fasterdb = sqlite3.connect(fasterdb)
    exon_list = get_control_exon_information(cnx_fasterdb, "CCE",
                                             exon2remove) + exon2remove
    cnx_fasterdb.close()
    print("CCE exons + regulated exons : %s" % len(exon_list))
    print("Getting CCE + regulated exon data ...")
    exon_data = get_exon_info(cnx, seddb, fasterdb, exon_list, u1_exons,
                              u2_exons)
    print("Writing bed")
    write_bed(output, exon_data, "data_for_regulated_CCE_exons")
    cnx.close()
コード例 #6
0
def main_3g(list_file, name_file, seddb, output, reverse):
    """
    Make the enrichment analysis comparing the frequencies of exon regulated by splicesome factors \
     for an AT and GC exons list.
    """
    nb_iteration = 10000
    cnx = sqlite3.connect(seddb)
    div_group = {
        "SNRPC": ["SNRPC"],
        "SNRNP70": ["SNRNP70"],
        # "DDX5_17": ["DDX5_DDX17"],
        "SF1": ["SF1"],
        "U2AF1": ["U2AF1"],
        "U2AF2": ['U2AF2'],
        # "SF3A3": ["SF3A3"], "SF3B4": ["SF3B4"]
    }
    dic_exon = {}
    for name_group in div_group.keys():
        print("Getting all exon regulated by %s factor" % name_group)
        dic_exon[name_group] = get_exons_list(cnx, div_group[name_group],
                                              "down")
    for key in dic_exon:
        print("%s : %s" % (key, len(dic_exon[key])))
    dic_spliceosome = {}
    for key in dic_exon.keys():
        if "AT" not in key and "GC" not in key:
            dic_spliceosome[key] = dic_exon[key]

    analysis_dic, super_dict = analysis_maker_bis(list_file, name_file,
                                                  dic_spliceosome,
                                                  nb_iteration)
    filename = file_writer(output, analysis_dic, nb_iteration, name_file[0],
                           name_file[1])
    fig_3g(filename, output, reverse)
    with open("%ssuper_dict_%s.txt" % (output, nb_iteration), "w") as outfile:
        outfile.write("super_dict=%s\n" % str(super_dict))
    cnx.close()