def get_concidered_exons(cnx, exon_class, regulation): """ Get the exon considered :param cnx: (sqlite3 connect object) connection to sed databse :param exon_class: (string) None, GC, AT or GC-AT, the exons that we consider, if \ exon_class is set to None, all exon regulated by a splicing factor are considered :param regulation: (string) up or down :return: (list of string/list 2 int) list of sf_name if exon_class is None or list of exons if \ exon_class is not None. """ if exon_class is None: list_sf = group_factor.get_wanted_sf_name(None) return list_sf else: if exon_class == "AT": exon_list = difference(cnx, group_factor.at_rich_down, group_factor.gc_rich_down, regulation) elif exon_class == "GC": exon_list = difference(cnx, group_factor.gc_rich_down, group_factor.at_rich_down, regulation) elif exon_class == "GC-AT": exon_list = difference(cnx, group_factor.at_rich_down, group_factor.gc_rich_down, regulation) exon_list += difference(cnx, group_factor.gc_rich_down, group_factor.at_rich_down, regulation) print("exons GC-AT concidered : %s" % len(exon_list)) else: exon_list = get_control_exon(cnx, exon_class) return exon_list
def main(union, columns, name, sf_type, contrast, operation): """ Launch the main function. """ target_columns = columns.split(",") exon_type = "CCE" seddb = "/".join(os.path.realpath(__file__).split("/")[:-2]) + "/data/sed.db" cnx = functions.connexion(seddb) ctrl_dic, ctrl_full = control_exon_adapter.control_handler(cnx, exon_type, operation) print("test") if union != "union": output = "/".join(os.path.realpath(__file__).split("/")[:-2]) + "/result/new_heatmap/" # If the output directory does not exist, then we create it ! if not os.path.isdir(output): os.mkdir(output) id_projects, name_projects = group_factor.get_id_and_name_project_wanted(cnx, sf_type) print(id_projects) print(name_projects) for regulations in [["down"]]: # Creating heatmap if sf_type is not None: redundant_ag_at_and_u1_u2(cnx, regulations[0]) projects_tab, df_stat, project_names, new_targets = \ create_matrix(cnx, id_projects, name_projects, target_columns, ctrl_dic, ctrl_full, regulations, operation, None, sf_type) if len(new_targets) > 1: heatmap_creator(np.array(projects_tab), new_targets, project_names, output, contrast, name) else: simple_heatmap(np.array(projects_tab), new_targets, project_names, output, contrast, name) heatmap_gc_sorted(np.array(projects_tab), df_stat, new_targets, project_names, output, contrast, name=name) else: output = "/".join(os.path.realpath(__file__).split("/")[:-2]) + "/result/new_heatmap_union/" # If the output directory does not exist, then we create it ! if not os.path.isdir(output): os.mkdir(output) name_projects = group_factor.get_wanted_sf_name(sf_type) for regulations in [["down"]]: # Creating heatmap if sf_type is not None: redundant_ag_at_and_u1_u2(cnx, regulations[0]) projects_tab, df_stat, project_names, new_targets = \ create_matrix(cnx, None, name_projects, target_columns, ctrl_dic, ctrl_full, regulations, operation, "union", sf_type) if len(new_targets) > 1: heatmap_creator(np.array(projects_tab), new_targets, project_names, output, contrast, name) else: simple_heatmap(np.array(projects_tab), new_targets, project_names, output, contrast, name) heatmap_gc_sorted(np.array(projects_tab), df_stat, new_targets, project_names, output, contrast, name)
def get_exon_regulated_by_sf(cnx, regulation): """ Get the exons ``regulation`` regulated by a splicing factors. :param cnx: (sqlite3 connect object) connection to sed database :param regulation: (string) up or down :return: (list of list of 2 int) list of exons regulated by a splicing factor """ name_projects = group_factor.get_wanted_sf_name("all") exon_list = [] for sf_name in name_projects: exon_list += get_every_events_4_a_sl(cnx, sf_name, regulation) exon_list = np.unique(exon_list, axis=0).tolist() exon_list = [list(map(int, exon)) for exon in exon_list] print("Number of exons regulated by a splicing factor : %s" % len(exon_list)) return exon_list
def main(target_columns, nt_list, output_folder, seddb, exon_type, size_bp_up_seq, regulation, name_tab): if seddb is None: seddb = os.path.realpath(os.path.dirname(__file__)).replace( "src", "data/sed.db") cnx = sqlite3.connect(seddb) ctrl_dic, ctrl_full = control_exon_adapter.control_handler( cnx, exon_type, size_bp_up_seq) sf_names = group_factor.get_wanted_sf_name(cnx) target_columns_new = [ target_columns[i].replace("iupac", "%s_nt" % nt_list[i]) for i in range(len(target_columns)) ] df = get_values_for_many_projects_iupac_dnt(cnx, sf_names, target_columns_new, regulation, ctrl_full, exon_type) if output_folder: df.to_csv("%s/%s" % (output_folder, name_tab), sep="\t", index=False) return df
def main_2bc(target_columns, name, seddb, exon_type, output, sf_type, sf_type2remove, regulation="down", contrast=20, operation="mean", morder='C', mascending=True): """ :param target_columns: (list of str) list of columns of interest :param name: (str) partial name of the figure :param seddb: (str) path to sed database :param exon_type: (str) the type of control exons we want :param contrast: (int) the scale of contrast we want :param operation: (str) the type of graphics we want (mean or median) :param output: (str) the result folder :param sf_type: (str) the type of splicing factor of interest :param sf_type2remove: (str) the list of splicing factor we want to remove :param regulation: (str) "down" """ global nt_list nt_list = ["A", "T", "G", "C"] global write_order write_order = "N" global ascending ascending = mascending global order order = morder cnx = functions.connexion(seddb) ctrl_dic, ctrl_full = \ control_exon_adapter.control_handler(cnx, exon_type, operation) name_projects = group_factor.get_wanted_sf_name(sf_type) # Creating heatmap projects_tab, df_stat, project_names, new_targets = \ create_matrix_bis(cnx, name_projects, target_columns, ctrl_dic, ctrl_full, regulation, operation, sf_type2remove) heatmap_gc_sorted(np.array(projects_tab), df_stat, new_targets, project_names, output, contrast, name)
def main(): """ Launch the creation of figures. """ global exon_type exon_type = "CCE" seddb = "/".join(os.path.realpath(__file__).split("/")[:-2]) + "/data/sed.db" fasterdb = "/".join(os.path.realpath(__file__).split("/")[:-2]) + "/data/fasterDB_lite.db" regs = ["down"] cnx = connexion(seddb) cnx_fasterdb = connexion(fasterdb) # columns = ["iupac_exon", "exon_size", "upstream_intron_size", "downstream_intron_size", "gene_size", # "median_flanking_intron_size", "force_donor", "force_acceptor", "iupac_upstream_intron_adjacent1", # "nb_intron_gene", "nb_good_bp_%s" % size_bp_up_seq, "hbound", "ag_count", "mfe_3ss", "mfe_5ss", # "iupac_upstream_intron_ppt_area"] columns = ["exon_size", "force_donor", "force_acceptor", "iupac_exon", "iupac_upstream_intron", "iupac_downstream_intron"] ctrl_dic, ctrl_full = control_exon_adapter.control_handler(cnx, exon_type, size_bp_up_seq) if len(sys.argv) < 2: output = "/".join(os.path.realpath(__file__).split("/")[:-2]) + "/result/project_figures_new/" # If the output directory does not exist, then we create it ! if not os.path.isdir(output): os.mkdir(output) id_projects, name_projects = get_interest_project(cnx) for regulation in regs: print(regulation) for target_column in columns: if target_column == "nb_good_bp": if not os.path.isdir(output_bp): os.mkdir(output_bp) print(" %s" % target_column) if "iupac" in target_column: for nt in nt_dic.keys(): create_figure_iupac_dnt(cnx, id_projects, name_projects, target_column, regulation, output, nt, ctrl_dic, ctrl_full) elif "dnt" in target_column: for dnt in dnt_dic.keys(): create_figure_iupac_dnt(cnx, id_projects, name_projects, target_column, regulation, output, dnt, ctrl_dic, ctrl_full) else: create_figure(cnx, cnx_fasterdb, id_projects, name_projects, target_column, regulation, output, ctrl_dic, output_bp, ctrl_full) elif sys.argv[1] == "union": output = "/".join(os.path.realpath(__file__).split("/")[:-2]) + "/result/project_figures_union_new/" # If the output directory does not exist, then we create it ! if not os.path.isdir(output): os.mkdir(output) name_projects = group_factor.get_wanted_sf_name(cnx) for regulation in regs: print(regulation) for target_column in columns: if target_column == "nb_good_bp": if not os.path.isdir(output_bp): os.mkdir(output_bp) print(" %s" % target_column) if "iupac" in target_column: for nt in nt_dic.keys(): create_figure_iupac_dnt(cnx, None, name_projects, target_column, regulation, output, nt, ctrl_dic, ctrl_full, "union") elif "dnt" in target_column: for dnt in dnt_dic.keys(): create_figure_iupac_dnt(cnx, None, name_projects, target_column, regulation, output, dnt, ctrl_dic, ctrl_full, "union") else: create_figure(cnx, cnx_fasterdb, None, name_projects, target_column, regulation, output, ctrl_dic, output_bp, ctrl_full, "union") else: print("wrong arg !") cnx.close() cnx_fasterdb.close()
def main(level, xaxis, yaxis, name_fig, exon_type, nt_list, exon_class, operation, representation): """ Create the correlation matrix (gene_size vs iupac) """ seddb = "/".join(os.path.realpath(__file__).split("/")[:-2]) + "/data/sed.db" cnx = functions.connexion(seddb) ctrl_dic, full_dic = control_exon_adapter.control_handler(cnx, exon_type, operation) nt_list = nt_list.split(",") regulation = "down" output = "/".join(os.path.realpath(__file__).split("/")[:-2]) + "/result/new_correlation/" couple_targets = define_couple_targets(xaxis, yaxis, nt_list) if level == "project": name_fig += "_project" id_projects, name_projects = group_factor.get_id_and_name_project_wanted(cnx, None) # If the output directory does not exist, then we create it ! if not os.path.isdir(output): os.mkdir(output) for i in range(len(couple_targets)): values_xaxis = get_axis_value(couple_targets[i][0], cnx, id_projects, ctrl_dic, regulation, operation, representation) values_yaxis = get_axis_value(couple_targets[i][1], cnx, id_projects, ctrl_dic, regulation, operation, representation) if len(values_xaxis) != len(values_yaxis): print("Warning the list of value don't have the same length") figure_creator(values_xaxis, values_yaxis, name_projects, regulation, couple_targets[i][0], couple_targets[i][1], exon_type, output, name_fig, representation, ctrl_dic) elif level == "exon": if exon_class is None: list_sf = group_factor.get_wanted_sf_name(None) else: exon_list = get_concidered_exons(cnx, exon_class, regulation) name_fig += "_%s_exons" % exon_class name_fig += "_Exon_LVL" if not os.path.isdir(output): os.mkdir(output) gene_columns = ["median_intron_size", "iupac_gene", "gene_size"] if xaxis in gene_columns and yaxis in gene_columns: for i in range(len(couple_targets)): if exon_class is None: value_xaxis, value_yaxis, exon_name = get_gene_values(cnx, list_sf, couple_targets[i][0], couple_targets[i][1], regulation) else: value_xaxis, value_yaxis, exon_name = get_gene_values(cnx, exon_list, couple_targets[i][0], couple_targets[i][1], regulation) value_xaxis, value_yaxis, exon_name = remove_none_values(value_xaxis, value_yaxis, exon_name) value_xaxis, name_x = get_relative_values(value_xaxis, ctrl_dic, couple_targets[i][0], representation) value_yaxis, name_y = get_relative_values(value_yaxis, ctrl_dic, couple_targets[i][1], representation) figure_creator_exon(value_xaxis, value_yaxis, regulation, name_x, name_y, exon_type, exon_name, output, name_fig, representation) density_creator_exon(value_xaxis, value_yaxis, regulation, name_x, name_y, exon_type, output, name_fig, representation) else: for i in range(len(couple_targets)): if exon_class is None: value_xaxis, value_yaxis, exon_name = get_exons_values(cnx, list_sf, couple_targets[i][0], couple_targets[i][1], regulation) else: value_xaxis, value_yaxis, exon_name = get_exons_values(cnx, exon_list, couple_targets[i][0], couple_targets[i][1], regulation) value_xaxis, value_yaxis, exon_name = remove_none_values(value_xaxis, value_yaxis, exon_name) value_xaxis, value_yaxis, exon_name = remove_wrong_size_values(value_xaxis, couple_targets[i][0], exon_name, value_yaxis, couple_targets[i][1]) if len(value_xaxis) != len(value_yaxis) or len(value_xaxis) != len(exon_name): print("Warning the list of value do'nt have the same length") value_xaxis, name_x = get_relative_values(value_xaxis, ctrl_dic, couple_targets[i][0], representation) value_yaxis, name_y = get_relative_values(value_yaxis, ctrl_dic, couple_targets[i][1], representation) figure_creator_exon(value_xaxis, value_yaxis, regulation, name_x, name_y, exon_type, exon_name, output, name_fig, representation) density_creator_exon(value_xaxis, value_yaxis, regulation, name_x, name_y, exon_type, output, name_fig, representation) else: name_fig += "_union" list_sf = group_factor.get_wanted_sf_name(None) if not os.path.isdir(output): os.mkdir(output) for i in range(len(couple_targets)): values_xaxis = get_axis_value(couple_targets[i][0], cnx, list_sf, ctrl_dic, regulation, operation, representation) values_yaxis = get_axis_value(couple_targets[i][1], cnx, list_sf, ctrl_dic, regulation, operation, representation) if len(values_xaxis) != len(values_yaxis): print("Warning the list of value don't have the same length") figure_creator(values_xaxis, values_yaxis, list_sf, regulation, couple_targets[i][0], couple_targets[i][1], exon_type, output, name_fig, representation, ctrl_dic)