def get_concidered_exons(cnx, exon_class, regulation):
    """
    Get the exon considered
    :param cnx: (sqlite3 connect object) connection to sed databse
    :param exon_class: (string) None, GC, AT or GC-AT, the exons that we consider, if \
    exon_class is set to None, all exon regulated by a splicing factor are considered
    :param regulation: (string) up or down
    :return: (list of string/list 2 int) list of sf_name if exon_class is None or list of exons if \
                     exon_class is not None.
    """
    if exon_class is None:
        list_sf = group_factor.get_wanted_sf_name(None)
        return list_sf
    else:
        if exon_class == "AT":
            exon_list = difference(cnx, group_factor.at_rich_down, group_factor.gc_rich_down, regulation)
        elif exon_class == "GC":
            exon_list = difference(cnx, group_factor.gc_rich_down, group_factor.at_rich_down, regulation)
        elif exon_class == "GC-AT":
            exon_list = difference(cnx, group_factor.at_rich_down, group_factor.gc_rich_down, regulation)
            exon_list += difference(cnx, group_factor.gc_rich_down, group_factor.at_rich_down, regulation)
            print("exons GC-AT concidered : %s" % len(exon_list))
        else:
            exon_list = get_control_exon(cnx, exon_class)
        return exon_list
def main(union, columns, name, sf_type, contrast, operation):
    """
    Launch the main function.
    """
    target_columns = columns.split(",")
    exon_type = "CCE"
    seddb = "/".join(os.path.realpath(__file__).split("/")[:-2]) + "/data/sed.db"
    cnx = functions.connexion(seddb)
    ctrl_dic, ctrl_full = control_exon_adapter.control_handler(cnx, exon_type, operation)
    print("test")
    if union != "union":
        output = "/".join(os.path.realpath(__file__).split("/")[:-2]) + "/result/new_heatmap/"
        # If the output directory does not exist, then we create it !
        if not os.path.isdir(output):
            os.mkdir(output)
        id_projects, name_projects = group_factor.get_id_and_name_project_wanted(cnx, sf_type)
        print(id_projects)
        print(name_projects)
        for regulations in [["down"]]:
            # Creating heatmap
            if sf_type is not None:
                redundant_ag_at_and_u1_u2(cnx, regulations[0])
            projects_tab, df_stat, project_names, new_targets = \
                create_matrix(cnx, id_projects, name_projects, target_columns,
                              ctrl_dic, ctrl_full, regulations, operation, None, sf_type)
            if len(new_targets) > 1:
                heatmap_creator(np.array(projects_tab), new_targets, project_names, output, contrast, name)
            else:
                simple_heatmap(np.array(projects_tab), new_targets, project_names, output, contrast, name)
            heatmap_gc_sorted(np.array(projects_tab), df_stat,
                              new_targets, project_names, output, contrast, name=name)

    else:
        output = "/".join(os.path.realpath(__file__).split("/")[:-2]) + "/result/new_heatmap_union/"
        # If the output directory does not exist, then we create it !
        if not os.path.isdir(output):
            os.mkdir(output)
        name_projects = group_factor.get_wanted_sf_name(sf_type)
        for regulations in [["down"]]:
            # Creating heatmap
            if sf_type is not None:
                redundant_ag_at_and_u1_u2(cnx, regulations[0])
            projects_tab, df_stat, project_names, new_targets = \
                create_matrix(cnx, None, name_projects, target_columns,
                              ctrl_dic, ctrl_full, regulations, operation, "union", sf_type)
            if len(new_targets) > 1:
                heatmap_creator(np.array(projects_tab), new_targets, project_names, output, contrast, name)
            else:
                simple_heatmap(np.array(projects_tab), new_targets, project_names, output, contrast, name)
            heatmap_gc_sorted(np.array(projects_tab), df_stat, new_targets,
                              project_names, output, contrast, name)
Exemple #3
0
def get_exon_regulated_by_sf(cnx, regulation):
    """
    Get the exons ``regulation`` regulated by a splicing factors.

    :param cnx: (sqlite3 connect object) connection to sed database
    :param regulation: (string) up or down
    :return: (list of list of 2 int) list of exons regulated by a splicing factor
    """
    name_projects = group_factor.get_wanted_sf_name("all")
    exon_list = []
    for sf_name in name_projects:
        exon_list += get_every_events_4_a_sl(cnx, sf_name, regulation)
    exon_list = np.unique(exon_list, axis=0).tolist()
    exon_list = [list(map(int, exon)) for exon in exon_list]
    print("Number of exons regulated by a splicing factor : %s" %
          len(exon_list))
    return exon_list
Exemple #4
0
def main(target_columns, nt_list, output_folder, seddb, exon_type,
         size_bp_up_seq, regulation, name_tab):
    if seddb is None:
        seddb = os.path.realpath(os.path.dirname(__file__)).replace(
            "src", "data/sed.db")
    cnx = sqlite3.connect(seddb)
    ctrl_dic, ctrl_full = control_exon_adapter.control_handler(
        cnx, exon_type, size_bp_up_seq)
    sf_names = group_factor.get_wanted_sf_name(cnx)
    target_columns_new = [
        target_columns[i].replace("iupac", "%s_nt" % nt_list[i])
        for i in range(len(target_columns))
    ]
    df = get_values_for_many_projects_iupac_dnt(cnx, sf_names,
                                                target_columns_new, regulation,
                                                ctrl_full, exon_type)
    if output_folder:
        df.to_csv("%s/%s" % (output_folder, name_tab), sep="\t", index=False)
    return df
def main_2bc(target_columns, name, seddb, exon_type,
             output, sf_type, sf_type2remove, regulation="down",
             contrast=20, operation="mean", morder='C', mascending=True):
    """

    :param target_columns: (list of str) list of columns of interest
    :param name: (str) partial name of the figure
    :param seddb: (str) path to sed database
    :param exon_type: (str) the type of control exons we want
    :param contrast: (int) the scale of contrast we want
    :param operation: (str) the type of graphics we want (mean or median)
    :param output: (str) the result folder
    :param sf_type: (str) the type of splicing factor of interest
    :param sf_type2remove: (str) the list of splicing factor we want to remove
    :param regulation: (str) "down"
    """
    global nt_list
    nt_list = ["A", "T", "G", "C"]
    global write_order
    write_order = "N"
    global ascending
    ascending = mascending
    global order
    order = morder
    cnx = functions.connexion(seddb)
    ctrl_dic, ctrl_full = \
        control_exon_adapter.control_handler(cnx, exon_type, operation)
    name_projects = group_factor.get_wanted_sf_name(sf_type)
    # Creating heatmap
    projects_tab, df_stat, project_names, new_targets = \
        create_matrix_bis(cnx, name_projects, target_columns,
                      ctrl_dic, ctrl_full, regulation, operation,
                      sf_type2remove)

    heatmap_gc_sorted(np.array(projects_tab), df_stat, new_targets,
                      project_names, output, contrast, name)
Exemple #6
0
def main():
    """
    Launch the creation of figures.
    """
    global exon_type
    exon_type = "CCE"
    seddb = "/".join(os.path.realpath(__file__).split("/")[:-2]) + "/data/sed.db"
    fasterdb = "/".join(os.path.realpath(__file__).split("/")[:-2]) + "/data/fasterDB_lite.db"
    regs = ["down"]
    cnx = connexion(seddb)
    cnx_fasterdb = connexion(fasterdb)
    # columns = ["iupac_exon", "exon_size", "upstream_intron_size", "downstream_intron_size", "gene_size",
    # "median_flanking_intron_size", "force_donor", "force_acceptor", "iupac_upstream_intron_adjacent1",
    # "nb_intron_gene", "nb_good_bp_%s" % size_bp_up_seq, "hbound", "ag_count", "mfe_3ss", "mfe_5ss",
    # "iupac_upstream_intron_ppt_area"]
    columns = ["exon_size", "force_donor", "force_acceptor", "iupac_exon", "iupac_upstream_intron", "iupac_downstream_intron"]
    ctrl_dic, ctrl_full = control_exon_adapter.control_handler(cnx, exon_type, size_bp_up_seq)
    if len(sys.argv) < 2:
        output = "/".join(os.path.realpath(__file__).split("/")[:-2]) + "/result/project_figures_new/"
        # If the output directory does not exist, then we create it !
        if not os.path.isdir(output):
            os.mkdir(output)
        id_projects, name_projects = get_interest_project(cnx)
        for regulation in regs:
            print(regulation)
            for target_column in columns:
                if target_column == "nb_good_bp":
                    if not os.path.isdir(output_bp):
                        os.mkdir(output_bp)
                print("   %s" % target_column)
                if "iupac" in target_column:
                    for nt in nt_dic.keys():
                        create_figure_iupac_dnt(cnx, id_projects, name_projects, target_column, regulation, output,
                                                nt, ctrl_dic, ctrl_full)
                elif "dnt" in target_column:
                    for dnt in dnt_dic.keys():
                        create_figure_iupac_dnt(cnx, id_projects, name_projects, target_column, regulation,
                                                output, dnt, ctrl_dic, ctrl_full)
                else:
                    create_figure(cnx, cnx_fasterdb, id_projects, name_projects, target_column, regulation, output,
                                  ctrl_dic, output_bp, ctrl_full)
    elif sys.argv[1] == "union":
        output = "/".join(os.path.realpath(__file__).split("/")[:-2]) + "/result/project_figures_union_new/"
        # If the output directory does not exist, then we create it !
        if not os.path.isdir(output):
            os.mkdir(output)
        name_projects = group_factor.get_wanted_sf_name(cnx)
        for regulation in regs:
            print(regulation)
            for target_column in columns:
                if target_column == "nb_good_bp":
                    if not os.path.isdir(output_bp):
                        os.mkdir(output_bp)
                print("   %s" % target_column)
                if "iupac" in target_column:
                    for nt in nt_dic.keys():
                        create_figure_iupac_dnt(cnx, None, name_projects, target_column, regulation, output, nt,
                                                ctrl_dic, ctrl_full,
                                                "union")
                elif "dnt" in target_column:
                    for dnt in dnt_dic.keys():
                        create_figure_iupac_dnt(cnx, None, name_projects, target_column, regulation, output, dnt,
                                                ctrl_dic, ctrl_full,
                                                "union")
                else:
                    create_figure(cnx, cnx_fasterdb, None, name_projects, target_column, regulation, output, ctrl_dic,
                                  output_bp, ctrl_full, "union")
    else:
        print("wrong arg !")
    cnx.close()
    cnx_fasterdb.close()
def main(level, xaxis, yaxis, name_fig, exon_type, nt_list, exon_class, operation, representation):
    """
    Create the correlation matrix (gene_size vs iupac)
    """
    seddb = "/".join(os.path.realpath(__file__).split("/")[:-2]) + "/data/sed.db"
    cnx = functions.connexion(seddb)
    ctrl_dic, full_dic = control_exon_adapter.control_handler(cnx, exon_type, operation)
    nt_list = nt_list.split(",")
    regulation = "down"
    output = "/".join(os.path.realpath(__file__).split("/")[:-2]) + "/result/new_correlation/"
    couple_targets = define_couple_targets(xaxis, yaxis, nt_list)
    if level == "project":
        name_fig += "_project"
        id_projects, name_projects = group_factor.get_id_and_name_project_wanted(cnx, None)
        # If the output directory does not exist, then we create it !
        if not os.path.isdir(output):
            os.mkdir(output)
        for i in range(len(couple_targets)):
            values_xaxis = get_axis_value(couple_targets[i][0], cnx, id_projects, ctrl_dic, regulation, operation,
                                          representation)
            values_yaxis = get_axis_value(couple_targets[i][1], cnx, id_projects, ctrl_dic, regulation, operation,
                                          representation)
            if len(values_xaxis) != len(values_yaxis):
                print("Warning the list of value don't have the same length")
            figure_creator(values_xaxis, values_yaxis, name_projects, regulation, couple_targets[i][0],
                           couple_targets[i][1], exon_type, output,
                           name_fig, representation, ctrl_dic)
    elif level == "exon":
        if exon_class is None:
            list_sf = group_factor.get_wanted_sf_name(None)
        else:
            exon_list = get_concidered_exons(cnx, exon_class, regulation)
            name_fig += "_%s_exons" % exon_class
        name_fig += "_Exon_LVL"
        if not os.path.isdir(output):
            os.mkdir(output)
        gene_columns = ["median_intron_size", "iupac_gene", "gene_size"]
        if xaxis in gene_columns and yaxis in gene_columns:
            for i in range(len(couple_targets)):
                if exon_class is None:
                    value_xaxis, value_yaxis, exon_name = get_gene_values(cnx, list_sf, couple_targets[i][0],
                                                                          couple_targets[i][1], regulation)
                else:
                    value_xaxis, value_yaxis, exon_name = get_gene_values(cnx, exon_list, couple_targets[i][0],
                                                                          couple_targets[i][1], regulation)
                value_xaxis, value_yaxis, exon_name =  remove_none_values(value_xaxis, value_yaxis, exon_name)
                value_xaxis, name_x = get_relative_values(value_xaxis, ctrl_dic, couple_targets[i][0], representation)
                value_yaxis, name_y = get_relative_values(value_yaxis, ctrl_dic, couple_targets[i][1], representation)
                figure_creator_exon(value_xaxis, value_yaxis, regulation, name_x, name_y, exon_type,
                                    exon_name, output, name_fig, representation)
                density_creator_exon(value_xaxis, value_yaxis, regulation, name_x, name_y, exon_type, output, name_fig,
                                     representation)
        else:
            for i in range(len(couple_targets)):
                if exon_class is None:
                    value_xaxis, value_yaxis, exon_name = get_exons_values(cnx, list_sf, couple_targets[i][0],
                                                                           couple_targets[i][1], regulation)
                else:
                    value_xaxis, value_yaxis, exon_name = get_exons_values(cnx, exon_list, couple_targets[i][0],
                                                                           couple_targets[i][1], regulation)
                value_xaxis, value_yaxis, exon_name = remove_none_values(value_xaxis, value_yaxis, exon_name)
                value_xaxis, value_yaxis, exon_name = remove_wrong_size_values(value_xaxis, couple_targets[i][0],
                                                                               exon_name, value_yaxis,
                                                                               couple_targets[i][1])

                if len(value_xaxis) != len(value_yaxis) or len(value_xaxis) != len(exon_name):
                    print("Warning the list of value do'nt have the same length")
                value_xaxis, name_x = get_relative_values(value_xaxis, ctrl_dic, couple_targets[i][0], representation)
                value_yaxis, name_y = get_relative_values(value_yaxis, ctrl_dic, couple_targets[i][1], representation)
                figure_creator_exon(value_xaxis, value_yaxis, regulation, name_x, name_y, exon_type,
                                    exon_name, output, name_fig, representation)
                density_creator_exon(value_xaxis, value_yaxis, regulation, name_x, name_y, exon_type, output, name_fig,
                                     representation)
    else:
        name_fig += "_union"
        list_sf = group_factor.get_wanted_sf_name(None)
        if not os.path.isdir(output):
            os.mkdir(output)
        for i in range(len(couple_targets)):
            values_xaxis = get_axis_value(couple_targets[i][0], cnx, list_sf, ctrl_dic, regulation, operation,
                                          representation)
            values_yaxis = get_axis_value(couple_targets[i][1], cnx, list_sf, ctrl_dic, regulation, operation,
                                          representation)
            if len(values_xaxis) != len(values_yaxis):
                print("Warning the list of value don't have the same length")
            figure_creator(values_xaxis, values_yaxis, list_sf, regulation, couple_targets[i][0],
                           couple_targets[i][1], exon_type, output,
                           name_fig, representation, ctrl_dic)