def out_validation(out_path_parameter, global_variables):

    # required inputs
    out_path = None

    # gets the sub-parameters
    sub_params_list = out_path_parameter.split(",")

    # checks the sub params
    for sub_param in sub_params_list:

        # Tests if there are two parts to the sub-parameter
        if len(sub_param.split("=")) != 2:
            print >> sys.stderr, "Error: the out parameter is not in a valid format."
            sys.exit(1)

        # Tests the path sub-parameter
        if sub_param.upper().startswith("path=".upper()):
            out_path = sub_param.split("=")[1]

            # makes the outpath and checks if its valid:
            try:
                new_directory(out_path)
            except:
                print >> sys.stderr, "Error: the outpath is not valid. Does it look right to you?"
                sys.exit(1)

    # tests if the required inputs have been supplied
    if out_path == None:
        print >> sys.stderr, "Error: the out parameter is not in a valid format."
        sys.exit(1)


    print "validated the out parameter"
Ejemplo n.º 2
0
def copy_shiny_files(global_variables):

    SL_path = global_variables["SL_path"]
    out_path = global_variables["out_path"]

    # Adds the UI
    ui_in_path = os.path.join(SL_path, "shiny", "app", "ui.r")
    ui_out_path = os.path.join(out_path, "shiny", "ui.r")
    copyfile(ui_in_path, ui_out_path)

    # Adds the server
    server_in_path = os.path.join(SL_path, "shiny", "app", "global.r")
    server_out_path = os.path.join(out_path, "shiny", "global.r")
    copyfile(server_in_path, server_out_path)

    # Adds the www
    new_directory(os.path.join(out_path, "shiny", "www"))
    www_in_path = os.path.join(SL_path, "shiny", "app", "www", "sl2.gif")
    www_out_path = os.path.join(out_path, "shiny", "www", "sl2.gif")
    copyfile(www_in_path, www_out_path)
    www_in_path = os.path.join(SL_path, "shiny", "app", "www", "sl2.png")
    www_out_path = os.path.join(out_path, "shiny", "www", "sl2.png")
    copyfile(www_in_path, www_out_path)
    www_in_path = os.path.join(SL_path, "shiny", "app", "www", "style.css")
    www_out_path = os.path.join(out_path, "shiny", "www", "style.css")
    copyfile(www_in_path, www_out_path)
def sub_directories(out_path, type, out_path_tag):

    new_directory(out_path)
    new_directory(os.path.join(out_path, type))

    if out_path_tag == "NONE":
        new_directory(os.path.join(out_path, type, "network_data"))
    else:
        new_directory(os.path.join(out_path, type, out_path_tag))
        new_directory(
            os.path.join(out_path, type, out_path_tag, "network_data"))
Ejemplo n.º 4
0
def biotype_folders(global_variables):

    out_path = global_variables["out_path"]

    # makes the all genes directory
    new_directory(os.path.join(out_path, "all_genes"))

    if global_variables["biotypes_flag"]:

        biotypes_dict = global_variables["biotypes_dict"]

        for biotype in biotypes_dict:
            new_directory(os.path.join(out_path, biotype))
Ejemplo n.º 5
0
def copy_rdata(biotype, global_variables):

    out_path = global_variables["out_path"]
    new_directory(os.path.join(out_path, "shiny", "rdata", biotype))

    if global_variables["normexp_flag"]:

        new_directory(
            os.path.join(out_path, "shiny", "rdata", biotype,
                         "normexp_workflow"))
        rdata_in_path = os.path.join(out_path, biotype, "normexp_workflow",
                                     "plots", "workflow.rdata")
        rdata_out_path = os.path.join(out_path, "shiny", "rdata", biotype,
                                      "normexp_workflow", "workflow.rdata")

        try:
            copyfile(rdata_in_path, rdata_out_path)
        except:
            print "Warning: the normexp workflow Rdata file is missing. It will be omitted from Shiny."

    if global_variables["pde_workflows_flag"]:

        parsed_pde_parameters = global_variables["pde_parameters"]
        for pde_parameter_dict in parsed_pde_parameters:

            pde_ID = pde_parameter_dict["pde_ID"]
            pde_ID_no_spaces = pde_ID.replace(" ", "_")
            new_directory(
                os.path.join(out_path, "shiny", "rdata", biotype,
                             "pde_workflows", pde_ID_no_spaces))
            rdata_in_path = os.path.join(out_path, biotype, "pde_workflows",
                                         pde_ID_no_spaces, "plots",
                                         "workflow.rdata")
            rdata_out_path = os.path.join(out_path, "shiny", "rdata", biotype,
                                          "pde_workflows", pde_ID_no_spaces,
                                          "workflow.rdata")

            try:
                copyfile(rdata_in_path, rdata_out_path)
            except:
                print "Warning: the PDE workflow " + pde_ID + " Rdata file is missing. It will be omitted from Shiny."

    if global_variables["mpde_workflows_flag"]:
        parsed_mpde_parameters = global_variables["mpde_parameters"]
        for mpde_dict in parsed_mpde_parameters:

            mpde_ID = mpde_dict["mpde_ID"]
            new_directory(
                os.path.join(out_path, "shiny", "rdata", biotype,
                             "mpde_workflows", mpde_ID))
            rdata_in_path = os.path.join(out_path, biotype, "mpde_workflows",
                                         mpde_ID, "plots", "workflow.rdata")
            rdata_out_path = os.path.join(out_path, "shiny", "rdata", biotype,
                                          "mpde_workflows", mpde_ID,
                                          "workflow.rdata")

            try:
                copyfile(rdata_in_path, rdata_out_path)
            except:
                print "Warning: the MPDE workflow " + mpde_ID + " Rdata file is missing. It will be omitted from Shiny."
Ejemplo n.º 6
0
def get_rdata(global_variables):

    out_path = global_variables["out_path"]

    # does the work for "all genes"
    new_directory(os.path.join(out_path, "shiny", "rdata"))
    copy_rdata("all_genes", global_variables)

    #does the work for the biotypes
    if global_variables["biotypes_flag"] and len(
            global_variables["biotypes_dict"].keys()) > 1:
        biotypes_dict = global_variables["biotypes_dict"]
        biotypes = sorted(biotypes_dict.keys())
        for biotype in biotypes:
            copy_rdata(biotype, global_variables)
Ejemplo n.º 7
0
def build_server(global_variables, shiny_info):

    SL_path = global_variables["SL_path"]
    out_path = global_variables["out_path"]

    # builds the server
    new_directory(os.path.join(out_path, "shiny"))
    server_in_file = open(os.path.join(SL_path, "shiny", "app",
                                       "server_end.r")).readlines()
    server_out_file = open(os.path.join(out_path, "shiny", "server.r"), "w")

    server_out_file.write("###--- SERVER ---####\n")
    server_out_file.write("options(shiny.maxRequestSize=30*1024^2)\n")
    server_out_file.write("server <- function(input, output, session)\n")
    server_out_file.write("{\n\n")
    server_out_file.write(shiny_info)

    for line in server_in_file:
        server_out_file.write(line)
Ejemplo n.º 8
0
def write_data(out_path, genes_by_merged_signature):

    new_directory(out_path)
    new_directory(os.path.join(out_path, "gene_IDs"))
    new_directory(os.path.join(out_path, "gene_symbols"))

    # sort the signatures by number of genes
    sorted_on_number_of_genes = sorted(
        genes_by_merged_signature,
        key=lambda k: len(genes_by_merged_signature[k]),
        reverse=True)

    # prints the results for each signature
    signature_count = 1
    for signature in sorted_on_number_of_genes:

        signature_ids = []
        signature_symbols = []
        for gene in genes_by_merged_signature[signature]:
            signature_ids.append(gene.split("\t")[0] + "\n")
            signature_symbols.append(gene.split("\t")[1] + "\n")

        open(
            os.path.join(out_path, "gene_symbols",
                         "signature_" + str(signature_count) + "_symbols.txt"),
            "w+").writelines(signature_symbols)
        open(
            os.path.join(out_path, "gene_IDs",
                         "signature_" + str(signature_count) + "_IDs.txt"),
            "w+").writelines(signature_ids)

        signature_count += 1
Ejemplo n.º 9
0
def core_sub_directories(global_variables, out_path):

    new_directory(out_path)
    new_directory(os.path.join(out_path, "data"))
    new_directory(os.path.join(out_path, "data", "gene_IDs"))
    new_directory(os.path.join(out_path, "data", "gene_symbols"))
    new_directory(os.path.join(out_path, "data", "statistical_analysis"))
    new_directory(os.path.join(out_path, "data", "deciles"))
    new_directory(os.path.join(out_path, "data", "quartiles"))
    new_directory(os.path.join(out_path, "plots"))
def undirectional_overlaps(mde_file_path, out_path, de_IDs,
                           overlap_statistics_list):

    # stores overlap stats
    overlap_statistics_dict = {}

    # infile
    mde_file = open(mde_file_path).readlines()

    # iterates through the pairwise combinations of des
    for de1_id in de_IDs:
        for de2_id in de_IDs:

            # excludes self comparing
            if de1_id != de2_id:

                de1_id_parsed = de1_id.replace(" ", "_")
                de2_id_parsed = de2_id.replace(" ", "_")

                # makes the directory
                new_directory(
                    os.path.join(out_path, "undirectional", de1_id_parsed,
                                 de2_id_parsed))

                # gets the fold and significance columns for the des
                de1_log2fold, de1_p, de1_padj, de1_sig, de1_valid = get_Mde_columns_from_file(
                    mde_file, de1_id)
                de2_log2fold, de2_p, de2_padj, de2_sig, de2_valid = get_Mde_columns_from_file(
                    mde_file, de2_id)

                # stores the genes in each group
                de1_unique_genes_IDs = []
                de1_unique_genes_symbols = []
                de2_unique_genes_IDs = []
                de2_unique_genes_symbols = []
                overlapping_genes_IDs = []
                overlapping_genes_symbols = []

                # gets the genes in each group
                header = True
                for line in mde_file:
                    if header:
                        header = False
                    else:
                        line_split = line.rstrip().split("\t")

                        # de1 unique
                        if line_split[de1_sig] == "True" and line_split[
                                de2_sig] == "False":
                            de1_unique_genes_IDs.append(line_split[0])
                            de1_unique_genes_symbols.append(line_split[1])
                        # de2 unique
                        elif line_split[de1_sig] == "False" and line_split[
                                de2_sig] == "True":
                            de2_unique_genes_IDs.append(line_split[0])
                            de2_unique_genes_symbols.append(line_split[1])
                        # overlapping
                        elif line_split[de1_sig] == "True" and line_split[
                                de2_sig] == "True":
                            overlapping_genes_IDs.append(line_split[0])
                            overlapping_genes_symbols.append(line_split[1])

                # outputs the gene lists
                de1_unique_genes_IDs_file = open(
                    os.path.join(out_path, "undirectional", de1_id_parsed,
                                 de2_id_parsed,
                                 "IDs_" + de1_id_parsed + "_unique_genes.txt"),
                    "w")
                de1_unique_genes_symbols_file = open(
                    os.path.join(
                        out_path, "undirectional", de1_id_parsed,
                        de2_id_parsed,
                        "symbols_" + de1_id_parsed + "_unique_genes.txt"), "w")
                de2_unique_genes_IDs_file = open(
                    os.path.join(out_path, "undirectional", de1_id_parsed,
                                 de2_id_parsed,
                                 "IDs_" + de2_id_parsed + "_unique_genes.txt"),
                    "w")
                de2_unique_genes_symbols_file = open(
                    os.path.join(
                        out_path, "undirectional", de1_id_parsed,
                        de2_id_parsed,
                        "symbols_" + de2_id_parsed + "_unique_genes.txt"), "w")
                overlapping_genes_IDs_file = open(
                    os.path.join(out_path, "undirectional", de1_id_parsed,
                                 de2_id_parsed, "IDs_overlapping_genes.txt"),
                    "w")
                overlapping_genes_symbols_file = open(
                    os.path.join(out_path, "undirectional", de1_id_parsed,
                                 de2_id_parsed,
                                 "symbols_overlapping_genes.txt"), "w")

                de1_unique_genes_IDs_file.write(
                    "\n".join(de1_unique_genes_IDs))
                de1_unique_genes_symbols_file.write(
                    "\n".join(de1_unique_genes_symbols))
                de2_unique_genes_IDs_file.write(
                    "\n".join(de2_unique_genes_IDs))
                de2_unique_genes_symbols_file.write(
                    "\n".join(de2_unique_genes_symbols))
                overlapping_genes_IDs_file.write(
                    "\n".join(overlapping_genes_IDs))
                overlapping_genes_symbols_file.write(
                    "\n".join(overlapping_genes_symbols))

                # gets the overlap stats
                background_size = len(mde_file) - 1
                candidate_size = len(de1_unique_genes_IDs) + len(
                    overlapping_genes_IDs)
                gene_set_size = len(de2_unique_genes_IDs) + len(
                    overlapping_genes_IDs)
                overlap_size = len(overlapping_genes_IDs)
                obs_vs_exp, p_Pos, p_Neg = hypergeometric_test(
                    background_size, candidate_size, gene_set_size,
                    overlap_size)

                # updates the overlap stats (considers A vs B the same as B vs A)
                sorted_de = "\t".join(sorted([de1_id, de2_id]))
                if sorted_de not in overlap_statistics_dict:
                    overlap_statistics_list.append([
                        de1_id, de2_id, background_size, candidate_size,
                        gene_set_size,
                        len(de1_unique_genes_IDs),
                        len(de2_unique_genes_IDs), overlap_size, obs_vs_exp,
                        p_Neg
                    ])
                overlap_statistics_dict[sorted_de] = True

    return overlap_statistics_list
Ejemplo n.º 11
0
def parse_line(line, pr_dictionary):

    # detects general tags and replaces them with the appropriate string
    if "<*comparisons_list*>" in line:
        line = line.replace("<*comparisons_list*>",
                            pr_dictionary["comparisons_r_string"])
    if "<*sample_sheet_column_names_list*>" in line:
        line = line.replace(
            "<*sample_sheet_column_names_list*>",
            pr_dictionary["sample_sheet_column_names_r_string"])
    if "<*sample_groups_by_SS_column_list*>" in line:
        line = line.replace(
            "<*sample_groups_by_SS_column_list*>",
            pr_dictionary["sample_groups_by_SS_column_r_string"])
    if "<*sample_groupings_by_SS_column_list*>" in line:
        line = line.replace(
            "<*sample_groupings_by_SS_column_list*>",
            pr_dictionary["sample_groupings_by_SS_column_r_string"])
    if "<*samples_list*>" in line:
        line = line.replace("<*samples_list*>",
                            pr_dictionary["samples_r_string"])
    if "<*sample_group_list*>" in line:
        line = line.replace("<*sample_group_list*>",
                            pr_dictionary["sample_groups_r_string"])
    if "<*sample_groupings_list*>" in line:
        line = line.replace("<*sample_groupings_list*>",
                            pr_dictionary["sample_groupings_r_string"])
    if "<*samples_by_sample_group_list*>" in line:
        line = line.replace("<*samples_by_sample_group_list*>",
                            pr_dictionary["samples_by_sample_group_r_string"])
    if "<*default_sample_colours_list*>" in line:
        line = line.replace("<*default_sample_colours_list*>",
                            pr_dictionary["default_samples_colours_r_string"])
    if "<*default_sample_colours_by_SS_column_list*>" in line:
        line = line.replace(
            "<*default_sample_colours_by_SS_column_list*>",
            pr_dictionary["default_sample_colours_by_SS_column_r_string"])
    if "<*default_sample_group_colours_by_SS_column_list*>" in line:
        line = line.replace(
            "<*default_sample_group_colours_by_SS_column_list*>",
            pr_dictionary["default_sample_group_colours_by_SS_column_r_string"]
        )
    if "<*working_directory*>" in line:
        line = line.replace("<*working_directory*>",
                            pr_dictionary["workflow_outpath"])
    if "<*per_hypergeometric_gene_set*>" in line:
        line = ""
    if "<*/per_hypergeometric_gene_set*>" in line:
        line = ""
    if "<*per_ipa_ureg*>" in line:
        line = ""
    if "<*/per_ipa_ureg*>" in line:
        line = ""
    if "<*per_de_signature_hyper_gs*>" in line:
        line = ""
    if "<*/per_de_signature_hyper_gs*>" in line:
        line = ""

    # detects a path tag and converts to os friendly version, and makes a new folder.
    if "<*path*>" in line and "<*/path*>" in line:
        path = line.split("<*path*>")[1].split("<*/path*>")[0]
        parsed_path = os.path.join(*path.split("/"))
        line = line.replace("<*path*>" + path + "<*/path*>", parsed_path)
        new_directory(
            os.path.join(pr_dictionary["workflow_outpath"],
                         os.path.join(*path.split("/")[0:-1])))

    return line
Ejemplo n.º 12
0
def sub_directories(out_path, type, out_path_tag):

    new_directory(out_path)
    new_directory(os.path.join(out_path, type))
    new_directory(os.path.join(out_path, type, out_path_tag))
    new_directory(os.path.join(out_path, type, out_path_tag, "network_data"))
def spatial_enrichment(global_variables,in_path, sample_groups, out_path, type):

    # strores the results
    gene_data_dictionary = {}
    summary_dictionary = {}

    # makes the out folder:
    new_directory(out_path)

    # opens the files
    in_file = open(in_path).readlines()
    genes_out_file = open(os.path.join(out_path,"spatial_enrichment_gene_data.csv"),"w")
    summary_out_file = open(os.path.join(out_path,"spatial_enrichment_summary.csv"),"w")

    # writes the headers:
    if type == "NORMEXP":
        genes_out_file.write("\t".join(["gene_id","mean_expression","chromosome","midpoint_coordinate"]) + "\n")
        summary_out_file.write("\t".join(["chromosome", "total_genes", "expressed_genes","expressed_genes_bias_log2fold","expressed_genes_bias_p"]) + "\n")
    if type == "PDE":
        genes_out_file.write("\t".join(["gene_id","mean_expression","chromosome","midpoint_coordinate","log2fold","p","significant","pde_valid"]) + "\n")
        summary_out_file.write("\t".join(["chromosome", "total_genes", "expressed_genes", "pde_valid_genes","positive_fold_genes","negative_fold_genes","significant_genes","upregulated_genes","downregulated_genes","expressed_genes_bias_log2fold","expressed_genes_bias_p","significant_genes_bias_log2fold","significant_genes_bias_p","direction_bias_swing","direction_bias_p"]) + "\n")

    # gets a dictionary of the samples
    samples_by_sample_groups = global_variables["samples_by_sample_groups"]
    samples_dict = {}
    for sample_group in sample_groups:
        sample_group_samples = samples_by_sample_groups[sample_group]
        for sample in sample_group_samples:
            samples_dict[sample] = True

    # gets the expression threshold
    expressed_threshold = global_variables["normexp_threshold"]

    # gets the column information for the infile
    sample_columns = get_sample_columns_from_file(samples_dict, in_file)
    coordinate_columns = get_coordinate_columns_from_file(in_file)

    if type == "PDE":
        pde_columns = get_PDE_columns_from_file(in_file)

    # gets the gene and summary information
    header = True
    for line in in_file:
        if header:
            header = False
        else:
            line_split = line.rstrip().split("\t")

            # gets the mean expression
            mean_expression = get_mean_expression(line_split, sample_columns)

            # gets the coordinates
            chromosome = line_split[coordinate_columns["CHROMOSOME"]]
            start = int(line_split[coordinate_columns["START"]])
            stop = int(line_split[coordinate_columns["STOP"]])
            mid_point = (stop-start)/2

            # updates the results with the normexp information
            gene_data = [str(mean_expression),chromosome,str(mid_point)]

            if chromosome in summary_dictionary:
                chromosome_summary = summary_dictionary[chromosome]
            else:
                chromosome_summary = [0,0,0,0,0,0,0,0]

            chromosome_summary[0] = chromosome_summary[0] + 1

            # tests for an expressed gene
            if mean_expression >= expressed_threshold:
                chromosome_summary[1] = chromosome_summary[1] + 1

            # updates the results with the PDE information
            if type == "PDE":
                gene_data.append(line_split[pde_columns["LOG2FOLD"]])
                gene_data.append(line_split[pde_columns["P"]])
                gene_data.append(line_split[pde_columns["SIG"]])
                gene_data.append(line_split[pde_columns["PDE_VALID"]])

                if line_split[pde_columns["PDE_VALID"]] == "True":
                    chromosome_summary[2] = chromosome_summary[2]+1
                    if float(line_split[pde_columns["LOG2FOLD"]]) > 0:
                        chromosome_summary[3] = chromosome_summary[3] + 1
                    elif float(line_split[pde_columns["LOG2FOLD"]]) < 0:
                        chromosome_summary[4] = chromosome_summary[4] + 1
                    if line_split[pde_columns["SIG"]] == "True":
                        chromosome_summary[5] = chromosome_summary[5] + 1
                        if float(line_split[pde_columns["LOG2FOLD"]]) > 0:
                            chromosome_summary[6] = chromosome_summary[6] + 1
                        elif float(line_split[pde_columns["LOG2FOLD"]]) < 0:
                            chromosome_summary[7] = chromosome_summary[7] + 1

            # updates the results
            gene_data_dictionary[line_split[0]] = gene_data
            summary_dictionary[chromosome] = chromosome_summary


    # performs the stats
    total_genes = 0
    total_expressed_genes = 0
    total_pde_valid_genes = 0
    total_significant = 0
    total_upregulated_genes = 0
    total_downregulated_genes = 0

    # counts and summaries
    for chromosome in summary_dictionary:
        chromosome_summary = summary_dictionary[chromosome]

        total_genes += chromosome_summary[0]
        total_expressed_genes += chromosome_summary[1]
        total_pde_valid_genes += chromosome_summary[2]
        total_significant += chromosome_summary[5]
        total_upregulated_genes += chromosome_summary[6]
        total_downregulated_genes += chromosome_summary[7]

    if total_significant > 0:
        ratio_upregulated = float(total_upregulated_genes) / (float(total_significant))
        ratio_downregulated = float(total_downregulated_genes) / (float(total_significant))
    else:
        ratio_upregulated = 0.0
        ratio_downregulated = 0.0

    # stats
    for chromosome in summary_dictionary:
        chromosome_summary = summary_dictionary[chromosome]

        # expressed genes bias
        try:
            expressed_genes_log2fold = math.log(float(chromosome_summary[1])+0.001,2) - math.log(((float(chromosome_summary[0])/float(total_genes))*float(total_expressed_genes))+0.001,2)
            chromosome_summary.append(round(expressed_genes_log2fold,2))
            odds, expressed_genes_p_value = scipy.stats.fisher_exact([[float(total_genes),float(chromosome_summary[0])],[float(total_expressed_genes),float(chromosome_summary[1])]],alternative='two-sided')
            chromosome_summary.append(expressed_genes_p_value)
        except:
            chromosome_summary.append("NA")
            chromosome_summary.append("NA")

        # sig genes bias
        try:
            significant_genes_log2fold = math.log(float(chromosome_summary[5])+0.001,2) - math.log((float(chromosome_summary[2])/float(total_pde_valid_genes))*float(total_significant)+0.001,2)
            chromosome_summary.append(round(significant_genes_log2fold,2))
            odds, sig_genes_bias_p_value = scipy.stats.fisher_exact([[float(total_pde_valid_genes),float(chromosome_summary[2])],[float(total_significant),float(chromosome_summary[5])]],alternative='two-sided')
            chromosome_summary.append(sig_genes_bias_p_value)
        except:
            chromosome_summary.append("NA")
            chromosome_summary.append("NA")

        # direction bias
        try:
            expected_upregulated = ratio_upregulated * float(chromosome_summary[5])
            expected_downregulated = ratio_downregulated * float(chromosome_summary[5])
            swing_difference = float(chromosome_summary[6]) - expected_upregulated
            swing = str(round(swing_difference/float(chromosome_summary[5])*100,2)) + "%"
            chromosome_summary.append(swing)

            odds,direction_bias_p_value = scipy.stats.fisher_exact([[expected_upregulated,float(chromosome_summary[6])],[expected_downregulated,float(chromosome_summary[7])]], alternative='two-sided')
            chromosome_summary.append(direction_bias_p_value)
        except:
            chromosome_summary.append("NA")
            chromosome_summary.append("NA")

        summary_dictionary[chromosome] = chromosome_summary


    #outputs the gene data results:
    for gene in gene_data_dictionary:
        genes_out_file.write(gene + "\t" + "\t".join(gene_data_dictionary[gene]) + "\n")

    #outputs the summary:
    for chromosome in summary_dictionary:
        summary_out_file.write(chromosome + "\t" + "\t".join(map(str,summary_dictionary[chromosome])) + "\n")