예제 #1
0
def load_and_run(args):

    data_file = args.data_file
    rand_data_file = args.rand_data_file
    save_prefix = args.out_prefix

    assert args.test in {'e', 'l', 'r'}
    fit_method = cp.test2fit_method[args.test]

    lag = args.lag
    best_hyper = pickle.load(open(args.best_hyper_file, 'rb'))

    if args.row_file != None:
        rows = pickle.load(open(args.row_file, 'rb'))
    else:
        rows = None

    assert args.null in {"l", "g"}

    # Load data file    # Load data file
    if args.load_reps:
        genes, geneTS = gtm.load_basic_rep_file_list(data_file)
        genesr, geneTSr = gtm.load_basic_rep_file_list(rand_data_file)

        # dfs, genes, geneTS, df, __, __  = gtm.load_rep_file_list(data_file)
        # dfsr, genesr, geneTSr, dfr, __, __  = gtm.load_rep_file_list(rand_data_file)

    else:
        df = pd.read_csv(data_file, sep="\t")
        genes, geneTS = gtm.get_gene_TS(df)
        dfr = pd.read_csv(rand_data_file, sep="\t")
        genesr, geneTSr = gtm.get_gene_TS(dfr)

    assert (geneTS.shape == geneTSr.shape)
    assert (genes == genesr).all()

    coefs, intercepts, fit_result_df, coefsr, fit_result_dfr = cp.run(
        geneTS,
        geneTSr,
        hyper=best_hyper,
        fit_method=fit_method,
        lag=lag,
        rows=rows,
        save_prefix=save_prefix,
        has_reps=args.load_reps,
        null=args.null,
        only_array=args.only_array)

    print("RESULTS of causal fit")
    print("*************************")
    print("NORMAL: ")
    cp.summarize_fit(coefs, intercepts, fit_result_df)
예제 #2
0
def load_and_run(args):

    lag = args.lag
    save_prefix = args.save_prefix

    assert args.stratify_by in {"e", "n"}

    stratify_by = cp.args2stratify_by[args.stratify_by]

    # Load data file and prepare a file to pass to plotters
    if args.load_reps:
        # load
        dfs, genes, geneTS, df, timekeys, num_per_keys = gtm.load_rep_file_list(
            args.data_file)
        dfsr, genesr, geneTSr, dfr, __, __ = gtm.load_rep_file_list(
            args.rand_data_file)

        # get shared prefix timekeys

        print "Timekeys: ", timekeys
        print "Num per key: ", num_per_keys

    else:
        df = pd.read_csv(args.data_file, sep="\t")
        genes, geneTS = gtm.get_gene_TS(df)
        dfr = pd.read_csv(args.rand_data_file, sep="\t")
        genesr, geneTSr = gtm.get_gene_TS(dfr)

        timekeys = df.columns.values[1:]
        print "Timekeys: ", timekeys

        # Num. replicates per key
        num_per_keys = None

    assert (geneTS.shape == geneTSr.shape)
    assert (genes == genesr).all()

    coefs = pickle.load(open(args.coef_file, 'rB'))
    intercepts = pickle.load(open(args.intercept_file, 'rB'))
    fit_result_df = pd.read_csv(args.fit_result_file, sep="\t")

    coefsr = pickle.load(open(args.coef_rand_file, 'rB'))
    # interceptsr = pickle.load(open(args.intercept_rand_file, 'rB'))
    fit_result_dfr = pd.read_csv(args.fit_result_rand_file, sep="\t")

    if args.best_hyper_file != None:
        best_hyper = pickle.load(open(args.best_hyper_file, 'rB'))
    else:
        best_hyper = None

    print "RESULTS"
    print "*************************"
    print "NORMAL: "
    cp.summarize_fit(coefs,
                     intercepts,
                     fit_result_df,
                     filename="fit_all_summary_normal.txt",
                     hyper=best_hyper,
                     test_name=args.test_name,
                     lag=lag)

    # print "*************************"
    # print "RANDOM:"
    # cp.summarize_fit(coefsr, interceptsr, fit_result_dfr, filename="fit_all_summary_random.txt", hyper=best_hyper,
    #                  test_name=args.test_name, lag=lag)

    # LEFT OFF HERE: SEE IF YOU CAN STILL DO FIT_RESULT_SUMMARY W/O INTERCEPT
    # -Jlu 1/25/17 10:14 AM

    # Align the coefs

    # print "Aligning coefficients"
    acoefs = lc.align_coefs(coefs, lag)
    acoefsr = lc.align_coefs(coefsr, lag)

    print "Removing alphas (gene-on-self effects) "

    acoefs = lc.remove_alphas(acoefs, lag)
    acoefsr = lc.remove_alphas(acoefsr, lag)

    coef_nets = []
    coefr_nets = []

    # Save the gene matrices
    for i in range(acoefs.shape[0]):
        coef_matr_filename = save_prefix + "-" + str(i + 1) + "-matrix.txt"
        coefr_matr_filename = save_prefix + "-" + str(i + 1) + "-r-matrix.txt"

        coef_net_filename = save_prefix + "-" + str(i + 1) + "-network.txt"
        coefr_net_filename = save_prefix + "-" + str(i + 1) + "-r-network.txt"

        coef_matr = gtm.save_gene_matrix(filename=coef_matr_filename,
                                         matrix=acoefs[i],
                                         genes=genes)
        coefr_matr = gtm.save_gene_matrix(filename=coefr_matr_filename,
                                          matrix=acoefsr[i],
                                          genes=genes)

        extra_dict = collections.OrderedDict()
        extra_dict["Test"] = args.test_name
        extra_dict["Lag"] = acoefs.shape[0]
        extra_dict["Coef"] = i + 1

        coef_net = nh.matr_to_net(coef_matr,
                                  extra_dict=extra_dict,
                                  make_type=False)
        coefr_net = nh.matr_to_net(coefr_matr,
                                   extra_dict=extra_dict,
                                   make_type=False)

        coef_net.to_csv(coef_net_filename, sep="\t", index=False)
        coefr_net.to_csv(coefr_net_filename, sep="\t", index=False)

        coef_nets.append(coef_net)
        coefr_nets.append(coefr_net)

        print "Coef ", i + 1
        print "Networks written to "
        print coef_net_filename
        print coefr_net_filename

    # max_net_filename = save_prefix + "-max-network.txt"
    # max_r_net_filename = save_prefix + "-max-r-network.txt"
    union_net_filename = save_prefix + "-union-network.txt"
    union_r_net_filename = save_prefix + "-union-r-network.txt"

    if acoefs.shape[0] > 1:
        m_net = cp.get_max_network(coef_nets,
                                   max_col="AbsWeight",
                                   index_col="Cause-Effect")
        union_net = cp.get_union_network(
            coef_nets + [m_net],
            suffixes=[str(i) for i in range(1, acoefs.shape[0] + 1)] + [""])
        print "Max network edges: ", m_net.shape
        print "Union network edges: ", union_net.shape
    else:
        union_net = coef_nets[0]
    union_net.to_csv(union_net_filename, sep="\t", index=False)

    if acoefsr.shape[0] > 1:
        m_net = cp.get_max_network(coefr_nets,
                                   max_col="AbsWeight",
                                   index_col="Cause-Effect")
        union_r_net = cp.get_union_network(
            coefr_nets + [m_net],
            suffixes=[str(i) for i in range(1, acoefs.shape[0] + 1)] + [""])
    else:
        union_r_net = coefr_nets[0]
    union_r_net.to_csv(union_r_net_filename, sep="\t", index=False)

    # print "Max networks written to "
    # print max_net_filename
    # print max_r_net_filename
    print "Unioned networks written to "
    print union_net_filename
    print union_r_net_filename

    if not os.path.exists("plots"):
        os.makedirs("plots")
    if not os.path.exists("plots" + os.sep + "betas"):
        os.makedirs("plots" + os.sep + "betas")

    # Plot the betas
    for i in range(acoefs.shape[0]):

        if len(np.nonzero(acoefs[i])[0]) > 0 and len(
                np.nonzero(acoefsr[i])[0]) > 0:

            fc.plot_betas(acoefs[i][np.nonzero(acoefs[i])].flatten(),
                          acoefsr[i][np.nonzero(acoefsr[i])].flatten(),
                          filename="plots" + os.sep + "betas" + os.sep +
                          "beta_nonzero_coef-" + str(i + 1),
                          title="Causal coefs, Coef " + str(i + 1),
                          xlabel="Causal Coefficient")
            fc.plot_betas(acoefs[i][np.nonzero(acoefs[i])].flatten(),
                          acoefsr[i][np.nonzero(acoefsr[i])].flatten(),
                          filename="plots" + os.sep + "betas" + os.sep +
                          "beta_nonzero_coef-" + str(i + 1) + "_zoom-in-90",
                          zoom_in_top_percentile=95,
                          zoom_in_bottom_percentile=5,
                          title="Causal coefs, Coef " + str(i + 1),
                          xlabel="Causal Coefficient")

            fc.plot_betas(
                np.absolute(acoefs[i][np.nonzero(acoefs[i])].flatten()),
                np.absolute(acoefsr[i][np.nonzero(acoefsr[i])].flatten()),
                filename="plots" + os.sep + "betas" + os.sep +
                "beta_abs_coef-" + str(i + 1),
                title="Absolute causal coefs, Coef " + str(i + 1),
                xlabel="Absolute Causal Coefficient")
            fc.plot_betas(
                np.absolute(acoefs[i][np.nonzero(acoefs[i])].flatten()),
                np.absolute(acoefsr[i][np.nonzero(acoefsr[i])].flatten()),
                filename="plots" + os.sep + "betas" + os.sep +
                "beta_abs_coef-" + str(i + 1) + "_zoom-in-bottom-95",
                zoom_in_top_percentile=95,
                title="Absolute causal coefs, Coef " + str(i + 1),
                xlabel="Absolute Causal Coefficient")
            fc.plot_betas(
                np.absolute(acoefs[i][np.nonzero(acoefs[i])].flatten()),
                np.absolute(acoefsr[i][np.nonzero(acoefsr[i])].flatten()),
                filename="plots" + os.sep + "betas" + os.sep +
                "beta_abs_coef-" + str(i + 1) + "_zoom-in-top-5",
                zoom_in_bottom_percentile=95,
                title="Absolute causal coefs, Coef " + str(i + 1),
                xlabel="Absolute Causal Coefficient")

        print "Coef ", i + 1
        print "Plots of betas written to: plots" + os.sep + "betas"

    # get FDRS
    fdrs = [0.01, 0.05, 0.1, 0.2]

    acoefs_fdrs = []
    sf_dfs = []

    for fdr in fdrs:

        fdr_dir = "fdr-" + str(fdr) + "-" + stratify_by
        if not os.path.exists(fdr_dir):
            os.makedirs(fdr_dir)

        fdr_prefix = fdr_dir + os.sep + save_prefix

        acoefs_fdr = np.zeros(acoefs.shape)

        fdr_nets = []

        print "*************"
        for i in range(acoefs.shape[0]):
            print "-----"
            print "FDR = ", fdr
            print "Lag ", lag
            print "Coef ", i + 1
            print "Stratify ", stratify_by
            acoefs_fdr[i], threshes = fc.get_abs_thresh(
                acoefs[i], acoefsr[i], fdr, stratify_by=stratify_by)
            # print "Threshes", threshes

            fdr_matr_filename = fdr_prefix + "-" + str(i + 1) + "-fdr-" + str(
                fdr) + "-" + stratify_by + "-matrix.txt"
            fdr_net_filename = fdr_prefix + "-" + str(i + 1) + "-fdr-" + str(
                fdr) + "-" + stratify_by + "-network.txt"

            fdr_matr = gtm.save_gene_matrix(fdr_matr_filename,
                                            matrix=acoefs_fdr[i],
                                            genes=genes)
            pickle.dump(
                threshes,
                open(
                    fdr_prefix + "-" + str(i + 1) + "-fdr-" + str(fdr) + "-" +
                    stratify_by + "-threshes.p", 'wB'))

            extra_dict = collections.OrderedDict()
            extra_dict["Test"] = args.test_name
            extra_dict["Lag"] = acoefs.shape[0]
            extra_dict["Coef"] = i + 1

            fdr_net = nh.matr_to_net(fdr_matr,
                                     extra_dict=extra_dict,
                                     make_type=False)
            fdr_net.to_csv(fdr_net_filename, sep="\t", index=False)
            fdr_nets.append(fdr_net)

            # write summary readme
            sf_df = fc.summarize_fdr(matr=acoefs_fdr[i],
                                     test=args.test_name,
                                     fdr=fdr,
                                     lag=lag,
                                     coef=i + 1,
                                     hyper=best_hyper,
                                     thresh=threshes,
                                     readme_name=fdr_prefix + "-" +
                                     str(i + 1) + "-fdr-" + str(fdr) + "-" +
                                     stratify_by + "-README.txt",
                                     matrixname=fdr_matr_filename,
                                     filename=fdr_net_filename)

            sf_dfs.append(sf_df)

            print "Network edges: ", fdr_net.shape[0]

        if acoefs_fdr.shape[0] > 1:
            m_net = cp.get_max_network(fdr_nets,
                                       max_col="AbsWeight",
                                       index_col="Cause-Effect")
            union_net = cp.get_union_network(
                fdr_nets + [m_net],
                suffixes=[str(i)
                          for i in range(1, acoefs_fdr.shape[0] + 1)] + [""])

        else:
            union_net = fdr_nets[0]

        union_net_filename = fdr_prefix + "-union-fdr-" + str(
            fdr) + "-" + stratify_by + "-network.txt"
        union_net.to_csv(union_net_filename, sep="\t", index=False)

        print "Union network edges", union_net.shape[0]
        print "Written to ", union_net_filename

        acoefs_fdrs.append(acoefs_fdr.copy())

    all_sf_dfs = pd.concat(sf_dfs)

    all_sf_dfs.to_csv("fit_all_summary_fdr-" + stratify_by + ".txt",
                      sep="\t",
                      index=False)
    print "********"
    print "Summaries of all fdrs written to fit_all_summary_fdr-" + stratify_by + ".txt"
    print "Matrices done."

    with open("matrices_done.txt", 'w') as donefile:
        donefile.write("done\n")

    if args.plot_coef_fdr:
        print "*******"
        print "PLOTS"
        for i, fdr in zip(range(len(fdrs)), fdrs):
            acoefs_fdr = acoefs_fdrs[i]

            if not os.path.exists("plots" + os.sep + "fdr-" + str(fdr)):
                os.makedirs("plots" + os.sep + "fdr-" + str(fdr))

            # Only plot the bar if replicates were loaded
            cp.plot_all_coef(acoefs_fdr,
                             df,
                             genes,
                             lag,
                             file_prefix="plots" + os.sep + "fdr-" + str(fdr) +
                             os.sep + save_prefix + "-",
                             plot_bar=args.load_reps,
                             keys=timekeys,
                             num_per_keys=num_per_keys,
                             linewidth=2,
                             capsize=5,
                             capwidth=2,
                             verbose=True)

            # Plot them without error bars just to check
            if args.load_reps:
                cp.plot_all_coef(acoefs_fdr,
                                 df,
                                 genes,
                                 lag,
                                 file_prefix="plots" + os.sep + "fdr-" +
                                 str(fdr) + os.sep + save_prefix + "-nobar-",
                                 plot_bar=False,
                                 keys=timekeys,
                                 num_per_keys=num_per_keys,
                                 linewidth=2,
                                 capsize=5,
                                 capwidth=2)

            print "FDR plots written to: ", "plots" + os.sep + "fdr-" + str(
                fdr)

    # Plot all the coefs
    # NOTE: this will take a long time!
    if args.plot_all:

        raise ValueError(
            "Fix all the below first before trying to do plot all")

        if not os.path.exists("plots" + os.sep + "original"):
            os.makedirs("plots" + os.sep + "original")
        cp.plot_all_coef(acoefs,
                         df,
                         genes,
                         lag,
                         file_prefix="plots" + os.sep + "original" + os.sep +
                         save_prefix + "-",
                         plot_bar=args.load_reps,
                         keys=timekeys,
                         num_per_keys=num_per_keys,
                         linewidth=2,
                         capsize=5,
                         capwidth=2)
        print "Original plots written to: ", "plots" + os.sep + "original"

        if not os.path.exists("plots" + os.sep + "randomized"):
            os.makedirs("plots" + os.sep + "randomized")
        cp.plot_all_coef(acoefsr,
                         dfr,
                         genes,
                         lag,
                         file_prefix="plots" + os.sep + "randomized" + os.sep +
                         save_prefix + "-",
                         plot_bar=args.load_reps,
                         keys=timekeys,
                         num_per_keys=num_per_keys,
                         linewidth=2,
                         capsize=5,
                         capwidth=2)

        print "Randomized plots written to: ", "plots" + os.sep + "randomized"
예제 #3
0
def load_and_run(args):

    lag = args.lag
    save_prefix = args.save_prefix

    assert args.stratify_by in {"e", "n"}

    stratify_by = cp.args2stratify_by[args.stratify_by]

    if args.output_folder == None:
        args.output_folder = "."

    # Load data file and prepare a file to pass to plotters
    if args.load_reps:
        # load

        genes, geneTS = gtm.load_basic_rep_file_list(args.data_file)
        genesr, geneTSr = gtm.load_basic_rep_file_list(args.rand_data_file)

        # dfs, genes, geneTS, df, timekeys, num_per_keys  = gtm.load_rep_file_list(args.data_file)
        # dfsr, genesr, geneTSr, dfr, __, __  = gtm.load_rep_file_list(args.rand_data_file)

        # get shared prefix timekeys

        # print "Timekeys: ", timekeys
        # print "Num per key: ", num_per_keys

    else:
        df = pd.read_csv(args.data_file, sep="\t")
        genes, geneTS = gtm.get_gene_TS(df)
        dfr = pd.read_csv(args.rand_data_file, sep="\t")
        genesr, geneTSr = gtm.get_gene_TS(dfr)

        timekeys = df.columns.values[1:]
        print("Timekeys: ", timekeys)

        # Num. replicates per key
        num_per_keys = None

    assert (geneTS.shape == geneTSr.shape)
    assert (genes == genesr).all()

    coefs = pickle.load(open(args.coef_file, 'rb'))
    intercepts = pickle.load(open(args.intercept_file, 'rb'))
    fit_result_df = pd.read_csv(args.fit_result_file, sep="\t")

    coefsr = pickle.load(open(args.coef_rand_file, 'rb'))
    # interceptsr = pickle.load(open(args.intercept_rand_file, 'rb'))
    fit_result_dfr = pd.read_csv(args.fit_result_rand_file, sep="\t")

    if args.best_hyper_file != None:
        best_hyper = pickle.load(open(args.best_hyper_file, 'rb'))
    else:
        best_hyper = None

    print("RESULTS")

    print("*************************")
    print("RESIDUALS: ")

    print("*************************")
    print("NORMAL: ")
    cp.summarize_fit(coefs,
                     intercepts,
                     fit_result_df,
                     filename=os.path.join(args.output_folder,
                                           "fit_all_summary_normal.txt"),
                     hyper=best_hyper,
                     test_name=args.test_name,
                     lag=lag)

    # Align the coefs

    # print "Aligning coefficients"
    acoefs = lc.align_coefs(coefs, lag)
    acoefsr = lc.align_coefs(coefsr, lag)

    print("Removing alphas (gene-on-self effects) ")

    acoefs = lc.remove_alphas(acoefs, lag)
    acoefsr = lc.remove_alphas(acoefsr, lag)

    coef_nets = []
    coefr_nets = []

    # Save the gene matrices
    for i in range(acoefs.shape[0]):
        coef_matr_filename = os.path.join(
            args.output_folder, save_prefix + "-" + str(i + 1) + "-matrix.txt")
        coefr_matr_filename = os.path.join(
            args.output_folder,
            save_prefix + "-" + str(i + 1) + "-r-matrix.txt")

        coef_net_filename = os.path.join(
            args.output_folder,
            save_prefix + "-" + str(i + 1) + "-network.txt")
        coefr_net_filename = os.path.join(
            args.output_folder,
            save_prefix + "-" + str(i + 1) + "-r-network.txt")

        coef_matr = gtm.save_gene_matrix(filename=coef_matr_filename,
                                         matrix=acoefs[i],
                                         genes=genes)
        coefr_matr = gtm.save_gene_matrix(filename=coefr_matr_filename,
                                          matrix=acoefsr[i],
                                          genes=genes)

        extra_dict = collections.OrderedDict()
        extra_dict["Test"] = args.test_name
        extra_dict["Lag"] = acoefs.shape[0]
        extra_dict["Coef"] = i + 1

        coef_net = nh.matr_to_net(coef_matr,
                                  extra_dict=extra_dict,
                                  make_type=False)
        coefr_net = nh.matr_to_net(coefr_matr,
                                   extra_dict=extra_dict,
                                   make_type=False)

        coef_net.to_csv(coef_net_filename, sep="\t", index=False)
        coefr_net.to_csv(coefr_net_filename, sep="\t", index=False)

        coef_nets.append(coef_net)
        coefr_nets.append(coefr_net)

        print("Coef ", i + 1)
        print("Networks written to ")
        print(coef_net_filename)
        print(coefr_net_filename)

    # max_net_filename = save_prefix + "-max-network.txt"
    # max_r_net_filename = save_prefix + "-max-r-network.txt"
    union_net_filename = os.path.join(args.output_folder,
                                      save_prefix + "-union-network.txt")
    union_r_net_filename = os.path.join(args.output_folder,
                                        save_prefix + "-union-r-network.txt")

    if acoefs.shape[0] > 1:
        m_net = cp.get_max_network(coef_nets,
                                   max_col="AbsWeight",
                                   index_col="Cause-Effect")
        union_net = cp.get_union_network(
            coef_nets + [m_net],
            suffixes=[str(i) for i in range(1, acoefs.shape[0] + 1)] + [""])
        print("Max network edges: ", m_net.shape)
        print("Union network edges: ", union_net.shape)
    else:
        union_net = coef_nets[0]
    union_net.to_csv(union_net_filename, sep="\t", index=False)

    if acoefsr.shape[0] > 1:
        m_net = cp.get_max_network(coefr_nets,
                                   max_col="AbsWeight",
                                   index_col="Cause-Effect")
        union_r_net = cp.get_union_network(
            coefr_nets + [m_net],
            suffixes=[str(i) for i in range(1, acoefs.shape[0] + 1)] + [""])
    else:
        union_r_net = coefr_nets[0]
    union_r_net.to_csv(union_r_net_filename, sep="\t", index=False)

    # print "Max networks written to "
    # print max_net_filename
    # print max_r_net_filename
    print("Unioned networks written to ")
    print(union_net_filename)
    print(union_r_net_filename)

    if not os.path.exists(os.path.join(args.output_folder, "plots")):
        os.makedirs(os.path.join(args.output_folder, "plots"))

    if args.plot_coef:
        if not os.path.exists(
                os.path.join(args.output_folder, "plots", "betas")):
            os.makedirs(os.path.join(args.output_folder, "plots", "betas"))

        # Plot the betas
        for i in range(acoefs.shape[0]):

            if len(np.nonzero(acoefs[i])[0]) > 0 and len(
                    np.nonzero(acoefsr[i])[0]) > 0:

                fc.plot_betas(acoefs[i][np.nonzero(acoefs[i])].flatten(),
                              acoefsr[i][np.nonzero(acoefsr[i])].flatten(),
                              filename=os.path.join(
                                  args.output_folder, "plots", "betas",
                                  "beta_nonzero_coef-" + str(i + 1)),
                              title="Causal coefs, Coef " + str(i + 1),
                              xlabel="Causal Coefficient")
                fc.plot_betas(
                    acoefs[i][np.nonzero(acoefs[i])].flatten(),
                    acoefsr[i][np.nonzero(acoefsr[i])].flatten(),
                    filename=os.path.join(
                        args.output_folder, "plots", "betas",
                        "beta_nonzero_coef-" + str(i + 1) + "_zoom-in-90"),
                    zoom_in_top_percentile=95,
                    zoom_in_bottom_percentile=5,
                    title="Causal coefs, Coef " + str(i + 1),
                    xlabel="Causal Coefficient")

                fc.plot_betas(
                    np.absolute(acoefs[i][np.nonzero(acoefs[i])].flatten()),
                    np.absolute(acoefsr[i][np.nonzero(acoefsr[i])].flatten()),
                    filename=os.path.join(args.output_folder, "plots", "betas",
                                          "beta_abs_coef-" + str(i + 1)),
                    title="Absolute causal coefs, Coef " + str(i + 1),
                    xlabel="Absolute Causal Coefficient")
                fc.plot_betas(
                    np.absolute(acoefs[i][np.nonzero(acoefs[i])].flatten()),
                    np.absolute(acoefsr[i][np.nonzero(acoefsr[i])].flatten()),
                    filename=os.path.join(
                        args.output_folder, "plots", "betas",
                        "beta_abs_coef-" + str(i + 1) + "_zoom-in-bottom-95"),
                    zoom_in_top_percentile=95,
                    title="Absolute causal coefs, Coef " + str(i + 1),
                    xlabel="Absolute Causal Coefficient")
                fc.plot_betas(
                    np.absolute(acoefs[i][np.nonzero(acoefs[i])].flatten()),
                    np.absolute(acoefsr[i][np.nonzero(acoefsr[i])].flatten()),
                    filename=os.path.join(
                        args.output_folder, "plots", "betas",
                        "beta_abs_coef-" + str(i + 1) + "_zoom-in-top-5"),
                    zoom_in_bottom_percentile=95,
                    title="Absolute causal coefs, Coef " + str(i + 1),
                    xlabel="Absolute Causal Coefficient")

            print("Coef ", i + 1)
            print("Plots of betas written to: ",
                  os.path.join(args.output_folder, "plots", "betas"))

    # get FDRS
    fdrs = [0.01, 0.05, 0.1, 0.2]

    acoefs_fdrs = []
    sf_dfs = []

    for fdr in fdrs:

        fdr_dir = os.path.join(args.output_folder,
                               "fdr-" + str(fdr) + "-" + stratify_by)
        if not os.path.exists(fdr_dir):
            os.makedirs(fdr_dir)

        fdr_prefix = fdr_dir + os.sep + save_prefix

        # in case we want there to be an intermediate directory for fdr, like the bootstrap case.
        # if not os.path.exists(os.path.dirname(fdr_prefix)):
        #     os.makedirs(os.path.dirname(fdr_prefix))

        acoefs_fdr = np.zeros(acoefs.shape)

        fdr_nets = []

        print("*************")
        for i in range(acoefs.shape[0]):
            print("-----")
            print("FDR = ", fdr)
            print("Lag ", lag)
            print("Coef ", i + 1)
            print("Stratify ", stratify_by)
            acoefs_fdr[i], threshes = fc.get_abs_thresh(
                acoefs[i], acoefsr[i], fdr, stratify_by=stratify_by)
            # print "Threshes", threshes

            fdr_matr_filename = fdr_prefix + "-" + str(i + 1) + "-fdr-" + str(
                fdr) + "-" + stratify_by + "-matrix.txt"
            fdr_net_filename = fdr_prefix + "-" + str(i + 1) + "-fdr-" + str(
                fdr) + "-" + stratify_by + "-network.txt"

            fdr_matr = gtm.save_gene_matrix(fdr_matr_filename,
                                            matrix=acoefs_fdr[i],
                                            genes=genes)
            pickle.dump(
                threshes,
                open(
                    fdr_prefix + "-" + str(i + 1) + "-fdr-" + str(fdr) + "-" +
                    stratify_by + "-threshes.p", 'wb'))

            extra_dict = collections.OrderedDict()
            extra_dict["Test"] = args.test_name
            extra_dict["Lag"] = acoefs.shape[0]
            extra_dict["Coef"] = i + 1

            fdr_net = nh.matr_to_net(fdr_matr,
                                     extra_dict=extra_dict,
                                     make_type=False)
            fdr_net.to_csv(fdr_net_filename, sep="\t", index=False)
            fdr_nets.append(fdr_net)

            # write summary readme
            sf_df = fc.summarize_fdr(matr=acoefs_fdr[i],
                                     test=args.test_name,
                                     fdr=fdr,
                                     lag=lag,
                                     coef=i + 1,
                                     hyper=best_hyper,
                                     thresh=threshes,
                                     readme_name=fdr_prefix + "-" +
                                     str(i + 1) + "-fdr-" + str(fdr) + "-" +
                                     stratify_by + "-README.txt",
                                     matrixname=fdr_matr_filename,
                                     filename=fdr_net_filename)

            sf_dfs.append(sf_df)

            print("Network edges: ", fdr_net.shape[0])

        if acoefs_fdr.shape[0] > 1:
            m_net = cp.get_max_network(fdr_nets,
                                       max_col="AbsWeight",
                                       index_col="Cause-Effect")
            union_net = cp.get_union_network(
                fdr_nets + [m_net],
                suffixes=[str(i)
                          for i in range(1, acoefs_fdr.shape[0] + 1)] + [""])

        else:
            union_net = fdr_nets[0]

        union_net_filename = fdr_prefix + "-union-fdr-" + str(
            fdr) + "-" + stratify_by + "-network.txt"
        union_net.to_csv(union_net_filename, sep="\t", index=False)

        print("Union network edges", union_net.shape[0])
        print("Written to ", union_net_filename)

        fdr_agg_matr_filename = fdr_prefix + "-union-fdr-" + str(
            fdr) + "-" + stratify_by + "-coefs.p"
        pickle.dump(acoefs_fdr, open(fdr_agg_matr_filename, 'wb'))

        print("Thresholded matrix written as pickle file: ",
              fdr_agg_matr_filename)

        acoefs_fdrs.append(acoefs_fdr.copy())

    all_sf_dfs = pd.concat(sf_dfs)

    # Hack to allow the base to still be fit_all_summary_fdr-stratby.txt
    # While the bootstrap will write to its own file, in its own corresponding folder
    # bullshit. just sent the output folder

    save_file = os.path.join(args.output_folder,
                             "fit_all_summary_fdr-" + stratify_by + ".txt")
    all_sf_dfs.to_csv(save_file, sep="\t", index=False)
    print("********")
    print("Summaries of all fdrs written to ", save_file)
    print("Matrices done.")

    with open(os.path.join(args.output_folder, "matrices_done.txt"),
              'w') as donefile:
        donefile.write("done\n")