コード例 #1
0
ファイル: simple.py プロジェクト: GillianGrayson/mlmg
def save_simple_linreg(config, num_top=100):
    attributes = get_attributes(config)
    config.scenario = Scenario.approach
    gene_names, gene_vals = load_top_gene_data(config, num_top)
    config.scenario = Scenario.validation

    p_values = []
    r_values = []
    slopes = []
    intercepts = []
    for id in range(0, len(gene_names)):
        vals = gene_vals[id]
        slope, intercept, r_value, p_value, std_err = stats.linregress(
            vals, attributes)
        r_values.append(r_value)
        p_values.append(p_value)
        slopes.append(slope)
        intercepts.append(intercept)

    order = np.argsort(list(map(abs, r_values)))[::-1]
    p_values = list(np.array(p_values)[order])
    r_values = list(np.array(r_values)[order])
    slopes = list(np.array(slopes)[order])
    intercepts = list(np.array(intercepts)[order])
    gene_names = list(np.array(gene_names)[order])

    fn = 'metrics.txt'
    fn = get_result_path(config, fn)
    save_features(fn, [gene_names, p_values, r_values, slopes, intercepts])
コード例 #2
0
ファイル: plane.py プロジェクト: GillianGrayson/mlmg
def save_plane_linreg(config,
                      num_top=100,
                      gd_type_x=GeneDataType.mean,
                      gd_type_y=GeneDataType.mean):
    attributes = get_attributes(config)
    config.scenario = Scenario.approach
    gene_names = load_top_gene_names(config, num_top)
    config.scenario = Scenario.validation
    config.approach_gd = gd_type_x
    gene_vals_x = load_top_gene_vals(config, gene_names)
    config.approach_gd = gd_type_y
    gene_vals_y = load_top_gene_vals(config, gene_names)

    p_values_x = []
    r_values_x = []
    p_values_y = []
    r_values_y = []
    for id in range(0, len(gene_names)):
        vals_main = gene_vals_x[id]
        slope, intercept, r_value, p_value, std_err = stats.linregress(
            vals_main, attributes)
        p_values_x.append(p_value)
        r_values_x.append(r_value)

        vals_aux = gene_vals_y[id]
        slope, intercept, r_value, p_value, std_err = stats.linregress(
            vals_aux, attributes)
        p_values_y.append(p_value)
        r_values_y.append(r_value)

    fn = 'plane.txt'
    fn = get_result_path(config, fn)
    save_features(fn, [gene_names, r_values_x, r_values_y])
コード例 #3
0
ファイル: params.py プロジェクト: GillianGrayson/mlmg
def save_params_enet(config, num_folds=10):
    attributes = get_attributes(config)
    cpgs, vals = load_cpg_data(config)

    param_names, param_values = get_enet_params(attributes, vals, num_folds)

    fn = 'params.txt'
    fn = get_param_path(config, fn)
    save_features(fn, [param_names, param_values])
コード例 #4
0
ファイル: error_from_age.py プロジェクト: GillianGrayson/mlmg
def save_error_from_age(config, num_top=100):
    attributes = get_attributes(config)
    config.scenario = Scenario.approach
    names, vals = load_top_gene_data(config, num_top)
    config.scenario = Scenario.validation

    X = vals
    y = attributes

    model = linreg_mult(y, X)

    ages = []
    maes = []
    str_list = []
    x_all = []
    y_all = []
    for age in range(0, 150):

        indexes = [i for i, x in enumerate(attributes) if x == age]

        if len(indexes) > 0:

            ages.append(age)

            X_test = np.array(vals).T[indexes].tolist()
            y_test_pred = model.get_prediction(X_test).predicted_mean

            curr_str = str(age)
            mae = 0
            for pred_age in y_test_pred:
                mae += abs(pred_age - age)
                curr_str += (' ' + str(format(pred_age, '0.8e')))
                x_all.append(age)
                y_all.append(pred_age - age)

            mae /= len(indexes)

            str_list.append(curr_str)
            maes.append(mae)

    fn = 'error_from_age.txt'
    fn = get_result_path(config, fn)
    save_features(fn, [ages, maes])

    fn = 'errors.txt'
    fn = get_result_path(config, fn)
    np.savetxt(fn, str_list, fmt="%s")

    slope, intercept, r_value, p_value, std_err = stats.linregress(x_all, y_all)
    print('slope: ' + str(slope))
    print('intercept: ' + str(intercept))
    print('r_value: ' + str(r_value))
    print('p_value: ' + str(p_value))
    print('std_err: ' + str(std_err))
コード例 #5
0
ファイル: params.py プロジェクト: GillianGrayson/mlmg
def save_params_enet(config, num_folds=10):

    attributes = get_attributes(config)
    genes_passed, vals_passed = load_gene_data(config)

    param_names, param_values = get_enet_params(vals_passed, attributes,
                                                num_folds)

    fn = 'params.txt'
    fn = get_param_path(config, fn)
    save_features(fn, [param_names, param_values])
コード例 #6
0
def get_attributes_group(config, attribute=Attribute.age):

    attributes = get_attributes(config, attribute)

    min_atr = min(attributes)
    max_atr = max(attributes)

    min_atr = int(min_atr / config.shift) * config.shift
    max_atr = (int(max_atr / config.shift) + 1) * config.shift
    group = []
    for age_id in range(0, len(attributes)):
        age = attributes[age_id]
        key = int((age - min_atr) / config.shift)
        group.append(key)

    return group
コード例 #7
0
def get_raw_dict(config):
    dict_cpg_gene = get_dict_cpg_gene(config)
    dict_cpg_map = get_dict_cpg_map_info(config)
    attributes = get_attributes(config)
    cpgs, vals = load_cpg_data(config)

    cpg_non_inc = get_non_inc_cpgs(config)

    gene_raw_dict = {}
    map_dict = {}
    for id in range(0, len(cpgs)):

        curr_cpg = cpgs[id]
        curr_vals = vals[id]

        if curr_cpg not in cpg_non_inc:

            genes = dict_cpg_gene.get(curr_cpg)
            map_info = dict_cpg_map.get(curr_cpg)

            if genes is not None:
                for gene in genes:
                    if gene in gene_raw_dict:
                        for list_id in range(0, len(attributes)):
                            gene_raw_dict[gene][list_id].append(
                                curr_vals[list_id])
                        map_dict[gene].append(int(map_info))
                    else:
                        gene_raw_dict[gene] = []
                        for list_id in range(0, len(attributes)):
                            gene_raw_dict[gene].append([curr_vals[list_id]])
                        map_dict[gene] = []
                        map_dict[gene].append(int(map_info))

    for gene in gene_raw_dict:
        raw = gene_raw_dict[gene]
        map_info = map_dict[gene]
        order = np.argsort(map_info)
        gene_raw_dict[gene] = []
        for record in raw:
            sorted_record = list(np.array(record)[order])
            gene_raw_dict[gene].append(sorted_record)

    return gene_raw_dict
コード例 #8
0
def get_attributes_dict(config):

    attributes = get_attributes(config)

    min_atr = min(attributes)
    max_atr = max(attributes)

    min_atr = int(min_atr / config.shift) * config.shift
    max_atr = (int(max_atr / config.shift) + 1) * config.shift
    attributes_dict = {}
    for age_id in range(0, len(attributes)):
        age = attributes[age_id]
        key = int((age - min_atr) / config.shift)
        if key in attributes_dict:
            attributes_dict[key].append(age_id)
        else:
            attributes_dict[key] = [age_id]

    return attributes_dict
コード例 #9
0
ファイル: top.py プロジェクト: GillianGrayson/mlmg
def save_top_spearman(config, num_top=500):
    attributes = get_attributes(config)
    dict_cpg_gene = get_dict_cpg_gene(config)
    cpgs, vals = load_cpg_data(config)

    rhos = []
    for id in range(0, len(cpgs)):
        curr_vals = vals[id]
        rho, pval = stats.spearmanr(attributes, curr_vals)
        rhos.append(rho)

    order = np.argsort(list(map(abs, rhos)))[::-1]
    cpgs_sorted = list(np.array(cpgs)[order])
    rhos_sorted = list(np.array(rhos)[order])

    genes_sorted = []
    rhos_genes = []
    for id in range(0, len(cpgs_sorted)):
        cpg = cpgs_sorted[id]
        rho = rhos_sorted[id]
        genes = dict_cpg_gene.get(cpg)
        for gene in genes:
            genes_sorted.append(gene)
            rhos_genes.append(rho)

    cpgs_sorted = cpgs_sorted[0:num_top]
    rhos_sorted = rhos_sorted[0:num_top]

    genes_sorted = genes_sorted[0:num_top]
    rhos_genes = rhos_genes[0:num_top]

    fn = 'top.txt'
    fn = get_result_path(config, fn)
    save_features(fn, [cpgs_sorted, rhos_sorted])

    config.approach_gd = GeneDataType.from_cpg
    config.dt = DataType.gene
    fn = 'top.txt'
    fn = get_result_path(config, fn)
    save_features(fn, [genes_sorted, rhos_genes])
    config.dt = DataType.cpg
コード例 #10
0
def save_simple_linreg_mult(config, num_bootstrap_runs=500, num_top=100):
    attributes = get_attributes(config)
    config.scenario = Scenario.approach
    gene_names, gene_vals = load_top_gene_data(config, num_top)
    config.scenario = Scenario.validation

    counts, R2s = R2_from_count(gene_vals, attributes)
    fn = 'R2s_' + str(num_top) + '.txt'
    fn = get_result_path(config, fn)
    save_features(fn, [counts, R2s])

    test_size = int(len(attributes) * config.test_part)
    train_size = len(attributes) - test_size
    metrics_names, metrics_vals = validation_metrics(gene_vals, attributes,
                                                     test_size, train_size,
                                                     num_bootstrap_runs)
    fn = 'metrics_' + str(num_top) + '.txt'
    fn = get_result_path(config, fn)
    save_features(fn, [metrics_names, metrics_vals])

    print(linreg_mult_with_const(attributes, gene_vals).summary())
コード例 #11
0
ファイル: top.py プロジェクト: GillianGrayson/mlmg
def save_top_linreg(config):
    attributes = get_attributes(config)
    genes, vals = load_gene_data(config)

    p_values = []
    r_values = []
    slopes = []
    intercepts = []
    for id in range(0, len(genes)):
        val = vals[id]
        slope, intercept, r_value, p_value, std_err = stats.linregress(
            attributes, val)
        r_values.append(r_value)
        p_values.append(p_value)
        slopes.append(slope)
        intercepts.append(intercept)

    order_mean = np.argsort(list(map(abs, r_values)))[::-1]
    p_values_sorted = list(np.array(p_values)[order_mean])
    r_values_sorted = list(np.array(r_values)[order_mean])
    slopes_sorted = list(np.array(slopes)[order_mean])
    intercepts_sorted = list(np.array(intercepts)[order_mean])
    genes_sorted = list(np.array(genes)[order_mean])

    metrics_sorted_np = np.asarray(list(map(abs,
                                            r_values_sorted))).reshape(-1, 1)
    bandwidth = estimate_bandwidth(metrics_sorted_np)
    ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
    ms.fit(metrics_sorted_np)
    labels_mean_shift = list(ms.labels_)
    clusters_mean_shift = clustering_order(labels_mean_shift)
    af = AffinityPropagation().fit(metrics_sorted_np)
    labels_affinity_propagation = list(af.labels_)
    clusters_affinity_prop = clustering_order(labels_affinity_propagation)

    fn = get_result_path(config, 'top.txt')
    save_features(fn, [
        genes_sorted, clusters_mean_shift, clusters_affinity_prop,
        r_values_sorted, p_values_sorted, slopes_sorted, intercepts_sorted
    ])
コード例 #12
0
def save_top_enet(config, num_bootstrap_runs=10, num_top=500):

    dict_cpg_gene = get_dict_cpg_gene(config)
    params_dict = load_params_dict(config)
    alpha = params_dict.get('alpha')
    l1_ratio = params_dict.get('l1_ratio')

    attributes = get_attributes(config)
    cpgs_passed, vals_passed = load_cpg_data(config)

    test_size = int(len(attributes) * config.test_part)
    train_size = len(attributes) - test_size
    rs = ShuffleSplit(num_bootstrap_runs, test_size, train_size)
    indexes = np.linspace(0, len(attributes) - 1, len(attributes),
                          dtype=int).tolist()
    enet_X = np.array(vals_passed).T.tolist()

    bootstrap_id = 0
    cpg_top_dict = {}
    for train_index, test_index in rs.split(indexes):
        print('bootstrap_id: ' + str(bootstrap_id))

        enet = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)

        enet_X_train = list(np.array(enet_X)[train_index])
        enet_X_test = list(np.array(enet_X)[test_index])
        enet_y_train = list(np.array(attributes)[train_index])
        enet_y_test = list(np.array(attributes)[test_index])

        enet = enet.fit(enet_X_train, enet_y_train)
        coef = enet.coef_

        order = np.argsort(list(map(abs, coef)))[::-1]
        coef_sorted = list(np.array(coef)[order])
        cpg_sorted = list(np.array(cpgs_passed)[order])
        coef_top = coef_sorted[0:num_top]
        cpg_top = cpg_sorted[0:num_top]

        for top_id in range(0, num_top):
            cpg = cpg_top[top_id]
            if cpg in cpg_top_dict:
                cpg_top_dict[cpg] += 1
            else:
                cpg_top_dict[cpg] = 1

        bootstrap_id += 1

    cpgs = list(cpg_top_dict.keys())
    counts = list(cpg_top_dict.values())
    order = np.argsort(list(map(abs, counts)))[::-1]
    cpgs_sorted = list(np.array(cpgs)[order])
    counts_sorted = list(np.array(counts)[order])
    genes_sorted = []
    counts_genes = []
    for id in range(0, len(cpgs_sorted)):
        cpg = cpgs_sorted[id]
        count = counts_sorted[id]
        genes = dict_cpg_gene.get(cpg)
        for gene in genes:
            if gene not in genes_sorted:
                genes_sorted.append(gene)
                counts_genes.append(count)

    fn = 'top.txt'
    fn = get_result_path(config, fn)
    save_features(fn, [cpgs_sorted, counts_sorted])

    config.approach_gd = GeneDataType.from_cpg
    config.dt = DataType.gene
    fn = 'top.txt'
    fn = get_result_path(config, fn)
    save_features(fn, [genes_sorted, counts_genes])
    config.dt = DataType.cpg
コード例 #13
0
ファイル: top.py プロジェクト: GillianGrayson/mlmg
def save_top_linreg(config, num_top=500):
    attributes = get_attributes(config)
    dict_cpg_gene = get_dict_cpg_gene(config)
    cpgs, vals = load_cpg_data(config)

    slopes = []
    intercepts = []
    rvals = []
    pvals = []
    for id in range(0, len(cpgs)):
        curr_vals = vals[id]
        slope, intercept, r_value, p_value, std_err = stats.linregress(
            curr_vals, attributes)
        slopes.append(slope)
        intercepts.append(intercept)
        rvals.append(r_value)
        pvals.append(p_value)

    order = np.argsort(pvals)
    cpgs_sorted = list(np.array(cpgs)[order])
    pvals_sorted = list(np.array(pvals)[order])
    slopes_sorted = list(np.array(slopes)[order])
    intercepts_sorted = list(np.array(intercepts)[order])
    rvals_sorted = list(np.array(rvals)[order])

    genes_sorted = []
    pvals_genes = []
    slopes_genes = []
    intercepts_genes = []
    rvals_genes = []
    for id in range(0, len(cpgs_sorted)):
        cpg = cpgs_sorted[id]
        pval = pvals_sorted[id]
        slope = slopes_sorted[id]
        intercept = intercepts_sorted[id]
        rval = rvals_sorted[id]
        genes = dict_cpg_gene.get(cpg)
        for gene in genes:
            if gene not in genes_sorted:
                genes_sorted.append(gene)
                pvals_genes.append(pval)
                slopes_genes.append(slope)
                intercepts_genes.append(intercept)
                rvals_genes.append(rval)

    cpgs_sorted = cpgs_sorted[0:num_top]
    pvals_sorted = pvals_sorted[0:num_top]
    slopes_sorted = slopes_sorted[0:num_top]
    intercepts_sorted = intercepts_sorted[0:num_top]
    rvals_sorted = rvals_sorted[0:num_top]

    genes_sorted = genes_sorted[0:num_top]
    pvals_genes = pvals_genes[0:num_top]
    slopes_genes = slopes_genes[0:num_top]
    intercepts_genes = intercepts_genes[0:num_top]
    rvals_genes = rvals_genes[0:num_top]

    fn = 'top.txt'
    fn = get_result_path(config, fn)
    save_features(fn, [
        cpgs_sorted, pvals_sorted, rvals_sorted, slopes_sorted,
        intercepts_sorted
    ])

    config.approach_gd = GeneDataType.from_cpg
    config.dt = DataType.gene
    fn = 'top.txt'
    fn = get_result_path(config, fn)
    save_features(fn, [
        genes_sorted, pvals_genes, rvals_genes, slopes_genes, intercepts_genes
    ])
    config.dt = DataType.cpg
コード例 #14
0
ファイル: top.py プロジェクト: GillianGrayson/mlmg
def save_top_enet(config, num_bootstrap_runs=100, num_top=500):

    params_dict = load_params_dict(config)
    alpha = params_dict.get('alpha')
    l1_ratio = params_dict.get('l1_ratio')

    attributes = get_attributes(config)

    genes_passed, vals_passed = load_gene_data(config)

    test_size = int(len(attributes) * config.test_part)
    train_size = len(attributes) - test_size
    rs = ShuffleSplit(num_bootstrap_runs, test_size, train_size)
    indexes = np.linspace(0, len(attributes) - 1, len(attributes),
                          dtype=int).tolist()
    enet_X = np.array(vals_passed).T.tolist()

    bootstrap_id = 0
    gene_top_dict = {}
    for train_index, test_index in rs.split(indexes):
        print('bootstrap_id: ' + str(bootstrap_id))

        enet = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)

        enet_X_train = list(np.array(enet_X)[train_index])
        enet_X_test = list(np.array(enet_X)[test_index])
        enet_y_train = list(np.array(attributes)[train_index])
        enet_y_test = list(np.array(attributes)[test_index])

        enet = enet.fit(enet_X_train, enet_y_train)
        coef = enet.coef_

        order = np.argsort(list(map(abs, coef)))[::-1]
        coef_sorted = list(np.array(coef)[order])
        gene_sorted = list(np.array(genes_passed)[order])
        coef_top = coef_sorted[0:num_top]
        gene_top = gene_sorted[0:num_top]

        for top_id in range(0, num_top):
            gene = gene_top[top_id]
            if gene in gene_top_dict:
                gene_top_dict[gene] += 1
            else:
                gene_top_dict[gene] = 1

        bootstrap_id += 1

    genes = list(gene_top_dict.keys())
    counts = list(gene_top_dict.values())
    order = np.argsort(list(map(abs, counts)))[::-1]
    genes_sorted = list(np.array(genes)[order])
    counts_sorted = list(np.array(counts)[order])

    metrics_sorted_np = np.asarray(counts_sorted).reshape(-1, 1)
    bandwidth = estimate_bandwidth(metrics_sorted_np)
    ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
    ms.fit(metrics_sorted_np)
    labels_mean_shift = list(ms.labels_)
    clusters_mean_shift = clustering_order(labels_mean_shift)
    af = AffinityPropagation().fit(metrics_sorted_np)
    labels_affinity_propagation = list(af.labels_)
    clusters_affinity_prop = clustering_order(labels_affinity_propagation)

    fn = get_result_path(config, 'top.txt')
    save_features(fn, [
        genes_sorted, clusters_mean_shift, clusters_affinity_prop,
        counts_sorted
    ])
コード例 #15
0
ファイル: bend.py プロジェクト: GillianGrayson/mlmg
def save_bend_linreg(config, limit, pval):
    config_less = deepcopy(config)
    age_less(config_less, limit)
    atr_l = get_attributes(config_less)
    g_names_l, g_vals_l = load_gene_data(config_less)

    config_more = deepcopy(config)
    age_more(config_more, limit)
    atr_m = get_attributes(config_more)
    g_names_m, g_vals_m = load_gene_data(config_more)

    genes_passed = []

    angles = []

    slope_ls = []
    intercept_ls = []
    r_value_ls = []
    p_value_ls = []
    std_err_ls = []

    slope_ms = []
    intercept_ms = []
    r_value_ms = []
    p_value_ms = []
    std_err_ms = []

    for g_id_l in range(0, len(g_names_l)):
        g_id_m = g_names_m.index(g_names_l[g_id_l])
        vals_l = g_vals_l[g_id_l]
        vals_m = g_vals_m[g_id_m]

        slope_l, intercept_l, r_value_l, p_value_l, std_err_l = stats.linregress(
            atr_l, vals_l)
        slope_m, intercept_m, r_value_m, p_value_m, std_err_m = stats.linregress(
            atr_m, vals_m)
        angle = abs(slope_l - slope_m)

        if (max(p_value_l, p_value_m) < pval):
            genes_passed.append(g_names_l[g_id_l])
            angles.append(angle)

            slope_ls.append(slope_l)
            intercept_ls.append(intercept_l)
            r_value_ls.append(r_value_l)
            p_value_ls.append(p_value_l)
            std_err_ls.append(std_err_l)

            slope_ms.append(slope_m)
            intercept_ms.append(intercept_m)
            r_value_ms.append(r_value_m)
            p_value_ms.append(p_value_m)
            std_err_ms.append(std_err_m)

    order = np.argsort(angles)[::-1]
    genes_opt = list(np.array(genes_passed)[order])
    angles_opt = list(np.array(angles)[order])

    slope_ls_opt = list(np.array(slope_ls)[order])
    intercept_ls_opt = list(np.array(intercept_ls)[order])
    r_value_ls_opt = list(np.array(r_value_ls)[order])
    p_value_ls_opt = list(np.array(p_value_ls)[order])
    std_err_ls_opt = list(np.array(std_err_ls)[order])

    slope_ms_opt = list(np.array(slope_ms)[order])
    intercept_ms_opt = list(np.array(intercept_ms)[order])
    r_value_ms_opt = list(np.array(r_value_ms)[order])
    p_value_ms_opt = list(np.array(p_value_ms)[order])
    std_err_ms_opt = list(np.array(std_err_ms)[order])

    fn = get_result_path(config, 'bend_' + str(limit) + '.txt')
    save_features(fn, [
        genes_opt, angles_opt, slope_ls_opt, intercept_ls_opt, r_value_ls_opt,
        p_value_ls_opt, std_err_ls_opt, slope_ms_opt, intercept_ms_opt,
        r_value_ms_opt, p_value_ms_opt, std_err_ms_opt
    ])
コード例 #16
0
ファイル: bend.py プロジェクト: GillianGrayson/mlmg
def save_bend_linreg(config, limit, pval, num_opt=1000):
    config_less = deepcopy(config)
    age_less(config_less, limit)
    atr_l = get_attributes(config_less)
    cpg_names_l, cpg_vals_l = load_cpg_data(config_less)

    config_more = deepcopy(config)
    age_more(config_more, limit)
    atr_m = get_attributes(config_more)
    cpg_names_m, cpg_vals_m = load_cpg_data(config_more)

    cpg_gene_dict = get_dict_cpg_gene(config)

    cpgs_passed = []
    genes_passed = []

    angles = []

    slope_ls = []
    intercept_ls = []
    r_value_ls = []
    p_value_ls = []
    std_err_ls = []

    slope_ms = []
    intercept_ms = []
    r_value_ms = []
    p_value_ms = []
    std_err_ms = []

    num_cpgs = 0

    for cpg_id_l in range(0, len(cpg_names_l)):
        cpg_id_m = cpg_names_m.index(cpg_names_l[cpg_id_l])
        vals_l = cpg_vals_l[cpg_id_l]
        vals_m = cpg_vals_m[cpg_id_m]

        slope_l, intercept_l, r_value_l, p_value_l, std_err_l = stats.linregress(
            atr_l, vals_l)
        slope_m, intercept_m, r_value_m, p_value_m, std_err_m = stats.linregress(
            atr_m, vals_m)
        angle = abs(slope_l - slope_m)

        if (max(p_value_l, p_value_m) < pval):
            cpgs_passed.append(cpg_names_l[cpg_id_l])

            genes = cpg_gene_dict.get(cpg_names_l[cpg_id_l])
            if len(genes) > 0:
                if genes[0] == '':
                    genes_passed.append('nan')
                else:
                    genes_passed.append(";".join(genes))
            else:
                genes_passed.append('nan')

            angles.append(angle)

            slope_ls.append(slope_l)
            intercept_ls.append(intercept_l)
            r_value_ls.append(r_value_l)
            p_value_ls.append(p_value_l)
            std_err_ls.append(std_err_l)

            slope_ms.append(slope_m)
            intercept_ms.append(intercept_m)
            r_value_ms.append(r_value_m)
            p_value_ms.append(p_value_m)
            std_err_ms.append(std_err_m)

        num_cpgs += 1
        if num_cpgs % config.print_rate == 0:
            print('num_cpgs: ' + str(num_cpgs))

    order = np.argsort(angles)[::-1][0:num_opt]

    cpgs_opt = list(np.array(cpgs_passed)[order])

    genes_opt = list(np.array(genes_passed)[order])

    angles_opt = list(np.array(angles)[order])

    slope_ls_opt = list(np.array(slope_ls)[order])
    intercept_ls_opt = list(np.array(intercept_ls)[order])
    r_value_ls_opt = list(np.array(r_value_ls)[order])
    p_value_ls_opt = list(np.array(p_value_ls)[order])
    std_err_ls_opt = list(np.array(std_err_ls)[order])

    slope_ms_opt = list(np.array(slope_ms)[order])
    intercept_ms_opt = list(np.array(intercept_ms)[order])
    r_value_ms_opt = list(np.array(r_value_ms)[order])
    p_value_ms_opt = list(np.array(p_value_ms)[order])
    std_err_ms_opt = list(np.array(std_err_ms)[order])

    fn = get_result_path(config, 'bend_' + str(limit) + '.txt')
    save_features(fn, [
        cpgs_opt, genes_opt, angles_opt, slope_ls_opt, intercept_ls_opt,
        r_value_ls_opt, p_value_ls_opt, std_err_ls_opt, slope_ms_opt,
        intercept_ms_opt, r_value_ms_opt, p_value_ms_opt, std_err_ms_opt
    ])

    raw_config = Config(db=config.db,
                        dt=config.dt,
                        approach=config.approach,
                        scenario=config.scenario,
                        approach_method=config.approach_method,
                        gender=Gender.any)

    cpg_str_list = []
    cpg_name_raw, cpg_vals_raw = load_cpg_data(raw_config)
    for cpg in cpgs_opt:
        cpg_vals = cpg_vals_raw[cpg_name_raw.index(cpg)]
        curr_cpg_str = cpg
        for id in range(0, len(cpg_vals)):
            curr_cpg_str += (' ' + str(format(cpg_vals[id], '0.8e')))
        cpg_str_list.append(curr_cpg_str)

    fn = get_result_path(config, 'bend_data_' + str(limit) + '.txt')
    np.savetxt(fn, cpg_str_list, fmt="%s")
コード例 #17
0
ファイル: beta_pdf.py プロジェクト: GillianGrayson/mlmg
cpg_condition = CpGCondition.x

for gender in genders:
    print('\t' + gender.value)
    for geo in geos:
        print('\t\t' + geo.value)

        config = Config(db=db,
                        dt=dt,
                        approach=approach,
                        scenario=scenario,
                        gender=gender,
                        geo=geo,
                        cpg_condition=cpg_condition)

        attributes = get_attributes(config)
        cpgs, vals = load_cpg_data(config)

        num_int = 200
        int_begin = 0
        int_end = 1
        int_shift = (int_end - int_begin) / num_int
        ints = []
        pdf = np.zeros(num_int)
        for int_id in range(0, num_int):
            ints.append(int_begin + int_id * int_shift + 0.5 * int_shift)

        for curr_cpg_vals in vals:
            for beta in curr_cpg_vals:
                int_id = math.floor((beta - int_begin) * num_int /
                                    (int_end - int_begin + 1.0e-8))