コード例 #1
0
def beta_dist(data_sets, kmer_size, n_factor):

    # Loop over all data sets
    for data_set in data_sets:

        data_set = data_set[0]

        # Retrieve diseased data and labels
        allowed_labels = ['0', '1']
        kmer_cnts, accessions, labelz, domain_labels = load_kmer_cnts_jf.load_kmers(
            kmer_size, data_set, allowed_labels)
        print("LOADED DATASET " + str(data_set[0]) + ": " +
              str(len(kmer_cnts)) + " SAMPLES")
        labelz = np.asarray(labelz)
        labelz = labelz.astype(np.int)

        # Conduct NMF and resave to data_normalized
        if n_factor == 0:
            data_normalized = normalize(kmer_cnts, axis=1, norm='l1')
            data_normalized, labels = shuffle(data_normalized,
                                              labelz,
                                              random_state=0)
            x = data_normalized
            y = labels

        else:
            data_normalized = normalize(kmer_cnts, axis=1, norm='l1')
            data_normalized = stats_utils_AEB.NMF_factor(
                data_normalized,
                kmer_size,
                n_components=int(n_factor),
                title=(str(data_set) + str(kmer_size) + "mers" +
                       str(n_factor) + "factors"))
            data_normalized, labels = shuffle(data_normalized,
                                              labelz,
                                              random_state=0)
            x = data_normalized
            y = labels

        return x, y
コード例 #2
0
            # Conduct NMF and resave to data_normalized
            #enter in desired no.factors
            for n in [0, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]:
                if n == 0:
                    data_normalized = normalize(kmer_cnts, axis=1, norm='l1')
                    data_normalized, labels = shuffle(data_normalized,
                                                      labelz,
                                                      random_state=0)
                    x = data_normalized
                    y = labels

                else:
                    data_normalized = normalize(kmer_cnts, axis=1, norm='l1')
                    data_normalized = stats_utils_AEB.NMF_factor(
                        data_normalized,
                        kmer_size,
                        n_components=int(n),
                        title=(str(data_set) + str(kmer_size) + "mers" +
                               str(n) + "factors"))
                    data_normalized, labels = shuffle(data_normalized,
                                                      labelz,
                                                      random_state=0)
                    x = data_normalized
                    y = labels

                param_grid = param_dict[learn_type]

                scoring = {
                    'Acc': 'accuracy',
                    'AUC': 'roc_auc',
                    'Precision': 'precision',
                    'Recall': 'recall',
コード例 #3
0
kmer_size = 7

data = pd.read_pickle(
    "/pollard/home/abustion/deep_learning_microbiome/data_AEB/NMF_on_all_data/before_NMF_no_norm.pickle"
)

data_normalized = pd.read_pickle(
    "/pollard/home/abustion/deep_learning_microbiome/data_AEB/NMF_on_all_data/before_NMF_with_norm.pickle"
)

factors = 30
for n in range(2, factors + 1):
    data_NMF = stats_utils_AEB.NMF_factor(
        data,
        kmer_size,
        n_components=int(n),
        title=("ALL_DATA_no_norm_" + str(kmer_size) + "mers" + str(n) +
               "factors"))
    data_NMF.to_pickle(
        "/pollard/home/abustion/deep_learning_microbiome/data_AEB/NMF_on_all_data/after_NMF_no_norm_"
        + str(n) + "factors.pickle")

factors = 30
for n in range(2, factors + 1):
    data_NMF = stats_utils_AEB.NMF_factor(
        data_normalized,
        kmer_size,
        n_components=int(n),
        title=("ALL_DATA_no_norm_" + str(kmer_size) + "mers" + str(n) +
               "factors"))
    data_NMF.to_pickle(