Esempio n. 1
0
def test_GMM_Unsup(genes, gene_len):
    """
        Test that if unsupervised GMM method works
        """
    k_min = 2
    k_max = 6
    num_class = 2
    cov_type = 'full'
    seed = 0

    count_vect = CountVectorizer(analyzer='char', ngram_range=(k_min, k_max))
    X = count_vect.fit_transform(genes)
    chars = count_vect.get_feature_names()
    kmers = X.toarray()
    kmer_freq = []
    for i in range(len(genes)):
        kmer_freq.append(kmers[i] / gene_len[i])
    kmer_table = pd.DataFrame(kmer_freq, columns=chars)

    gmm = GMM(n_components=num_class,
              covariance_type=cov_type,
              random_state=seed).fit(kmer_table)
    gmm.init_params = 'random'
    labels = gmm.predict(kmer_table)

    assert all(labels == [0, 0, 1, 0])
Esempio n. 2
0
def get_predictions(path, k_min, k_max, num_class, cov_type, seed):
    kmer_table = get_kmer_table(path, k_min, k_max)
    gmm = GMM(n_components=num_class,
              covariance_type=cov_type,
              random_state=seed).fit(kmer_table)
    gmm.init_params = 'random'
    labels = gmm.predict(kmer_table)
    return labels
Esempio n. 3
0
def get_predictions_semi(path, k_min, k_max, num_class, cov_type, seed,
                         labels):
    targets = []
    kmer_table = get_kmer_table(path, k_min, k_max)
    finalDf = pd.concat([kmer_table, pd.Series(labels)], axis=1)
    gmm = GMM(n_components=num_class,
              covariance_type=cov_type,
              random_state=seed)
    gmm.init_params = 'random'
    for i in range(num_class):
        if (i in list(finalDf.Labels)):
            targets.append(i)
    if (len(targets) == num_class):
        #print("Yes")
        gmm.means_init = np.array(
            [kmer_table[finalDf.Labels == i].mean(axis=0) for i in targets])
    gmm.fit(kmer_table)
    predictions = gmm.predict(kmer_table)
    return predictions