Ejemplo n.º 1
0
def tsne(score, c_size):
    '''
    param:
        score: similarity score
        c_size: TSNE components
        
    return TSNE RESULT
    '''
    import pandas as pd
    for cs in c_size:
        ts = TSNE(n_components=cs, perplexity=50).fit_transform(score)
        if not os.path.exists(os.path.join(path, 'tsne')):
            os.makedirs(os.path.join(path, 'tsne'))
            print(f'*******Saving TSNE_{cs}*******')
            ts = pd.DataFrame(ts)
            ts['pdf_names'] = np.array(data['pdf_names'])
            ts['year'] = np.array(data['year'])
            ts['language'] = np.array(data['language'])
            ts['authors'] = np.array(data['authors'])
            ts['title'] = np.array(data['title'])
            ts.to_csv(os.path.join(path, f'tsne/tsne_{cs}.csv'))
        else:
            print(f'*******Saving TSNE_{cs}*******')
            ts = pd.DataFrame(ts)
            ts['pdf_names'] = np.array(data['pdf_names'])
            ts['year'] = np.array(data['year'])
            ts['language'] = np.array(data['language'])
            ts['authors'] = np.array(data['authors'])
            ts['title'] = np.array(data['title'])
            ts.to_csv(os.path.join(path, f'tsne/tsne_{cs}.csv'))
def generate_tsne_mapping(X, perplexity, suffix):

    fileName = "mapping_" + suffix + str(perplexity) + ".csv"
    X = X.values

    # metric = precomputed, x= distance_matrix <- JS <- KL (x or X_train?)
    X_embedded = TSNE(n_components=2, perplexity=perplexity, verbose=1, random_state=1).fit_transform(X)

    X_embedded = pd.DataFrame(X_embedded)
    X_embedded.to_csv(fileName, encoding='utf-8', index=False, header=None)
def Caltsne_pv(InputFilePath, OutputFilePath):

    data = pd.read_csv(InputFilePath, header=None, index_col=False)
    cols = data.shape[1]
    labels = data[data.columns[-1]]
    X = data.iloc[:, 0:cols - 1]
    X = preprocessing.normalize(X, norm='l2')
    tsne = TSNE(n_components=2)
    tsne.fit_transform(X)
    tsne = pd.DataFrame(tsne.embedding_)
    tsne["pv"] = labels
    tsne.to_csv(OutputFilePath)
    return tsne
    scaled_t = (t - t.mean(axis=0)) / t.std(axis=0, ddof=1)

    k3n_errors.append(
        sample_functions.k3n_error(autoscaled_x, scaled_t, k_in_k3n_error) + sample_functions.k3n_error(
            scaled_t, autoscaled_x, k_in_k3n_error))
plt.rcParams['font.size'] = 18
plt.scatter(candidates_of_perplexity, k3n_errors, c='blue')
plt.xlabel("perplexity")
plt.ylabel("k3n-errors")
plt.show()
optimal_perplexity = candidates_of_perplexity[np.where(k3n_errors == np.min(k3n_errors))[0][0]]
print('\nk3n-error による perplexity の最適値 :', optimal_perplexity)
# t-SNE
t = TSNE(perplexity=optimal_perplexity, n_components=2, init='pca', random_state=10).fit_transform(autoscaled_x)
t = pd.DataFrame(t, index=x.index, columns=['t_1 (t-SNE)', 't_2 (t-SNE)'])
t.to_csv('tsne_t.csv')
# t1 と t2 の散布図 (物性 a の値でサンプルに色付け)
plt.rcParams['font.size'] = 18
plt.scatter(t.iloc[:, 0], t.iloc[:, 1], c=dataset.iloc[:, 0], cmap=plt.get_cmap('jet'))
plt.colorbar()
plt.xlabel('t_1 (t-SNE)')
plt.ylabel('t_2 (t-SNE)')
plt.show()
# t1 と t2 の散布図 (物性 a の値でサンプルに色付け)
plt.scatter(t.iloc[:, 0], t.iloc[:, 1], c=dataset.iloc[:, 0], cmap=plt.get_cmap('jet'))
plt.colorbar()
plt.rcParams['font.size'] = 10
for sample_number in range(score.shape[0]):
    plt.text(t.iloc[sample_number, 0], t.iloc[sample_number, 1], t.index[sample_number],
             horizontalalignment='center', verticalalignment='top')
plt.xlabel('t_1 (t-SNE)')
# -*- coding: utf-8 -*-
"""
@author: hkaneko
"""

import matplotlib.pyplot as plt
import pandas as pd
from sklearn.manifold import TSNE  # scikit-learn の中の t-SNE を実行するためのライブラリのインポート

perplexity = 30  # perplexity (基本的には 5 から 50 の間)

dataset = pd.read_csv('iris_without_species.csv', index_col=0)
autoscaled_dataset = (dataset - dataset.mean()) / dataset.std()  # オートスケーリング

# t-SNE
t = TSNE(perplexity=perplexity, n_components=2, init='pca',
         random_state=0).fit_transform(autoscaled_dataset)
t = pd.DataFrame(t, index=dataset.index,
                 columns=['t_1',
                          't_2'])  # pandas の DataFrame 型に変換。行の名前・列の名前も設定
t.to_csv('tsne_t.csv')  # csv ファイルに保存。同じ名前のファイルがあるときは上書きされるため注意

# t1 と t2 の散布図
plt.rcParams['font.size'] = 18
plt.scatter(t.iloc[:, 0], t.iloc[:, 1], c='blue')
plt.xlabel('t_1')
plt.ylabel('t_2')
plt.show()
         "Off-Target", "Blocked", "Corners", "Offsides", "Free Kicks", \
         "Saves", "Pass Accuracy %", "Passes", "Distance Covered (Kms)", \
         "Fouls Committed", "Yellow Card", "Yellow & Red", "Red"] # , "1st Goal"

names = ["Goal Scored", "On-Target", "Off-Target", "Ball Possession %", \
         "Fouls Committed"]

data_file_name = "../data/FIFA_2018_Statistics.csv"
df = pd.read_csv(data_file_name)
df = df[names]

df_norm = (df - df.mean()) / (df.max() - df.min())

print(df_norm)

input_data_mat = np.array(df_norm)

df_embedded = TSNE(n_components=2).fit_transform(df_norm)

# pca = PCA(n_components=2)
# df_embedded = pca.fit(input_data_mat).transform(input_data_mat)

print(df_embedded)

df_embedded = pd.DataFrame(df_embedded)
df_embedded.reset_index(inplace=True)
df_embedded = df_embedded.rename(columns={0: "x", 1: "y"})
print(df_embedded)

df_embedded.to_csv("../data/tsne-results.csv", index=0)