def tsne(score, c_size): ''' param: score: similarity score c_size: TSNE components return TSNE RESULT ''' import pandas as pd for cs in c_size: ts = TSNE(n_components=cs, perplexity=50).fit_transform(score) if not os.path.exists(os.path.join(path, 'tsne')): os.makedirs(os.path.join(path, 'tsne')) print(f'*******Saving TSNE_{cs}*******') ts = pd.DataFrame(ts) ts['pdf_names'] = np.array(data['pdf_names']) ts['year'] = np.array(data['year']) ts['language'] = np.array(data['language']) ts['authors'] = np.array(data['authors']) ts['title'] = np.array(data['title']) ts.to_csv(os.path.join(path, f'tsne/tsne_{cs}.csv')) else: print(f'*******Saving TSNE_{cs}*******') ts = pd.DataFrame(ts) ts['pdf_names'] = np.array(data['pdf_names']) ts['year'] = np.array(data['year']) ts['language'] = np.array(data['language']) ts['authors'] = np.array(data['authors']) ts['title'] = np.array(data['title']) ts.to_csv(os.path.join(path, f'tsne/tsne_{cs}.csv'))
def generate_tsne_mapping(X, perplexity, suffix): fileName = "mapping_" + suffix + str(perplexity) + ".csv" X = X.values # metric = precomputed, x= distance_matrix <- JS <- KL (x or X_train?) X_embedded = TSNE(n_components=2, perplexity=perplexity, verbose=1, random_state=1).fit_transform(X) X_embedded = pd.DataFrame(X_embedded) X_embedded.to_csv(fileName, encoding='utf-8', index=False, header=None)
def Caltsne_pv(InputFilePath, OutputFilePath): data = pd.read_csv(InputFilePath, header=None, index_col=False) cols = data.shape[1] labels = data[data.columns[-1]] X = data.iloc[:, 0:cols - 1] X = preprocessing.normalize(X, norm='l2') tsne = TSNE(n_components=2) tsne.fit_transform(X) tsne = pd.DataFrame(tsne.embedding_) tsne["pv"] = labels tsne.to_csv(OutputFilePath) return tsne
scaled_t = (t - t.mean(axis=0)) / t.std(axis=0, ddof=1) k3n_errors.append( sample_functions.k3n_error(autoscaled_x, scaled_t, k_in_k3n_error) + sample_functions.k3n_error( scaled_t, autoscaled_x, k_in_k3n_error)) plt.rcParams['font.size'] = 18 plt.scatter(candidates_of_perplexity, k3n_errors, c='blue') plt.xlabel("perplexity") plt.ylabel("k3n-errors") plt.show() optimal_perplexity = candidates_of_perplexity[np.where(k3n_errors == np.min(k3n_errors))[0][0]] print('\nk3n-error による perplexity の最適値 :', optimal_perplexity) # t-SNE t = TSNE(perplexity=optimal_perplexity, n_components=2, init='pca', random_state=10).fit_transform(autoscaled_x) t = pd.DataFrame(t, index=x.index, columns=['t_1 (t-SNE)', 't_2 (t-SNE)']) t.to_csv('tsne_t.csv') # t1 と t2 の散布図 (物性 a の値でサンプルに色付け) plt.rcParams['font.size'] = 18 plt.scatter(t.iloc[:, 0], t.iloc[:, 1], c=dataset.iloc[:, 0], cmap=plt.get_cmap('jet')) plt.colorbar() plt.xlabel('t_1 (t-SNE)') plt.ylabel('t_2 (t-SNE)') plt.show() # t1 と t2 の散布図 (物性 a の値でサンプルに色付け) plt.scatter(t.iloc[:, 0], t.iloc[:, 1], c=dataset.iloc[:, 0], cmap=plt.get_cmap('jet')) plt.colorbar() plt.rcParams['font.size'] = 10 for sample_number in range(score.shape[0]): plt.text(t.iloc[sample_number, 0], t.iloc[sample_number, 1], t.index[sample_number], horizontalalignment='center', verticalalignment='top') plt.xlabel('t_1 (t-SNE)')
# -*- coding: utf-8 -*- """ @author: hkaneko """ import matplotlib.pyplot as plt import pandas as pd from sklearn.manifold import TSNE # scikit-learn の中の t-SNE を実行するためのライブラリのインポート perplexity = 30 # perplexity (基本的には 5 から 50 の間) dataset = pd.read_csv('iris_without_species.csv', index_col=0) autoscaled_dataset = (dataset - dataset.mean()) / dataset.std() # オートスケーリング # t-SNE t = TSNE(perplexity=perplexity, n_components=2, init='pca', random_state=0).fit_transform(autoscaled_dataset) t = pd.DataFrame(t, index=dataset.index, columns=['t_1', 't_2']) # pandas の DataFrame 型に変換。行の名前・列の名前も設定 t.to_csv('tsne_t.csv') # csv ファイルに保存。同じ名前のファイルがあるときは上書きされるため注意 # t1 と t2 の散布図 plt.rcParams['font.size'] = 18 plt.scatter(t.iloc[:, 0], t.iloc[:, 1], c='blue') plt.xlabel('t_1') plt.ylabel('t_2') plt.show()
"Off-Target", "Blocked", "Corners", "Offsides", "Free Kicks", \ "Saves", "Pass Accuracy %", "Passes", "Distance Covered (Kms)", \ "Fouls Committed", "Yellow Card", "Yellow & Red", "Red"] # , "1st Goal" names = ["Goal Scored", "On-Target", "Off-Target", "Ball Possession %", \ "Fouls Committed"] data_file_name = "../data/FIFA_2018_Statistics.csv" df = pd.read_csv(data_file_name) df = df[names] df_norm = (df - df.mean()) / (df.max() - df.min()) print(df_norm) input_data_mat = np.array(df_norm) df_embedded = TSNE(n_components=2).fit_transform(df_norm) # pca = PCA(n_components=2) # df_embedded = pca.fit(input_data_mat).transform(input_data_mat) print(df_embedded) df_embedded = pd.DataFrame(df_embedded) df_embedded.reset_index(inplace=True) df_embedded = df_embedded.rename(columns={0: "x", 1: "y"}) print(df_embedded) df_embedded.to_csv("../data/tsne-results.csv", index=0)