print('Input:\t\t', ip.data.cpu().numpy()[0]) enc = encoder(ip) print('Encoding:\t', enc.data.cpu().numpy()[0]) enc = channel_output(enc) print('Channel:\t', enc.data.cpu().numpy()[0]) op = decoder(enc) print('Output:\t\t', torch.softmax(op, dim=1).data.cpu().numpy()[0]) if hp.constellation: # to visualize encodings, etc. try: os.makedirs('Constellations') except OSError as e: if e.errno != errno.EEXIST: raise ip = torch.eye(hp.M, device=device) enc = encoder(ip).cpu().detach().numpy() enc_emb = TSNE().fit_transform(enc).T enc_emb -= enc_emb.mean(axis=1).reshape(2, 1) enc_emb /= enc_emb.std() plt.figure(dpi=250) plt.grid() plt.scatter(enc_emb[0], enc_emb[1]) plt.title('Constellation for RBF ({0},{1})'.format(hp.n, hp.k)) plt.savefig(join('Constellations', 'RBF({0},{1}).png'.format(hp.n, hp.k))) plt.show() print('Total time taken:{0:.2f} seconds'.format(time() - start))
def preprocess(data, data_in='../data/raw', data_out='../data/preprocessed'): """ Preprocess a dataset based on its name. """ if data == 'breast-cancer': df = pd.read_csv('{}/{}/breast-cancer.data'.format(data_in, data), header=None) trn_idx, test_idx = split(df.shape[0]) features = to_categorical_features(df.iloc[:, 1:]) labels = to_labels(df[0]) elif data == 'breast-cancer-wisconsin': df = pd.read_csv('{}/{}/breast-cancer-wisconsin.data'.format( data_in, data), header=None) trn_idx, test_idx = split(df.shape[0]) features = to_numerical_features(df, trn_idx, list(range(1, 10))) labels = to_labels(df[10]) elif data == 'heart-disease': df = pd.read_csv('{}/{}/processed.cleveland.data'.format( data_in, data), header=None) trn_idx, test_idx = split(df.shape[0]) x1 = to_numerical_features(df, trn_idx, [0, 3, 4, 7, 9]) x2 = to_categorical_features(df, [1, 2, 5, 6, 8, 10, 12]) features = np.concatenate((x1, x2), axis=1) labels = to_labels((df[13] > 0).astype(np.int64)) elif data == 'hepatitis': df = pd.read_csv('{}/{}/hepatitis.data'.format(data_in, data), header=None) trn_idx, test_idx = split(df.shape[0]) x1 = to_numerical_features(df, trn_idx, [1, 14, 15, 16, 17, 18]) x2 = to_categorical_features(df, [2, 4, 5]) features = np.concatenate((x1, x2), axis=1) labels = to_labels(df[19]) elif data == 'brain-tumor': df = pd.read_csv('{}/{}/Dataset.csv'.format(data_in, data)) df = df[df['Area'] != 0].reset_index() trn_idx, test_idx = split(df.shape[0]) cols = [ 'Area', 'Perimeter', 'Convex Area', 'Solidity', 'Equivalent Diameter', 'Major Axis', 'Minor Axis' ] features = to_numerical_features(df, trn_idx, cols) labels = to_labels(df['Class']) elif data == 'diabetes': df = pd.read_csv('{}/{}/pima-indians-diabetes.csv'.format( data_in, data), header=None) trn_idx, test_idx = split(df.shape[0]) df.iloc[:, 1:-1] = df.iloc[:, 1:-1].replace(0, np.nan) features = to_numerical_features(df.iloc[:, 1:-1], trn_idx) labels = to_labels(df.iloc[:, -1]) elif data == 'synthetic': df = pd.read_csv( '{}/breast-cancer-wisconsin/breast-cancer-wisconsin.data'.format( data_in), header=None) trn_idx, test_idx = split(df.shape[0]) features = to_numerical_features(df, trn_idx, list(range(1, 10))) features = TSNE(random_state=0).fit_transform(features) features = (features - features.mean(axis=0)) / features.std(axis=0) labels = to_labels(df[10]) else: raise ValueError(data) print('{}\t{}\t{}\t{}'.format(data, features.shape[0], features.shape[1], labels.max() + 1)) trn_x = features[trn_idx] trn_y = labels[trn_idx] test_x = features[test_idx] test_y = labels[test_idx] os.makedirs('{}/{}'.format(data_out, data), exist_ok=True) np.save('{}/{}/trn_x'.format(data_out, data), trn_x) np.save('{}/{}/trn_y'.format(data_out, data), trn_y) np.save('{}/{}/test_x'.format(data_out, data), test_x) np.save('{}/{}/test_y'.format(data_out, data), test_y)
print('Output:\t\t',torch.softmax(op,dim=1).data.cpu().numpy()[0]) if hyper.constellation: # to visualize encodings, etc. try: os.makedirs('Constellations') except OSError as e: if e.errno != errno.EEXIST: raise ip = torch.eye(hyper.M,device=device) enc = encoder(ip).cpu().detach().numpy() enc_emb = TSNE().fit_transform(enc).T enc_emb -= enc_emb.mean(axis=1).reshape(2,1) enc_emb /= enc_emb.std() plt.figure(dpi=250) plt.grid() plt.scatter(enc_emb[0],enc_emb[1]) plt.title('Constellation of autoencoder ({0},{1})'.format(hyper.n,hyper.k)) plt.savefig( join('Constellations','constellation_({0},{1}).png'.format(hyper.n,hyper.k)) ) plt.show() if hyper.boundaries: # to (try to) visualize decision boundaries, etc. try: os.makedirs('Decision Boundaries') except OSError as e: if e.errno != errno.EEXIST: raise
plt.colorbar() plt.rcParams['font.size'] = 10 for sample_number in range(score.shape[0]): plt.text(score.iloc[sample_number, 0], score.iloc[sample_number, 1], score.index[sample_number], horizontalalignment='center', verticalalignment='top') plt.xlabel('t_1 (PCA)') plt.ylabel('t_2 (PCA)') plt.show() # t-SNE # k3n-error を用いた perplexity の最適化 k3n_errors = [] for index, perplexity in enumerate(candidates_of_perplexity): print(index + 1, '/', len(candidates_of_perplexity)) t = TSNE(perplexity=perplexity, n_components=2, init='pca', random_state=10).fit_transform(autoscaled_x) scaled_t = (t - t.mean(axis=0)) / t.std(axis=0, ddof=1) k3n_errors.append( sample_functions.k3n_error(autoscaled_x, scaled_t, k_in_k3n_error) + sample_functions.k3n_error( scaled_t, autoscaled_x, k_in_k3n_error)) plt.rcParams['font.size'] = 18 plt.scatter(candidates_of_perplexity, k3n_errors, c='blue') plt.xlabel("perplexity") plt.ylabel("k3n-errors") plt.show() optimal_perplexity = candidates_of_perplexity[np.where(k3n_errors == np.min(k3n_errors))[0][0]] print('\nk3n-error による perplexity の最適値 :', optimal_perplexity) # t-SNE t = TSNE(perplexity=optimal_perplexity, n_components=2, init='pca', random_state=10).fit_transform(autoscaled_x) t = pd.DataFrame(t, index=x.index, columns=['t_1 (t-SNE)', 't_2 (t-SNE)']) t.to_csv('tsne_t.csv')
z = q['styles'].value.cpu().detach().numpy() else: q = enc(images) z = q['styles'].value.data.detach().numpy() zs.append(z) ys.append(y.numpy()) ys = np.concatenate(ys,0) zs = np.concatenate(zs,0) # run TSNE when number of latent dims exceeds 2 if NUM_STYLE > 2: from sklearn.manifold import TSNE zs2 = TSNE().fit_transform(zs) zs2_mean = zs2.mean(0) zs2_std = zs2.std(0) else: zs2 = zs # display a 2D plot of the digit classes in the latent space fig = plt.figure(figsize=(6,6)) ax = plt.gca() colors = [] for k in range(10): m = (ys == k) p = ax.scatter(zs2[m, 0], zs2[m, 1], label='y=%d' % k, alpha=0.5, s=5) colors.append(p.get_facecolor()) ax.legend() #fig.tight_layout()