def prepareFMNISTData(scale=0, PCA_threshold=-1, Whitening=0, PCA_p=None): mndata = MNIST('fashion_data') imagesTrain, labelsTrain = mndata.load_training() imagesTest, labelsTest = mndata.load_testing() X_test = np.array(imagesTest) y_test = np.array(labelsTest) n = len(imagesTrain) np.random.seed(RANDOM_SEED) indices = np.random.permutation(n) trainingIndex = indices[:int(4 * n / 5)] validationIndex = indices[int(4 * n / 5):] X_train = np.array(imagesTrain)[trainingIndex] y_train = np.array(labelsTrain)[trainingIndex] X_val = np.array(imagesTrain)[validationIndex] y_val = np.array(labelsTrain)[validationIndex] if (PCA_threshold != -1): [Z_train, p, Xr, U, W] = PCA(X_train, PCA_threshold) if PCA_p is not None: p = PCA_p [Z_test, Xr] = project(X_test, U, p) [Z_val, Xr] = project(X_val, U, p) X_train = Z_train[:, :p] X_val = Z_val[:, :p] X_test = Z_test[:, :p] print("PCA_Threshold = " + str(PCA_threshold) + ", P = " + str(p)) if (scale == 1): mean = np.mean(X_train, axis=0) X_train = X_train - mean X_test = X_test - mean X_val = X_val - mean variance = np.var(X_train, axis=0) X_train = X_train / np.sqrt(variance) X_test = X_test / np.sqrt(variance) X_val = X_val / np.sqrt(variance) if (Whitening == 1): [Z, p, X3, U, W] = PCA(X_train, 1.0) X_train = whiteningTransform(X_train, W, U) X_test = whiteningTransform(X_test, W, U) X_val = whiteningTransform(X_val, W, U) return (X_train, y_train, X_val, y_val, X_test, y_test)
def construct_mnist(): # 主要成分 K = 1 # 手写数字 num = 9 # 样本数量 N = 100 print('read from MNIST_test.txt...') data = np.loadtxt('dataset/MNIST_test.txt', delimiter=',') # 切分 标签和 特征 Y = data[:, 0] X = data[:, 1:] ######单一数字###### # 获得某个手写数字的所有下标 indices = np.argwhere(Y == num) # 获得所有该数字的样本 X_n = X[indices][:N] # 展示原始图片 slice_imgs(X_n, 'original') # 主成分分析 特征重建 X_n_k, re_X_n = PCA(np.asarray(X_n).reshape((N, 784)), K) # 展示重建图片 slice_imgs(np.real(re_X_n), 'reconstruct') # 每张图片的信噪比 print('SNR of each picture...') print([compute_SNR(X_n[i], re_X_n[i]) for i in range(N)])
def draw_2d(): x2 = PCA(data_set.x, 2) plt.figure() plt.scatter(x2[0, :50], x2[1, :50], marker='x', color='m', s=30, label='Iris-setosa') plt.scatter(x2[0, 50:100], x2[1, 50:100], marker='+', color='c', s=50, label='Iris-versicolor') plt.scatter(x2[0, 100:150], x2[1, 100:150], marker='o', color='r', s=15, label='Iris-virginica') plt.legend() plt.title('PCA of IRIS k = 2') plt.show()
def main(): data = pd.read_csv('/Users/bytedance/Desktop/AI/data/wine.data.csv') label = data["0"].to_numpy() del data["0"] data = data / data.max(axis=0) # normalize data = data.to_numpy() # PCA K = 3 for thresh in [0.9, 0.8, 0.7, 0.6, 0.5]: new_data, _, _ = PCA.PCA(data.T, 2, True, thresh) ndim = new_data.shape[1] print( f"======== kmeans, K = {K}, ndim = {ndim}, thresh = {thresh} =========" ) if ndim == 2: plt.figure(1) plt.scatter(new_data[:, 0], new_data[:, 1], s=50) S, RI, predicted_label = Kmeans.test_kmeans(new_data, label, K) df_data = pd.DataFrame(new_data) df_label = pd.DataFrame(predicted_label) result_df = pd.concat([df_label, df_data], axis=1) result_df.to_csv(f"./result_ndim{ndim}_K{K}.csv")
def treat_data(self, data): data, name = data.drop( ['participant'], axis=1).as_matrix(), data['participant'].tolist() data = PCA.PCA(data) # PCA it return data, name
def test_PCA(): X = np.empty((100, 2)) X[:, 0] = np.random.uniform(0., 100., size=100) X[:, 1] = 0.75 * X[:, 0] + 3. + np.random.normal(0, 10., size=100) pca = PCA(n_components=2) pca.fit(X) print(pca.components_) # 降维 pca = PCA(n_components=1) pca.fit(X) X_reduction = pca.transform(X) print(X_reduction.shape) X_restore = pca.inverse_transform(X_reduction) print(X_restore.shape) plt.scatter(X[:, 0], X[:, 1], color='b') plt.scatter(X_restore[:, 0], X_restore[:, 1], color='r', alpha=0.5) plt.show()
def pca(self, X, dim=25): """ 进行PCA降维 :param X: 图片 :param dim: 将维后图片维度 """ pca = PCA(X) output = pca.reduction(dim=25) return output
def pca_and_call(features=all_features, fn=using_distance_to_original, dim=2, k=-1): data = np.array([f[1] for f in features]) # Note: this warps the variable data data_rescaled = PCA.PCA(data, dim) features = [(features[i][0], data_rescaled[i]) for i in range(len(features))] if k > 0: return fn(features, k) return fn(features)
def trans_3_to_2(): # 生成数据 data, data_c = generate_data(50, 1) # 旋转数据 datax = data_rotate(data) # 展示旋转前后的数据 # show_3D(data, data_c, 'before rotate') # show_3D(datax, data_c, 'after rotate') # 提取主成分, 得到二维数据 # plt.figure() X, Y = PCA(datax, 2) # show_3D(Y, data_c, 'After PCA') print('旋转前', [np.cov(data[:, d]) for d in range(data.shape[1])]) print('旋转后', [np.cov(datax[:, d]) for d in range(datax.shape[1])]) print('主成分', [np.cov(X[:, d]) for d in range(X.shape[1])])
def task_4_1(): #load a boi data = np.load("data/new_100_corner.npy") data = data[18] print(data.shape) #do PCA to get top 5 PCs myPCA = PCA.PCA(data[:, 1:]) data[:, 1:] = myPCA.to_PC(data[:, 1:]) data = data[:, :6] print(np.min(data, axis=0)) print(np.max(data, axis=0)) #make binary data dict decision_points = np.arrange(-10.0, 10.0, 0.1) PC_range = range(0, 5, 1) bin_dict = {} for PC in PC_range: bin_data = {} for dp in decision_points: bin_data[dp] = split(data, dp, index=PC) bin_dict[PC] = bin_data #calc start entropy raw_entropy = get_entropy(data) print("Start entropy:", raw_entropy) #calculate entropy for each PC at each dp entropies = [] for PC in PC_range: entropy = [] for dp in decision_points: entropy.append(raw_entropy - get_entropy_groups(bin_dict[PC][dp])) entropies.append(entropy) #plot that garbage plt.figure() plt.xlabel("Decision point") plt.ylabel("Information gain") for entropy in entropies: plt.plot(decision_points, entropy) figure = plt.gcf() # get current figure figure.set_size_inches(16, 12) plt.show()
def test(Num): #降到15维 D = 15 trainingData, trainingLabel, testData, testLabel = getData() trainingData = trainingData[:Num, :] trainingLabel = trainingLabel[:Num] trainingData, DimReduVct, PCAmean = PCA(trainingData, D) #下标分类 indexOf = [None for i in range(10)] for i in range(10): indexOf[i] = argwhere(trainingLabel == i) #均值 meanOf = [None for i in range(10)] for i in range(10): meanOf[i] = mean(trainingData[indexOf[i], :], axis=0) #协方差矩阵 covarianceOf = [None for i in range(10)] for i in range(10): temp = indexOf[i] temp.shape = -1 covarianceOf[i] = cov(trainingData[temp].T) testData = (testData - PCAmean) testData = dot(testData, DimReduVct) #识别(10000个) hit = 0 for sample in range(10000): testPoint = testData[sample, :] possibilityOf = [0 for i in range(10)] for i in range(10): possibilityOf[i]=exp(-0.5*dot(dot((testPoint-meanOf[i]),\ inv(covarianceOf[i])),(testPoint-meanOf[i]).T))/sqrt(det(covarianceOf[i])) guest = argmax(possibilityOf) testlabel.append(testLabel[sample]) if guest == testLabel[sample]: hit += 1 else: test_error.append(testLabel[sample]) # # print("\n test complete!") # print(hit,"hit") print("\n trainingData:", Num) print("correct rate:{:.2f}%".format((hit / 10000.0) * 100))
def draw_3d(): x3 = PCA(data_set.x, 3) ax = plt.subplot(111, projection='3d') for i in range(50): ax.scatter(x3[0, i], x3[1, i], x3[2, i], marker='x', c='m', s=30) for i in range(50, 100): ax.scatter(x3[0, i], x3[1, i], x3[2, i], marker='+', c='c', s=30) for i in range(100, 150): ax.scatter(x3[0, i], x3[1, i], x3[2, i], marker='o', c='r', s=30) #ax.scatter(x3[0, 50:100], x3[1, 50:100], x3[2, 50:100], marker = '+', c='c') #ax.scatter(x3[0, 100:150], x3[1, 100:150], x3[2, 100:150], marker = 'o', c='r') ax.set_zlabel('Z') ax.set_ylabel('Y') ax.set_xlabel('X') plt.title('PCA of IRIS k = 3') plt.show()
def RunTrainLDA(infile, pcaFile, ldaFile): import cPickle fp = open(infile, "r") dataset = cPickle.load(fp) subjID = cPickle.load(fp) fp.close() pca = PCA(dataset) pca_proj = pca.compute() np.save(pcaFile, pca_proj) lda_proj = [] lda = LDA(dataset, subjID, pca_proj) projData = lda.projectData() lda_proj = lda.train(projData) np.save(ldaFile, lda_proj)
def testRelation(): # 映射空间,测试文本相似性的判断依据 with tf.Session() as sess: tg = DataPreparation.TupleGenerator() ohEncoder = one_hot.OneHotEncoder() generator = tg.tuple_gen('content_law_labeled.txt') input_data = tf.placeholder(tf.int32, shape=[1, None]) length_data = tf.placeholder(tf.int32, shape=[1]) ops = BuildModel.model(input_data, None, length=length_data) saver = tf.train.Saver(tf.global_variables()) checkpoint = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) lstmcells = [] for i in range(3): lstmcells.append({'c': [], 'h': []}) if checkpoint: saver.restore(sess, checkpoint) ALL_NUM = 1000 for count in range(ALL_NUM): content = next(generator) enter_data = ohEncoder.one_hot_single(content, True) state, predict = sess.run([ops['last_state'], ops['prediction']], feed_dict={ input_data: [enter_data], length_data: [len(enter_data)] }) for c in range(3): lstmcells[c]['c'].append(np.array(state[c][0])) lstmcells[c]['h'].append(np.array(state[c][0])) if count % 10 == 0: print('[INFO] Getting %d \tst word\'s vector...' % count) vec = lstmcells[1]['c'] vec = np.array(vec) lddata, recon = PCA.PCA(np.mat(vec), 2) points, labels = tg.generateLabel(lddata) DrawPlot.drawScatter(points, labels) outfile = open('result.txt', 'w', encoding='utf-8') for i in range(ALL_NUM): outfile.write('%f\t%f\n' % (lddata[i, 0], lddata[i, 1]))
def PCA(self, dynamic_features, static_features): '''PCA on dynamic features''' learn = self.model_learn pca_features = [] pca_data = [] for feature in dynamic_features.copy(): pca_result = PCA(learn, self.model_output, feature) if len(pca_result) == 0: '''not enough data for PCA on this feature => take cos and sin instead''' dynamic_features.remove(feature) static_features.append('cos_' + feature) static_features.append('sin_' + feature) print( ' >>> alert : ' + feature + ' removed from dynamic features, cos() added in static_features instead' ) else: pca_features += pca_result.columns.tolist() pca_data.append(pca_result) self.dynamic_features = dynamic_features self.static_features = static_features self.pca_data = pca_data self.pca_features = pca_features
def pcaSklearn(training, dimension=700): pca = PCA(n_components=dimension) pca.fit(training) low = pca.transform(training) same = pca.inverse_transform(low) print "low[0].shape" print low[0].shape image2DInitial = vectorToImage(training[0], (28, 28)) print same[0].shape image2D = vectorToImage(same[0], (28, 28)) image2DLow = vectorToImage(low[0], (20, 20)) plt.imshow(image2DLow, cmap=plt.cm.gray) plt.show() plt.imshow(image2DInitial, cmap=plt.cm.gray) plt.show() plt.imshow(image2D, cmap=plt.cm.gray) plt.show() print "done" return low
def train(self, labels, labelsObj, ncomp): """Data is a 2d data list. Each row in the 2dlist is sample (all samples probably of a word) The first column is the label idenity the sample ("A") labels are where the sample came frome, such as from JamesJoyce sisters """ a = PCA(labels, labelsObj, ncomp) (data, self.wordlist) = a.processData() self.ncomp = ncomp self.labels = labels #Strip the first column x = [None] * len(data) y = [None] * len(data) for row in range(len(data)): y[row] = data[row][0] t = [] for col in range(1, len(data[row])): t += [data[row][col]] x[row] = t self.pca_h = decomposition.PCA(ncomp) self.pca_h.fit(x) self.X = self.pca_h.transform(x)
def model(TEST=True, Comparison_with_PCA=True, model_name="Autoencoder", corrupt_probability=0.5, optimizer_selection="Adam", learning_rate=0.001, training_epochs=100, batch_size=128, display_step=10, batch_norm=True): mnist = input_data.read_data_sets("", one_hot=False) if batch_norm == True: model_name = "batch_norm_" + model_name if TEST == False: if os.path.exists("tensorboard/{}".format(model_name)): shutil.rmtree("tensorboard/{}".format(model_name)) # ksize, strides? -> [1, 2, 2, 1] = [one image, width, height, one channel] # pooling을 할때, 각 batch 에 대해 한 채널에 대해서 하니까, 1, 1,로 설정해준것. def pooling(input, type="avg", k=2, padding='VALID'): if type == "max": return tf.nn.max_pool(input, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding=padding) else: return tf.nn.avg_pool(input, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding=padding) def layer(input, weight_shape, bias_shape): weight_init = tf.random_normal_initializer(stddev=0.01) bias_init = tf.random_normal_initializer(stddev=0.01) if batch_norm: w = tf.get_variable("w", weight_shape, initializer=weight_init) else: weight_decay = tf.constant(0.00001, dtype=tf.float32) w = tf.get_variable("w", weight_shape, initializer=weight_init, regularizer=tf.contrib.layers.l2_regularizer( scale=weight_decay)) b = tf.get_variable("b", bias_shape, initializer=bias_init) if batch_norm: return tf.layers.batch_normalization(tf.matmul(input, w) + b, training=not TEST) else: return tf.matmul(input, w) + b # stride? -> [1, 2, 2, 1] = [one image, width, height, one channel] def conv2d(input, weight_shape='', bias_shape='', strides=[1, 1, 1, 1], padding="VALID"): weight_init = tf.contrib.layers.xavier_initializer(uniform=False) bias_init = tf.constant_initializer(value=0) if batch_norm: w = tf.get_variable("w", weight_shape, initializer=weight_init) else: weight_decay = tf.constant(0.00001, dtype=tf.float32) w = tf.get_variable("w", weight_shape, initializer=weight_init, regularizer=tf.contrib.layers.l2_regularizer( scale=weight_decay)) b = tf.get_variable("b", bias_shape, initializer=bias_init) conv_out = tf.nn.conv2d(input, w, strides=strides, padding=padding) if batch_norm: return tf.layers.batch_normalization(tf.nn.bias_add(conv_out, b), training=not TEST) else: return tf.nn.bias_add(conv_out, b) def conv2d_transpose(input, output_shape='', weight_shape='', bias_shape='', strides=[1, 1, 1, 1], padding="VALID"): weight_init = tf.contrib.layers.xavier_initializer(uniform=False) bias_init = tf.constant_initializer(value=0) if batch_norm: w = tf.get_variable("w", weight_shape, initializer=weight_init) else: weight_decay = tf.constant(0.00001, dtype=tf.float32) w = tf.get_variable("w", weight_shape, initializer=weight_init, regularizer=tf.contrib.layers.l2_regularizer( scale=weight_decay)) b = tf.get_variable("b", bias_shape, initializer=bias_init) conv_out = tf.nn.conv2d_transpose(input, w, output_shape=output_shape, strides=strides, padding=padding) if batch_norm: return tf.layers.batch_normalization(tf.nn.bias_add(conv_out, b), training=not TEST) else: return tf.nn.bias_add(conv_out, b) def inference(x): if model_name == "Autoencoder" or model_name == "batch_norm_Autoencoder": with tf.variable_scope("encoder"): with tf.variable_scope("fully1"): fully_1 = tf.nn.relu( layer(tf.reshape(x, (-1, 784)), [784, 256], [256])) with tf.variable_scope("fully2"): fully_2 = tf.nn.relu(layer(fully_1, [256, 128], [128])) with tf.variable_scope("fully3"): fully_3 = tf.nn.relu(layer(fully_2, [128, 64], [64])) with tf.variable_scope("output"): encoder_output = tf.nn.relu(layer(fully_3, [64, 2], [2])) with tf.variable_scope("decoder"): with tf.variable_scope("fully1"): fully_4 = tf.nn.relu(layer(encoder_output, [2, 64], [64])) with tf.variable_scope("fully2"): fully_5 = tf.nn.relu(layer(fully_4, [64, 128], [128])) with tf.variable_scope("fully3"): fully_6 = tf.nn.relu(layer(fully_5, [128, 256], [256])) with tf.variable_scope("output"): decoder_output = tf.nn.sigmoid( layer(fully_6, [256, 784], [784])) return encoder_output, decoder_output elif model_name == 'Convolution_Autoencoder' or model_name == "batch_norm_Convolution_Autoencoder": with tf.variable_scope("encoder"): with tf.variable_scope("conv_1"): conv_1 = tf.nn.relu( conv2d(x, weight_shape=[5, 5, 1, 32], bias_shape=[32], strides=[1, 1, 1, 1], padding="VALID")) # result -> batch_size, 24, 24, 32 with tf.variable_scope("conv_2"): conv_2 = tf.nn.relu( conv2d(conv_1, weight_shape=[5, 5, 32, 32], bias_shape=[32], strides=[1, 1, 1, 1], padding="VALID")) # result -> batch_size, 20, 20, 32 with tf.variable_scope("conv_3"): conv_3 = tf.nn.relu( conv2d(conv_2, weight_shape=[5, 5, 32, 32], bias_shape=[32], strides=[1, 1, 1, 1], padding="VALID")) # result -> batch_size, 16, 16, 32 with tf.variable_scope("conv_4"): conv_4 = tf.nn.relu( conv2d(conv_3, weight_shape=[5, 5, 32, 32], bias_shape=[32], strides=[1, 1, 1, 1], padding="VALID")) # result -> batch_size, 12, 12, 32 with tf.variable_scope("conv_5"): conv_5 = tf.nn.relu( conv2d(conv_4, weight_shape=[5, 5, 32, 32], bias_shape=[32], strides=[1, 1, 1, 1], padding="VALID")) # result -> batch_size, 8, 8, 32 with tf.variable_scope("conv_6"): conv_6 = tf.nn.relu( conv2d(conv_5, weight_shape=[5, 5, 32, 32], bias_shape=[32], strides=[1, 1, 1, 1], padding="VALID")) # result -> batch_size, 4, 4, 32 with tf.variable_scope("output"): encoder_output = tf.nn.relu( conv2d(conv_6, weight_shape=[4, 4, 32, 2], bias_shape=[2], strides=[1, 1, 1, 1], padding="VALID")) # result -> batch_size, 1, 1, 2 with tf.variable_scope("decoder"): with tf.variable_scope("trans_conv_1"): conv_7 = tf.nn.relu( conv2d_transpose(encoder_output, output_shape=tf.shape(conv_6), weight_shape=[4, 4, 32, 2], bias_shape=[32], strides=[1, 1, 1, 1], padding="VALID")) # result -> batch_size, 4, 4, 32 with tf.variable_scope("trans_conv_2"): conv_8 = tf.nn.relu( conv2d_transpose(conv_7, output_shape=tf.shape(conv_5), weight_shape=[5, 5, 32, 32], bias_shape=[32], strides=[1, 1, 1, 1], padding="VALID")) # result -> batch_size, 8, 8, 32 with tf.variable_scope("trans_conv_3"): conv_9 = tf.nn.relu( conv2d_transpose(conv_8, output_shape=tf.shape(conv_4), weight_shape=[5, 5, 32, 32], bias_shape=[32], strides=[1, 1, 1, 1], padding="VALID")) # result -> batch_size, 12, 12, 32 with tf.variable_scope("trans_conv_4"): conv_10 = tf.nn.relu( conv2d_transpose(conv_9, output_shape=tf.shape(conv_3), weight_shape=[5, 5, 32, 32], bias_shape=[32], strides=[1, 1, 1, 1], padding="VALID")) # result -> batch_size, 16, 16, 32 with tf.variable_scope("trans_conv_5"): conv_11 = tf.nn.relu( conv2d_transpose(conv_10, output_shape=tf.shape(conv_2), weight_shape=[5, 5, 32, 32], bias_shape=[32], strides=[1, 1, 1, 1], padding="VALID")) # result -> batch_size, 20, 20, 32 with tf.variable_scope("trans_conv_6"): conv_12 = tf.nn.relu( conv2d_transpose(conv_11, output_shape=tf.shape(conv_1), weight_shape=[5, 5, 32, 32], bias_shape=[32], strides=[1, 1, 1, 1], padding="VALID")) # result -> batch_size, 24, 24, 32 with tf.variable_scope("output"): decoder_output = tf.nn.sigmoid( conv2d_transpose(conv_12, output_shape=tf.shape(x), weight_shape=[5, 5, 1, 32], bias_shape=[1], strides=[1, 1, 1, 1], padding="VALID")) # result -> batch_size, 28, 28, 1 return encoder_output, decoder_output def evaluate(output, x): with tf.variable_scope("validation"): tf.summary.image('input_image', tf.reshape(x, [-1, 28, 28, 1]), max_outputs=5) tf.summary.image('output_image', tf.reshape(output, [-1, 28, 28, 1]), max_outputs=5) if model_name == 'Convolution_Autoencoder' or model_name == "batch_norm_Convolution_Autoencoder": l2 = tf.sqrt( tf.reduce_sum(tf.square(tf.subtract(output, x)), axis=[1, 2, 3])) elif model_name == "Autoencoder" or model_name == "batch_norm_Autoencoder": l2 = tf.sqrt( tf.reduce_sum(tf.square( tf.subtract(output, tf.reshape(x, (-1, 784)))), axis=1)) val_loss = tf.reduce_mean(l2) tf.summary.scalar('val_cost', val_loss) return val_loss def loss(output, x): if model_name == 'Convolution_Autoencoder' or model_name == "batch_norm_Convolution_Autoencoder": l2 = tf.sqrt( tf.reduce_sum(tf.square(tf.subtract(output, x)), axis=[1, 2, 3])) elif model_name == "Autoencoder" or model_name == "batch_norm_Autoencoder": l2 = tf.sqrt( tf.reduce_sum(tf.square( tf.subtract(output, tf.reshape(x, (-1, 784)))), axis=1)) train_loss = tf.reduce_mean(l2) return train_loss def training(cost, global_step): tf.summary.scalar("train_cost", cost) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): if optimizer_selection == "Adam": optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) elif optimizer_selection == "RMSP": optimizer = tf.train.RMSPropOptimizer( learning_rate=learning_rate) elif optimizer_selection == "SGD": optimizer = tf.train.GradientDescentOptimizer( learning_rate=learning_rate) train_operation = optimizer.minimize(cost, global_step=global_step) return train_operation def Denoising(x, r=0.1): # 0인 경우 입력을 손상시키지 않고, 1인경우 입력을 손상시킨다. corrupt_x = tf.multiply( x, tf.cast( tf.random_uniform(shape=tf.shape(x), minval=0, maxval=2, dtype=tf.int32), tf.float32)) Denoising_x = tf.add(tf.multiply(corrupt_x, r), tf.multiply(x, 1 - r)) return Denoising_x # print(tf.get_default_graph()) #기본그래프이다. JG_Graph = tf.Graph() # 내 그래프로 설정한다.- 혹시라도 나중에 여러 그래프를 사용할 경우를 대비 with JG_Graph.as_default(): # as_default()는 JG_Graph를 기본그래프로 설정한다. with tf.name_scope("feed_dict"): x = tf.placeholder("float", [None, 28, 28, 1]) d_x = Denoising(x, r=corrupt_probability) with tf.variable_scope("shared_variables", reuse=tf.AUTO_REUSE) as scope: with tf.name_scope("inference"): encoder_output, decoder_output = inference(d_x) # or scope.reuse_variables() # Adam optimizer의 매개변수들을 저장하고 싶지 않다면 여기에 선언해야한다. with tf.name_scope("saver"): saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=3) if not TEST: with tf.name_scope("loss"): global_step = tf.Variable(0, name="global_step", trainable=False) cost = loss(decoder_output, x) with tf.name_scope("trainer"): train_operation = training(cost, global_step) with tf.name_scope("tensorboard"): summary_operation = tf.summary.merge_all() with tf.name_scope("evaluation"): evaluate_operation = evaluate(decoder_output, d_x) config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(graph=JG_Graph, config=config) as sess: print("initializing!!!") sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(os.path.join('model', model_name)) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print("Restore {} checkpoint!!!".format( os.path.basename(ckpt.model_checkpoint_path))) saver.restore(sess, ckpt.model_checkpoint_path) # shutil.rmtree("model/{}/".format(model_name)) if not TEST: summary_writer = tf.summary.FileWriter( os.path.join("tensorboard", model_name), sess.graph) for epoch in tqdm(range(training_epochs)): avg_cost = 0. total_batch = int(mnist.train.num_examples / batch_size) for i in range(total_batch): mbatch_x, mbatch_y = mnist.train.next_batch(batch_size) feed_dict = {x: mbatch_x.reshape((-1, 28, 28, 1))} _, minibatch_cost = sess.run([train_operation, cost], feed_dict=feed_dict) avg_cost += (minibatch_cost / total_batch) print("L2 cost : {}".format(avg_cost)) if epoch % display_step == 0: val_feed_dict = { x: mnist.validation.images[:1000].reshape( (-1, 28, 28, 1)) } # GPU 메모리 인해 mnist.test.images[:1000], 여기서 1000이다. val_cost, summary_str = sess.run( [evaluate_operation, summary_operation], feed_dict=val_feed_dict) print("Validation L2 cost : {}".format(val_cost)) summary_writer.add_summary( summary_str, global_step=sess.run(global_step)) save_model_path = os.path.join('model', model_name) if not os.path.exists(save_model_path): os.makedirs(save_model_path) saver.save(sess, save_model_path + '/', global_step=sess.run(global_step), write_meta_graph=False) print("Optimization Finished!") # batch_norm=True 일 때, 이동평균 사용 if Comparison_with_PCA and TEST: # PCA , Autoencoder Visualization test_feed_dict = { x: mnist.test.images.reshape(-1, 28, 28, 1) } # GPU 메모리 인해 mnist.test.images[:1000], 여기서 1000이다. pca_applied = PCA.PCA(n_components=2, show_reconstruction_image=False) # 10000,2 encoder_applied, test_cost = sess.run( [encoder_output, evaluate_operation], feed_dict=test_feed_dict) print("Test L2 cost : {}".format(test_cost)) applied = OrderedDict(PCA=pca_applied, Autoencoder=encoder_applied.reshape(-1, 2)) # PCA , Autoencoder 그리기 fig, ax = plt.subplots(1, 2, figsize=(18, 12)) # fig.suptitle('vs', size=20, color='r') for x, (key, value) in enumerate(applied.items()): ax[x].grid(False) ax[x].set_title(key, size=20, color='k') ax[x].set_axis_off() for num in range(10): ax[x].scatter( [value[:, 0][i] for i in range(len(mnist.test.labels)) if mnist.test.labels[i] == num], \ [value[:, 1][j] for j in range(len(mnist.test.labels)) if mnist.test.labels[j] == num], \ s=10, label=str(num), marker='o') ax[x].legend() # plt.tight_layout() if model_name == "Autoencoder": plt.savefig("PCA vs Autoencoder.png", dpi=300) elif model_name == "batch_norm_Autoencoder": plt.savefig("PCA vs batch_Autoencoder.png", dpi=300) elif model_name == "Convolution_Autoencoder": plt.savefig("PCA vs ConvAutoencoder.png", dpi=300) elif model_name == "batch_norm_Convolution_Autoencoder": plt.savefig("PCA vs batchConvAutoencoder.png", dpi=300) plt.show()
data = data.values data_ground_truth = data[:, 1] data_features = data[:, 2:] data_id = hierarchical() # Calculating rand index ARI = adjusted_rand_score(data_ground_truth, data_id) print ('The Rand Index is', ARI) # visualization unique_label = np.unique(data_id) unique_label_gt = np.unique(data_ground_truth) # using PCA to reduce the dimension of the clustered data from k-means and plot dim2 = PCA.PCA(data_features, 2) dim2_agg = pd.DataFrame(data = dim2, index = data_id) # using PCA to reduce the dimension plot the ground truth dim2_ground_truth = pd.DataFrame(data = dim2, index = data_ground_truth) fig = plt.figure() fig.set_figheight(5) fig.set_figwidth(12) a = fig.add_subplot(1, 2, 1) img_agg = PCA.plot_pca_dim2(dim2_agg, unique_label) a.set_title('iyer Clusters from Agglomerative') a = fig.add_subplot(1, 2, 2) img_ground = PCA.plot_pca_dim2(dim2_ground_truth, unique_label_gt) a.set_title('iyer Clusters from Ground Truth')
elif groupNum == 2: x = r0 + 0.0 y = 1.0 * r1 + x xcord2.append(x) ycord2.append(y) fw.write("%f\t%f\t%d\n" % (x, y, groupNum)) fw.close() fig = plt.figure() ax = fig.add_subplot(211) ax.scatter(xcord0, ycord0, marker='^', s=90) ax.scatter(xcord1, ycord1, marker='o', s=50, c='red') ax.scatter(xcord2, ycord2, marker='v', s=50, c='yellow') ax = fig.add_subplot(212) myDat = PCA.loadDataSet('testSet3.txt') lowDDat, reconDat = PCA.PCA(myDat[:, 0:2], 1) label0Mat = lowDDat[nonzero( myDat[:, 2] == 0)[0], :2][0] #get the items with label 0 label1Mat = lowDDat[nonzero( myDat[:, 2] == 1)[0], :2][0] #get the items with label 1 label2Mat = lowDDat[nonzero( myDat[:, 2] == 2)[0], :2][0] #get the items with label 2 #ax.scatter(label0Mat[:,0],label0Mat[:,1], marker='^', s=90) #ax.scatter(label1Mat[:,0],label1Mat[:,1], marker='o', s=50, c='red') #ax.scatter(label2Mat[:,0],label2Mat[:,1], marker='v', s=50, c='yellow') ax.scatter(label0Mat[:, 0].tolist(), zeros(shape(label0Mat)[0]).tolist(), marker='^', s=90) ax.scatter(label1Mat[:, 0].tolist(), zeros(shape(label1Mat)[0]).tolist(),
#print("i "+str(i)+ " idx "+str(idx)) plt_idx = i * num_classes + j + 1 #print("plt index "+str(plt_idx)) plt.subplot(samples_per_class, num_classes, plt_idx) plt.imshow(X_train[idx].astype('uint8')) plt.axis('off') if i == 0: plt.title(classes[j]) plt.suptitle("Original CIFAR-10 data set") plt.show() # Reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) pca = PCA() print( "-----------------Fitting CIFAR-10 train set to PCA model-------------------" ) X_std = pca.fit(X_train) print( "-----------------Done Fitting CIFAR-10 train set to PCA model-------------------" ) X_reduced = pca.transform_data(X_std, None) X_reconstructed = pca.inverse_transform(X_reduced, None) # X_reconstructed = pca.inverse_standarize(X_reconstructed) # Calculte reconstruction error
# # Put the result into a color plot # Z = Z.reshape(xx.shape) # plt.figure() # plt.pcolormesh(xx, yy, Z, cmap=cmap_light) # # # Plot also the training points # plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold) # plt.xlim(xx.min(), xx.max()) # plt.ylim(yy.min(), yy.max()) # plt.title("3-Class classification (k = %i, weights = '%s')" # % (n_neighbors, weights)) # # plt.show() '''classification after applying PCA''' '''classification after applying PCA''' trainInp = np.array(PCA.PCA(floatFeaturesMatrix, 3)) neigh = KNeighborsRegressor(3) neigh.fit(trainInp, classes) valid_pred = neigh.predict(trainInp) valid_pred_1 = DataIOFactory.roundingNumbers(valid_pred) n_real_1 = classes.flatten() n_predict_1 = valid_pred_1.flatten() print('real ', n_real_1.shape) print('predict ', n_predict_1.shape) '''results''' print('PCA') ResultAnalyzer.confusionMatrix(n_real_1, n_predict_1) # neigh = KNeighborsRegressor(3) # neigh.fit(trainInp, trainOut)
x_combined.append((x_test[f])) k += 50 h += 14 x_combined = np.array(x_combined) # plt.imshow(x_combined[120]) # plt.show() # print(x_combined.shape) x_combined_vec = [] for i in range(len(x_combined)): x_temp = cv2.resize(x_combined[i], (20, 17), interpolation=cv2.INTER_AREA) x_combined_vec.append(x_temp.flatten()) x_combined_vec = np.array(x_combined_vec) print(x_combined_vec.shape) print("apply PCA") test = PCA.PCA(d=2) mean, basis, new_x_data = test.pca(x_combined_vec.T) # B=[1.29,1.27,1.46,0.91,0.56,0.99,1.00,0.37,1.24,1.23] print(new_x_data.shape) new_x_data = new_x_data.T print("done") fig, ax = plt.subplots() plt.title("Data in 2-dim after applying PCA on original dataset") ax.scatter(new_x_data[0:64, 0], new_x_data[0:64, 1], c='red', marker='o', label='class 1') ax.scatter(new_x_data[64:128, 0], new_x_data[64:128, 1], c='blue',
for picture in tqdm(pictureList): im = Image.open(path + '/' + picture) im = im.convert("L") width, height = im.size data = im.getdata() data = np.array(data, dtype='double') for elem in data.tolist(): file.write(str(elem) + " ") file.write("\n") file.close() return [width, height] if __name__ == "__main__": [width, height] = PictureToData('./at33') # 获取图片大小 data = loadDataSet("./data/pictures.data", delim=' ') #加载图片数据集 new_data, rate = PCA(data, 10) print(rate) num = 0 file = open("./data/PCApictures.data", 'w') pictureList = GetFiles('./at33') for picture in new_data: for i in picture.tolist()[0]: file.write(str(i) + ' ') file.write('\n') pictureMat = picture.reshape((height, width)) new_im = Image.fromarray(pictureMat.astype(np.uint8)) new_im.save('./newPicture/PCA_' + pictureList[num]) num += 1 file.close()
'''spliting the data into test and train and validation set based on different portions in = input = features out = output = labels''' trainIn, trainOut, validationIn, validationOut, testIn, testOut = DataIOFactory.dataSplitFactory(floatFeaturesMatrix, classes, 0.8, 0.1, 0.1) print('trainIn',trainIn.shape) print('trainout', trainOut.shape) print('feature matrix shape: ', floatFeaturesMatrix.shape) # print(classes) print('class matrix shape: ', classes.shape) '''chi2 feature selection''' sorted_features_score = FeatureSelection_Chi2.Chi2_featureSelection(floatFeaturesMatrix, classes, features_label, 'all') '''PCA''' trainInp = np.array(PCA.PCA(floatFeaturesMatrix, 3)) '''raw data''' # X = floatFeaturesMatrix # y = classes.flatten() # clf = SGDClassifier(loss="hinge", penalty="l2") # clf.fit(X, y) # # # test_predicted = clf.predict(trainIn) # print(test_predicted) # test_real = trainOut.flatten() # print(test_real) X = trainInp
from PIL import Image from numpy import * from pylab import * import PCA im = array(Image.open(imlist[0])) # open img to get size m, n = im.shape[0:2] # get image size imbr = len(imlist) # get count numbers # create the matrix for saving linearise img immatrix = array([array(Image.open(im)).flatten() for im in imlist], 'f') # run PCA V, S, immean = PCA.PCA(immatrix) # show few img figure() gray() subplot(2, 4, 1) imshow(immean.reshape(m, n)) for i in range(7): subplot(2, 4, i + 2) imshow(V[i].reshape9m, n) show()
# ax.scatter(data[:,0],data[:,1],data[:,2]) # data = Data_processing.data_pruning_for_school_explorer() # vectors = init_codebook_vector(20,data) # square_main(data,vectors) low, median, high, data = Data_processing.data_pruning_for_school_explorer( ) vector, data = square_main(data, init_codebook_vector(4, data)) SOM_topo(data, vector) print("PCA+SOM") '''Q 5.4 first PCA then SOM vs only SOM''' C = PCA.get_C(data) eigenvalue, eigenvector = PCA.get_eigen(C) # eigenvalue = np.array(eigenvalue,dtype=float) # do principle component analysis new_data_set, eigenvector1 = PCA.PCA(eigenvalue, eigenvector, data) new_dimension_data = PCA.get_new_points(new_data_set, eigenvector1) vector1, data1 = square_main(new_dimension_data, init_codebook_vector(4, new_dimension_data)) print(vector1) SOM_topo(data1, vector1) '''Q 5.4 first PCA then SOM vs only SOM''' # '''SOM topological graph''' # points,twoD_vector = SOM_topo(data,vector) # output = [] # for i in range(len(points)): # for j in range(len(points[i])): # output.append(points[i][j]) # output = np.array(output) # # initial_center, cost1 = PCA.k_means_clustering(low, median, high, 3, output) # print(vectors) # # PCA.label_clustering_graph(initial_center,output)
parser.add_argument("--mode", type=int, default=0) input_ = parser.parse_args() #mode = 1 Size = (50, 50) images, label = Readfile(path="./Yale_Face_Database/Training/", Size=Size) test_images, test_label = Readfile(path="./Yale_Face_Database/Testing/", Size=Size) sample_image = test_images[random.sample(range(len(test_label)), 10)] if input_.mode == 0: ## Doing PCA and get the eigenface and W(dimension reduction) PCA_mean, PCA_EigenFace, PCA_W = PCA(images=images, Size=Size, FacePath="./PCA/EigenFace/") Reconstruct(EigenFace=PCA_EigenFace, sample_image=sample_image, Size=Size, Path="./PCA/") ## Doing LDA and get the fisherface and W(dimension reduction) LDA_mean, LDA_EigenFace, LDA_W = LDA(images=images, Size=Size, label=label, FacePath="./LDA/EigenFace/") Reconstruct(EigenFace=LDA_EigenFace, sample_image=sample_image, Size=Size, Path="./LDA/")
TempRectList[:,:4] = np.copy(rectlist) for rect in TempRectList: ax = rect[0] + (rect[2]/2.0) ay = rect[1] + (rect[3]/2.0) rect[4] = math.sqrt((targetpoint[0]-ax)**2 + (targetpoint[1]-ay)**2) if rect[3] > rect[2]*1.5: rect[5] = -1 TempRectList = TempRectList[TempRectList[:,4].argsort()[::-1]] return TempRectList[-1] def callback(image): global image_tmp image_tmp = bridge.imgmsg_to_cv2(image, "bgr8") pca_model = PCA(PCA_MODEL_DIR) rospy.init_node('Plant_Detector') rospy.Subscriber("camera/color/image_raw", Image, callback = callback, queue_size=1) image_pub = rospy.Publisher("camera/color/result",Image,queue_size=10) print 'waiting...' while image_tmp is None: pass print 'start...' while image_tmp is not None: im = np.copy(image_tmp) im_display = np.copy(im)
#-*-coding:utf-8-*- from PCA import * import matplotlib.pyplot as plt def loadDataSet(filename, delim='\t'): fr = open(filename) stringArr = [line.strip().split(delim) for line in fr.readlines()] dataArr = [list(map(float, line)) for line in stringArr] return np.mat(dataArr) n = 1000 #number of points to create dataMat = loadDataSet('./data/testSet.txt') reconMat, rate = PCA(dataMat, 1) fig = plt.figure() ax = fig.add_subplot(121) ax.scatter(np.array(dataMat[:, 0]), np.array(dataMat[:, 1]), marker='^', s=20) plt.xlabel('hours of direct sunlight') plt.ylabel('liters of water') plt.title('Before PCA') ax = fig.add_subplot(122) ax.scatter(np.array(dataMat[:, 0]), np.array(dataMat[:, 1]), marker='^', s=20) ax.scatter(np.array(reconMat[:, 0]), np.array(reconMat[:, 1]), marker='s', s=20, c='red') plt.xlabel('hours of direct sunlight')