예제 #1
0
def prepareFMNISTData(scale=0, PCA_threshold=-1, Whitening=0, PCA_p=None):
    mndata = MNIST('fashion_data')
    imagesTrain, labelsTrain = mndata.load_training()
    imagesTest, labelsTest = mndata.load_testing()

    X_test = np.array(imagesTest)
    y_test = np.array(labelsTest)

    n = len(imagesTrain)
    np.random.seed(RANDOM_SEED)
    indices = np.random.permutation(n)

    trainingIndex = indices[:int(4 * n / 5)]
    validationIndex = indices[int(4 * n / 5):]

    X_train = np.array(imagesTrain)[trainingIndex]
    y_train = np.array(labelsTrain)[trainingIndex]

    X_val = np.array(imagesTrain)[validationIndex]
    y_val = np.array(labelsTrain)[validationIndex]

    if (PCA_threshold != -1):

        [Z_train, p, Xr, U, W] = PCA(X_train, PCA_threshold)
        if PCA_p is not None: p = PCA_p
        [Z_test, Xr] = project(X_test, U, p)
        [Z_val, Xr] = project(X_val, U, p)
        X_train = Z_train[:, :p]
        X_val = Z_val[:, :p]
        X_test = Z_test[:, :p]
        print("PCA_Threshold = " + str(PCA_threshold) + ", P = " + str(p))

    if (scale == 1):
        mean = np.mean(X_train, axis=0)
        X_train = X_train - mean
        X_test = X_test - mean
        X_val = X_val - mean

        variance = np.var(X_train, axis=0)
        X_train = X_train / np.sqrt(variance)
        X_test = X_test / np.sqrt(variance)
        X_val = X_val / np.sqrt(variance)

    if (Whitening == 1):
        [Z, p, X3, U, W] = PCA(X_train, 1.0)
        X_train = whiteningTransform(X_train, W, U)
        X_test = whiteningTransform(X_test, W, U)
        X_val = whiteningTransform(X_val, W, U)

    return (X_train, y_train, X_val, y_val, X_test, y_test)
예제 #2
0
def construct_mnist():
    # 主要成分
    K = 1
    # 手写数字
    num = 9
    # 样本数量
    N = 100
    print('read from MNIST_test.txt...')
    data = np.loadtxt('dataset/MNIST_test.txt', delimiter=',')
    # 切分 标签和 特征
    Y = data[:, 0]
    X = data[:, 1:]
    ######单一数字######
    # 获得某个手写数字的所有下标
    indices = np.argwhere(Y == num)
    # 获得所有该数字的样本
    X_n = X[indices][:N]
    # 展示原始图片
    slice_imgs(X_n, 'original')

    # 主成分分析 特征重建
    X_n_k, re_X_n = PCA(np.asarray(X_n).reshape((N, 784)), K)

    # 展示重建图片
    slice_imgs(np.real(re_X_n), 'reconstruct')

    # 每张图片的信噪比
    print('SNR of each picture...')
    print([compute_SNR(X_n[i], re_X_n[i]) for i in range(N)])
예제 #3
0
def draw_2d():
    x2 = PCA(data_set.x, 2)

    plt.figure()
    plt.scatter(x2[0, :50],
                x2[1, :50],
                marker='x',
                color='m',
                s=30,
                label='Iris-setosa')
    plt.scatter(x2[0, 50:100],
                x2[1, 50:100],
                marker='+',
                color='c',
                s=50,
                label='Iris-versicolor')
    plt.scatter(x2[0, 100:150],
                x2[1, 100:150],
                marker='o',
                color='r',
                s=15,
                label='Iris-virginica')
    plt.legend()
    plt.title('PCA of IRIS k = 2')
    plt.show()
예제 #4
0
def main():
    data = pd.read_csv('/Users/bytedance/Desktop/AI/data/wine.data.csv')
    label = data["0"].to_numpy()
    del data["0"]

    data = data / data.max(axis=0)  # normalize
    data = data.to_numpy()

    # PCA
    K = 3
    for thresh in [0.9, 0.8, 0.7, 0.6, 0.5]:
        new_data, _, _ = PCA.PCA(data.T, 2, True, thresh)

        ndim = new_data.shape[1]
        print(
            f"======== kmeans, K = {K}, ndim = {ndim}, thresh = {thresh} ========="
        )

        if ndim == 2:
            plt.figure(1)
            plt.scatter(new_data[:, 0], new_data[:, 1], s=50)

        S, RI, predicted_label = Kmeans.test_kmeans(new_data, label, K)
        df_data = pd.DataFrame(new_data)
        df_label = pd.DataFrame(predicted_label)
        result_df = pd.concat([df_label, df_data], axis=1)
        result_df.to_csv(f"./result_ndim{ndim}_K{K}.csv")
예제 #5
0
    def treat_data(self, data):

        data, name = data.drop(
            ['participant'], axis=1).as_matrix(), data['participant'].tolist()
        data = PCA.PCA(data)  # PCA it

        return data, name
예제 #6
0
def test_PCA():
    X = np.empty((100, 2))
    X[:, 0] = np.random.uniform(0., 100., size=100)
    X[:, 1] = 0.75 * X[:, 0] + 3. + np.random.normal(0, 10., size=100)
    pca = PCA(n_components=2)
    pca.fit(X)
    print(pca.components_)

    # 降维
    pca = PCA(n_components=1)
    pca.fit(X)
    X_reduction = pca.transform(X)
    print(X_reduction.shape)
    X_restore = pca.inverse_transform(X_reduction)
    print(X_restore.shape)

    plt.scatter(X[:, 0], X[:, 1], color='b')
    plt.scatter(X_restore[:, 0], X_restore[:, 1], color='r', alpha=0.5)
    plt.show()
예제 #7
0
    def pca(self, X, dim=25):
        """
        进行PCA降维
        :param X: 图片
        :param dim: 将维后图片维度
        """

        pca = PCA(X)
        output = pca.reduction(dim=25)

        return output
예제 #8
0
def pca_and_call(features=all_features,
                 fn=using_distance_to_original,
                 dim=2,
                 k=-1):
    data = np.array([f[1] for f in features])
    # Note: this warps the variable data
    data_rescaled = PCA.PCA(data, dim)
    features = [(features[i][0], data_rescaled[i])
                for i in range(len(features))]
    if k > 0:
        return fn(features, k)
    return fn(features)
예제 #9
0
def trans_3_to_2():
    # 生成数据
    data, data_c = generate_data(50, 1)
    # 旋转数据
    datax = data_rotate(data)
    # 展示旋转前后的数据
    # show_3D(data, data_c, 'before rotate')
    # show_3D(datax, data_c, 'after rotate')
    # 提取主成分, 得到二维数据
    # plt.figure()
    X, Y = PCA(datax, 2)
    # show_3D(Y, data_c, 'After PCA')

    print('旋转前', [np.cov(data[:, d]) for d in range(data.shape[1])])
    print('旋转后', [np.cov(datax[:, d]) for d in range(datax.shape[1])])
    print('主成分', [np.cov(X[:, d]) for d in range(X.shape[1])])
def task_4_1():
    #load a boi
    data = np.load("data/new_100_corner.npy")
    data = data[18]
    print(data.shape)

    #do PCA to get top 5 PCs
    myPCA = PCA.PCA(data[:, 1:])
    data[:, 1:] = myPCA.to_PC(data[:, 1:])
    data = data[:, :6]
    print(np.min(data, axis=0))
    print(np.max(data, axis=0))

    #make binary data dict
    decision_points = np.arrange(-10.0, 10.0, 0.1)
    PC_range = range(0, 5, 1)
    bin_dict = {}
    for PC in PC_range:
        bin_data = {}
        for dp in decision_points:
            bin_data[dp] = split(data, dp, index=PC)
        bin_dict[PC] = bin_data

    #calc start entropy
    raw_entropy = get_entropy(data)
    print("Start entropy:", raw_entropy)

    #calculate entropy for each PC at each dp
    entropies = []
    for PC in PC_range:
        entropy = []
        for dp in decision_points:
            entropy.append(raw_entropy - get_entropy_groups(bin_dict[PC][dp]))
        entropies.append(entropy)

    #plot that garbage
    plt.figure()
    plt.xlabel("Decision point")
    plt.ylabel("Information gain")

    for entropy in entropies:
        plt.plot(decision_points, entropy)

    figure = plt.gcf()  # get current figure
    figure.set_size_inches(16, 12)

    plt.show()
예제 #11
0
def test(Num):

    #降到15维
    D = 15
    trainingData, trainingLabel, testData, testLabel = getData()
    trainingData = trainingData[:Num, :]
    trainingLabel = trainingLabel[:Num]
    trainingData, DimReduVct, PCAmean = PCA(trainingData, D)

    #下标分类
    indexOf = [None for i in range(10)]
    for i in range(10):
        indexOf[i] = argwhere(trainingLabel == i)
    #均值
    meanOf = [None for i in range(10)]
    for i in range(10):
        meanOf[i] = mean(trainingData[indexOf[i], :], axis=0)
    #协方差矩阵
    covarianceOf = [None for i in range(10)]
    for i in range(10):
        temp = indexOf[i]
        temp.shape = -1
        covarianceOf[i] = cov(trainingData[temp].T)

    testData = (testData - PCAmean)
    testData = dot(testData, DimReduVct)
    #识别(10000个)
    hit = 0
    for sample in range(10000):
        testPoint = testData[sample, :]
        possibilityOf = [0 for i in range(10)]
        for i in range(10):
            possibilityOf[i]=exp(-0.5*dot(dot((testPoint-meanOf[i]),\
        inv(covarianceOf[i])),(testPoint-meanOf[i]).T))/sqrt(det(covarianceOf[i]))

        guest = argmax(possibilityOf)
        testlabel.append(testLabel[sample])
        if guest == testLabel[sample]:
            hit += 1
        else:
            test_error.append(testLabel[sample])  #
# print("\n test complete!")
# print(hit,"hit")
    print("\n trainingData:", Num)
    print("correct rate:{:.2f}%".format((hit / 10000.0) * 100))
예제 #12
0
def draw_3d():
    x3 = PCA(data_set.x, 3)
    ax = plt.subplot(111, projection='3d')
    for i in range(50):
        ax.scatter(x3[0, i], x3[1, i], x3[2, i], marker='x', c='m', s=30)

    for i in range(50, 100):
        ax.scatter(x3[0, i], x3[1, i], x3[2, i], marker='+', c='c', s=30)

    for i in range(100, 150):
        ax.scatter(x3[0, i], x3[1, i], x3[2, i], marker='o', c='r', s=30)

    #ax.scatter(x3[0, 50:100], x3[1, 50:100], x3[2, 50:100], marker = '+', c='c')
    #ax.scatter(x3[0, 100:150], x3[1, 100:150], x3[2, 100:150], marker = 'o', c='r')
    ax.set_zlabel('Z')
    ax.set_ylabel('Y')
    ax.set_xlabel('X')
    plt.title('PCA of IRIS k = 3')
    plt.show()
예제 #13
0
def RunTrainLDA(infile, pcaFile, ldaFile):

    import cPickle

    fp = open(infile, "r")
    dataset = cPickle.load(fp)
    subjID = cPickle.load(fp)
    fp.close()

    pca = PCA(dataset)
    pca_proj = pca.compute()

    np.save(pcaFile, pca_proj)

    lda_proj = []
    lda = LDA(dataset, subjID, pca_proj)
    projData = lda.projectData()
    lda_proj = lda.train(projData)

    np.save(ldaFile, lda_proj)
예제 #14
0
파일: Inference.py 프로젝트: chugare/RNN_WS
def testRelation():
    # 映射空间,测试文本相似性的判断依据
    with tf.Session() as sess:
        tg = DataPreparation.TupleGenerator()
        ohEncoder = one_hot.OneHotEncoder()
        generator = tg.tuple_gen('content_law_labeled.txt')
        input_data = tf.placeholder(tf.int32, shape=[1, None])
        length_data = tf.placeholder(tf.int32, shape=[1])
        ops = BuildModel.model(input_data, None, length=length_data)
        saver = tf.train.Saver(tf.global_variables())
        checkpoint = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)

        lstmcells = []
        for i in range(3):
            lstmcells.append({'c': [], 'h': []})
        if checkpoint:
            saver.restore(sess, checkpoint)
        ALL_NUM = 1000
        for count in range(ALL_NUM):

            content = next(generator)
            enter_data = ohEncoder.one_hot_single(content, True)
            state, predict = sess.run([ops['last_state'], ops['prediction']],
                                      feed_dict={
                                          input_data: [enter_data],
                                          length_data: [len(enter_data)]
                                      })
            for c in range(3):
                lstmcells[c]['c'].append(np.array(state[c][0]))
                lstmcells[c]['h'].append(np.array(state[c][0]))
            if count % 10 == 0:
                print('[INFO] Getting %d \tst word\'s vector...' % count)
        vec = lstmcells[1]['c']
        vec = np.array(vec)
        lddata, recon = PCA.PCA(np.mat(vec), 2)
        points, labels = tg.generateLabel(lddata)
        DrawPlot.drawScatter(points, labels)
        outfile = open('result.txt', 'w', encoding='utf-8')
        for i in range(ALL_NUM):
            outfile.write('%f\t%f\n' % (lddata[i, 0], lddata[i, 1]))
예제 #15
0
 def PCA(self, dynamic_features, static_features):
     '''PCA on dynamic features'''
     learn = self.model_learn
     pca_features = []
     pca_data = []
     for feature in dynamic_features.copy():
         pca_result = PCA(learn, self.model_output, feature)
         if len(pca_result) == 0:
             '''not enough data for PCA on this feature => take cos and sin instead'''
             dynamic_features.remove(feature)
             static_features.append('cos_' + feature)
             static_features.append('sin_' + feature)
             print(
                 ' >>> alert : ' + feature +
                 ' removed from dynamic features, cos() added in static_features instead'
             )
         else:
             pca_features += pca_result.columns.tolist()
             pca_data.append(pca_result)
     self.dynamic_features = dynamic_features
     self.static_features = static_features
     self.pca_data = pca_data
     self.pca_features = pca_features
예제 #16
0
def pcaSklearn(training, dimension=700):
    pca = PCA(n_components=dimension)
    pca.fit(training)
    low = pca.transform(training)
    same = pca.inverse_transform(low)

    print "low[0].shape"
    print low[0].shape

    image2DInitial = vectorToImage(training[0], (28, 28))
    print same[0].shape
    image2D = vectorToImage(same[0], (28, 28))

    image2DLow = vectorToImage(low[0], (20, 20))
    plt.imshow(image2DLow, cmap=plt.cm.gray)
    plt.show()

    plt.imshow(image2DInitial, cmap=plt.cm.gray)
    plt.show()
    plt.imshow(image2D, cmap=plt.cm.gray)
    plt.show()
    print "done"
    return low
예제 #17
0
    def train(self, labels, labelsObj, ncomp):
        """Data is a 2d data list.
           Each row in the 2dlist is sample (all samples probably of a word)
           The first column is the label idenity the sample ("A")
           labels are where the sample came frome, such as from JamesJoyce sisters
        """
        a = PCA(labels, labelsObj, ncomp)
        (data, self.wordlist) = a.processData()
        self.ncomp = ncomp
        self.labels = labels
        #Strip the first column
        x = [None] * len(data)
        y = [None] * len(data)

        for row in range(len(data)):
            y[row] = data[row][0]
            t = []
            for col in range(1, len(data[row])):
                t += [data[row][col]]
            x[row] = t

        self.pca_h = decomposition.PCA(ncomp)
        self.pca_h.fit(x)
        self.X = self.pca_h.transform(x)
def model(TEST=True,
          Comparison_with_PCA=True,
          model_name="Autoencoder",
          corrupt_probability=0.5,
          optimizer_selection="Adam",
          learning_rate=0.001,
          training_epochs=100,
          batch_size=128,
          display_step=10,
          batch_norm=True):
    mnist = input_data.read_data_sets("", one_hot=False)

    if batch_norm == True:
        model_name = "batch_norm_" + model_name

    if TEST == False:
        if os.path.exists("tensorboard/{}".format(model_name)):
            shutil.rmtree("tensorboard/{}".format(model_name))

    # ksize, strides? -> [1, 2, 2, 1] = [one image, width, height, one channel]
    # pooling을 할때, 각 batch 에 대해 한 채널에 대해서 하니까, 1, 1,로 설정해준것.
    def pooling(input, type="avg", k=2, padding='VALID'):
        if type == "max":
            return tf.nn.max_pool(input,
                                  ksize=[1, k, k, 1],
                                  strides=[1, k, k, 1],
                                  padding=padding)
        else:
            return tf.nn.avg_pool(input,
                                  ksize=[1, k, k, 1],
                                  strides=[1, k, k, 1],
                                  padding=padding)

    def layer(input, weight_shape, bias_shape):
        weight_init = tf.random_normal_initializer(stddev=0.01)
        bias_init = tf.random_normal_initializer(stddev=0.01)
        if batch_norm:
            w = tf.get_variable("w", weight_shape, initializer=weight_init)
        else:
            weight_decay = tf.constant(0.00001, dtype=tf.float32)
            w = tf.get_variable("w",
                                weight_shape,
                                initializer=weight_init,
                                regularizer=tf.contrib.layers.l2_regularizer(
                                    scale=weight_decay))
        b = tf.get_variable("b", bias_shape, initializer=bias_init)

        if batch_norm:
            return tf.layers.batch_normalization(tf.matmul(input, w) + b,
                                                 training=not TEST)
        else:
            return tf.matmul(input, w) + b

    # stride? -> [1, 2, 2, 1] = [one image, width, height, one channel]
    def conv2d(input,
               weight_shape='',
               bias_shape='',
               strides=[1, 1, 1, 1],
               padding="VALID"):
        weight_init = tf.contrib.layers.xavier_initializer(uniform=False)
        bias_init = tf.constant_initializer(value=0)
        if batch_norm:
            w = tf.get_variable("w", weight_shape, initializer=weight_init)
        else:
            weight_decay = tf.constant(0.00001, dtype=tf.float32)
            w = tf.get_variable("w",
                                weight_shape,
                                initializer=weight_init,
                                regularizer=tf.contrib.layers.l2_regularizer(
                                    scale=weight_decay))

        b = tf.get_variable("b", bias_shape, initializer=bias_init)
        conv_out = tf.nn.conv2d(input, w, strides=strides, padding=padding)

        if batch_norm:
            return tf.layers.batch_normalization(tf.nn.bias_add(conv_out, b),
                                                 training=not TEST)
        else:
            return tf.nn.bias_add(conv_out, b)

    def conv2d_transpose(input,
                         output_shape='',
                         weight_shape='',
                         bias_shape='',
                         strides=[1, 1, 1, 1],
                         padding="VALID"):
        weight_init = tf.contrib.layers.xavier_initializer(uniform=False)
        bias_init = tf.constant_initializer(value=0)
        if batch_norm:
            w = tf.get_variable("w", weight_shape, initializer=weight_init)
        else:
            weight_decay = tf.constant(0.00001, dtype=tf.float32)
            w = tf.get_variable("w",
                                weight_shape,
                                initializer=weight_init,
                                regularizer=tf.contrib.layers.l2_regularizer(
                                    scale=weight_decay))
        b = tf.get_variable("b", bias_shape, initializer=bias_init)

        conv_out = tf.nn.conv2d_transpose(input,
                                          w,
                                          output_shape=output_shape,
                                          strides=strides,
                                          padding=padding)
        if batch_norm:
            return tf.layers.batch_normalization(tf.nn.bias_add(conv_out, b),
                                                 training=not TEST)
        else:
            return tf.nn.bias_add(conv_out, b)

    def inference(x):
        if model_name == "Autoencoder" or model_name == "batch_norm_Autoencoder":
            with tf.variable_scope("encoder"):
                with tf.variable_scope("fully1"):
                    fully_1 = tf.nn.relu(
                        layer(tf.reshape(x, (-1, 784)), [784, 256], [256]))
                with tf.variable_scope("fully2"):
                    fully_2 = tf.nn.relu(layer(fully_1, [256, 128], [128]))
                with tf.variable_scope("fully3"):
                    fully_3 = tf.nn.relu(layer(fully_2, [128, 64], [64]))
                with tf.variable_scope("output"):
                    encoder_output = tf.nn.relu(layer(fully_3, [64, 2], [2]))

            with tf.variable_scope("decoder"):
                with tf.variable_scope("fully1"):
                    fully_4 = tf.nn.relu(layer(encoder_output, [2, 64], [64]))
                with tf.variable_scope("fully2"):
                    fully_5 = tf.nn.relu(layer(fully_4, [64, 128], [128]))
                with tf.variable_scope("fully3"):
                    fully_6 = tf.nn.relu(layer(fully_5, [128, 256], [256]))
                with tf.variable_scope("output"):
                    decoder_output = tf.nn.sigmoid(
                        layer(fully_6, [256, 784], [784]))
            return encoder_output, decoder_output

        elif model_name == 'Convolution_Autoencoder' or model_name == "batch_norm_Convolution_Autoencoder":
            with tf.variable_scope("encoder"):
                with tf.variable_scope("conv_1"):
                    conv_1 = tf.nn.relu(
                        conv2d(x,
                               weight_shape=[5, 5, 1, 32],
                               bias_shape=[32],
                               strides=[1, 1, 1, 1],
                               padding="VALID"))
                    # result -> batch_size, 24, 24, 32
                with tf.variable_scope("conv_2"):
                    conv_2 = tf.nn.relu(
                        conv2d(conv_1,
                               weight_shape=[5, 5, 32, 32],
                               bias_shape=[32],
                               strides=[1, 1, 1, 1],
                               padding="VALID"))
                    # result -> batch_size, 20, 20, 32
                with tf.variable_scope("conv_3"):
                    conv_3 = tf.nn.relu(
                        conv2d(conv_2,
                               weight_shape=[5, 5, 32, 32],
                               bias_shape=[32],
                               strides=[1, 1, 1, 1],
                               padding="VALID"))
                    # result -> batch_size, 16, 16, 32
                with tf.variable_scope("conv_4"):
                    conv_4 = tf.nn.relu(
                        conv2d(conv_3,
                               weight_shape=[5, 5, 32, 32],
                               bias_shape=[32],
                               strides=[1, 1, 1, 1],
                               padding="VALID"))
                    # result -> batch_size, 12, 12, 32
                with tf.variable_scope("conv_5"):
                    conv_5 = tf.nn.relu(
                        conv2d(conv_4,
                               weight_shape=[5, 5, 32, 32],
                               bias_shape=[32],
                               strides=[1, 1, 1, 1],
                               padding="VALID"))
                    # result -> batch_size, 8, 8, 32
                with tf.variable_scope("conv_6"):
                    conv_6 = tf.nn.relu(
                        conv2d(conv_5,
                               weight_shape=[5, 5, 32, 32],
                               bias_shape=[32],
                               strides=[1, 1, 1, 1],
                               padding="VALID"))
                    # result -> batch_size, 4, 4, 32
                with tf.variable_scope("output"):
                    encoder_output = tf.nn.relu(
                        conv2d(conv_6,
                               weight_shape=[4, 4, 32, 2],
                               bias_shape=[2],
                               strides=[1, 1, 1, 1],
                               padding="VALID"))
                    # result -> batch_size, 1, 1, 2

            with tf.variable_scope("decoder"):
                with tf.variable_scope("trans_conv_1"):
                    conv_7 = tf.nn.relu(
                        conv2d_transpose(encoder_output,
                                         output_shape=tf.shape(conv_6),
                                         weight_shape=[4, 4, 32, 2],
                                         bias_shape=[32],
                                         strides=[1, 1, 1, 1],
                                         padding="VALID"))
                    # result -> batch_size, 4, 4, 32
                with tf.variable_scope("trans_conv_2"):
                    conv_8 = tf.nn.relu(
                        conv2d_transpose(conv_7,
                                         output_shape=tf.shape(conv_5),
                                         weight_shape=[5, 5, 32, 32],
                                         bias_shape=[32],
                                         strides=[1, 1, 1, 1],
                                         padding="VALID"))
                    # result -> batch_size, 8, 8, 32
                with tf.variable_scope("trans_conv_3"):
                    conv_9 = tf.nn.relu(
                        conv2d_transpose(conv_8,
                                         output_shape=tf.shape(conv_4),
                                         weight_shape=[5, 5, 32, 32],
                                         bias_shape=[32],
                                         strides=[1, 1, 1, 1],
                                         padding="VALID"))
                    # result -> batch_size, 12, 12, 32
                with tf.variable_scope("trans_conv_4"):
                    conv_10 = tf.nn.relu(
                        conv2d_transpose(conv_9,
                                         output_shape=tf.shape(conv_3),
                                         weight_shape=[5, 5, 32, 32],
                                         bias_shape=[32],
                                         strides=[1, 1, 1, 1],
                                         padding="VALID"))
                    # result -> batch_size, 16, 16, 32
                with tf.variable_scope("trans_conv_5"):
                    conv_11 = tf.nn.relu(
                        conv2d_transpose(conv_10,
                                         output_shape=tf.shape(conv_2),
                                         weight_shape=[5, 5, 32, 32],
                                         bias_shape=[32],
                                         strides=[1, 1, 1, 1],
                                         padding="VALID"))
                    # result -> batch_size, 20, 20, 32
                with tf.variable_scope("trans_conv_6"):
                    conv_12 = tf.nn.relu(
                        conv2d_transpose(conv_11,
                                         output_shape=tf.shape(conv_1),
                                         weight_shape=[5, 5, 32, 32],
                                         bias_shape=[32],
                                         strides=[1, 1, 1, 1],
                                         padding="VALID"))
                    # result -> batch_size, 24, 24, 32

                with tf.variable_scope("output"):
                    decoder_output = tf.nn.sigmoid(
                        conv2d_transpose(conv_12,
                                         output_shape=tf.shape(x),
                                         weight_shape=[5, 5, 1, 32],
                                         bias_shape=[1],
                                         strides=[1, 1, 1, 1],
                                         padding="VALID"))
                    # result -> batch_size, 28, 28, 1
            return encoder_output, decoder_output

    def evaluate(output, x):
        with tf.variable_scope("validation"):
            tf.summary.image('input_image',
                             tf.reshape(x, [-1, 28, 28, 1]),
                             max_outputs=5)
            tf.summary.image('output_image',
                             tf.reshape(output, [-1, 28, 28, 1]),
                             max_outputs=5)

            if model_name == 'Convolution_Autoencoder' or model_name == "batch_norm_Convolution_Autoencoder":
                l2 = tf.sqrt(
                    tf.reduce_sum(tf.square(tf.subtract(output, x)),
                                  axis=[1, 2, 3]))
            elif model_name == "Autoencoder" or model_name == "batch_norm_Autoencoder":
                l2 = tf.sqrt(
                    tf.reduce_sum(tf.square(
                        tf.subtract(output, tf.reshape(x, (-1, 784)))),
                                  axis=1))

            val_loss = tf.reduce_mean(l2)
            tf.summary.scalar('val_cost', val_loss)
            return val_loss

    def loss(output, x):
        if model_name == 'Convolution_Autoencoder' or model_name == "batch_norm_Convolution_Autoencoder":
            l2 = tf.sqrt(
                tf.reduce_sum(tf.square(tf.subtract(output, x)),
                              axis=[1, 2, 3]))
        elif model_name == "Autoencoder" or model_name == "batch_norm_Autoencoder":
            l2 = tf.sqrt(
                tf.reduce_sum(tf.square(
                    tf.subtract(output, tf.reshape(x, (-1, 784)))),
                              axis=1))
        train_loss = tf.reduce_mean(l2)
        return train_loss

    def training(cost, global_step):
        tf.summary.scalar("train_cost", cost)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            if optimizer_selection == "Adam":
                optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
            elif optimizer_selection == "RMSP":
                optimizer = tf.train.RMSPropOptimizer(
                    learning_rate=learning_rate)
            elif optimizer_selection == "SGD":
                optimizer = tf.train.GradientDescentOptimizer(
                    learning_rate=learning_rate)
            train_operation = optimizer.minimize(cost, global_step=global_step)
        return train_operation

    def Denoising(x, r=0.1):
        # 0인 경우 입력을 손상시키지 않고, 1인경우 입력을 손상시킨다.
        corrupt_x = tf.multiply(
            x,
            tf.cast(
                tf.random_uniform(shape=tf.shape(x),
                                  minval=0,
                                  maxval=2,
                                  dtype=tf.int32), tf.float32))
        Denoising_x = tf.add(tf.multiply(corrupt_x, r), tf.multiply(x, 1 - r))
        return Denoising_x

    # print(tf.get_default_graph()) #기본그래프이다.
    JG_Graph = tf.Graph()  # 내 그래프로 설정한다.- 혹시라도 나중에 여러 그래프를 사용할 경우를 대비
    with JG_Graph.as_default():  # as_default()는 JG_Graph를 기본그래프로 설정한다.
        with tf.name_scope("feed_dict"):
            x = tf.placeholder("float", [None, 28, 28, 1])
            d_x = Denoising(x, r=corrupt_probability)
        with tf.variable_scope("shared_variables",
                               reuse=tf.AUTO_REUSE) as scope:
            with tf.name_scope("inference"):
                encoder_output, decoder_output = inference(d_x)
            # or scope.reuse_variables()

        # Adam optimizer의 매개변수들을 저장하고 싶지 않다면 여기에 선언해야한다.
        with tf.name_scope("saver"):
            saver = tf.train.Saver(var_list=tf.global_variables(),
                                   max_to_keep=3)
        if not TEST:
            with tf.name_scope("loss"):
                global_step = tf.Variable(0,
                                          name="global_step",
                                          trainable=False)
                cost = loss(decoder_output, x)
            with tf.name_scope("trainer"):
                train_operation = training(cost, global_step)
            with tf.name_scope("tensorboard"):
                summary_operation = tf.summary.merge_all()

        with tf.name_scope("evaluation"):
            evaluate_operation = evaluate(decoder_output, d_x)

    config = tf.ConfigProto(log_device_placement=False,
                            allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    with tf.Session(graph=JG_Graph, config=config) as sess:
        print("initializing!!!")
        sess.run(tf.global_variables_initializer())
        ckpt = tf.train.get_checkpoint_state(os.path.join('model', model_name))
        if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
            print("Restore {} checkpoint!!!".format(
                os.path.basename(ckpt.model_checkpoint_path)))
            saver.restore(sess, ckpt.model_checkpoint_path)
            # shutil.rmtree("model/{}/".format(model_name))

        if not TEST:

            summary_writer = tf.summary.FileWriter(
                os.path.join("tensorboard", model_name), sess.graph)

            for epoch in tqdm(range(training_epochs)):
                avg_cost = 0.
                total_batch = int(mnist.train.num_examples / batch_size)
                for i in range(total_batch):
                    mbatch_x, mbatch_y = mnist.train.next_batch(batch_size)
                    feed_dict = {x: mbatch_x.reshape((-1, 28, 28, 1))}
                    _, minibatch_cost = sess.run([train_operation, cost],
                                                 feed_dict=feed_dict)
                    avg_cost += (minibatch_cost / total_batch)

                print("L2 cost : {}".format(avg_cost))
                if epoch % display_step == 0:
                    val_feed_dict = {
                        x: mnist.validation.images[:1000].reshape(
                            (-1, 28, 28, 1))
                    }  # GPU 메모리 인해 mnist.test.images[:1000], 여기서 1000이다.
                    val_cost, summary_str = sess.run(
                        [evaluate_operation, summary_operation],
                        feed_dict=val_feed_dict)
                    print("Validation L2 cost : {}".format(val_cost))
                    summary_writer.add_summary(
                        summary_str, global_step=sess.run(global_step))

                    save_model_path = os.path.join('model', model_name)
                    if not os.path.exists(save_model_path):
                        os.makedirs(save_model_path)
                    saver.save(sess,
                               save_model_path + '/',
                               global_step=sess.run(global_step),
                               write_meta_graph=False)

            print("Optimization Finished!")

        # batch_norm=True 일 때, 이동평균 사용
        if Comparison_with_PCA and TEST:
            # PCA , Autoencoder Visualization
            test_feed_dict = {
                x: mnist.test.images.reshape(-1, 28, 28, 1)
            }  # GPU 메모리 인해 mnist.test.images[:1000], 여기서 1000이다.
            pca_applied = PCA.PCA(n_components=2,
                                  show_reconstruction_image=False)  # 10000,2
            encoder_applied, test_cost = sess.run(
                [encoder_output, evaluate_operation], feed_dict=test_feed_dict)
            print("Test L2 cost : {}".format(test_cost))
            applied = OrderedDict(PCA=pca_applied,
                                  Autoencoder=encoder_applied.reshape(-1, 2))

            # PCA , Autoencoder 그리기
            fig, ax = plt.subplots(1, 2, figsize=(18, 12))
            # fig.suptitle('vs', size=20, color='r')
            for x, (key, value) in enumerate(applied.items()):
                ax[x].grid(False)
                ax[x].set_title(key, size=20, color='k')
                ax[x].set_axis_off()
                for num in range(10):
                    ax[x].scatter(
                        [value[:, 0][i] for i in range(len(mnist.test.labels)) if mnist.test.labels[i] == num], \
                        [value[:, 1][j] for j in range(len(mnist.test.labels)) if mnist.test.labels[j] == num], \
                        s=10, label=str(num), marker='o')
                ax[x].legend()

            # plt.tight_layout()
            if model_name == "Autoencoder":
                plt.savefig("PCA vs Autoencoder.png", dpi=300)
            elif model_name == "batch_norm_Autoencoder":
                plt.savefig("PCA vs batch_Autoencoder.png", dpi=300)
            elif model_name == "Convolution_Autoencoder":
                plt.savefig("PCA vs ConvAutoencoder.png", dpi=300)
            elif model_name == "batch_norm_Convolution_Autoencoder":
                plt.savefig("PCA vs batchConvAutoencoder.png", dpi=300)
            plt.show()
예제 #19
0
data = data.values
data_ground_truth = data[:, 1]
data_features = data[:, 2:]

data_id = hierarchical()

# Calculating rand index
ARI = adjusted_rand_score(data_ground_truth, data_id)
print ('The Rand Index is', ARI)


# visualization
unique_label = np.unique(data_id)
unique_label_gt = np.unique(data_ground_truth)

# using PCA to reduce the dimension of the clustered data from k-means and plot
dim2 = PCA.PCA(data_features, 2)
dim2_agg = pd.DataFrame(data = dim2, index = data_id)

# using PCA to reduce the dimension plot the ground truth
dim2_ground_truth = pd.DataFrame(data = dim2, index = data_ground_truth)

fig = plt.figure()
fig.set_figheight(5)
fig.set_figwidth(12)
a = fig.add_subplot(1, 2, 1)
img_agg = PCA.plot_pca_dim2(dim2_agg, unique_label)
a.set_title('iyer Clusters from Agglomerative')
a = fig.add_subplot(1, 2, 2)
img_ground = PCA.plot_pca_dim2(dim2_ground_truth, unique_label_gt)
a.set_title('iyer Clusters from Ground Truth')
예제 #20
0
    elif groupNum == 2:
        x = r0 + 0.0
        y = 1.0 * r1 + x
        xcord2.append(x)
        ycord2.append(y)
    fw.write("%f\t%f\t%d\n" % (x, y, groupNum))

fw.close()
fig = plt.figure()
ax = fig.add_subplot(211)
ax.scatter(xcord0, ycord0, marker='^', s=90)
ax.scatter(xcord1, ycord1, marker='o', s=50, c='red')
ax.scatter(xcord2, ycord2, marker='v', s=50, c='yellow')
ax = fig.add_subplot(212)
myDat = PCA.loadDataSet('testSet3.txt')
lowDDat, reconDat = PCA.PCA(myDat[:, 0:2], 1)
label0Mat = lowDDat[nonzero(
    myDat[:, 2] == 0)[0], :2][0]  #get the items with label 0
label1Mat = lowDDat[nonzero(
    myDat[:, 2] == 1)[0], :2][0]  #get the items with label 1
label2Mat = lowDDat[nonzero(
    myDat[:, 2] == 2)[0], :2][0]  #get the items with label 2
#ax.scatter(label0Mat[:,0],label0Mat[:,1], marker='^', s=90)
#ax.scatter(label1Mat[:,0],label1Mat[:,1], marker='o', s=50,  c='red')
#ax.scatter(label2Mat[:,0],label2Mat[:,1], marker='v', s=50,  c='yellow')
ax.scatter(label0Mat[:, 0].tolist(),
           zeros(shape(label0Mat)[0]).tolist(),
           marker='^',
           s=90)
ax.scatter(label1Mat[:, 0].tolist(),
           zeros(shape(label1Mat)[0]).tolist(),
예제 #21
0
        #print("i "+str(i)+ " idx "+str(idx))
        plt_idx = i * num_classes + j + 1
        #print("plt index "+str(plt_idx))
        plt.subplot(samples_per_class, num_classes, plt_idx)
        plt.imshow(X_train[idx].astype('uint8'))
        plt.axis('off')
        if i == 0:
            plt.title(classes[j])
    plt.suptitle("Original CIFAR-10 data set")
    plt.show()

# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))

pca = PCA()
print(
    "-----------------Fitting CIFAR-10 train set to PCA model-------------------"
)
X_std = pca.fit(X_train)
print(
    "-----------------Done Fitting CIFAR-10 train set to PCA model-------------------"
)

X_reduced = pca.transform_data(X_std, None)

X_reconstructed = pca.inverse_transform(X_reduced, None)

#
X_reconstructed = pca.inverse_standarize(X_reconstructed)
# Calculte reconstruction error
#     # Put the result into a color plot
#     Z = Z.reshape(xx.shape)
#     plt.figure()
#     plt.pcolormesh(xx, yy, Z, cmap=cmap_light)
#
#     # Plot also the training points
#     plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
#     plt.xlim(xx.min(), xx.max())
#     plt.ylim(yy.min(), yy.max())
#     plt.title("3-Class classification (k = %i, weights = '%s')"
#               % (n_neighbors, weights))
#
# plt.show()
'''classification after applying PCA'''
'''classification after applying PCA'''
trainInp = np.array(PCA.PCA(floatFeaturesMatrix, 3))
neigh = KNeighborsRegressor(3)
neigh.fit(trainInp, classes)
valid_pred = neigh.predict(trainInp)
valid_pred_1 = DataIOFactory.roundingNumbers(valid_pred)

n_real_1 = classes.flatten()
n_predict_1 = valid_pred_1.flatten()
print('real ', n_real_1.shape)
print('predict ', n_predict_1.shape)
'''results'''
print('PCA')
ResultAnalyzer.confusionMatrix(n_real_1, n_predict_1)

# neigh = KNeighborsRegressor(3)
# neigh.fit(trainInp, trainOut)
예제 #23
0
        x_combined.append((x_test[f]))
    k += 50
    h += 14
x_combined = np.array(x_combined)
# plt.imshow(x_combined[120])
# plt.show()
# print(x_combined.shape)
x_combined_vec = []
for i in range(len(x_combined)):
    x_temp = cv2.resize(x_combined[i], (20, 17), interpolation=cv2.INTER_AREA)
    x_combined_vec.append(x_temp.flatten())

x_combined_vec = np.array(x_combined_vec)
print(x_combined_vec.shape)
print("apply PCA")
test = PCA.PCA(d=2)
mean, basis, new_x_data = test.pca(x_combined_vec.T)
# B=[1.29,1.27,1.46,0.91,0.56,0.99,1.00,0.37,1.24,1.23]
print(new_x_data.shape)
new_x_data = new_x_data.T
print("done")
fig, ax = plt.subplots()
plt.title("Data in 2-dim after applying PCA on original dataset")
ax.scatter(new_x_data[0:64, 0],
           new_x_data[0:64, 1],
           c='red',
           marker='o',
           label='class 1')
ax.scatter(new_x_data[64:128, 0],
           new_x_data[64:128, 1],
           c='blue',
예제 #24
0
    for picture in tqdm(pictureList):
        im = Image.open(path + '/' + picture)
        im = im.convert("L")
        width, height = im.size
        data = im.getdata()
        data = np.array(data, dtype='double')
        for elem in data.tolist():
            file.write(str(elem) + " ")
        file.write("\n")
    file.close()
    return [width, height]


if __name__ == "__main__":

    [width, height] = PictureToData('./at33')  # 获取图片大小
    data = loadDataSet("./data/pictures.data", delim=' ')  #加载图片数据集
    new_data, rate = PCA(data, 10)
    print(rate)
    num = 0
    file = open("./data/PCApictures.data", 'w')
    pictureList = GetFiles('./at33')
    for picture in new_data:
        for i in picture.tolist()[0]:
            file.write(str(i) + ' ')
        file.write('\n')
        pictureMat = picture.reshape((height, width))
        new_im = Image.fromarray(pictureMat.astype(np.uint8))
        new_im.save('./newPicture/PCA_' + pictureList[num])
        num += 1
    file.close()
'''spliting the data into test and train and validation set based on different portions
    in  = input = features   out = output = labels'''
trainIn, trainOut, validationIn, validationOut, testIn, testOut = DataIOFactory.dataSplitFactory(floatFeaturesMatrix, classes, 0.8, 0.1, 0.1)
print('trainIn',trainIn.shape)
print('trainout', trainOut.shape)
print('feature matrix shape: ', floatFeaturesMatrix.shape)
# print(classes)
print('class matrix shape: ', classes.shape)


'''chi2 feature selection'''
sorted_features_score = FeatureSelection_Chi2.Chi2_featureSelection(floatFeaturesMatrix, classes, features_label, 'all')

'''PCA'''
trainInp = np.array(PCA.PCA(floatFeaturesMatrix, 3))

'''raw data'''
# X = floatFeaturesMatrix
# y = classes.flatten()
# clf = SGDClassifier(loss="hinge", penalty="l2")
# clf.fit(X, y)
#
#
# test_predicted = clf.predict(trainIn)
# print(test_predicted)
# test_real = trainOut.flatten()
# print(test_real)


X = trainInp
예제 #26
0
from PIL import Image
from numpy import *
from pylab import *
import PCA

im = array(Image.open(imlist[0]))  # open img to get size
m, n = im.shape[0:2]  # get image size
imbr = len(imlist)  # get count numbers

# create the matrix for saving linearise img
immatrix = array([array(Image.open(im)).flatten() for im in imlist], 'f')

# run PCA
V, S, immean = PCA.PCA(immatrix)

# show few img
figure()
gray()
subplot(2, 4, 1)
imshow(immean.reshape(m, n))
for i in range(7):
    subplot(2, 4, i + 2)
    imshow(V[i].reshape9m, n)

show()
예제 #27
0
    # ax.scatter(data[:,0],data[:,1],data[:,2])
    # data = Data_processing.data_pruning_for_school_explorer()
    # vectors = init_codebook_vector(20,data)
    # square_main(data,vectors)

    low, median, high, data = Data_processing.data_pruning_for_school_explorer(
    )
    vector, data = square_main(data, init_codebook_vector(4, data))
    SOM_topo(data, vector)
    print("PCA+SOM")
    '''Q 5.4 first PCA then SOM vs only SOM'''
    C = PCA.get_C(data)
    eigenvalue, eigenvector = PCA.get_eigen(C)
    # eigenvalue = np.array(eigenvalue,dtype=float)
    # do principle component analysis
    new_data_set, eigenvector1 = PCA.PCA(eigenvalue, eigenvector, data)
    new_dimension_data = PCA.get_new_points(new_data_set, eigenvector1)
    vector1, data1 = square_main(new_dimension_data,
                                 init_codebook_vector(4, new_dimension_data))
    print(vector1)
    SOM_topo(data1, vector1)
    '''Q 5.4 first PCA then SOM vs only SOM'''
    # '''SOM topological graph'''
    # points,twoD_vector = SOM_topo(data,vector)
    # output = []
    # for i in range(len(points)):
    #     for j in range(len(points[i])):
    #         output.append(points[i][j])
    # output = np.array(output)
    # # initial_center, cost1 = PCA.k_means_clustering(low, median, high, 3, output)    # print(vectors)
    # # PCA.label_clustering_graph(initial_center,output)
예제 #28
0
파일: hw7.py 프로젝트: Dada870423/ML
parser.add_argument("--mode", type=int, default=0)
input_ = parser.parse_args()

#mode = 1
Size = (50, 50)

images, label = Readfile(path="./Yale_Face_Database/Training/", Size=Size)
test_images, test_label = Readfile(path="./Yale_Face_Database/Testing/",
                                   Size=Size)

sample_image = test_images[random.sample(range(len(test_label)), 10)]

if input_.mode == 0:
    ## Doing PCA and get the eigenface and W(dimension reduction)
    PCA_mean, PCA_EigenFace, PCA_W = PCA(images=images,
                                         Size=Size,
                                         FacePath="./PCA/EigenFace/")
    Reconstruct(EigenFace=PCA_EigenFace,
                sample_image=sample_image,
                Size=Size,
                Path="./PCA/")

    ## Doing LDA and get the fisherface and W(dimension reduction)
    LDA_mean, LDA_EigenFace, LDA_W = LDA(images=images,
                                         Size=Size,
                                         label=label,
                                         FacePath="./LDA/EigenFace/")
    Reconstruct(EigenFace=LDA_EigenFace,
                sample_image=sample_image,
                Size=Size,
                Path="./LDA/")
예제 #29
0
    TempRectList[:,:4] = np.copy(rectlist)
    for rect in TempRectList:
        ax = rect[0] + (rect[2]/2.0)
        ay = rect[1] + (rect[3]/2.0)
        rect[4] = math.sqrt((targetpoint[0]-ax)**2 + (targetpoint[1]-ay)**2)
        if rect[3] > rect[2]*1.5:
            rect[5] = -1
    TempRectList = TempRectList[TempRectList[:,4].argsort()[::-1]]
    return TempRectList[-1]

def callback(image):
    global image_tmp
    image_tmp = bridge.imgmsg_to_cv2(image, "bgr8")


pca_model = PCA(PCA_MODEL_DIR)

rospy.init_node('Plant_Detector')

rospy.Subscriber("camera/color/image_raw", Image,  callback = callback, queue_size=1)
image_pub = rospy.Publisher("camera/color/result",Image,queue_size=10)

print 'waiting...'
while image_tmp is None:
    pass
print 'start...'


while image_tmp is not  None:
    im = np.copy(image_tmp)
    im_display = np.copy(im)
예제 #30
0
#-*-coding:utf-8-*-
from PCA import *
import matplotlib.pyplot as plt


def loadDataSet(filename, delim='\t'):
    fr = open(filename)
    stringArr = [line.strip().split(delim) for line in fr.readlines()]
    dataArr = [list(map(float, line)) for line in stringArr]
    return np.mat(dataArr)


n = 1000  #number of points to create

dataMat = loadDataSet('./data/testSet.txt')
reconMat, rate = PCA(dataMat, 1)
fig = plt.figure()
ax = fig.add_subplot(121)
ax.scatter(np.array(dataMat[:, 0]), np.array(dataMat[:, 1]), marker='^', s=20)
plt.xlabel('hours of direct sunlight')
plt.ylabel('liters of water')
plt.title('Before PCA')

ax = fig.add_subplot(122)
ax.scatter(np.array(dataMat[:, 0]), np.array(dataMat[:, 1]), marker='^', s=20)
ax.scatter(np.array(reconMat[:, 0]),
           np.array(reconMat[:, 1]),
           marker='s',
           s=20,
           c='red')
plt.xlabel('hours of direct sunlight')