Example #1
0
def train_kmeans(model_index,input,num_of_classes):
    x =tf.placeholder(tf.float32, shape=np.shape(input),name="pdfs")
    model = KMeans(inputs=x, num_clusters=num_of_classes, initial_clusters="kmeans_plus_plus")#,distance_metric='cross_entropy_distance')
    training_graph = model.training_graph()

    if len(training_graph) > 6:
        (all_scores, cluster_idx, scores, cluster_centers_initialized,
         cluster_centers_var, init_op, train_op) = training_graph
    else:
        (all_scores, cluster_idx, scores, cluster_centers_initialized,
         init_op, train_op) = training_graph

    cluster_idx = cluster_idx[0]
    avg_distance = tf.reduce_mean(scores)
    init_vars = tf.global_variables_initializer()

    with tf.Session() as sess:
        sess.run(init_vars)
        sess.run(init_op,feed_dict={x:input})

        for i in range(500):
            _, d,label= sess.run([train_op, avg_distance, cluster_idx],feed_dict={x:input})
            if i % 10 == 0:
                print("step %i, avg distance: %f" % (i, d))
        return label
Example #2
0
def groupNumber(req_data):
    f = open('data_list.tdat', 'rb')
    data_list = pk.load(f)
    f.close()
    k = ut.elbow(data_list)
    num_features = 14

    X = tf.placeholder(tf.float32, shape=[None, num_features])

    kmeans = KMeans(inputs = X, num_clusters=k, distance_metric='squared_euclidean', use_mini_batch=True)

    (all_scores, cluster_idx, scores, cluster_centers_initialized, init_op, train_op) = kmeans.training_graph()
    cluster_idx = cluster_idx[0]
    avg_distance = tf.reduce_mean(scores)

    save_file = './trained_data.ckpt'
    saver = tf.train.Saver()

    input_data = []
    input_data.append(req_data)

    with tf.Session() as sess:
        saver.restore(sess, save_file)
        _, d, idx = sess.run([train_op, avg_distance, cluster_idx], feed_dict={X: input_data})
        for i in range(0, k):
            for j in range(0, idx.size, 1):
                if(idx[j] == i):
                    return i
        
        sess.cllose()
Example #3
0
def kmeans(num_clusters, data):
    num_steps = 100
    data_array = np.concatenate(data, axis=0)
    row, col = np.shape(data_array)
    # Input features
    X = tf.placeholder(tf.float32, shape=[row, col])

    # Define the Kmeans
    kmeans = KMeans(inputs=X,
                    num_clusters=num_clusters,
                    distance_metric='squared_euclidean',
                    initial_clusters='kmeans_plus_plus',
                    use_mini_batch=True)

    all_scores, cluster_idx, scores, cluster_centers_initialized, cluster_centers_var, init_op, training_op = \
        kmeans.training_graph()
    # fix for cluster_idx being a tuple
    cluster_idx = cluster_idx[0]

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer(), feed_dict={X: data_array})
        sess.run(init_op, feed_dict={X: data_array})

        for i in range(num_steps):
            _, idx = sess.run([training_op, cluster_idx],
                              feed_dict={X: data_array})

    # Create the k means histogram
    return histograming(data, idx, num_clusters)
Example #4
0
def train(x_train, y_train):

    # Parameters
    num_steps = 50  # Total steps to train
    batch_size = 1024  # The number of samples per batch
    k = 25  # The number of clusters
    num_classes = 10  # The 10 digits
    num_features = 784  # Each image is 28x28 pixels

    # Put placeholders for input data here
    X = tf.placeholder(tf.float32, shape=[None, num_features])
    Y = tf.placeholder(tf.float32, shape=[None, num_classes])
    # Initialize K-means parameters here (KMeans)
    kmeans = KMeans(inputs=X,
                    num_clusters=k,
                    distance_metric='cosine',
                    use_mini_batch=True)
    # Build K-means graph (kmeans.training_graph())
    training_graph = kmeans.training_graph()
    # You will need the output of kmeans.training_graph() to calculate the average distance
    if len(training_graph) > 6:
        (all_scores, cluster_idx, scores, cluster_centers_initialized,
         cluster_centers_var, init_op, train_op) = training_graph
    else:
        (all_scores, cluster_idx, scores, cluster_centers_initialized, init_op,
         train_op) = training_graph

    cluster_idx = cluster_idx[0]  # fix for cluster_idx being a tuple
    avg_distance = tf.reduce_mean(scores)
    # Initialize the variables (i.e. assign their default value)
    init_vars = tf.global_variables_initializer()
    # Start TensorFlow session and run the initializer
    sess = tf.Session()

    sess.run(init_vars, feed_dict={X: x_train})
    sess.run(init_op, feed_dict={X: x_train})
    # Training
    for i in range(1, num_steps + 1):
        _, d, idx = sess.run([train_op, avg_distance, cluster_idx],
                             feed_dict={X: x_train})
        if i % 10 == 0 or i == 1:
            print("Step %i, Avg Distance: %f" % (i, d))
    # Assign a label to each centroid
    # Count total number of labels per centroid, using the label of each training
    # sample to their closest centroid (given by 'idx')
    counts = np.zeros(shape=(k, num_classes))
    for i in range(len(idx)):
        counts[idx[i]] += y_train[i]
    # Assign the most frequent label to the centroid
    labels_map = [np.argmax(c) for c in counts]
    labels_map = tf.convert_to_tensor(labels_map)
    # Lookup: centroid_id -> label
    cluster_label = tf.nn.embedding_lookup(labels_map, cluster_idx)
    correct_prediction = tf.equal(cluster_label,
                                  tf.cast(tf.argmax(Y, 1), tf.int32))
    accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    # return a model, sess and both placeholders to test the model

    return sess, accuracy_op, X, Y
def tf_kmeans(full_data_x):
        num_steps = 50 # Total steps to train
        batch_size = 1024 # The number of samples per batch
        k = 3000 # The number of clusters
        num_features = 128 # Each image is 28x28 pixels
        full_data_x = full_data_x


        # Input images
        X = tf.placeholder(tf.float32, shape=[None, num_features])


        # K-Means Parameters
        kmeans = KMeans(inputs=X, num_clusters=k, distance_metric='cosine',
                use_mini_batch=True)

        # Build KMeans graph
        training_graph = kmeans.training_graph()

        if len(training_graph) > 6: # Tensorflow 1.4+
            (all_scores, cluster_idx, scores, cluster_centers_initialized,
            cluster_centers_var, init_op, train_op) = training_graph
        else:
            (all_scores, cluster_idx, scores, cluster_centers_initialized,
            init_op, train_op) = training_graph

        cluster_idx = cluster_idx[0] # fix for cluster_idx being a tuple
        avg_distance = tf.reduce_mean(scores)

        # Initialize the variables (i.e. assign their default value)
        init_vars = tf.global_variables_initializer()

        # Start TensorFlow session
        sess = tf.Session()

        # Run the initializer
        sess.run(init_vars, feed_dict={X: full_data_x})
        sess.run(init_op, feed_dict={X: full_data_x})

        # cluster_label = tf.nn.embedding_lookup(labels_map, cluster_idx)

        # Training
        for i in range(1, num_steps + 1):
             _, d, idx = sess.run([train_op, avg_distance, cluster_idx],
                         feed_dict={X: full_data_x})
            if i % 10 == 0 or i == 1:
                print("Step %i, Avg Distance: %f" % (i, d))

        return idx


        
Example #6
0
def main():
    mnist = input_data.read_data_sets('/tmp/data/', one_hot=True)
    full_data_x = mnist.train.images
    num_steps = 50
    batch_size = 1024
    k = 25
    num_classes = 10
    num_features = 784

    X = tf.placeholder(tf.float32, shape=[None, num_features])
    Y = tf.placeholder(tf.float32, shape=[None, num_classes])

    kmeans = KMeans(inputs=X, num_clusters=k,
                    distance_metric='cosine', use_mini_batch=True)
    training_graph = kmeans.training_graph()

    if len(training_graph) > 6:
        (all_scores, cluster_idx, scores, cluster_centers_initialized,
         cluster_centers_var, init_op, train_op) = training_graph
    else:
        (all_scores, cluster_idx, scores, cluster_centers_initialized,
         init_op, train_op) = training_graph

    cluster_idx = cluster_idx[0]
    avg_distance = tf.reduce_mean(scores)

    init_vars = tf.global_variables_initializer()

    sess=tf.Session()
    sess.run(init_vars, feed_dict={X: full_data_x})
    sess.run(init_op, feed_dict={X: full_data_x})

    for i in range(1, num_steps+1):
        _, d, idx = sess.run(
            [train_op, avg_distance, cluster_idx], feed_dict={X: full_data_x})

        if i % 10 == 0 or i == 1:
            print("Step %i, Avg Distance: %f" % (i, d))
    counts = np.zeros(shape=(k, num_classes))
    for i in range(len(idx)):
        counts[idx[i]] += mnist.train.labels[i]

    labels_map = [np.argmax(c) for c in counts]
    labels_map = tf.convert_to_tensor(labels_map)

    cluter_label = tf.nn.embedding_lookup(labels_map, cluster_idx)
    correct_prediction = tf.equal(
        cluter_label, tf.cast(tf.argmax(Y, 1), tf.int32))
    accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    test_x, test_y = mnist.test.images, mnist.test.labels
    print("Test Accucaty", sess.run(accuracy_op, feed_dict={X: test_x, Y: test_y}))
Example #7
0
def build_kmeans_model(inputs, params):
    imgs = inputs["samples"]

    kmeans_model = KMeans(inputs=imgs,
                          num_clusters=params.k,
                          distance_metric='cosine',
                          use_mini_batch=False,
                          mini_batch_steps_per_iteration=1,
                          initial_clusters='kmeans_plus_plus')

    # Build KMeans graph
    training_graph = kmeans_model.training_graph()

    return training_graph
    def init_kmeans(self,
                    num_clusters=25,
                    initial_clusters='kmeans_plus_plus',
                    distance_metric='cosine',
                    use_mini_batch=True,
                    mini_batch_steps_per_iteration=1,
                    random_seed=0,
                    kmeans_plus_plus_num_retries=5,
                    kmc2_chain_length=200):

        # load saved cluster centers.
        if self.outputs != None:
            saver = tf.train.import_meta_graph(self.model_path + '.meta')
            sess = tf.Session()
            saver.restore(sess, self.model_path)
            cluster_centers = sess.run(
                tf.get_default_graph().get_tensor_by_name(CLUSTERS_VAR_NAME +
                                                          ':0'))
            initial_clusters = tf.convert_to_tensor(cluster_centers)
            sess.close()
            del sess
        KMeans.__init__(self, self.kmeans_x, num_clusters, initial_clusters,
                        distance_metric, use_mini_batch,
                        mini_batch_steps_per_iteration, random_seed,
                        kmeans_plus_plus_num_retries, kmc2_chain_length)
        training_graph = self.training_graph()
        if len(training_graph) > 6:
            (all_scores, cluster_index, scores, cluster_centers_initialized,
             cluster_centers_var, init_op, train_op) = training_graph
        else:
            (all_scores, cluster_index, scores, cluster_centers_initialized,
             init_op, train_op) = training_graph

        cluster_index = cluster_index[0]
        avg_distance = tf.reduce_mean(scores)
        init_vars = tf.global_variables_initializer()
        self.fetch_kmeans = [train_op, avg_distance, cluster_index, scores]
        if self.kmeans_session != None:
            self.kmeans_session.close()
            del self.kmeans_session
        self.kmeans_session = tf.Session()
        self.kmeans_session.run(init_vars,
                                feed_dict={self.kmeans_x: self.inputs})
        self.kmeans_session.run(init_op,
                                feed_dict={self.kmeans_x: self.inputs})
Example #9
0
 def __init__(self,
              inputs,
              num_clusters,
              mini_batch_steps_per_iteration=100):
     self.num_clusters = tf.convert_to_tensor(num_clusters)
     self.kmeans = KMeans(
         inputs,
         self.num_clusters,
         use_mini_batch=True,
         mini_batch_steps_per_iteration=mini_batch_steps_per_iteration)
     out = self.kmeans.training_graph()
     self.cluster_centers = tf.get_default_graph().get_tensor_by_name(
         'clusters:0')
     self.all_scores = out[0][0]
     self.cluster_index = out[1][0]
     self.scores = out[2][0]
     self.cluster_centers_initialized = out[3]
     self.init_op = out[4]
     self.train_op = out[5]
Example #10
0
    def _build_nn(self):
        self.kmeans = KMeans(inputs=self.x,
                             num_clusters=self.k,
                             distance_metric='cosine',
                             use_mini_batch=True)

        # 构建K-Means的计算图
        training_graph = self.kmeans.training_graph()

        if len(training_graph) > 6:  # tensorflow 1.4及以上版本
            (all_scores, cluster_idx, scores, cluster_cnters_initialized,
             cluster_cnters_var, init_op, train_op) = training_graph
        else:
            (all_scores, cluster_idx, scores, cluster_cnters_initialized,
             init_op, train_op) = training_graph

        self.cluster_idx = cluster_idx[0]  # 每个样本的类别,[0, k)
        self.avg_distance = tf.reduce_mean(scores)

        self.init_op = init_op
        self.train_op = train_op
Example #11
0
    def build_network(self, x_image, num_class):

        x_image_shape = x_image.get_shape().as_list()
        x_image = tf.reshape(
            x_image,
            [-1, x_image_shape[1] * x_image_shape[2] * x_image_shape[3]])
        x = x_image

        # K-Means 파라미터
        x = KMeans(inputs=x,
                   num_clusters=self.k,
                   distance_metric='cosine',
                   use_mini_batch=True)

        return x
Example #12
0
class Clustering:
    def __init__(self,
                 inputs,
                 num_clusters,
                 mini_batch_steps_per_iteration=100):
        self.num_clusters = tf.convert_to_tensor(num_clusters)
        self.kmeans = KMeans(
            inputs,
            self.num_clusters,
            use_mini_batch=True,
            mini_batch_steps_per_iteration=mini_batch_steps_per_iteration)
        out = self.kmeans.training_graph()
        self.cluster_centers = tf.get_default_graph().get_tensor_by_name(
            'clusters:0')
        self.all_scores = out[0][0]
        self.cluster_index = out[1][0]
        self.scores = out[2][0]
        self.cluster_centers_initialized = out[3]
        self.init_op = out[4]
        self.train_op = out[5]

    def lab_to_labels(self, images, name='lab_to_labels'):
        a = tf.reshape(self.cluster_centers[:, 0], [1, 1, 1, -1])
        b = tf.reshape(self.cluster_centers[:, 1], [1, 1, 1, -1])
        da = tf.expand_dims(images[:, :, :, 1], 3) - a
        db = tf.expand_dims(images[:, :, :, 2], 3) - b
        d = tf.square(da) + tf.square(db)
        return tf.argmin(d, 3, name=name)

    def labels_to_lab(self, labels, name='labels_to_lab'):
        if labels.dtype in [tf.float16, tf.float32, tf.float64]:
            l = tf.cast(tf.expand_dims(labels, -1), tf.float32)
            c = tf.reshape(self.cluster_centers, [1, 1, 1, -1, 2])
            ab = tf.reduce_sum(l * c, 3)
        else:
            ab = tf.gather(self.cluster_centers, labels)
        l = tf.ones(tf.shape(ab)[:-1], tf.float32) * 75
        return tf.concat([tf.expand_dims(l, -1), ab], 3, name=name)
Example #13
0
class Model:
    def __init__(self, args):
        self.k = args.get('k', 10)
        self.num_classes = args.get('num_classes', 10)
        self.num_features = args.get('num_features', 784)

        self._add_op()

        self.device = '/gpu:0' if args['gpu'] >= 0 else '/cpu:0'
        with tf.device(self.device):
            self._build_nn()

    def _add_op(self):
        self.x = tf.placeholder(tf.float32, shape=[None, self.num_features])
        self.y = tf.placeholder(tf.float32, shape=[None, self.num_classes])

    def _build_nn(self):
        self.kmeans = KMeans(inputs=self.x,
                             num_clusters=self.k,
                             distance_metric='cosine',
                             use_mini_batch=True)

        # 构建K-Means的计算图
        training_graph = self.kmeans.training_graph()

        if len(training_graph) > 6:  # tensorflow 1.4及以上版本
            (all_scores, cluster_idx, scores, cluster_cnters_initialized,
             cluster_cnters_var, init_op, train_op) = training_graph
        else:
            (all_scores, cluster_idx, scores, cluster_cnters_initialized,
             init_op, train_op) = training_graph

        self.cluster_idx = cluster_idx[0]  # 每个样本的类别,[0, k)
        self.avg_distance = tf.reduce_mean(scores)

        self.init_op = init_op
        self.train_op = train_op
mnist = input_data.read_data_sets("/tmp/data", one_hot=True)
full_data_x = mnist.train.images

#Params
steps = 50  #total steps
batch_size = 1024  #sample per batch
k = 25  #nmber of clsters
num_classes = 10  #digits
num_features = 784  # each image si 28x28

#Inpt images
X = tf.placeholder(tf.float32, shape=[None, num_features])

Y = tf.placeholder(tf.float32, shape=[None, num_classes])
kMeans = KMeans(inputs=X,
                num_clusters=k,
                distance_metric='cosine',
                use_mini_batch=True)

# Build KMeans graph
(all_scores, cluster_idx, scores, cluster_centers_initialized, init_op,
 train_op) = kMeans.training_graph()
cluster_idx = cluster_idx[0]  # fix for cluster_idx being a tuple
avg_distance = tf.reduce_mean(scores)

# Initialize the variables (i.e. assign their default value)
init_vars = tf.global_variables_initializer()

# Start TensorFlow session
sess = tf.Session()

# Run the initializer
Example #15
0
def run():
    # 忽略所有的 GPU, 因为随机森林不会从中受益
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
    # 使用mnist数据
    mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
    full_data_x = mnist.train.images

    # 设置超参数
    # num_steps     训练次数
    # batch_size    每次训练用的数据集大小
    # k             分类的数量
    # num_classes   类的数量
    # num_features  特征的数量
    num_steps = 50
    batch_size = 1024
    k = 25
    num_classes = 10
    num_features = 784

    # 定义输入数据
    X = tf.placeholder(tf.float32, shape=[None, num_features])
    # 定义标签
    Y = tf.placeholder(tf.float32, shape=[None, num_classes])

    # 利用 Tensorflow 提供的KMeans来进行计算
    # KMeans(inputs,                                    输入张量或者张量列表
    #       num_clusters,                               一个整数张量,指定簇的数量
    #       initial_clusters=RANDOM_INIT,               指定初始化期间使用的集群。
    #       distance_metric=SQUARED_EUCLIDEAN_DISTANCE, 用于聚类的距离度量。 支持的选项:“平方欧几里得”,“余弦”。
    #       use_mini_batch=False,                       如果为true,请使用小批量k-means算法。 否则假设使用完整批次。
    #       mini_batch_steps_per_iteration=1,           更新后的簇中心同步到主副本的最小步数
    #       random_seed=0,                              PRNG的种子用于初始化种子
    #       kmeans_plus_plus_num_retries=2,             对于在kmeans ++初始化期间采样的每个点,
    #                                                   此参数指定在选择最佳值之前从当前分布中绘制的附加点的数量。
    #                                                   如果指定负值,则使用启发式对O(log(num_to_sample))个附加点进行采样。
    #       kmc2_chain_length=200)                      确定k-MC2算法使用多少个候选点来生成一个新的聚类中心。
    #                                                   如果(小)批次包含较少的点,则从(小)批次生成一个新的集群中心。
    kmeans = KMeans(inputs=X,
                    num_clusters=k,
                    distance_metric='cosine',
                    use_mini_batch=True)

    # 建立 KMeans 计算图
    (all_scores, cluster_idx, scores, cluster_centers_initialized, init_op,
     train_op) = kmeans.training_graph()
    cluster_idx = cluster_idx[0]
    avg_distance = tf.reduce_mean(scores)

    # 初始化所有变量
    init_vars = tf.global_variables_initializer()

    # 获取计算图
    sess = tf.Session()

    # 初始化变量
    sess.run(init_vars, feed_dict={X: full_data_x})
    sess.run(init_op, feed_dict={X: full_data_x})

    # 训练
    for i in range(1, num_steps + 1):
        _, d, idx = sess.run([train_op, avg_distance, cluster_idx],
                             feed_dict={X: full_data_x})
        if i % 10 == 0 or i == 1:
            print("步数 %i, 平均距离: %f" % (i, d))

    # 给每个中心分配标签
    # 使用每次训练的标签计算每个中心心的标签总数
    # 把样本分配到他们最近的中心
    counts = np.zeros(shape=(k, num_classes))
    for i in range(len(idx)):
        counts[idx[i]] += mnist.train.labels[i]
    # 将最频率最高的标签分配给中心
    labels_map = [np.argmax(c) for c in counts]
    labels_map = tf.convert_to_tensor(labels_map)

    # 评价模型
    cluster_label = tf.nn.embedding_lookup(labels_map, cluster_idx)
    # 计算准确性
    correct_prediction = tf.equal(cluster_label,
                                  tf.cast(tf.argmax(Y, 1), tf.int32))
    accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # 测试数据
    test_x, test_y = mnist.test.images, mnist.test.labels
    print("在测试集上的准确率:", sess.run(accuracy_op, feed_dict={
        X: test_x,
        Y: test_y
    }))

    return
Example #16
0
#parameters
epochs = 50
batch_size = 64  #give batch size depends upon the Memory capacity your system does
num_clusters = 10
num_classes = 10
num_features = 784

with tf.name_scope("Input_Features"):
    x = tf.placeholder(tf.float32, shape=[None, num_features], name="Input")
    y_ = tf.placeholder(tf.float32, shape=[None, num_classes], name="Output")

#kmeans
with tf.name_scope("KMeans_Architecture"):
    Kmeans = KMeans(inputs=x,
                    num_clusters=num_clusters,
                    distance_metric='cosine',
                    use_mini_batch=True)

#Building a graph
training_graph = Kmeans.training_graph()

if len(training_graph) > 6:
    (all_scores, cluster_idx, scores, cluster_centers_initialized,
     cluster_center_var, init_op, train_op) = training_graph

else:
    (all_scores, cluster_idx, scores, cluster_centers_initialized, init_op,
     train_op) = training_graph

cluster_idx = cluster_idx[0]
avg_distance = tf.reduce_mean(scores)
Example #17
0
full_data_x = mnist.train.images

# Parameters
num_steps = 50 # Total steps to train
batch_size = 1024 # The number of samples per batch
k = 25 # The number of clusters
num_classes = 10 # The 10 digits
num_features = 784 # Each image is 28x28 pixels

# Input images
X = tf.placeholder(tf.float32, shape=[None, num_features])
# Labels (for assigning a label to a centroid and testing)
Y = tf.placeholder(tf.float32, shape=[None, num_classes])

# K-Means Parameters
kmeans = KMeans(inputs=X, num_clusters=k, distance_metric='cosine',
                use_mini_batch=True)

# Build KMeans graph
(all_scores, cluster_idx, scores, cluster_centers_initialized, init_op,
train_op) = kmeans.training_graph()
cluster_idx = cluster_idx[0] # fix for cluster_idx being a tuple
avg_distance = tf.reduce_mean(scores)

# Initialize the variables (i.e. assign their default value)
init_vars = tf.global_variables_initializer()

# Start TensorFlow session
sess = tf.Session()

# Run the initializer
sess.run(init_vars, feed_dict={X: full_data_x})
Example #18
0

data_ = load_data()
X_train, Y_train, X_test,Y_test = data_.return_data() #Resturn one hot

batch_size = 32
epoch = 30 #Increase epoch
n_class =10
n_feature = X_train.shape[1]

#Input Images
X_data = tf.placeholder(shape=[None,n_feature],dtype=tf.float32)
Y_data = tf.placeholder(shape =[None,n_class],dtype=tf.float32)

#Kmeans
kmeans_ = KMeans(inputs=X_data,num_clusters=n_class,use_mini_batch=True,
       distance_metric='cosine')

(all_scores, cluster_idx, scores, cluster_centers_initialized,
 init_op,train_op)=kmeans_.training_graph()

avg_distance = tf.reduce_mean(scores) #Intra clusters

init_vars = tf.global_variables_initializer()

sess = tf.Session()
sess.run(init_vars,feed_dict={X_data:X_train})
sess.run(init_op,feed_dict={X_data:X_train})#Graph init
#Epoch
for  i in range(epoch):
    _,avg_d = sess.run([train_op,avg_distance],feed_dict={X_data:X_train})
    if i % 5 ==0:
Example #19
0
File: tf2.py Project: swq90/stock
#         x = self.fc3(x)
#         return x
#
#
# model = Network()
# # 通过 build 函数完成内部张量的创建,其中 4 为任意的 batch 数量,9 为输入特征长度
# model.build(input_shape=(4, 8))
# model.summary()  # 打印网络信息
# # 创建优化器,指定学习率
# optimizer = tf.keras.optimizers.RMSprop(0.001)
#
# # 网络训练部分。通过 Epoch 和 Step 的双层循环训练网络,共训练 200 个 epoch:
# for epoch in range(200):  # 200 个 Epoch
#     for step, (x, y) in enumerate(train_db):  # 遍历一次训练集
#         # 梯度记录器
#         with tf.GradientTape() as tape:
#             out = model(x)  # 通过网络获得输出
#             loss = tf.reduce_mean(keras.losses.MSE(y, out))  # 计算 MSE
#             mae_loss = tf.reduce_mean(keras.losses.MAE(y, out))  # 计算 MAE
#
#         if step % 10 == 0:
#             print(epoch, step, float(loss))
#
#         # 计算梯度 并更新
#         grads = tape.gradient(loss, model.trainable_variables)
#         optimizer.apply_gradients(zip(grads, model.trainable_variables))
#
vector=tf.constant()
kmeans = KMeans(n_clusters=2, random_state=0)
kmeans.fit(dataset)
print(kmeans.labels_)
def train():
    # args = parser.parse_args()
    args = FLAGS
    training_readout_layer = args.training_readout_layer
    testing_readout_layer = args.testing_readout_layer
    LOG_FREQUENCY = args.test_frequency

    global condition_1, condition_2, condition_3, condition_4
    condition_1 = False
    condition_2 = False
    condition_3 = False
    condition_4 = False

    if not os.path.exists("./checkpoints"):
        os.mkdir("./checkpoints")

    # feed dictionary for dataSetOne
    def feed_dict(train, i):
        if train:
            xs, ys = dataSetTrain.next_batch(FLAGS.batch_size)
            k_h = FLAGS.dropout_hidden
            k_i = FLAGS.dropout_input
        else:
            xs, ys = dataSetTest.images, dataSetTest.labels
            k_h = 1.0
            k_i = 1.0
        return {
            x: xs,
            y_: ys,
            global_step: i,
            keep_prob_input: k_i,
            keep_prob_hidden: k_h
        }

    def feed_dict_test(train, i, batch_size):
        if train:
            xs, ys = dataSetTrain.next_batch(batch_size)
            k_h = FLAGS.dropout_hidden
            k_i = FLAGS.dropout_input
        else:
            xs, ys = dataSetTest.next_batch(batch_size)
            k_h = 1.0
            k_i = 1.0
        return {
            x: xs,
            y_: ys,
            global_step: i,
            keep_prob_input: k_i,
            keep_prob_hidden: k_h
        }

    # weights initialization
    def weight_variable(shape, stddev, name="W"):
        initial = tf.truncated_normal(shape, stddev=stddev)
        return tf.Variable(initial, name=name)

    # biases initialization
    def bias_variable(shape, name="b"):
        initial = tf.zeros(shape)
        return tf.Variable(initial, name=name)

    # define a fully connected layer
    def fc_layer(input, channels_in, channels_out, stddev, name='fc'):
        with tf.name_scope(name):
            with tf.name_scope('weights'):
                W = weight_variable([channels_in, channels_out], stddev)
            with tf.name_scope('biases'):
                b = bias_variable([channels_out])
            act = tf.nn.relu(tf.matmul(input, W) + b)
            tf.summary.histogram("weights", W)
            tf.summary.histogram("biases", b)
            tf.summary.histogram("activation", act)
            return act

    # define a sotfmax linear classification layer
    def softmax_linear(input, channels_in, channels_out, stddev, name='read'):
        with tf.name_scope(name):
            with tf.name_scope('weights'):
                W = weight_variable([channels_in, channels_out],
                                    stddev,
                                    name="WMATRIX")
                wdict[W] = W
            with tf.name_scope('biases'):
                b = bias_variable([channels_out], name="bias")
            act = tf.matmul(input, W) + b
            tf.summary.histogram("weights", W)
            tf.summary.histogram("biases", b)
            tf.summary.histogram("activation", act)
            return act

    # Start an Interactive session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.log_device_placement = False
    # sess = tf.Session(config=config)
    sess = tf.InteractiveSession(config=config)

    # Placeholder for input variables
    with tf.name_scope('input'):
        x = tf.placeholder(tf.float32, shape=[None, 784], name='x')
        y_ = tf.placeholder(tf.float32, shape=[None, 10], name='labels')

    global global_step
    global_step = tf.placeholder(tf.float32, shape=[], name="step")

    # apply dropout to the input layer
    keep_prob_input = tf.placeholder(tf.float32)
    tf.summary.scalar('dropout_input', keep_prob_input)

    x_drop = tf.nn.dropout(x, keep_prob_input)

    # Create the first hidden layer
    h_fc1 = fc_layer(x_drop, IMAGE_PIXELS, FLAGS.hidden1,
                     1.0 / math.sqrt(float(IMAGE_PIXELS)), 'h_fc1')

    # Apply dropout to first hidden layer
    keep_prob_hidden = tf.placeholder(tf.float32)
    tf.summary.scalar('dropout_hidden', keep_prob_hidden)

    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob_hidden)

    # Create the second hidden layer
    h_fc2 = fc_layer(h_fc1_drop, FLAGS.hidden1, FLAGS.hidden2,
                     1.0 / math.sqrt(float(FLAGS.hidden1)), 'h_fc2')

    # Apply dropout to second hidden layer
    h_fc2_drop = tf.nn.dropout(h_fc2, keep_prob_hidden)

    # Create a softmax linear classification layer for the outputs
    if FLAGS.hidden3 == -1:
        logits_tr1 = softmax_linear(h_fc2_drop, FLAGS.hidden2, NUM_CLASSES,
                                    1.0 / math.sqrt(float(FLAGS.hidden2)),
                                    'softmax_linear_tr1')
        logits_tr2 = softmax_linear(h_fc2_drop, FLAGS.hidden2, NUM_CLASSES,
                                    1.0 / math.sqrt(float(FLAGS.hidden2)),
                                    'softmax_linear_tr2')
        logits_tr3 = softmax_linear(h_fc2_drop, FLAGS.hidden2, NUM_CLASSES,
                                    1.0 / math.sqrt(float(FLAGS.hidden2)),
                                    'softmax_linear_tr3')
        logits_tr4 = softmax_linear(h_fc2_drop, FLAGS.hidden2, NUM_CLASSES,
                                    1.0 / math.sqrt(float(FLAGS.hidden2)),
                                    'softmax_linear_tr4')
    else:
        h_fc3 = fc_layer(h_fc2_drop, FLAGS.hidden2, FLAGS.hidden3,
                         1.0 / math.sqrt(float(FLAGS.hidden3)), 'h_fc3')

        # Apply dropout to third hidden layer
        h_fc3_drop = tf.nn.dropout(h_fc3, keep_prob_hidden)

        logits_tr1 = softmax_linear(h_fc3_drop, FLAGS.hidden3, NUM_CLASSES,
                                    1.0 / math.sqrt(float(FLAGS.hidden3)),
                                    'softmax_linear_tr1')
        logits_tr2 = softmax_linear(h_fc3_drop, FLAGS.hidden3, NUM_CLASSES,
                                    1.0 / math.sqrt(float(FLAGS.hidden3)),
                                    'softmax_linear_tr2')
        logits_tr3 = softmax_linear(h_fc3_drop, FLAGS.hidden3, NUM_CLASSES,
                                    1.0 / math.sqrt(float(FLAGS.hidden3)),
                                    'softmax_linear_tr3')
        logits_tr4 = softmax_linear(h_fc3_drop, FLAGS.hidden3, NUM_CLASSES,
                                    1.0 / math.sqrt(float(FLAGS.hidden3)),
                                    'softmax_linear_tr4')

    logitsAll = logits_tr1 + logits_tr2 + logits_tr3 + logits_tr4
    # Define the loss model as a cross entropy with softmax layer 1
    with tf.name_scope('cross_entropy_tr1'):
        diff_tr1 = tf.nn.softmax_cross_entropy_with_logits(labels=y_,
                                                           logits=logits_tr1)
        with tf.name_scope('total_tr1'):
            cross_entropy_tr1 = tf.reduce_mean(diff_tr1)
    # tf.summary.scalar('cross_entropy_tr1', cross_entropy_tr1)

    # Define the loss model as a cross entropy with softmax layer 2
    with tf.name_scope('cross_entropy_tr2'):
        diff_tr2 = tf.nn.softmax_cross_entropy_with_logits(labels=y_,
                                                           logits=logits_tr2)
        with tf.name_scope('total_tr2'):
            cross_entropy_tr2 = tf.reduce_mean(diff_tr2)
    # tf.summary.scalar('cross_entropy_tr2', cross_entropy_tr2)

    # Define the loss model as a cross entropy with softmax layer 3
    with tf.name_scope('cross_entropy_tr3'):
        diff_tr3 = tf.nn.softmax_cross_entropy_with_logits(labels=y_,
                                                           logits=logits_tr3)
        with tf.name_scope('total_tr3'):
            cross_entropy_tr3 = tf.reduce_mean(diff_tr3)
    # tf.summary.scalar('cross_entropy_tr3', cross_entropy_tr3)

    # Define the loss model as a cross entropy with softmax layer 4
    with tf.name_scope('cross_entropy_tr4'):
        diff_tr4 = tf.nn.softmax_cross_entropy_with_logits(labels=y_,
                                                           logits=logits_tr4)
        with tf.name_scope('total_tr4'):
            cross_entropy_tr4 = tf.reduce_mean(diff_tr4)
    # tf.summary.scalar('cross_entropy_tr4', cross_entropy_tr4)

    # Use Gradient descent optimizer for training steps and minimize x-entropy
    # decaying learning rate tickles a few more 0.1% out of the algorithm
    with tf.name_scope('train_tr1'):
        lr = tf.train.exponential_decay(FLAGS.learning_rate,
                                        global_step,
                                        FLAGS.decayStep,
                                        FLAGS.decayFactor,
                                        staircase=True)

        train_step_tr1 = tf.train.MomentumOptimizer(
            learning_rate=lr, momentum=0.99).minimize(cross_entropy_tr1)

    with tf.name_scope('train_tr2'):
        lr = tf.train.exponential_decay(FLAGS.learning_rate,
                                        global_step,
                                        FLAGS.decayStep,
                                        FLAGS.decayFactor,
                                        staircase=True)

        train_step_tr2 = tf.train.MomentumOptimizer(
            learning_rate=lr, momentum=0.99).minimize(cross_entropy_tr2)

    with tf.name_scope('train_tr3'):
        lr = tf.train.exponential_decay(FLAGS.learning_rate,
                                        global_step,
                                        FLAGS.decayStep,
                                        FLAGS.decayFactor,
                                        staircase=True)

        train_step_tr3 = tf.train.MomentumOptimizer(
            learning_rate=lr, momentum=0.99).minimize(cross_entropy_tr3)

    with tf.name_scope('train_tr4'):
        lr = tf.train.exponential_decay(FLAGS.learning_rate,
                                        global_step,
                                        FLAGS.decayStep,
                                        FLAGS.decayFactor,
                                        staircase=True)

        train_step_tr4 = tf.train.MomentumOptimizer(
            learning_rate=lr, momentum=0.99).minimize(cross_entropy_tr4)

    # Compute correct prediction and accuracy
    with tf.name_scope('accuracy_tr1'):
        with tf.name_scope('correct_prediction_tr1'):
            correct_prediction_tr1 = tf.equal(tf.argmax(logits_tr1, 1),
                                              tf.argmax(y_, 1))
        with tf.name_scope('accuracy_tr1'):
            accuracy_tr1 = tf.reduce_mean(
                tf.cast(correct_prediction_tr1, tf.float32))
    tf.summary.scalar('accuracy_tr1', accuracy_tr1)

    # Compute correct prediction and accuracy
    with tf.name_scope('accuracy_tr2'):
        with tf.name_scope('correct_prediction_tr2'):
            correct_prediction_tr2 = tf.equal(tf.argmax(logits_tr2, 1),
                                              tf.argmax(y_, 1))
        with tf.name_scope('accuracy_tr2'):
            accuracy_tr2 = tf.reduce_mean(
                tf.cast(correct_prediction_tr2, tf.float32))
    tf.summary.scalar('accuracy_tr2', accuracy_tr2)

    # Compute correct prediction and accuracy
    with tf.name_scope('accuracy_tr3'):
        with tf.name_scope('correct_prediction_tr3'):
            correct_prediction_tr3 = tf.equal(tf.argmax(logits_tr3, 1),
                                              tf.argmax(y_, 1))
        with tf.name_scope('accuracy_tr3'):
            accuracy_tr3 = tf.reduce_mean(
                tf.cast(correct_prediction_tr3, tf.float32))
    tf.summary.scalar('accuracy_tr3', accuracy_tr3)

    # Compute correct prediction and accuracy
    with tf.name_scope('accuracy_tr4'):
        with tf.name_scope('correct_prediction_tr4'):
            correct_prediction_tr4 = tf.equal(tf.argmax(logits_tr4, 1),
                                              tf.argmax(y_, 1))
        with tf.name_scope('accuracy_tr4'):
            accuracy_tr4 = tf.reduce_mean(
                tf.cast(correct_prediction_tr4, tf.float32))
    tf.summary.scalar('accuracy_tr4', accuracy_tr4)

    # Compute correct prediction and accuracy
    with tf.name_scope('accuracy_trAll'):
        with tf.name_scope('correct_prediction_trAll'):
            correct_prediction_trAll = tf.equal(tf.argmax(logitsAll, 1),
                                                tf.argmax(y_, 1))
        with tf.name_scope('accuracy_trAll'):
            accuracy_trAll = tf.reduce_mean(
                tf.cast(correct_prediction_trAll, tf.float32))
    tf.summary.scalar('accuracy_trAll', accuracy_trAll)

    # K-means
    kmeans_tr1 = KMeans(
        inputs=x,
        num_clusters=500,  #distance_metric=SQUARED_EUCLIDEAN_DISTANCE,
        use_mini_batch=False)
    kmeans_tr2 = KMeans(
        inputs=x,
        num_clusters=500,  #distance_metric=SQUARED_EUCLIDEAN_DISTANCE,
        use_mini_batch=False)
    kmeans_tr3 = KMeans(
        inputs=x,
        num_clusters=500,  #distance_metric=SQUARED_EUCLIDEAN_DISTANCE,
        use_mini_batch=False)
    kmeans_tr4 = KMeans(
        inputs=x,
        num_clusters=500,  #distance_metric=SQUARED_EUCLIDEAN_DISTANCE,
        use_mini_batch=False)

    # Build KMeans graph
    (all_scores_tr1, cluster_idx_tr1, scores_tr1,
     cluster_centers_initialized_tr1, cluster_centers_var_tr1, init_op_tr1,
     train_op_tr1) = kmeans_tr1.training_graph()

    (all_scores_tr2, cluster_idx_tr2, scores_tr2,
     cluster_centers_initialized_tr2, cluster_centers_var_tr2, init_op_tr2,
     train_op_tr2) = kmeans_tr2.training_graph()

    (all_scores_tr3, cluster_idx_tr3, scores_tr3,
     cluster_centers_initialized_tr3, cluster_centers_var_tr3, init_op_tr3,
     train_op_tr3) = kmeans_tr3.training_graph()

    (all_scores_tr4, cluster_idx_tr4, scores_tr4,
     cluster_centers_initialized_tr4, cluster_centers_var_tr4, init_op_tr4,
     train_op_tr4) = kmeans_tr4.training_graph()

    init_graph_1 = tf.placeholder(tf.bool, name="init_graph_1")
    init_graph_2 = tf.placeholder(tf.bool, name="init_graph_2")
    init_graph_3 = tf.placeholder(tf.bool, name="init_graph_3")
    init_graph_4 = tf.placeholder(tf.bool, name="init_graph_4")

    with tf.name_scope('test_score_tr1'):
        # minimum_dist_tr1 = tf.reduce_min(tf.reduce_sum(tf.square(tf.subtract(cluster_centers_var_tr1, x)), axis=1))
        minimum_dist_tr1 = tf.cond(
            init_graph_1, lambda: tf.reduce_min(
                tf.reduce_sum(tf.square(tf.subtract(cluster_centers_var_tr1, x)
                                        ),
                              axis=1)), lambda: tf.reduce_max(x) * 1000)
    with tf.name_scope('test_score_tr2'):
        minimum_dist_tr2 = tf.cond(
            init_graph_2, lambda: tf.reduce_min(
                tf.reduce_sum(tf.square(tf.subtract(cluster_centers_var_tr2, x)
                                        ),
                              axis=1)), lambda: tf.reduce_max(x) * 1000)
    with tf.name_scope('test_score_tr3'):
        minimum_dist_tr3 = tf.cond(
            init_graph_3, lambda: tf.reduce_min(
                tf.reduce_sum(tf.square(tf.subtract(cluster_centers_var_tr3, x)
                                        ),
                              axis=1)), lambda: tf.reduce_max(x) * 1000)
    with tf.name_scope('test_score_tr4'):
        minimum_dist_tr4 = tf.cond(
            init_graph_4, lambda: tf.reduce_min(
                tf.reduce_sum(tf.square(tf.subtract(cluster_centers_var_tr4, x)
                                        ),
                              axis=1)), lambda: tf.reduce_max(x) * 1000)

    # Merge all summaries and write them out to /tmp/tensorflow/mnist/logs
    # different writers are used to separate test accuracy from train accuracy
    # also a writer is implemented to observe CF after we trained on both sets
    merged = tf.summary.merge_all()

    saver = tf.train.Saver(var_list=None)

    # Initialize all global variables or load model from pre-saved checkpoints
    # Open csv file for append when model is loaded, otherwise new file is created.
    if args.load_model:
        print('\nLoading Model: ', args.load_model)
        ckpt = tf.train.get_checkpoint_state(
            checkpoint_dir=args.checkpoints_dir,
            latest_filename=args.load_model)

        saver.restore(sess=sess,
                      save_path=args.checkpoints_dir + args.load_model +
                      '.ckpt')
    else:
        tf.global_variables_initializer().run()
    writer = csv.writer(open(FLAGS.plot_file, "wb"))

    with tf.name_scope("training"):
        print('\n\nTraining on given Dataset...')
        print('____________________________________________________________')
        print(time.strftime('%X %x %Z'))

        # train cluster for given training set
        nrStepsForClustering = 1
        if training_readout_layer is 1:
            sess.run(init_op_tr1, feed_dict={x: dataSetTrain.images})
            for i in range(0, nrStepsForClustering):
                _ = sess.run(train_op_tr1, feed_dict={x: dataSetTrain.images})
        elif training_readout_layer is 2:
            sess.run(init_op_tr2, feed_dict={x: dataSetTrain.images})
            for i in range(0, nrStepsForClustering):
                _ = sess.run(train_op_tr2, feed_dict={x: dataSetTrain.images})
        elif training_readout_layer is 3:
            sess.run(init_op_tr3, feed_dict={x: dataSetTrain.images})
            for i in range(0, nrStepsForClustering):
                _ = sess.run(train_op_tr3, feed_dict={x: dataSetTrain.images})
        elif training_readout_layer is 4:
            sess.run(init_op_tr4, feed_dict={x: dataSetTrain.images})
            for i in range(0, nrStepsForClustering):
                _ = sess.run(train_op_tr4, feed_dict(True, i))
        print("Trained clustering!")
        condition_1 = tf.get_default_graph().get_tensor_by_name(
            "initialized:0").eval()
        condition_2 = tf.get_default_graph().get_tensor_by_name(
            "initialized_1:0").eval()
        condition_3 = tf.get_default_graph().get_tensor_by_name(
            "initialized_2:0").eval()
        condition_4 = tf.get_default_graph().get_tensor_by_name(
            "initialized_3:0").eval()
        print(condition_1, "\n\n", condition_2, "\n\n", condition_3, "\n\n",
              condition_4)

        # get readout layer by testing trained clusters
        # for i in range(0, 10):
        #     xs, ys = dataSetTest.next_batch(1)
        #     score_tr1, score_tr2, score_tr3, score_tr4 = sess.run(
        #         [minimum_dist_tr1, minimum_dist_tr2, minimum_dist_tr3, minimum_dist_tr4],
        #         feed_dict={x: xs, init_graph_1: condition_1, init_graph_2: condition_2, init_graph_3: condition_3,
        #                    init_graph_4: condition_4})
        #     print("readout layer for %s is %s " % (
        #         np.argmax(ys), (np.argmin([score_tr1, score_tr2, score_tr3, score_tr4]) + 1)))

        # Training for NN
        for i in range(FLAGS.start_at_step,
                       FLAGS.max_steps + FLAGS.start_at_step):
            #print ("Training at step",i)
            if i % LOG_FREQUENCY == 0:  # record summaries & test-set accuracy every 5 steps
                cumm_acc = 0
                total_steps = dataSetTest.images.shape[0] / 50
                dsClass = np.array([0., 0., 0., 0.])
                for xx in range(0, int(total_steps)):
                    idx = random.randint(1, dataSetTest.images.shape[0]) - 1
                    xs = dataSetTest.images[np.newaxis, idx]
                    ys = dataSetTest.labels[np.newaxis, idx]
                    #xs, ys = dataSetTest.next_batch(1)
                    k_h = 1.0
                    k_i = 1.0
                    score_tr1, score_tr2, score_tr3, score_tr4 = sess.run(
                        [
                            minimum_dist_tr1, minimum_dist_tr2,
                            minimum_dist_tr3, minimum_dist_tr4
                        ],
                        feed_dict={
                            x: xs,
                            init_graph_1: condition_1,
                            init_graph_2: condition_2,
                            init_graph_3: condition_3,
                            init_graph_4: condition_4
                        })
                    readout_layer = (np.argmin(
                        [score_tr1, score_tr2, score_tr3, score_tr4]) + 1)
                    dsClass[readout_layer - 1] += 1

                    if readout_layer == 1:
                        l, s, acc = sess.run(
                            [logits_tr1, merged, accuracy_tr1],
                            feed_dict={
                                x: xs,
                                y_: ys,
                                keep_prob_input: k_i,
                                keep_prob_hidden: k_h,
                                global_step: i
                            })
                    elif readout_layer == 2:
                        l, s, acc = sess.run(
                            [logits_tr2, merged, accuracy_tr2],
                            feed_dict={
                                x: xs,
                                y_: ys,
                                keep_prob_input: k_i,
                                keep_prob_hidden: k_h,
                                global_step: i
                            })
                    elif readout_layer == 3:
                        l, s, acc = sess.run(
                            [logits_tr3, merged, accuracy_tr3],
                            feed_dict={
                                x: xs,
                                y_: ys,
                                keep_prob_input: k_i,
                                keep_prob_hidden: k_h,
                                global_step: i
                            })
                    elif readout_layer == 4:
                        l, s, acc = sess.run(
                            [logits_tr4, merged, accuracy_tr4],
                            feed_dict={
                                x: xs,
                                y_: ys,
                                keep_prob_input: k_i,
                                keep_prob_hidden: k_h,
                                global_step: i
                            })
                    else:
                        print("PROBLEM:::!")
                    #print("readout layer for %s is %s " % (np.argmax(ys), readout_layer), acc,l.argmax())
                    cumm_acc = cumm_acc + acc
                    # test_writer_ds.add_summary(s, i)
                average_accu = cumm_acc / float(total_steps)
                print(
                    'test set 1 accuracy at step: %s \t \t %s' %
                    (i, average_accu), "D1D2 acc=", dsClass / dsClass.sum())
                writer.writerow([i, average_accu])
            else:  # record train set summaries, and run training steps
                if training_readout_layer is 1:
                    s, _ = sess.run([merged, train_step_tr1],
                                    feed_dict_test(True, i, 100))
                elif training_readout_layer is 2:
                    s, _ = sess.run([merged, train_step_tr2],
                                    feed_dict_test(True, i, 100))
                elif training_readout_layer is 3:
                    s, _ = sess.run([merged, train_step_tr3],
                                    feed_dict_test(True, i, 100))
                elif training_readout_layer is 4:
                    s, _ = sess.run([merged, train_step_tr4],
                                    feed_dict_test(True, i, 100))

        if args.save_model:
            saver.save(sess=sess,
                       save_path=args.checkpoints_dir + args.save_model +
                       '.ckpt')
Example #21
0
# print(data_y.shape)

# parameters
class_num = 3
feature_num = 4
train_steps = 50

# one-hot encoding
data_y = np.reshape(data_y, (data_y.shape[0], 1))
data_y_onehot = np.ndarray((data_y.shape[0], 3), dtype=np.int32)
# print(data_y_onehot.shape)
for i in range(data_y_onehot.shape[0]):
    data_y_onehot[i, :] = one_hot_encoding(data_y[i, :], 3)

# print(data_y_onehot.shape)

# split data
from sklearn.cross_validation import train_test_split
train_x, test_x, train_y, test_y = train_test_split(data_x,
                                                    data_y,
                                                    test_size=0.3)

# Graph
graph = tf.Graph()
with graph.as_default():
    x_input = tf.placeholder(tf.float32, shape=[None, feature_num])
    y_input = tf.placeholder(tf.int32, shape=[None, class_num])

    # Model
    model = KMeans(inputs=x_input, num_clusters=class_num)
Example #22
0
# Parameters
num_steps = 2  # Total steps to train
batch_size = 1024  # The number of samples per batch
k = 25  # The number of clusters
num_classes = 10  # The 10 digits
num_features = 784  # Each image is 28x28 pixels

# Input images
X = tf.placeholder(tf.float32, shape=[None, num_features])
# Labels (for assigning a label to a centroid and testing)
Y = tf.placeholder(tf.float32, shape=[None, num_classes])

# K-Means Parameters
kmeans = KMeans(inputs=X,
                num_clusters=k,
                distance_metric="cosine",
                use_mini_batch=True)

# Build KMeans graph
training_graph = kmeans.training_graph()

if len(training_graph) > 6:  # Tensorflow 1.4+
    (
        all_scores,
        cluster_idx,
        scores,
        cluster_centers_initialized,
        cluster_centers_var,
        init_op,
        train_op,
    ) = training_graph
mnist = input_data.read_data_sets("E:\data", one_hot=True)
full_data = mnist.train.images

# 设置模型参数
num_steps = 300
batch_size = 1024
k = 10
num_classes = 10
num_feature = 784

# 定义输入输出占位符
x = tf.placeholder(tf.float32, shape=[None, num_feature], name='input_x')
y = tf.placeholder(tf.float32, shape=[None, num_classes], name='output_y')

# 使用kmeans函数定义模型
kmeans = KMeans(inputs=x, num_clusters=k, distance_metric='cosine', use_mini_batch=True)

pdb.set_trace()

# 创建 Kmeans 图
(all_source, cluster_idx, scores, cluster_center_initialized, ini_op, train_op) = kmeans.training_graph()
#
cluster_idx = cluster_idx[0]
avg_distance = tf.reduce_mean(scores)

init_op = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init_op)
    sess.run(ini_op, feed_dict={x: full_data})
Example #24
0
full_data_x = mnist.train.images

# Parameters
num_steps = 50 # Total steps to train
batch_size = 1024 # The number of samples per batch
k = 25 # The number of clusters
num_classes = 10 # The 10 digits
num_features = 784 # Each image is 28x28 pixels

# Input images
X = tf.placeholder(tf.float32, shape=[None, num_features])
# Labels (for assigning a label to a centroid and testing)
Y = tf.placeholder(tf.float32, shape=[None, num_classes])

# K-Means Parameters
kmeans = KMeans(inputs=X, num_clusters=k, distance_metric='cosine',
                use_mini_batch=True)

# Build KMeans graph
training_graph = kmeans.training_graph()

if len(training_graph) > 6: # Tensorflow 1.4+
    (all_scores, cluster_idx, scores, cluster_centers_initialized,
     cluster_centers_var, init_op, train_op) = training_graph
else:
    (all_scores, cluster_idx, scores, cluster_centers_initialized,
     init_op, train_op) = training_graph

cluster_idx = cluster_idx[0] # fix for cluster_idx being a tuple
avg_distance = tf.reduce_mean(scores)

# Initialize the variables (i.e. assign their default value)
Example #25
0
mnist = input_data.read_data_sets('./tmp/data/', one_hot=True)

full_data_x = mnist.train.images
print(full_data_x.shape)  # (55000, 784)

num_steps = 200
batch_size = 1000
k = 50
num_classes = 10  # 출력 레이블 수
num_features = 784  # 입력 feature 수

x = tf.placeholder(tf.float32, shape=[None, num_features])
y = tf.placeholder(tf.float32, shape=[None, num_classes])

kmodel = KMeans(inputs=x,
                num_clusters=k,
                distance_metric='cosine',
                use_mini_batch=True)
training_graph = kmodel.training_graph()

(all_scores, cluster_idx, scores, cluster_centers_initialized, init_op,
 train_op) = training_graph
print('cluster_idx : ', cluster_idx)
print('cluster_idx[0] : ', cluster_idx[0])
cluster_idx = cluster_idx[0]
print('cluster_idx : ', cluster_idx)
avg_distance = tf.reduce_mean(scores)

sess = tf.Session()
sess.run(tf.global_variables_initializer())
sess.run(init_op, feed_dict={x: full_data_x})
Example #26
0
numb_steps = 50  # Number of training steps
batch_size = 1024  # batch size
k = 25  # number of clusters
number_classes = 10
numb_features = data.shape[1]

input_images = tf.placeholder(tf.float32,
                              shape=[None, numb_features
                                     ])  # Input Images to the model
input_labels = tf.placeholder(tf.float32,
                              shape=[None, number_classes
                                     ])  # Input labels to the model

# Kmeans Instance created
kmeans = KMeans(inputs=input_images,
                num_clusters=k,
                distance_metric='cosine',
                use_mini_batch=True)

#Build Kmeans graph
training_graph = kmeans.training_graph()

(all_scores, cluster_idx, scores, cluster_centers_initialized,
 cluster_centers_var, init_op, training_op) = training_graph

#print(len(cluster_idx), len(cluster_idx[0]))
cluster_idx = cluster_idx[0]
avg_distance = tf.reduce_mean(scores)

# Initiliaze the variables (i.e. assign there default value)
init_vars = tf.global_variables_initializer()
Example #27
0
def run_k_means(num_steps=num_steps,
                k=None,
                num_classes=None,
                num_features=num_features,
                keep_session=True):
    (new_X_num, num_map, new_y_num, max_t_s_num, num_student, orig_new_X_num,
     orig_new_y) = get_processed_data()

    if k is None:
        k = num_student
        print('Choosing {} clusters for {} students, {} samples'.format(
            k, num_student, len(new_y_num)))

    if num_classes is None:
        num_classes = num_student

    full_data_x = np.asarray(new_X_num)

    # Input
    X = tf.placeholder(tf.float32, shape=[None, num_features])
    # Labels (for assigning a label to a centroid and testing)
    y = tf.placeholder(tf.float32, shape=[None, num_classes])

    # K-Means Parameters
    kmeans = KMeans(inputs=X,
                    num_clusters=k,
                    distance_metric='squared_euclidean',
                    use_mini_batch=True)

    # Build KMeans graph
    training_graph = kmeans.training_graph()
    if len(training_graph) > 6:  # Tensorflow 1.4+
        (all_scores, cluster_idx, scores, cluster_centers_initialized,
         cluster_centers_var, init_op, train_op) = training_graph
    else:
        (all_scores, cluster_idx, scores, cluster_centers_initialized, init_op,
         train_op) = training_graph

    cluster_idx = cluster_idx[0]  # fix for cluster_idx being a tuple
    avg_distance = tf.reduce_mean(scores)

    saver = tf.train.Saver()

    # Start TensorFlow session
    sess = tf.Session()

    # Run the initializer
    sess.run(tf.global_variables_initializer(), feed_dict={X: full_data_x})
    sess.run(init_op, feed_dict={X: full_data_x})

    one_hot_y = sess.run(tf.one_hot(new_y_num, num_student))
    test_one_hot_y = sess.run(tf.one_hot(orig_new_y, num_student))

    # Training
    for i in range(1, num_steps + 1):
        _, d, idx = sess.run([train_op, avg_distance, cluster_idx],
                             feed_dict={X: full_data_x})
        if i % 500 == 0 or i == 1:
            print("Step %i, Avg Distance: %f" % (i, d))

    # Assign a label to each centroid
    # Count total number of labels per centroid, using the label of each training
    # sample to their closest centroid (given by 'idx')
    counts = np.zeros(shape=(k, num_classes))
    for i in range(len(idx)):
        counts[idx[i]] += one_hot_y[i]
    # Assign the most frequent label to the centroid
    # labels_map_np = [np.argmax(c) for c in counts]
    # Different strategy
    labels_map_np = [
        np.random.choice(np.argwhere(c == np.amax(c)).flatten())
        for c in counts
    ]

    labels_map = tf.convert_to_tensor(labels_map_np)

    # Evaluation ops
    # Lookup: centroid_id -> label
    cluster_label = tf.nn.embedding_lookup(labels_map, cluster_idx)
    # Compute accuracy
    correct_prediction = tf.equal(cluster_label,
                                  tf.cast(tf.argmax(y, 1), tf.int32))
    accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # Test Model
    test_x, test_y = orig_new_X_num, test_one_hot_y
    print("Test Accuracy:",
          sess.run(accuracy_op, feed_dict={
              X: test_x,
              y: test_y
          }))

    save_path = saver.save(
        sess, path.join('data', 'cache', 'kmeans', 'tf_model', 'model.ckpt'))
    print("Model saved in path: %s" % save_path)

    save((labels_map_np, k),
         'labels_map_np,k',
         folder=path.join('data', 'cache', 'kmeans'))

    if not keep_session:
        sess.close()

    return k, num_classes, labels_map_np, sess
Example #28
0
# PCA降维
pca = PCA(n_components=n)
reduced_train_images = pca.fit_transform(train_images)
print(len(reduced_train_images), len(reduced_train_images[0]),
      pca.n_components_)

# tf输入输出
tf_in = tf.placeholder(tf.float32, shape=[None, pca.n_components_])  # 输入
tf_out = tf.placeholder(tf.float32, shape=[None, num_labels])  # 输出

# KMeans,注意这是归属在tf下的,产生就会自动加入tf网络
# 距离度量的方式采用余弦距离(余弦相似度)
tf_kmeans = KMeans(
    inputs=tf_in,
    num_clusters=k,
    distance_metric='cosine'  # 对于没有归一化的使用cos距离比较好,关掉也可以测试
)
(tf_incluster_dis, tf_cluster_id, tf_dis, tf_kmeans_initialized, tf_init_op,
 tf_train_op) = tf_kmeans.training_graph()  # 1.13
tf_cluster_id = tf_cluster_id[0]  # 你可以考虑删除,不过会有很神奇的结果
tf_avg_dis = tf.reduce_mean(tf_dis)
tf_init_vars = tf.global_variables_initializer()

# 准备开始计算
sess = tf.Session()
# 初始化网络
sess.run(tf_init_vars, feed_dict={tf_in: reduced_train_images})
sess.run(tf_init_op,
         feed_dict={tf_in: reduced_train_images})  # init_op在training_graph的输出中
Example #29
0
def keeplearning(new_data):
    #K값 추정을 위해 data_list 파일 불러오기.
    ofile = open('data_list.tdat', 'rb')
    ofile_list = pk.load(ofile)

    #추가된 new_data와 기존의 old file을 합쳐서 저장
    if len(new_data) != 0:
        data_list = new_data + ofile_list
    else:
        data_list = ofile_list
    ofile.close()

    f = open('data_list.tdat', 'wb')
    pk.dump(data_list, f)
    f.close()
    # data_set = [
    #     [1,787,24.2,17.7,8.0,5.4,11.7,8.4,1.8,4.0,5.6,2.8,1.3,5.9,3.1],
    #     [2,737,23.6,14.9,11.6,6.5,7.7,6.3,11.3,3.1,3.5,3.1,3.5,2.7,1.9],
    #     [3,754,23.2,11.7,10.9,8.4,10.0,9.1,6.4,4.8,3.3,3.9,4.4,2.4,1.4],
    #     [4,621,24.6,10.8,11.8,11.7,8.2,8.2,4.0,6.2,4.2,5.1,2.5,1.2,1.4],
    #     [5,693,22.8,8.8,12.8,21.4,7.9,5.9,3.1,5.9,3.3,4.6,2.0,0.3,0.8]
    # ]

    # data_list = []
    # for i in range(0,len(data_set)):
    #     for j in range(2, len(data_set[i])):
    #         data_set[i][j] = data_set[i][j]/10

    # for k in range(0,len(data_set)):
    #     for i in range(0, data_set[k][1]):
    #         data = []
    #         data.append(data_set[k][0])
    #         for j in range(2, len(data_set[k])):
    #             ans_good = int(data_set[k][1]*data_set[k][j]/100)
    #             ans_bad = data_set[k][1] - ans_good
    #             #numpy 초기하분포를 활용해서 통계청 자료 비복원추출
    #             target = np.random.hypergeometric(ngood = ans_good, nbad = ans_bad, nsample = 13, size = None)
    #             data.append(target)
    #         data_list.append(data)

    #파일에 list를 그대로 기록
    # f = open('data_list.tdat', 'wb')
    # pickle.dump(data_list, f)
    # f.close()

    #훈련 전에 일단 기존의 체크포인트 삭제
    if os.path.isfile('./trained_data.ckpt.data-00000-of-00001'):
        os.remove('./trained_data.ckpt.data-00000-of-00001')
    if os.path.isfile('./trained_data.ckpt.index'):
        os.remove('./trained_data.ckpt.index')
    if os.path.isfile('./trained_data.ckpt.meta'):
        os.remove('./trained_data.ckpt.meta')
    if os.path.isfile('./checkpoint'):
        os.remove('./checkpoint')

    #새로운 데이터 훈련
    Data_X = data_list
    k = ut.elbow(data_list)

    #훈련자료 column의 개수
    num_features = 14

    X = tf.placeholder(tf.float32, shape=[None, num_features])

    kmeans = KMeans(inputs=X,
                    num_clusters=k,
                    distance_metric='squared_euclidean',
                    use_mini_batch=True)

    (all_scores, cluster_idx, scores, cluster_centers_initialized, init_op,
     train_op) = kmeans.training_graph()
    cluster_idx = cluster_idx[0]
    avg_distance = tf.reduce_mean(scores)

    saver = tf.train.Saver()
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    sess.run(init_op, feed_dict={X: Data_X})

    #밥맥이기
    for i in range(1, 100):
        _, d, idx = sess.run([train_op, avg_distance, cluster_idx],
                             feed_dict={X: Data_X})

    #훈련된 모델 저장
    saver.save(sess, './trained_data.ckpt')
    print("Trained data save completed.")
    sess.close()
Example #30
0
def kmeans_unsupervised_init(sim_op, templates_var, weights_var):
    """Initialize a similarity layer using k-means unsupervised learning

    Initializes the templates using k-means.
    The function returns two ops. The first is used to initialize the learning and the second should be run iteratively
     with all the data.

    Parameters
    ----------
    sim_op : tf.Operation | tf.Tensor
        the similarity operation (or the tensor which is the output of the similarity)
    templates_var : tf.Variable
        the templates variable for this similarity layer
    weights_var : tf.Variable
        the weights variable for this similarity layer

    Returns
    -------
    A tuple (init_op, update_op) where init_op must be executed by a session before using the update op
    and the update_op is the operation that performs the learning.
    """
    if isinstance(sim_op, tf.Tensor):
        sim_op = sim_op.op
    if not sim_op.type == 'Similarity':
        raise ValueError(
            'kmeans_unsupervised_init needs a similarity op, got %s instead' %
            sim_op.type)
    assert (isinstance(sim_op, tf.Operation))
    name = sim_op.name + '_kmeans_init'
    with tf.name_scope(name):
        input_tensor = sim_op.inputs[0]
        templates_tensor = sim_op.inputs[1]
        weights_tensor = sim_op.inputs[2]
        ninstances = templates_tensor.get_shape().as_list()[0]

        strides = sim_op.get_attr('strides')
        blocks = sim_op.get_attr('blocks')
        strides = [1, strides[0], strides[1], 1]
        blocks = [1, blocks[0], blocks[1], 1]
        patches = tf.extract_image_patches(tf.transpose(
            input_tensor, (0, 2, 3, 1)),
                                           strides=strides,
                                           ksizes=blocks,
                                           rates=[1, 1, 1, 1],
                                           padding='VALID')
        _, _, _, ppatch = patches.get_shape().as_list()
        patches = tf.reshape(patches, [-1, ppatch])
        kmeans = KMeans(patches,
                        ninstances,
                        use_mini_batch=True,
                        initial_clusters='kmeans_plus_plus')
        _, _, _, _, init_op, training_op = kmeans.training_graph()
        clusters_var = [
            v for v in tf.global_variables()
            if v.name == name + '/' + 'clusters:0'
        ][0]
        clusters = clusters_var.op.outputs[0]

        channels, block_rows, block_cols = templates_tensor.get_shape(
        ).as_list()[1:]
        reshaped_clusters = tf.reshape(
            clusters, (ninstances, block_rows, block_cols, channels))
        transposed_clusters = tf.transpose(reshaped_clusters, [0, 3, 1, 2])
        with tf.control_dependencies([training_op]):
            assign1 = tf.assign(templates_var, transposed_clusters)
            assign2 = tf.assign(weights_var, tf.ones_like(transposed_clusters))
        return init_op, tf.group(assign1, assign2, name='kmeans_init_assign')
Example #31
0
def main():
    data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             'data')
    mnist = input_data.read_data_sets(data_path, one_hot=True)
    full_data_x = mnist.train.images

    # total steps to train
    num_steps = 200
    # number of per batch
    batch_size = 1024
    # number of clusters
    k = 25
    # the 10 digits
    num_classes = 10
    # each image is 28*28 pixels
    num_features = 784

    # input images
    X = tf.placeholder(tf.float32, shape=[None, num_features])
    # Lables (for assigning a label to a centroid and testing)
    Y = tf.placeholder(tf.float32, shape=[None, num_classes])

    kmeans = KMeans(inputs=X,
                    num_clusters=k,
                    distance_metric='cosine',
                    use_mini_batch=True)

    train_graph = kmeans.training_graph()

    if len(train_graph) > 6:
        (all_scores, cluster_idx, scores, cluster_centers_initialized,
         cluster_centers_var, init_op, train_op) = train_graph
    else:
        (all_scores, cluster_idx, scores, cluster_centers_initialized, init_op,
         train_op) = train_graph

    cluster_idx = cluster_idx[0]
    avg_distance = tf.reduce_mean(scores)

    with tf.Session() as sess:
        init = tf.global_variables_initializer()
        # run the initializer
        sess.run(init, feed_dict={X: full_data_x})
        sess.run(init_op, feed_dict={X: full_data_x})

        for i in range(num_steps):
            _, d, idx = sess.run([train_op, avg_distance, cluster_idx],
                                 feed_dict={X: full_data_x})
            if i % 10 == 0:
                print("step {0} avg distance: {1}".format(i, d))

        # assign a label to each centroid
        # count total number of labels per centroid, using the label of each training
        # sample to their closest centroid
        counts = np.zeros(shape=(k, num_classes))
        for i in range(len(idx)):
            counts[idx[i]] += mnist.train.labels[i]
        # assign the most frequent label to the centroid
        labels_map = [np.argmax(c) for c in counts]
        labels_map = tf.convert_to_tensor(labels_map)

        # evaluation ops
        cluster_label = tf.nn.embedding_lookup(labels_map, cluster_idx)
        # computer accuracy
        correct_prediction = tf.equal(cluster_label,
                                      tf.cast(tf.argmax(Y, 1), tf.int32))
        accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        test_x, test_y = mnist.test.images, mnist.test.labels
        print('test accuracy : ',
              sess.run(accuracy_op, feed_dict={
                  X: test_x,
                  Y: test_y
              }))
# Parameters
num_steps = 50  # Total steps to train
batch_size = 1024  # The number of samples per batch
k = 25  # The number of clusters
num_classes = 10  # The 10 digits
num_features = 784  # Each image is 28x28 pixels

# Input images
X = tf.placeholder(tf.float32, shape=[None, num_features])
# Labels (for assigning a label to a centroid and testing)
Y = tf.placeholder(tf.float32, shape=[None, num_classes])

# K-Means Parameters
kmeans = KMeans(inputs=X,
                num_clusters=k,
                distance_metric='cosine',
                use_mini_batch=True)

# Build KMeans graph
(all_scores, cluster_idx, scores, cluster_centers_initialized,
 cluster_centers_vars, init_op, train_op) = kmeans.training_graph()
cluster_idx = cluster_idx[0]  # fix for cluster_idx being a tuple
avg_distance = tf.reduce_mean(scores)

# Initialize the variables (i.e. assign their default value)
init_vars = tf.global_variables_initializer()

# Start TensorFlow session
sess = tf.Session()

# Run the initializer
Example #33
0
# clustering
import tensorflow as tf
from tensorflow.contrib.factorization import KMeans

k = 3
# num_feature = 1
# arr = [[1],[2],[2],[4],[2]]

num_feature = 2
arr = [[1, 2], [2, 3], [3, 4], [5, 5], [5, 10], [15, 5]]

x = tf.placeholder(tf.float32, shape=[None, num_feature])
kmodel = KMeans(inputs=x, num_clusters=k, \
                distance_metric='squared_euclidean', use_mini_batch=True)
#print(kmodel)
(all_scores, cluster_idx, scores, cluster_centers_initialized, init_op, train_op)=\
    kmodel.training_graph()
cluster_idx = cluster_idx[0]
avg_distance = tf.reduce_mean(scores)  # 요소값 거리의 평균

sess = tf.Session()
sess.run(tf.global_variables_initializer())
print(sess.run(init_op, feed_dict={x: arr}))
print(sess.run(train_op, feed_dict={x: arr}))
print(sess.run(all_scores, feed_dict={x: arr}))
print(sess.run(cluster_idx, feed_dict={x: arr}))
print(sess.run(scores, feed_dict={x: arr}))
print(sess.run(cluster_centers_initialized, feed_dict={x: arr}))

# 학습
for i in range(1, 100):