예제 #1
0
def run_k_means(num_steps=num_steps,
                k=None,
                num_classes=None,
                num_features=num_features,
                keep_session=True):
    (new_X_num, num_map, new_y_num, max_t_s_num, num_student, orig_new_X_num,
     orig_new_y) = get_processed_data()

    if k is None:
        k = num_student
        print('Choosing {} clusters for {} students, {} samples'.format(
            k, num_student, len(new_y_num)))

    if num_classes is None:
        num_classes = num_student

    full_data_x = np.asarray(new_X_num)

    # Input
    X = tf.placeholder(tf.float32, shape=[None, num_features])
    # Labels (for assigning a label to a centroid and testing)
    y = tf.placeholder(tf.float32, shape=[None, num_classes])

    # K-Means Parameters
    kmeans = KMeans(inputs=X,
                    num_clusters=k,
                    distance_metric='squared_euclidean',
                    use_mini_batch=True)

    # Build KMeans graph
    training_graph = kmeans.training_graph()
    if len(training_graph) > 6:  # Tensorflow 1.4+
        (all_scores, cluster_idx, scores, cluster_centers_initialized,
         cluster_centers_var, init_op, train_op) = training_graph
    else:
        (all_scores, cluster_idx, scores, cluster_centers_initialized, init_op,
         train_op) = training_graph

    cluster_idx = cluster_idx[0]  # fix for cluster_idx being a tuple
    avg_distance = tf.reduce_mean(scores)

    saver = tf.train.Saver()

    # Start TensorFlow session
    sess = tf.Session()

    # Run the initializer
    sess.run(tf.global_variables_initializer(), feed_dict={X: full_data_x})
    sess.run(init_op, feed_dict={X: full_data_x})

    one_hot_y = sess.run(tf.one_hot(new_y_num, num_student))
    test_one_hot_y = sess.run(tf.one_hot(orig_new_y, num_student))

    # Training
    for i in range(1, num_steps + 1):
        _, d, idx = sess.run([train_op, avg_distance, cluster_idx],
                             feed_dict={X: full_data_x})
        if i % 500 == 0 or i == 1:
            print("Step %i, Avg Distance: %f" % (i, d))

    # Assign a label to each centroid
    # Count total number of labels per centroid, using the label of each training
    # sample to their closest centroid (given by 'idx')
    counts = np.zeros(shape=(k, num_classes))
    for i in range(len(idx)):
        counts[idx[i]] += one_hot_y[i]
    # Assign the most frequent label to the centroid
    # labels_map_np = [np.argmax(c) for c in counts]
    # Different strategy
    labels_map_np = [
        np.random.choice(np.argwhere(c == np.amax(c)).flatten())
        for c in counts
    ]

    labels_map = tf.convert_to_tensor(labels_map_np)

    # Evaluation ops
    # Lookup: centroid_id -> label
    cluster_label = tf.nn.embedding_lookup(labels_map, cluster_idx)
    # Compute accuracy
    correct_prediction = tf.equal(cluster_label,
                                  tf.cast(tf.argmax(y, 1), tf.int32))
    accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # Test Model
    test_x, test_y = orig_new_X_num, test_one_hot_y
    print("Test Accuracy:",
          sess.run(accuracy_op, feed_dict={
              X: test_x,
              y: test_y
          }))

    save_path = saver.save(
        sess, path.join('data', 'cache', 'kmeans', 'tf_model', 'model.ckpt'))
    print("Model saved in path: %s" % save_path)

    save((labels_map_np, k),
         'labels_map_np,k',
         folder=path.join('data', 'cache', 'kmeans'))

    if not keep_session:
        sess.close()

    return k, num_classes, labels_map_np, sess
예제 #2
0
import pickle
import time

import numpy as np
from tensorflow.python.keras.models import load_model

from nn.nn_predict import train_nn, get_file_name, get_file_path, NNPredictor
from preprocess.processor_num_map import get_processed_data
from test_data import test_manager
from test_data.test_manager import results_accuracy

import matplotlib.pyplot as plt

(new_X_num, num_map, new_y_num, max_t_s_num, num_student, orig_new_X_num,
 orig_new_y) = get_processed_data()

try:
    fp = get_file_path()
    recog_model = load_model(fp)
    with open('{}.num_map.pkl'.format(fp), 'rb') as file:
        num_map = pickle.load(file)
except OSError:
    recog_model, num_map = train_nn(new_X_num, new_y_num, num_student, num_map,
                                    True)

# test_result = test_manager.test(
#     predict_fn=lambda arr_face:
#     predict(arr_face, recog_model, 0.52, True),
#     show_image=True
# )
# accuracy = results_accuracy(test_result)
예제 #3
0
from knn_kmeans.knn_kmeans_predict import knn_kmeans_generate, KNNKmeansPredictor
from preprocess.processor_num_map import get_processed_data, get_file_name
from test_data import test_manager
from test_data.test_manager import results_accuracy

import numpy as np

from util.file import load_or_create
import matplotlib.pyplot as plt

restore = False

(new_X_num, num_map, new_y_num,
 max_t_s_num,
 num_student,
 test_new_X_num, test_new_y) = get_processed_data()

n = 2
print('Using n of {}'.format(n))

extra = '{}'.format(get_file_name())

file_name = 'knn_{}_{}'.format(extra, n)

knn_trained = load_or_create(file_name,
                             create_fn=lambda: knn_kmeans_generate(restore, n, num_map),
                             folder=path.join('data', 'model', 'knn'))

# test_data.test(
#     predict_fn=
#     lambda face_encodings: predict(face_encodings, knn_trained,
예제 #4
0
import time

import numpy as np
from tensorflow.python.keras.models import load_model

from nn.nn_predict import train_nn, get_file_name, get_file_path, NNPredictor
from preprocess.processor_num_map import get_processed_data
from test_data import test_manager
from test_data.test_manager import results_accuracy

import matplotlib.pyplot as plt

(new_X_num, num_map, new_y_num,
 max_t_s_num,
 num_student,
 orig_new_X_num, orig_new_y) = get_processed_data()

try:
    fp = get_file_path()
    recog_model = load_model(fp)
    with open('{}.num_map.pkl'.format(fp), 'rb') as file:
        num_map = pickle.load(file)
except OSError:
    recog_model, num_map = train_nn(new_X_num, new_y_num, num_student, num_map, True)

# test_result = test_manager.test(
#     predict_fn=lambda arr_face:
#     predict(arr_face, recog_model, 0.52, True),
#     show_image=True
# )
# accuracy = results_accuracy(test_result)