예제 #1
0
def maybe_update_models():
    global y1, variables, saver_regression, y2, saver_cnn, x, is_training, sess, num_categories

    # close (old) tensorflow if existent
    if 'sess' in globals():
        sess.close()

    if utils.CATEGORIES_IN_USE is None:
        utils.initialize_categories_in_use()
    else:
        utils.update_categories_in_use()
    num_categories = len(utils.CATEGORIES_IN_USE)

    # Model variables
    x = tf.placeholder("float", [None, IMAGE_SIZE * IMAGE_SIZE])  # image input placeholder
    is_training = tf.placeholder("bool")  # used for activating the dropout in training phase

    # Tensorflow session
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())

    # Regression model
    y1, variables = regression_model.regression(x, nCategories=num_categories)  # prediction results and variables
    saver_regression = tf.train.Saver(variables)

    # CNN model
    y2, variables = cnn_model.convolutional(x, nCategories=num_categories, is_training=is_training)  # prediction results and variables
    saver_cnn = tf.train.Saver(variables)
예제 #2
0
def train():
    print("\nSOFTMAX REGRESSION TRAINING STARTED.")

    config = configparser.ConfigParser()
    config.read(os.path.join(os.path.dirname(__file__), 'config.ini'))

    MODEL_PATH = os.path.join(os.path.dirname(__file__), config['DIRECTORIES']['MODELS'],
                              config['DEFAULT']['IMAGE_SIZE'], config['REGRESSION']['MODEL_FILENAME'])
    IMAGE_SIZE = int(config['DEFAULT']['IMAGE_SIZE'])
    BATCH_SIZE = int(config['DEFAULT']['TRAIN_BATCH_SIZE'])

    # get training/validation/testing data
    try:
        curr_number_of_categories, train_total_data, train_size, validation_data, validation_labels, test_data, test_labels = prepare_training_data.prepare_data(
            "regression", True)
    except Exception as inst:
        raise Exception(inst.args[0])

    # regression model
    x = tf.placeholder(tf.float32, [None, IMAGE_SIZE * IMAGE_SIZE], name="image")  # regression input placeholder
    y_ = tf.placeholder(tf.float32, [None, curr_number_of_categories], name="labels")  # regression ground truth labels
    y, variables = regression_model.regression(x, nCategories=curr_number_of_categories)  # regression output and variables

    # training variables
    with tf.name_scope("Loss"):
        cross_entropy = -tf.reduce_sum(y_ * tf.log(y))
    with tf.name_scope("GradientDescent"):
        train_step = tf.train.GradientDescentOptimizer(float(config['REGRESSION']['LEARNING_RATE'])).minimize(
            cross_entropy)
    with tf.name_scope("Acc"):
        correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), 0)

    # merge training data and validation data
    validation_total_data = numpy.concatenate((validation_data, validation_labels), axis=1)
    new_train_total_data = numpy.concatenate((train_total_data, validation_total_data))
    train_size = new_train_total_data.shape[0]

    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver(variables)

    # training cycle (number of batches and epochs)
    total_batch = int(train_size / BATCH_SIZE)
    epochs = int(config['REGRESSION']['EPOCHS'])

    # restore stored regression model if it exists and has the correct number of categories
    max_acc = maybe_restore_model(MODEL_PATH, saver, sess, accuracy, validation_data, x, validation_labels, y_)

    # loop for epoch
    for epoch in range(epochs):

        # random shuffling
        numpy.random.shuffle(train_total_data)
        train_data_ = new_train_total_data[:, :-curr_number_of_categories]
        train_labels_ = new_train_total_data[:, -curr_number_of_categories:]

        # loop over all batches
        for i in range(total_batch):
            # compute the offset of the current minibatch in the data.
            offset = (i * BATCH_SIZE) % train_size
            batch_xs = train_data_[offset:(offset + BATCH_SIZE), :]
            batch_ys = train_labels_[offset:(offset + BATCH_SIZE), :]

            _, train_accuracy = sess.run([train_step, accuracy], feed_dict={x: batch_xs, y_: batch_ys})

            # update progress
            progress = float((epoch * total_batch + i + 1) / (epochs * total_batch))
            utils.update_progress(progress)

            validation_accuracy = compute_accuracy(sess, accuracy, train_accuracy, i, total_batch, epoch,
                                                   validation_data, x,
                                                   validation_labels, y_,
                                                   int(config['LOGS']['TRAIN_ACCURACY_DISPLAY_STEP']),
                                                   int(config['LOGS']['VALIDATION_STEP']))

            # save the current model if the maximum accuracy is updated
            if validation_accuracy > max_acc:
                max_acc = validation_accuracy
                save_path = saver.save(sess, MODEL_PATH, write_meta_graph=False, write_state=False)
                print("Model updated and saved in file: %s" % save_path)

            # break inner loop if stop training is required
            if utils.train_should_stop():
                break

        # break outer loop if stop training is required
        if utils.train_should_stop():
            break

    # Code with test set
    # restore variables from disk
    # saver.restore(sess, MODEL_PATH)

    # Code with test set
    # calculate accuracy for all test images
    #test_accuracy = sess.run(accuracy, feed_dict={x: test_data, y_: test_labels})
    #print("test accuracy for the stored model: %g" % test_accuracy)

    sess.close()

    print("SOFTMAX REGRESSION TRAINING END.")
예제 #3
0
from kafka import KafkaConsumer
from json import loads
import csv
from regression_model import model as regression

consumer = KafkaConsumer('numtest',
                         bootstrap_servers=['localhost:9092'],
                         auto_offset_reset='earliest',
                         enable_auto_commit=True,
                         group_id='my-group',
                         value_deserializer=lambda x: loads(x.decode('utf-8')))

regression = regression()

for message in consumer:
    message = message.value
    row = [message['year'], message['salary']]
    print(row)
    if message['train']:
        regression.update()
    regression.predict(float(message['year']))
    with open('model_data/data.csv', 'a') as csvFile:
        writer = csv.writer(csvFile)
        writer.writerow(row)
    csvFile.close()