def maybe_update_models(): global y1, variables, saver_regression, y2, saver_cnn, x, is_training, sess, num_categories # close (old) tensorflow if existent if 'sess' in globals(): sess.close() if utils.CATEGORIES_IN_USE is None: utils.initialize_categories_in_use() else: utils.update_categories_in_use() num_categories = len(utils.CATEGORIES_IN_USE) # Model variables x = tf.placeholder("float", [None, IMAGE_SIZE * IMAGE_SIZE]) # image input placeholder is_training = tf.placeholder("bool") # used for activating the dropout in training phase # Tensorflow session sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) # Regression model y1, variables = regression_model.regression(x, nCategories=num_categories) # prediction results and variables saver_regression = tf.train.Saver(variables) # CNN model y2, variables = cnn_model.convolutional(x, nCategories=num_categories, is_training=is_training) # prediction results and variables saver_cnn = tf.train.Saver(variables)
def train(): print("\nSOFTMAX REGRESSION TRAINING STARTED.") config = configparser.ConfigParser() config.read(os.path.join(os.path.dirname(__file__), 'config.ini')) MODEL_PATH = os.path.join(os.path.dirname(__file__), config['DIRECTORIES']['MODELS'], config['DEFAULT']['IMAGE_SIZE'], config['REGRESSION']['MODEL_FILENAME']) IMAGE_SIZE = int(config['DEFAULT']['IMAGE_SIZE']) BATCH_SIZE = int(config['DEFAULT']['TRAIN_BATCH_SIZE']) # get training/validation/testing data try: curr_number_of_categories, train_total_data, train_size, validation_data, validation_labels, test_data, test_labels = prepare_training_data.prepare_data( "regression", True) except Exception as inst: raise Exception(inst.args[0]) # regression model x = tf.placeholder(tf.float32, [None, IMAGE_SIZE * IMAGE_SIZE], name="image") # regression input placeholder y_ = tf.placeholder(tf.float32, [None, curr_number_of_categories], name="labels") # regression ground truth labels y, variables = regression_model.regression(x, nCategories=curr_number_of_categories) # regression output and variables # training variables with tf.name_scope("Loss"): cross_entropy = -tf.reduce_sum(y_ * tf.log(y)) with tf.name_scope("GradientDescent"): train_step = tf.train.GradientDescentOptimizer(float(config['REGRESSION']['LEARNING_RATE'])).minimize( cross_entropy) with tf.name_scope("Acc"): correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), 0) # merge training data and validation data validation_total_data = numpy.concatenate((validation_data, validation_labels), axis=1) new_train_total_data = numpy.concatenate((train_total_data, validation_total_data)) train_size = new_train_total_data.shape[0] sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(variables) # training cycle (number of batches and epochs) total_batch = int(train_size / BATCH_SIZE) epochs = int(config['REGRESSION']['EPOCHS']) # restore stored regression model if it exists and has the correct number of categories max_acc = maybe_restore_model(MODEL_PATH, saver, sess, accuracy, validation_data, x, validation_labels, y_) # loop for epoch for epoch in range(epochs): # random shuffling numpy.random.shuffle(train_total_data) train_data_ = new_train_total_data[:, :-curr_number_of_categories] train_labels_ = new_train_total_data[:, -curr_number_of_categories:] # loop over all batches for i in range(total_batch): # compute the offset of the current minibatch in the data. offset = (i * BATCH_SIZE) % train_size batch_xs = train_data_[offset:(offset + BATCH_SIZE), :] batch_ys = train_labels_[offset:(offset + BATCH_SIZE), :] _, train_accuracy = sess.run([train_step, accuracy], feed_dict={x: batch_xs, y_: batch_ys}) # update progress progress = float((epoch * total_batch + i + 1) / (epochs * total_batch)) utils.update_progress(progress) validation_accuracy = compute_accuracy(sess, accuracy, train_accuracy, i, total_batch, epoch, validation_data, x, validation_labels, y_, int(config['LOGS']['TRAIN_ACCURACY_DISPLAY_STEP']), int(config['LOGS']['VALIDATION_STEP'])) # save the current model if the maximum accuracy is updated if validation_accuracy > max_acc: max_acc = validation_accuracy save_path = saver.save(sess, MODEL_PATH, write_meta_graph=False, write_state=False) print("Model updated and saved in file: %s" % save_path) # break inner loop if stop training is required if utils.train_should_stop(): break # break outer loop if stop training is required if utils.train_should_stop(): break # Code with test set # restore variables from disk # saver.restore(sess, MODEL_PATH) # Code with test set # calculate accuracy for all test images #test_accuracy = sess.run(accuracy, feed_dict={x: test_data, y_: test_labels}) #print("test accuracy for the stored model: %g" % test_accuracy) sess.close() print("SOFTMAX REGRESSION TRAINING END.")
from kafka import KafkaConsumer from json import loads import csv from regression_model import model as regression consumer = KafkaConsumer('numtest', bootstrap_servers=['localhost:9092'], auto_offset_reset='earliest', enable_auto_commit=True, group_id='my-group', value_deserializer=lambda x: loads(x.decode('utf-8'))) regression = regression() for message in consumer: message = message.value row = [message['year'], message['salary']] print(row) if message['train']: regression.update() regression.predict(float(message['year'])) with open('model_data/data.csv', 'a') as csvFile: writer = csv.writer(csvFile) writer.writerow(row) csvFile.close()