def validate_model(sess, val_names, val_ops, plot_ckpt, batch_size=200): """ Validates the model stored in a session. Args: session: The session where the model is loaded. val_data: The validation data to use for evaluating the model. val_ops: The validation operations. Returns: The overall validation error for the model. """ print("Validating model...") val_num = len(val_names) print("test_num: ", val_num) MaxTestIters = int(val_num / batch_size) print("MaxTestIters: ", MaxTestIters) val_err = [] iter_start = None eye_left, eye_right, face, face_mask, pred = val_ops y = tf.placeholder(tf.float32, [None, 2], name='pos') err = tf.reduce_mean( tf.sqrt(tf.reduce_sum(tf.squared_difference(pred, y), axis=1))) for iterTest in range(MaxTestIters): test_start = iterTest * batch_size test_end = (iterTest + 1) * batch_size batch_val_data = load_batch_from_data(val_names, dataset_path, 1000, img_ch, img_cols, img_rows, train_start=test_start, train_end=test_end) batch_val_data = prepare_data(batch_val_data) val_batch_err = sess.run(err, feed_dict={eye_left: batch_val_data[0], \ eye_right: batch_val_data[1], face: batch_val_data[2], \ face_mask: batch_val_data[3], y: batch_val_data[4]}) val_err.append(val_batch_err) if iterTest % 10 == 0: print ('IterTest %s, val error: %.5f' % \ (iterTest, np.mean(val_err))) # plot_loss(np.array(train_loss), np.array(train_err), np.array(Val_err), start=0, per=1, save_file=plot_ckpt + "/testing_loss_" + str(n_epoch) + "_" + str(iterTest) + ".png") if iter_start: print('10 iters runtime: %.1fs' % (timeit.default_timer() - iter_start)) else: iter_start = timeit.default_timer() return np.mean(val_err)
def train(self, args, ckpt, plot_ckpt, lr=1e-3, batch_size=128, max_epoch=1000, min_delta=1e-4, patience=10, print_per_epoch=10): ifCheck = False # -------------------------- train_data_eye_left, val_data_eye_left = self.organize_extra_eye_data( args, "left") train_data_eye_right, val_data_eye_right = self.organize_extra_eye_data( args, "right") # ----------------------------- print("------ finish processing extra data --------") train_names = load_data_names(train_path) val_names = load_data_names(val_path) train_num = len(train_names) val_num = len(val_names) print("train_num: ", train_num) print("test_num: ", val_num) MaxIters = train_num / batch_size n_batches = MaxIters val_chunk_size = 1000 MaxTestIters = val_num / val_chunk_size val_n_batches = val_chunk_size / batch_size print("MaxIters: ", MaxIters) print("MaxTestIters: ", MaxTestIters) print('Train on %s samples, validate on %s samples' % (train_num, val_num)) # Define loss and optimizer pred_xy, pred_ang_left, pred_ang_right = self.pred self.cost1 = tf.losses.mean_squared_error(self.y, pred_xy) self.cost2 = tf.losses.mean_squared_error(self.y, pred_ang_left)**2 self.cost3 = tf.losses.mean_squared_error(self.y, pred_ang_right)**2 self.optimizer1 = tf.train.AdamOptimizer(learning_rate=lr).minimize( self.cost1) self.optimizer2 = tf.train.AdamOptimizer(learning_rate=lr).minimize( self.cost2) self.optimizer3 = tf.train.AdamOptimizer(learning_rate=lr).minimize( self.cost3) # Evaluate model self.err1 = tf.reduce_mean( tf.sqrt( tf.reduce_sum(tf.squared_difference(pred_xy, self.y), axis=1))) self.err2 = compute_angle_error(self.y, pred_ang_left) self.err3 = compute_angle_error(self.y, pred_ang_right) train_loss_history = [] train_err_history = [] val_loss_history = [] val_err_history = [] train_loss_history_eye_left = [] train_err_history_eye_left = [] val_loss_history_eye_left = [] val_err_history_eye_left = [] train_loss_history_eye_right = [] train_err_history_eye_right = [] val_loss_history_eye_right = [] val_err_history_eye_right = [] best_loss = np.Inf # Create the collection tf.get_collection("validation_nodes") # Add stuff to the collection. tf.add_to_collection("validation_nodes", self.eye_left) tf.add_to_collection("validation_nodes", self.eye_right) tf.add_to_collection("validation_nodes", self.face) tf.add_to_collection("validation_nodes", self.face_mask) tf.add_to_collection("validation_nodes", pred_xy) tf.add_to_collection("validation_nodes", pred_ang_left) tf.add_to_collection("validation_nodes", pred_ang_right) # variables_to_restore = [var for var in tf.global_variables()] # saver = tf.train.Saver(variables_to_restore) saver = tf.train.Saver(max_to_keep=0) # Initializing the variables init = tf.global_variables_initializer() # TODO:////// # tf.reset_default_graph() # Launch the graph with tf.Session() as sess: sess.run(init) # TODO:////// writer = tf.summary.FileWriter("logs", sess.graph) # saver.restore(sess, "./my_model/pretrained/model_4_1800_train_error_3.5047762_val_error_5.765135765075684") # saver.restore(sess, "./my_model/2018-09-18-11-01/model_1_300_train_error_history_2.8944669_val_error_history_3.092479933391918") # print " pass the restoring !!!!" mtcnn_h = mtcnn_handle() random.shuffle(val_names) # Keep training until reach max iterations for n_epoch in range(1, max_epoch + 1): print("vvvvvvvvvvvvvvvvvvv") print("n_epoch: ", n_epoch) epoch_start = timeit.default_timer() iter_start = None random.shuffle(train_names) iterTest = 0 i_left = 0 i_right = 0 for iter in range(int(MaxIters)): start = timeit.default_timer() # print ("--------------------------------") # print ("iter: ", iter) train_start = iter * batch_size train_end = (iter + 1) * batch_size batch_train_data = load_batch_from_data( mtcnn_h, train_names, dataset_path, batch_size, img_ch, img_cols, img_rows, train_start=train_start, train_end=train_end) batch_train_data = prepare_data(batch_train_data) print('Loading and preparing training data: %.1fs' % (timeit.default_timer() - start)) start = timeit.default_timer() # # Run optimization op (backprop) sess.run(self.optimizer1, feed_dict={self.eye_left: batch_train_data[0], \ self.eye_right: batch_train_data[1], self.face: batch_train_data[2], \ self.face_mask: batch_train_data[3], self.y: batch_train_data[4]}) train_batch_loss, train_batch_err = sess.run([self.cost1, self.err1], feed_dict={self.eye_left: batch_train_data[0], \ self.eye_right: batch_train_data[1], self.face: batch_train_data[2], \ self.face_mask: batch_train_data[3], self.y: batch_train_data[4]}) for _ in range(5): batch_train_data_eye_left, i_left = next_batch_universal( train_data_eye_left, batch_size, i_left) sess.run(self.optimizer2, feed_dict={self.eye_left: batch_train_data_eye_left[0], \ self.y: batch_train_data_eye_left[1]}) train_batch_loss_eye_left, train_batch_err_eye_left = sess.run([self.cost2, self.err2], feed_dict={self.eye_left: batch_train_data_eye_left[0], \ self.y: batch_train_data_eye_left[1]}) batch_train_data_eye_right, i_right = next_batch_universal( train_data_eye_right, batch_size, i_right) sess.run(self.optimizer3, feed_dict={self.eye_right: batch_train_data_eye_right[0], \ self.y: batch_train_data_eye_right[1]}) train_batch_loss_eye_right, train_batch_err_eye_right = sess.run([self.cost3, self.err3], feed_dict={self.eye_right: batch_train_data_eye_right[0], \ self.y: batch_train_data_eye_right[1]}) train_loss_history.append(train_batch_loss) train_err_history.append(train_batch_err) train_loss_history_eye_left.append( train_batch_loss_eye_left) train_err_history_eye_left.append(train_batch_err_eye_left) train_loss_history_eye_right.append( train_batch_loss_eye_right) train_err_history_eye_right.append( train_batch_err_eye_right) print('Training on batch: %.1fs' % (timeit.default_timer() - start)) if iter % 30 == 0: ifCheck = True if ifCheck: start = timeit.default_timer() if iterTest + 1 >= MaxTestIters: iterTest = 0 # test_start = iterTest * val_chunk_size # test_end = (iterTest+1) * val_chunk_size test_start = 0 test_end = val_chunk_size val_data = load_batch_from_data(mtcnn_h, val_names, dataset_path, val_chunk_size, img_ch, img_cols, img_rows, train_start=test_start, train_end=test_end) val_data = prepare_data(val_data) print('Loading and preparing val data: %.1fs' % (timeit.default_timer() - start)) start = timeit.default_timer() val_loss = 0. val_err = 0. val_loss_eye_left = 0. val_err_eye_left = 0. val_loss_eye_right = 0. val_err_eye_right = 0. i_val_left = 0 i_val_right = 0 for batch_val_data in next_batch(val_data, batch_size): batch_val_data_eye_left, i_val_left = next_batch_universal( val_data_eye_left, batch_size, i_val_left) batch_val_data_eye_right, i_val_right = next_batch_universal( val_data_eye_right, batch_size, i_val_right) val_batch_loss, val_batch_err = sess.run([self.cost1, self.err1], feed_dict={self.eye_left: batch_val_data[0], \ self.eye_right: batch_val_data[1], self.face: batch_val_data[2], \ self.face_mask: batch_val_data[3], self.y: batch_val_data[4]}) val_batch_loss_eye_left, val_batch_err_eye_left = sess.run([self.cost2, self.err2], \ feed_dict={self.eye_left: batch_val_data_eye_left[0], \ self.y: batch_val_data_eye_left[1]}) val_batch_loss_eye_right, val_batch_err_eye_right = sess.run([self.cost3, self.err3], \ feed_dict={self.eye_right: batch_val_data_eye_right[0], \ self.y: batch_val_data_eye_right[1]}) val_loss += val_batch_loss / val_n_batches val_err += val_batch_err / val_n_batches val_loss_eye_left += val_batch_loss_eye_left / val_n_batches val_err_eye_left += val_batch_err_eye_left / val_n_batches val_loss_eye_right += val_batch_loss_eye_right / val_n_batches val_err_eye_right += val_batch_err_eye_right / val_n_batches print("val_loss: ", val_loss, "val_err: ", val_err) print("val_loss_left: ", val_loss_eye_left, "val_err_left: ", val_err_eye_left) print("val_loss_right: ", val_loss_eye_right, "val_err_right: ", val_err_eye_right) iterTest += 1 print('Testing on chunk: %.1fs' % (timeit.default_timer() - start)) start = timeit.default_timer() if iter_start: print('batch iters runtime: %.1fs' % (timeit.default_timer() - iter_start)) else: iter_start = timeit.default_timer() print("now: ", now) print("learning rate: ", lr) print( 'Epoch %s/%s Iter %s, train loss: %.5f, train error: %.5f, val loss: %.5f, val error: %.5f' % (n_epoch, max_epoch, iter, np.mean(train_loss_history), np.mean(train_err_history), np.mean(val_loss_history), np.mean(val_err_history))) print( 'Epoch %s/%s Iter %s, train val_loss_eye_left: %.5f, train error_eye_left: %.5f, val loss_eye_left: %.5f, val error_eye_left: %.5f' % (n_epoch, max_epoch, iter, np.mean(train_loss_history_eye_left), np.mean(train_err_history_eye_left), np.mean(val_loss_history_eye_left), np.mean(val_err_history_eye_left))) print( 'Epoch %s/%s Iter %s, train loss_eye_right: %.5f, train error_eye_right: %.5f, val loss_eye_right: %.5f, val error_eye_right: %.5f' % (n_epoch, max_epoch, iter, np.mean(train_loss_history_eye_right), np.mean(train_err_history_eye_right), np.mean(val_loss_history_eye_right), np.mean(val_err_history_eye_right))) val_loss_history.append(val_loss) val_err_history.append(val_err) val_loss_history_eye_left.append(val_loss_eye_left) val_err_history_eye_left.append(val_err_eye_left) val_loss_history_eye_right.append(val_loss_eye_right) val_err_history_eye_right.append(val_err_eye_right) plot_loss(np.array(train_loss_history), np.array(train_err_history), np.array(val_loss_history), np.array(val_err_history), start=0, per=1, save_file=plot_ckpt + "/cumul_loss_" + str(n_epoch) + "_" + str(iter) + ".png") plot_loss(np.array(train_loss_history_eye_left), np.array(train_err_history_eye_left), np.array(val_loss_history_eye_left), np.array(val_err_history_eye_left), start=0, per=1, save_file=plot_ckpt + "/cumul_loss_" + str(n_epoch) + "_" + str(iter) + "_eye_left.png") plot_loss(np.array(train_loss_history_eye_right), np.array(train_err_history_eye_right), np.array(val_loss_history_eye_right), np.array(val_err_history_eye_right), start=0, per=1, save_file=plot_ckpt + "/cumul_loss_" + str(n_epoch) + "_" + str(iter) + "_eye_right.png") save_path = ckpt + "model_" + str(n_epoch) + "_" + str( iter) + "_train_error_history_%s" % ( np.mean(train_err_history) ) + "_val_error_history_%s" % ( np.mean(val_err_history)) save_path = saver.save(sess, save_path) print("args.learning_rate: ", args.learning_rate) print("Model saved in file: %s" % save_path) ifCheck = False print('Saving models and plotting loss: %.1fs' % (timeit.default_timer() - start)) print('epoch runtime: %.1fs' % (timeit.default_timer() - epoch_start)) return train_loss_history, train_err_history, val_loss_history, val_err_history
def validate_model(sess, val_names, val_ops, plot_ckpt, batch_size=200, output_only=False): """ Validates the model stored in a session. Args: session: The session where the model is loaded. val_data: The validation data to use for evaluating the model. val_ops: The validation operations. Returns: The overall validation error for the model. """ print("Validating model...") val_num = len(val_names) print("test_num: ", val_num) MaxTestIters = int(val_num / batch_size) print("MaxTestIters: ", MaxTestIters) val_err = [] pred_xy_list = [] y_list = [] iter_start = None print("len(val_ops): ", len(val_ops)) # eye_left, eye_right, face, face_mask, pred = val_ops eye_left, eye_right, face, face_mask, pred_xy, pred_ang_left, pred_ang_right = val_ops y = tf.placeholder(tf.float32, [None, 2], name='pos') # err = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.squared_difference(pred, y), axis=1))) err = tf.reduce_mean( tf.sqrt(tf.reduce_sum(tf.squared_difference(pred_xy, y), axis=1))) for iterTest in range(MaxTestIters): test_start = iterTest * batch_size test_end = (iterTest + 1) * batch_size batch_val_data = load_batch_from_data(mtcnn_h, val_names, dataset_path, 1000, img_ch, img_cols, img_rows, train_start=test_start, train_end=test_end) batch_val_data = prepare_data(batch_val_data) if not output_only: val_batch_err = sess.run(err, feed_dict={eye_left: batch_val_data[0], \ eye_right: batch_val_data[1], face: batch_val_data[2], \ face_mask: batch_val_data[3], y: batch_val_data[4]}) val_err.append(val_batch_err) else: pred_xy = sess.run(pred_xy, feed_dict={eye_left: batch_val_data[0], \ eye_right: batch_val_data[1], face: batch_val_data[2], \ face_mask: batch_val_data[3], y: batch_val_data[4]}) y = batch_val_data[4] pred_xy_list.append(pred_xy) y_list.append(y) if iterTest % 10 == 0: print ('IterTest %s, val error: %.5f' % \ (iterTest, np.mean(val_err))) # plot_loss(np.array(train_loss), np.array(train_err), np.array(Val_err), start=0, per=1, save_file=plot_ckpt + "/testing_loss_" + str(n_epoch) + "_" + str(iterTest) + ".png") file_name = "pred_and_y_" + date + ".p" if os.path.isfile(file_name): [pre_pred_xy_list, pre_y_list] = pickle.load(open(file_name, "rb")) pre_pred_xy_list.extend(pred_xy_list) pre_y_list.extend(y_list) pred_xy_list = pre_pred_xy_list y_list = pre_y_list else: pickle.dump([pre_y_list, y_list], open(file_name, "wb")) print "saving file: ", file_name if iter_start: print('10 iters runtime: %.1fs' % (timeit.default_timer() - iter_start)) else: iter_start = timeit.default_timer() return np.mean(val_err)
def train(self, ckpt, plot_ckpt, lr=1e-3, batch_size=128, max_epoch=1000, min_delta=1e-4, patience=10, print_per_epoch=10): ifCheck = False # limit = 1000 train_names = load_data_names(train_path) # [:1000] # [:limit] val_names = load_data_names(val_path) # [:1000] # [:limit] train_num = len(train_names) val_num = len(val_names) print("train_num: ", train_num) print("test_num: ", val_num) MaxIters = train_num / batch_size n_batches = MaxIters val_chunk_size = 1000 MaxTestIters = val_num / val_chunk_size val_n_batches = val_chunk_size / batch_size print("MaxIters: ", MaxIters) print("MaxTestIters: ", MaxTestIters) print('Train on %s samples, validate on %s samples' % (train_num, val_num)) # Define loss and optimizer self.cost = tf.losses.mean_squared_error(self.y, self.pred) self.optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize( self.cost) # Evaluate model self.err = tf.reduce_mean( tf.sqrt( tf.reduce_sum(tf.squared_difference(self.pred, self.y), axis=1))) train_loss_history = [] train_err_history = [] val_loss_history = [] val_err_history = [] # n_incr_error = 0 # nb. of consecutive increase in error best_loss = np.Inf # n_batches = train_data[0].shape[0] / batch_size + (train_data[0].shape[0] % batch_size != 0) # # Create the collection # tf.get_collection("validation_nodes") # # # Add stuff to the collection. # tf.add_to_collection("validation_nodes", self.eye_left) # tf.add_to_collection("validation_nodes", self.eye_right) # tf.add_to_collection("validation_nodes", self.face) # tf.add_to_collection("validation_nodes", self.face_mask) # tf.add_to_collection("validation_nodes", self.pred) saver = tf.train.Saver(max_to_keep=0) # Initializing the variables init = tf.global_variables_initializer() # TODO:////// # tf.reset_default_graph() # Launch the graph with tf.Session() as sess: # sess.run(init) # TODO:////// writer = tf.summary.FileWriter("logs", sess.graph) # saver = tf.train.import_meta_graph('my_model/2018-08-17-23-17/model_1_140_train_error_14.236069_val_error_7.756780624389648.meta') # saver.restore(sess, "./my_model/2018-08-22-00-33/model_8_840_train_error_3.5212839_val_error_2.7497661113739014") saver.restore( sess, "./my_model/2018-08-29-00-04/model_4_1200_train_error_3.5212839_val_error_2.7497661113739014" ) # Keep training until reach max iterations for n_epoch in range(1, max_epoch + 1): print("vvvvvvvvvvvvvvvvvvv") print("n_epoch: ", n_epoch) epoch_start = timeit.default_timer() iter_start = None # n_incr_error += 1 train_loss = [] train_err = [] Val_loss = [] Val_err = [] # train_names = shuffle_data(train_names) random.shuffle(train_names) iterTest = 0 for iter in range(int(MaxIters)): start = timeit.default_timer() # print ("--------------------------------") # print ("iter: ", iter) train_start = iter * batch_size train_end = (iter + 1) * batch_size batch_train_data = load_batch_from_data( train_names, dataset_path, batch_size, img_ch, img_cols, img_rows, train_start=train_start, train_end=train_end) batch_train_data = prepare_data(batch_train_data) print('Loading and preparing training data: %.1fs' % (timeit.default_timer() - start)) start = timeit.default_timer() # Run optimization op (backprop) sess.run(self.optimizer, feed_dict={self.eye_left: batch_train_data[0], \ self.eye_right: batch_train_data[1], self.face: batch_train_data[2], \ self.face_mask: batch_train_data[3], self.y: batch_train_data[4]}) train_batch_loss, train_batch_err =sess.run([self.cost, self.err], feed_dict={self.eye_left: batch_train_data[0], \ self.eye_right: batch_train_data[1], self.face: batch_train_data[2], \ self.face_mask: batch_train_data[3], self.y: batch_train_data[4]}) print("train_batch_loss: ", train_batch_loss, "train_batch_err: ", train_batch_err) train_loss.append(train_batch_loss) train_err.append(train_batch_err) print('Training on batch: %.1fs' % (timeit.default_timer() - start)) if iter > 1000: if iter % 60 == 0: ifCheck = True else: if iter % 30 == 0: ifCheck = True if ifCheck: start = timeit.default_timer() if iterTest + 1 >= MaxTestIters: iterTest = 0 test_start = iterTest * val_chunk_size test_end = (iterTest + 1) * val_chunk_size val_data = load_batch_from_data(val_names, dataset_path, val_chunk_size, img_ch, img_cols, img_rows, train_start=test_start, train_end=test_end) val_n_batches = val_data[0].shape[0] / batch_size + ( val_data[0].shape[0] % batch_size != 0) val_data = prepare_data(val_data) print('Loading and preparing val data: %.1fs' % (timeit.default_timer() - start)) start = timeit.default_timer() val_loss = 0. val_err = 0 for batch_val_data in next_batch(val_data, batch_size): val_batch_loss, val_batch_err = sess.run([self.cost, self.err], feed_dict={self.eye_left: batch_val_data[0], \ self.eye_right: batch_val_data[1], self.face: batch_val_data[2], \ self.face_mask: batch_val_data[3], self.y: batch_val_data[4]}) val_loss += val_batch_loss / val_n_batches val_err += val_batch_err / val_n_batches Val_loss.append(val_loss) Val_err.append(val_err) print("val_loss: ", val_loss, "val_err: ", val_err) iterTest += 1 print('Testing on chunk: %.1fs' % (timeit.default_timer() - start)) start = timeit.default_timer() if iter_start: print('batch iters runtime: %.1fs' % (timeit.default_timer() - iter_start)) else: iter_start = timeit.default_timer() print( 'Epoch %s/%s Iter %s, train loss: %.5f, train error: %.5f, val loss: %.5f, val error: %.5f' % (n_epoch, max_epoch, iter, np.mean(train_loss), np.mean(train_err), np.mean(Val_loss), np.mean(Val_err))) train_loss_history.append(np.mean(train_loss)) train_err_history.append(np.mean(train_err)) val_loss_history.append(np.mean(Val_loss)) val_err_history.append(np.mean(Val_err)) plot_loss(np.array(train_loss_history), np.array(train_err_history), np.array(val_err_history), start=0, per=1, save_file=plot_ckpt + "/cumul_loss_" + str(n_epoch) + "_" + str(iter) + ".png") # if val_loss - min_delta < best_loss: # if val_err - min_delta < best_loss: # best_loss = val_err save_path = ckpt + "model_" + str(n_epoch) + "_" + str( iter) + "_train_error_%s" % ( np.mean(train_err)) + "_val_error_%s" % ( np.mean(val_err)) # , global_step=n_epoch save_path = saver.save(sess, save_path) print("Model saved in file: %s" % save_path) # n_incr_error = 0 ifCheck = False print('Saving models and plotting loss: %.1fs' % (timeit.default_timer() - start)) print('epoch runtime: %.1fs' % (timeit.default_timer() - epoch_start)) # train_loss_history.append(np.mean(train_loss)) # train_err_history.append(np.mean(train_err)) # val_loss_history.append(np.mean(Val_loss)) # val_err_history.append(np.mean(Val_err)) # plot_loss(np.array(train_loss_history), np.array(train_err_history), np.array(val_err_history), start=0, per=1, save_file=plot_ckpt + "/cumul_loss_" + str(n_epoch) + ".png") # if n_epoch % print_per_epoch == 0: print( 'Epoch %s/%s Iter %s, train loss: %.5f, train error: %.5f, val loss: %.5f, val error: %.5f' % (n_epoch, max_epoch, iter, np.mean(train_loss), np.mean(train_err), np.mean(Val_loss), np.mean(Val_err))) # if n_incr_error >= patience: # print ('Early stopping occured. Optimization Finished!') # return train_loss_history, train_err_history, val_loss_history, val_err_history return train_loss_history, train_err_history, val_loss_history, val_err_history
def train(self, lr=1e-3, batch_size=128, max_epoch=1000, min_delta=1e-4, patience=10, print_per_epoch=10, out_model='my_model'): # train_data, val_data, # limit = 1000 train_names = load_data_names(train_path)[3000:4000] # [:limit] val_names = load_data_names(val_path)[2000:3000] # val_names = load_data_names(val_path) # [:limit] train_num = len(train_names) val_num = len(val_names) print("train_num: ", train_num) print("test_num: ", val_num) MaxIters = train_num / batch_size n_batches = MaxIters MaxTestIters = val_num / batch_size # val_n_batches = MaxTestIters print("MaxIters: ", MaxIters) print("MaxTestIters: ", MaxTestIters) print('Train on %s samples, validate on %s samples' % (train_num, val_num)) # Define loss and optimizer self.cost = tf.losses.mean_squared_error(self.y, self.pred) self.optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize( self.cost) # Evaluate model self.err = tf.reduce_mean( tf.sqrt( tf.reduce_sum(tf.squared_difference(self.pred, self.y), axis=1))) train_loss_history = [] train_err_history = [] val_loss_history = [] val_err_history = [] # n_incr_error = 0 # nb. of consecutive increase in error # best_loss = np.Inf # print ("len(train_data): ", len(train_data)) # print ("train_data[0].shape: ", train_data[0].shape) # print ("train_data[0].shape[0]: ", train_data[0].shape[0]) # print ("train_data[0].shape[0] / batch_size: ", train_data[0].shape[0] / batch_size) # n_batches = train_data[0].shape[0] / batch_size + (train_data[0].shape[0] % batch_size != 0) # print ("n_batches: ", n_batches) # Create the collection tf.get_collection("validation_nodes") # Add stuff to the collection. tf.add_to_collection("validation_nodes", self.eye_left) tf.add_to_collection("validation_nodes", self.eye_right) tf.add_to_collection("validation_nodes", self.face) tf.add_to_collection("validation_nodes", self.face_mask) tf.add_to_collection("validation_nodes", self.pred) saver = tf.train.Saver(max_to_keep=1) # Initializing the variables init = tf.global_variables_initializer() # Launch the graph with tf.Session() as sess: sess.run(init) writer = tf.summary.FileWriter("logs", sess.graph) # Keep training until reach max iterations for n_epoch in range(1, max_epoch + 1): print("vvvvvvvvvvvvvvvvvvv") print("n_epoch: ", n_epoch) # n_incr_error += 1 # train_loss = 0. # train_err = 0. train_loss = [] train_err = [] # train_data = shuffle_data(train_data) # for batch_train_data in next_batch(train_data, batch_size): iterTest = 0 for iter in range(int(MaxIters)): # print ("--------------------------------") # print ("iter: ", iter) train_start = iter * batch_size train_end = (iter + 1) * batch_size batch_train_data = load_batch_from_data( train_names, dataset_path, batch_size, img_ch, img_cols, img_rows, train_start=train_start, train_end=train_end) batch_train_data = prepare_data(batch_train_data) # Run optimization op (backprop) sess.run(self.optimizer, feed_dict={self.eye_left: batch_train_data[0], \ self.eye_right: batch_train_data[1], self.face: batch_train_data[2], \ self.face_mask: batch_train_data[3], self.y: batch_train_data[4]}) train_batch_loss, train_batch_err = sess.run([self.cost, self.err], feed_dict={self.eye_left: batch_train_data[0], \ self.eye_right: batch_train_data[1], self.face: batch_train_data[2], \ self.face_mask: batch_train_data[3], self.y: batch_train_data[4]}) # train_loss += train_batch_loss / n_batches # train_err += train_batch_err / n_batches # print ("train batch loss: ", train_batch_loss, "train_batch_err: ", train_batch_err) train_loss.append(train_batch_loss) train_err.append(train_batch_err) test_start = 0 test_end = val_num val_data = load_batch_from_data(val_names, dataset_path, 1000, img_ch, img_cols, img_rows, train_start=test_start, train_end=test_end) val_n_batches = val_data[0].shape[0] / batch_size + ( val_data[0].shape[0] % batch_size != 0) val_data = prepare_data(val_data) val_loss = 0. val_err = 0 for batch_val_data in next_batch(val_data, batch_size): val_batch_loss, val_batch_err = sess.run([self.cost, self.err], feed_dict={self.eye_left: batch_val_data[0], \ self.eye_right: batch_val_data[1], self.face: batch_val_data[2], \ self.face_mask: batch_val_data[3], self.y: batch_val_data[4]}) val_loss += val_batch_loss / val_n_batches val_err += val_batch_err / val_n_batches train_loss_history.append(np.mean(train_loss)) train_err_history.append(np.mean(train_err)) val_loss_history.append(val_loss) val_err_history.append(val_err) # if val_loss - min_delta < best_loss: # print ("out_model: ", out_model.split()) # # # ckpt = out_model.split()[0] # # ckpt = ckpt + "/" + date + "/" + str(cycle) + "/" # # print ("ckpt: ", ckpt) # # ckpt = os.path.abspath(out_model) # print ("ckpt: ", ckpt) # if not os.path.exists(ckpt): # os.makedirs(ckpt) # # ckpt += "/model" # best_loss = val_loss # print ("os.path.abspath(out_model): ", os.path.abspath(out_model)) # # # , global_step=n_epoch # save_path = saver.save(sess, ckpt) # print ("Model saved in file: %s" % save_path) # n_incr_error = 0 # if n_epoch % print_per_epoch == 0: print ('Epoch %s/%s, train loss: %.5f, train error: %.5f, val loss: %.5f, val error: %.5f' % \ (n_epoch, max_epoch, np.mean(train_loss), np.mean(train_err), val_loss, val_err)) # if n_incr_error >= patience: # print ('Early stopping occured. Optimization Finished!') # return train_loss_history, train_err_history, val_loss_history, val_err_history return train_loss_history, train_err_history, val_loss_history, val_err_history
# batch = next(generator_train_data(train_names, dataset_path, batch_size, img_ch, img_cols, img_rows)) # print (len(batch[0])) # print (np.asarray(batch[0][0]).shape) # print (batch[1].shape) # print (len(batch)) # print (batch.shape) chunk_size = 100 train_num = len(train_names) MaxIters = train_num/chunk_size for iter in range (int(MaxIters)): print (" ------------- iter --------------: ", iter) train_start=iter* chunk_size train_end = (iter+1)* chunk_size batch = load_batch_from_data(train_names, dataset_path, chunk_size, img_ch, img_cols, img_rows, train_start = train_start, train_end = train_end) print (len(batch[0])) print (np.asarray(batch[0][0]).shape) print (batch[1].shape)
def train(args): # train_data, val_data = load_data(args.input) # train_data = prepare_data(train_data) # val_data = prepare_data(val_data) # # print (len(train_data)) # print (train_data[-5].shape) # print (train_data[-4].shape) # print (train_data[-3].shape) # print (train_data[-2].shape) # print (train_data[-1].shape) dataset_path = "..\Eye-Tracking-for-Everyone-master\Eye-Tracking-for-Everyone-master\GazeCapture" train_path = dataset_path + '\ '.strip() + "train" val_path = dataset_path + '\ '.strip() + "validation" test_path = dataset_path + '\ '.strip() + "test" # train parameters # n_epoch = args.max_epoch batch_size = 16 # patience = args.patience # image parameter img_cols = 64 img_rows = 64 img_ch = 3 # train data # limit = 2000 train_names = load_data_names(train_path) # [:limit] # validation data # val_limit = 100 val_names = load_data_names(val_path) # [:val_limit] # test data test_names = load_data_names(test_path) # [:limit] et = EyeTracker() Train_loss_history = [] Train_err_history = [] Val_loss_history = [] Val_err_history = [] chunk_size = args.batch_size # * 10 # chunk_size = args.batch_size print("chunk_size: ", chunk_size) train_num = len(train_names) test_num = len(val_names) print("train_num: ", train_num) print("test_num: ", test_num) MaxIters = train_num / chunk_size MaxTestIters = test_num / chunk_size print("MaxIters: ", MaxIters) print("MaxTestIters: ", MaxTestIters) iterTest = 0 # Initializing the variables # Launch the graph with tf.Session() as sess: # sess.run(init) et.initialize() for e in range(args.max_epoch): print(" ------------- overall epoch --------------: ", e) for iter in range(int(MaxIters)): start = timeit.default_timer() print(" ------------- iter --------------: ", iter) # train_start=iter * chunk_size # train_end = (iter+1) * chunk_size train_start = 0 train_end = 4000 # train_data = load_batch_from_data(train_names, dataset_path, chunk_size, img_ch, img_cols, img_rows, train_start = train_start, train_end = train_end) train_data = load_batch_from_data(train_names, dataset_path, 4000, img_ch, img_cols, img_rows, train_start=train_start, train_end=train_end) # test_start = iterTest * chunk_size # test_end = (iterTest + 1) * chunk_size test_start = 0 test_end = 500 # val_data = load_batch_from_data(val_names, dataset_path, chunk_size, img_ch, img_cols, img_rows, train_start = test_start, train_end = test_end) val_data = load_batch_from_data(val_names, dataset_path, 500, img_ch, img_cols, img_rows, train_start=test_start, train_end=test_end) # print ("----------before----------") # print (len(train_data)) # print (train_data[-5].shape) # print (train_data[-4].shape) # print (train_data[-3].shape) # print (train_data[-2].shape) # print (train_data[-1].shape) train_data = prepare_data(train_data) val_data = prepare_data(val_data) # print ("----------after----------") print(len(train_data)) print(train_data[-5].shape) print(train_data[-4].shape) print(train_data[-3].shape) print(train_data[-2].shape) print(train_data[-1].shape) train_loss_history, train_err_history, val_loss_history, val_err_history = et.train(sess, train_data, val_data, \ lr = args.learning_rate, \ batch_size = args.batch_size, \ max_epoch = 100, \ min_delta = 1e-4, \ patience = args.patience, \ print_per_epoch = args.print_per_epoch, out_model = args.save_model,\ cycle = iter, overall_epoch = e) Train_loss_history.extend(train_loss_history) Train_err_history.extend(train_err_history) Val_loss_history.extend(val_loss_history) Val_err_history.extend(val_err_history) plot_loss(np.array(Train_loss_history), np.array(Train_err_history), np.array(Val_err_history), start=0, per=1, save_file="test1/loss_" + str(e) + "_" + str(iter) + ".png") iterTest += 1 iterTest %= MaxTestIters if iterTest > MaxTestIters - 2: iterTest = 0 print('runtime: %.1fs' % (timeit.default_timer() - start)) # tf.summary.histogram("train_loss_history", train_loss_history) # tf.summary.histogram("train_err_history", train_err_history) # tf.summary.histogram("val_loss_history", val_loss_history) # tf.summary.histogram("val_err_history", val_err_history) # if args.plot_loss: plot_loss(np.array(train_loss_history), np.array(train_err_history), np.array(val_err_history), start=0, per=1, save_file=args.plot_loss)
def train(self, ckpt, plot_ckpt, lr=1e-3, batch_size=128, max_epoch=1000, min_delta=1e-4, patience=10, print_per_epoch=10): ifCheck = False # limit = 1000 train_names = load_data_names(train_path)[:500] # [:limit] val_names = load_data_names(val_path)[:500] # [:limit] train_num = len(train_names) val_num = len(val_names) print("train_num: ", train_num) print("test_num: ", val_num) MaxIters = train_num / batch_size n_batches = MaxIters val_chunk_size = 100 MaxTestIters = val_num / val_chunk_size val_n_batches = val_chunk_size / batch_size print("MaxIters: ", MaxIters) print("MaxTestIters: ", MaxTestIters) print('Train on %s samples, validate on %s samples' % (train_num, val_num)) # Define loss and optimizer self.cost = tf.losses.mean_squared_error(self.y, self.pred) self.optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize( self.cost) # Evaluate model self.err = tf.reduce_mean( tf.sqrt( tf.reduce_sum(tf.squared_difference(self.pred, self.y), axis=1))) train_loss_history = [] train_err_history = [] val_loss_history = [] val_err_history = [] # Create the collection tf.get_collection("validation_nodes") # Add stuff to the collection. tf.add_to_collection("validation_nodes", self.eye_left) tf.add_to_collection("validation_nodes", self.eye_right) tf.add_to_collection("validation_nodes", self.face) tf.add_to_collection("validation_nodes", self.face_mask) tf.add_to_collection("validation_nodes", self.pred) saver = tf.train.Saver(max_to_keep=0) # Initializing the variables init = tf.global_variables_initializer() # Launch the graph with tf.Session() as sess: sess.run(init) writer = tf.summary.FileWriter("logs", sess.graph) # saver.restore(sess, "./my_model/2018-09-15-21-45/model_3_180_train_error_1.8261857_val_error_2.2103159427642822") random.shuffle(train_names) random.shuffle(val_names) # Keep training until reach max iterations for n_epoch in range(1, max_epoch + 1): print("vvvvvvvvvvvvvvvvvvv") print("n_epoch: ", n_epoch) epoch_start = timeit.default_timer() iter_start = None iterTest = 0 for iter in range(int(MaxIters)): start = timeit.default_timer() # print ("--------------------------------") # print ("iter: ", iter) train_start = iter * batch_size train_end = (iter + 1) * batch_size batch_train_data = load_batch_from_data( mtcnn_h, train_names, dataset_path, batch_size, img_ch, img_cols, img_rows, train_start=train_start, train_end=train_end) batch_train_data = prepare_data(batch_train_data) print('Loading and preparing training data: %.1fs' % (timeit.default_timer() - start)) start = timeit.default_timer() # Run optimization op (backprop) sess.run(self.optimizer, feed_dict={self.eye_left: batch_train_data[0], \ self.eye_right: batch_train_data[1], self.face: batch_train_data[2], \ self.face_mask: batch_train_data[3], self.y: batch_train_data[4]}) train_batch_loss, train_batch_err = sess.run([self.cost, self.err], feed_dict={self.eye_left: batch_train_data[0], \ self.eye_right: batch_train_data[1], self.face: batch_train_data[2], \ self.face_mask: batch_train_data[3], self.y: batch_train_data[4]}) print("train_batch_loss: ", train_batch_loss, "train_batch_err: ", train_batch_err) # train_loss.append(train_batch_loss) # train_err.append(train_batch_err) train_loss_history.append(train_batch_loss) train_err_history.append(train_batch_err) print('Training on batch: %.1fs' % (timeit.default_timer() - start)) if iter % 30 == 0: ifCheck = True if ifCheck: start = timeit.default_timer() if iterTest + 1 >= MaxTestIters: iterTest = 0 # test_start = iterTest * val_chunk_size # test_end = (iterTest+1) * val_chunk_size test_start = 0 test_end = val_chunk_size val_data = load_batch_from_data(mtcnn_h, val_names, dataset_path, val_chunk_size, img_ch, img_cols, img_rows, train_start=test_start, train_end=test_end) val_n_batches = val_data[0].shape[0] / batch_size + ( val_data[0].shape[0] % batch_size != 0) val_data = prepare_data(val_data) print('Loading and preparing val data: %.1fs' % (timeit.default_timer() - start)) start = timeit.default_timer() val_loss = 0. val_err = 0 for batch_val_data in next_batch(val_data, batch_size): val_batch_loss, val_batch_err = sess.run([self.cost, self.err], feed_dict={self.eye_left: batch_val_data[0], \ self.eye_right: batch_val_data[1], self.face: batch_val_data[2], \ self.face_mask: batch_val_data[3], self.y: batch_val_data[4]}) val_loss += val_batch_loss / val_n_batches val_err += val_batch_err / val_n_batches print("val_loss: ", val_loss, "val_err: ", val_err) iterTest += 1 print('Testing on chunk: %.1fs' % (timeit.default_timer() - start)) start = timeit.default_timer() if iter_start: print('batch iters runtime: %.1fs' % (timeit.default_timer() - iter_start)) else: iter_start = timeit.default_timer() print("now: ", now) print("learning rate: ", lr) print( 'Epoch %s/%s Iter %s, train loss: %.5f, train error: %.5f, val loss: %.5f, val error: %.5f' % (n_epoch, max_epoch, iter, np.mean(train_loss_history), np.mean(train_err_history), np.mean(val_loss_history), np.mean(val_err_history))) val_loss_history.append(val_loss) val_err_history.append(val_err) plot_loss(np.array(train_loss_history), np.array(train_err_history), np.array(val_loss_history), np.array(val_err_history), start=0, per=1, save_file=plot_ckpt + "/cumul_loss_" + str(n_epoch) + "_" + str(iter) + ".png") save_path = ckpt + "model_" + str(n_epoch) + "_" + str( iter) + "_train_error_history_%s" % ( np.mean(train_err_history) ) + "_val_error_history_%s" % ( np.mean(val_err_history)) save_path = saver.save(sess, save_path) print("Model saved in file: %s" % save_path) ifCheck = False print('Saving models and plotting loss: %.1fs' % (timeit.default_timer() - start)) print('epoch runtime: %.1fs' % (timeit.default_timer() - epoch_start)) return train_loss_history, train_err_history, val_loss_history, val_err_history
def train(args): dataset_path = "../Eye-Tracking-for-Everyone-master/Eye-Tracking-for-Everyone-master/GazeCapture" train_path = dataset_path + '/train' val_path = dataset_path + '/validation' img_cols = 64 img_rows = 64 img_ch = 3 train_names = load_data_names(train_path) val_names = load_data_names(val_path) train_start = 3000 train_end = 4000 chunk_size = train_end - train_start train_data = load_batch_from_data(train_names, dataset_path, chunk_size, img_ch, img_cols, img_rows, train_start=train_start, train_end=train_end) test_start = 2000 test_end = 3000 # test_start = 0 # test_end = 1000 chunk_size = test_end - test_start val_data = load_batch_from_data(val_names, dataset_path, chunk_size, img_ch, img_cols, img_rows, train_start=test_start, train_end=test_end) # train_data, val_data = load_data(args.input) # print (len(train_data)) # print (train_data[-5].shape) # print (train_data[-4].shape) # print (train_data[-3].shape) # print (train_data[-2].shape) # print (train_data[-1].shape) # print (train_data[-1][0]) # print (train_data[-2][10]) train_data = prepare_data(train_data) val_data = prepare_data(val_data) start = timeit.default_timer() et = EyeTracker() train_loss_history, train_err_history, val_loss_history, val_err_history = et.train(train_data, val_data, \ lr=args.learning_rate, \ batch_size=args.batch_size, \ max_epoch=args.max_epoch, \ min_delta=1e-4, \ patience=args.patience, \ print_per_epoch=args.print_per_epoch, out_model=args.save_model) print('runtime: %.1fs' % (timeit.default_timer() - start)) plot_loss(np.array(train_loss_history), np.array(train_err_history), np.array(val_err_history), start=0, per=1, save_file="test2/loss.png")