def __init__(self, num_frames, model_path): super(InferenceModel, self).__init__() self.num_frames = num_frames self.model_path = model_path with tf.Graph().as_default(): self.inputs_pl = tf.placeholder( tf.float32, shape=( 1, 240, 320, cfg.num_frames)) self.is_training_pl = tf.placeholder(tf.bool, shape=()) pred = build_graph(self.inputs_pl, self.is_training_pl, weight_decay=0.0, bn_decay=None) self.pred=tf.nn.softmax(pred) # config_ss = tf.ConfigProto() self.sess = tf.Session() saver = tf.train.Saver() saver.restore(self.sess, model_path) print("\nLoaded model... ", model_path)
def train(): log_string('***** Config *****') log_string('***** Building Point {}...'.format(MODEL_NAME)) log_string('** num_frames: {}'.format(cfg.num_frames)) log_string('** num_classes: {}'.format(cfg.num_classes)) log_string('** batch_size: {}'.format(cfg.batch_size)) log_string('** epoch: {}'.format(cfg.epoch)) log_string('** init_learning_rate: {}'.format(cfg.init_learning_rate)) log_string('** decay_step: {}'.format(cfg.decay_step)) log_string('** decay_rate: {}'.format(cfg.decay_rate)) log_string('** weight_decay: {}'.format(cfg.weight_decay)) with tf.Graph().as_default(): inputs, labels = placeholder_inputs(cfg.batch_size, cfg.num_frames) is_training_pl = tf.placeholder(tf.bool, shape=()) keep_prob_pl = tf.placeholder(tf.float32) global_step = tf.Variable(0, dtype=tf.int64) bn_decay = get_bn_decay(global_step) tf.summary.scalar('bn_decay', bn_decay) pred = build_graph(inputs, is_training_pl, weight_decay=cfg.weight_decay, keep_prob=keep_prob_pl, bn_decay=bn_decay) loss = get_loss(pred, labels) # raise tf.summary.scalar('total_loss', loss) correct = tf.equal(tf.argmax(pred, 1), tf.to_int64(labels)) correct = tf.reduce_sum(tf.cast(correct, tf.float32)) accuracy = correct / float(cfg.batch_size) tf.summary.scalar('accuracy', accuracy) # Get training operator learning_rate = get_learning_rate(global_step) tf.summary.scalar('learning_rate', learning_rate) optimizer = tf.train.AdamOptimizer(learning_rate) # optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step=global_step) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True # config.log_device_placement = True # config.gpu_options.allocator_type = "BFC" sess = tf.Session(config=config) # # restore model ################# load_model_path = LOGDIR + '/model_epoch_{}'.format( cfg.load_model_epoch) try: saver = tf.train.Saver() saver.restore(sess, load_model_path) print("\nPrevious model restored... ", load_model_path) except Exception as e: print("\nCannot find the requested model... {}".format(e)) sess.run(tf.global_variables_initializer()) # %% create a saver object saver = tf.train.Saver() print("\nCreating new model...", load_model_path) if cfg.debug: sess = tf_debug.LocalCLIDebugWrapperSession(sess) # init = tf.global_variables_initializer() # sess.run(init, {is_training_pl: True}) # saver = tf.train.Saver() # Plot Variable Histogram t_vars = tf.trainable_variables() merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(LOGDIR + '/train') train_writer.add_graph(tf.get_default_graph()) test_writer = tf.summary.FileWriter(LOGDIR + '/test') test_writer.add_graph(tf.get_default_graph()) # running_vars = tf.get_collection('metric_vars') # running_vars = tf.get_collection(tf.GraphKeys.METRIC_VARIABLES) # running_vars = [ var for var in running_vars if isinstance(var, tf.Variable)] # print(running_vars) # running_vars_initializer = tf.variables_initializer(var_list=running_vars) # Count number of trainable parameters num_params = np.sum([np.prod(v.get_shape().as_list()) for v in t_vars]) log_string( '************ The Number of Trainable Parameters: {} ************'. format(num_params)) num_g_params = np.sum( [np.prod(v.get_shape().as_list()) for v in tf.global_variables()]) log_string( '************ The Number of Global Parameters: {} ************'. format(num_g_params)) ops = { 'inputs_pl': inputs, 'labels_pl': labels, 'is_training_pl': is_training_pl, 'keep_prob_pl': keep_prob_pl, 'pred': pred, 'loss': loss, 'train_op': train_op, 'merged': merged, 'step': global_step } # %% # training_dataset = np.load( # '/media/tjosh/vault/MSRAction3D/all_npy_5_t1_training.npy') # validation_dataset = np.load( # '/media/tjosh/vault/MSRAction3D/all_npy_5_t1_validation.npy') # for utkinect training_dataset = np.load( 'D:/datasets/UTKinectAction3D_npy_5/training.npy') validation_dataset = np.load( 'D:/datasets/UTKinectAction3D_npy_5/validation.npy') # '/media/tjosh/vault/MSRAction3D/all_npy_5_t3_training.npy') # '/media/tjosh/vault/MSRAction3D/all_npy_5_t3_validation.npy') train_data_gen = DataGenerator(training_dataset, batch_size=cfg.batch_size, augment=False) validation_data_gen = DataGenerator(validation_dataset, batch_size=cfg.batch_size, augment=False) # %% # dataset = glob.glob( # '/media/tjosh/vault/UTKinectAction3D_train_npy_6/**/*.npy') # dataset_size = len(dataset) for epoch in range(1, cfg.epoch + 1): # dataset = shuffle(dataset) # train_cut = int(dataset_size*0.50) # training_dataset = dataset[:train_cut] # validation_dataset = dataset[train_cut:] # train_data_gen = DataGenerator(training_dataset, batch_size=cfg.batch_size) # validation_data_gen = DataGenerator(validation_dataset, batch_size=cfg.batch_size, augment=False) log_string('\n******** Training:---Epoch_{}/{} *********'.format( epoch, cfg.epoch)) log_string('Training ...') train_one_epoch(sess, train_data_gen, ops, train_writer) log_string('Validating ...') val_one_epoch(sess, validation_data_gen, ops, test_writer) if epoch % cfg.save_model_freq == 0: saver.save(sess, LOGDIR + '/model_epoch_{}'.format(epoch)) log_string('Model saved at epoch {}'.format(epoch))
def test_model(): with tf.Graph().as_default(): validation_dataset = np.load('path/to/dataset') validation_data_gen = DataGenerator(validation_dataset, batch_size=cfg.batch_size) inputs, labels = placeholder_inputs(cfg.batch_size, cfg.num_frames) is_training_pl = tf.placeholder(tf.bool, shape=()) keep_prob_pl = tf.placeholder(tf.float32) global_step = tf.Variable(0, dtype=tf.int64) bn_decay = get_bn_decay(global_step) tf.summary.scalar('bn_decay', bn_decay) # Get model and loss pred = build_graph(inputs, is_training_pl, weight_decay=cfg.weight_decay, keep_prob=keep_prob_pl, bn_decay=bn_decay) loss = get_loss(pred, labels) tf.summary.scalar('total_loss', loss) # raise correct = tf.equal(tf.argmax(pred, 1), tf.to_int64(labels)) correct = tf.reduce_sum(tf.cast(correct, tf.float32)) accuracy = correct / float(cfg.batch_size) tf.summary.scalar('accuracy', accuracy) # Get training operator learning_rate = get_learning_rate(global_step) tf.summary.scalar('learning_rate', learning_rate) optimizer = tf.train.AdamOptimizer(learning_rate) # optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step=global_step) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True # config.log_device_placement = True # config.gpu_options.allocator_type = "BFC" sess = tf.Session(config=config) # # restore model ################# # load_model_path = LOGDIR+'/model_epoch_{}'.format(cfg.load_model_epoch) load_model_path = LOGDIR + '/model_epoch_{}'.format( cfg.load_model_epoch) try: saver = tf.train.Saver() saver.restore(sess, load_model_path) print("\nLoaded previous model... ", load_model_path) except Exception as e: raise if cfg.debug: sess = tf_debug.LocalCLIDebugWrapperSession(sess) # Plot Variable Histogram t_vars = tf.trainable_variables() # for var in t_vars: # tf.summary.histogram(var.op.name, var) # saver = tf.train.Saver() merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(LOGDIR + '/train') train_writer.add_graph(tf.get_default_graph()) test_writer = tf.summary.FileWriter(LOGDIR + '/test') test_writer.add_graph(tf.get_default_graph()) # Count number of trainable parameters num_params = np.sum([np.prod(v.get_shape().as_list()) for v in t_vars]) print( '************ The Number of Trainable Parameters: {} ************'. format(num_params)) num_g_params = np.sum( [np.prod(v.get_shape().as_list()) for v in tf.global_variables()]) print('************ The Number of Global Parameters: {} ************'. format(num_g_params)) ops = { 'inputs_pl': inputs, 'labels_pl': labels, 'keep_prob_pl': keep_prob_pl, 'is_training_pl': is_training_pl, 'pred': pred, 'loss': loss, 'train_op': train_op, 'merged': merged, 'step': global_step } # validation_data_gen = DataGenerator(validation_dataset, batch_size=cfg.batch_size) print('Validating ...') val_one_epoch(sess, validation_data_gen, ops, test_writer, logging=False)