def val_one_step(model, X, Y, total_iou): ''' compute valitation datasets loss and accuracy. Parameters ---------- model : psenet + fpn X : images Y : labels Returns loss, accuracy ------- ''' X = tf.cast(X, tf.float32) Y = tf.cast(Y, tf.float32) y_pred = model(X) loss = build_loss(y_true=Y,y_pred=y_pred) loss = tf.reduce_mean(loss) accuracy, total_iou = mean_iou(y_true=Y, y_pred=y_pred, total_iou=total_iou) return loss, accuracy
def train_one_step(model, X, Y,optimizer): ''' update weights using auto gradient and adam Parameters ---------- model : psenet + fpn X : input images Y : label optimizer : trian optimizer Returns ------- ''' with tf.GradientTape() as tape: X = tf.cast(X, tf.float32) Y = tf.cast(Y, tf.float32) y_pred = model(X, training=True) loss = build_loss(y_true=Y, y_pred=y_pred) loss = tf.reduce_mean(loss) grads = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(grads_and_vars=zip(grads, model.trainable_variables)) return loss
def train_model(continue_flag,restore_scale2,train_scale2,freeze_scale1): # directors to save the chkpts of each scale if not gfile.Exists(SCALE1_DIR): gfile.MakeDirs(SCALE1_DIR) if not gfile.Exists(SCALE2_DIR): gfile.MakeDirs(SCALE2_DIR) with tf.Graph().as_default(): # get batch global_step = tf.Variable(0, name='global_step', trainable=False) mode = tf.placeholder(dtype=tf.bool, name='mode', shape=()) with tf.device('/cpu:0'): batch_generator = BatchGenerator(batch_size=BATCH_SIZE) # train_images, train_depths, train_pixels_mask = batch_generator.csv_inputs(TRAIN_FILE) train_images, train_depths, train_pixels_mask = tf.cond(mode,lambda:batch_generator.csv_inputs(TRAIN_FILE),lambda:batch_generator.csv_inputs(TEST_FILE)) ''' # placeholders training_images = tf.placeholder(tf.float32, shape=[None, IMAGE_HEIGHT, IMAGE_WIDTH, 3], name="training_images") depths = tf.placeholder(tf.float32, shape=[None, TARGET_HEIGHT, TARGET_WIDTH, 1], name="depths") pixels_mask = tf.placeholder(tf.float32, shape=[None, TARGET_HEIGHT, TARGET_WIDTH, 1], name="pixels_mask") ''' # build model scale1 = model.build_scale1(train_images,freeze_weights=freeze_scale1) if train_scale2: scale2 = model.build_scale2(batch_data=train_images, scale1_op= scale1) loss = model.build_loss(scale2_op= scale2,depths=train_depths,pixels_mask=train_pixels_mask) else: loss = model.build_loss(scale2_op=scale1, depths=train_depths, pixels_mask=train_pixels_mask) #learning rate num_batches_per_epoch = float(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN) / BATCH_SIZE decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY) lr = tf.train.exponential_decay( INITIAL_LEARNING_RATE, global_step, 100000, LEARNING_RATE_DECAY_FACTOR, staircase=True) #optimizer optimizer = tf.train.AdamOptimizer(lr).minimize(loss, global_step=global_step) # TODO: define model saver # Training session # sess_config = tf.ConfigProto(log_device_placement=True) # sess_config.gpu_options.allocator_type = 'BFC' # sess_config.gpu_options.per_process_gpu_memory_fraction = 0.80 ''' config = tf.ConfigProto() config.gpu_options.allow_growth = True ''' # loss summary tf.summary.scalar("loss", loss) # merge all summaries into a single "operation" which we can execute in a session summary_op = tf.summary.merge_all() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # create log writer object writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph()) # Saver # dictionary to each scale to define to seprate collections scale1_params = {} scale2_params = {} # add variables to it's corresponding dictionary for variable in tf.all_variables(): variable_name = variable.name if variable_name.find('s1') >= 0: scale1_params[variable_name] = variable if train_scale2: if variable_name.find('s2') >= 0: scale2_params[variable_name] = variable # define savers saver_scale1 = tf.train.Saver(scale1_params,max_to_keep=4) if train_scale2: saver_scale2 = tf.train.Saver(scale2_params) # restore params if we need to continue on the previous training if continue_flag: scale1_ckpt = tf.train.get_checkpoint_state(SCALE1_DIR) if scale1_ckpt and scale1_ckpt.model_checkpoint_path: print("Scale1 params Loading.") saver_scale1.restore(sess, scale1_ckpt.model_checkpoint_path) print("Scale1 params Restored.") else: print("No Params available") if restore_scale2: scale2_ckpt = tf.train.get_checkpoint_state(SCALE2_DIR) if scale2_ckpt and scale2_ckpt.model_checkpoint_path: print("Scale2 params Loading.") scale2_ckpt.restore(sess, scale2_ckpt.model_checkpoint_path) print("Scale2 params Restored.") else: print("No Scale2 Params available") # initialize the queue threads to start to shovel data coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) # p = tf.Print(data_file,[data_file],message=) for epoch in range(EPOCHS): for i in range(45): # sess.run(p,feed_dict={data_file:'Test'}) if train_scale2: _, loss_value, predections_s1,predections_s2, batch_images,summary = sess.run([optimizer,loss,scale1,scale2, train_images,summary_op],feed_dict={mode:True}) else: _, loss_value, predections_s1, batch_images, summary = sess.run([optimizer, loss, scale1, train_images, summary_op],feed_dict={mode:True}) validation_loss, _ = sess.run([loss, train_images],feed_dict={mode:False}) writer.add_summary(summary, epoch * 1000 + i) if i % 10 == 0: # log.info('step' + loss_value) print("%s: %d[epoch]: %d[iteration]: train loss %f : validation %f" % (datetime.now(), epoch, i, loss_value, validation_loss)) # print("%s: %d[epoch]: %d[iteration]: train loss %f" % (datetime.now(), epoch, i, loss_value)) if i == 41: # save predictions if not freeze_scale1: output_predict(predections_s1, batch_images, "data/predictions/predict_scale1_%05d_%05d" % (epoch, i)) if train_scale2: output_predict(predections_s2, batch_images, "data/predictions/predict_scale2_%05d_%05d" % (epoch, i)) if not freeze_scale1: scale1_checkpoint_path = SCALE1_DIR + '/model' saver_scale1.save(sess, scale1_checkpoint_path) if train_scale2: scale2_checkpoint_path = SCALE2_DIR + '/model' saver_scale2.save(sess, scale2_checkpoint_path) # stop our queue threads and properly close the session coord.request_stop() coord.join(threads) sess.close()
print(data_embeddings.dtype) print(data_labels.dtype) print(data_mask.dtype) num_time = data_labels.shape[1] num_labels = data_labels.shape[2] feature_size = data_embeddings.shape[-1] seed = 88 # Creating bi-lstm based computational graph and attaching # loss and optimizer to the graph outputs, inputs, labels, drop_rate = model.lstm_model(input_shape=(None, num_time, feature_size), label_shape=(None, num_time, num_labels), num_layers=NUM_LAYERS, cell_size=CELL_SIZE) loss, mask = model.build_loss(labels, outputs, loss_name=ARGS.loss) patient_pred = model.compute_patient_prediction(labels, outputs, mask) train_loss = tf.summary.scalar('train_loss', loss) validation_loss = tf.summary.scalar('val_loss', loss) train_op, gradient_norm = model.optimizer(loss, lr=ARGS.lr) grad_norm = tf.summary.scalar('grad_norm', gradient_norm) train_summary = tf.summary.merge([train_loss, grad_norm]) validation_summary = tf.summary.merge([validation_loss]) saver = tf.train.Saver(tf.global_variables(), max_to_keep=1, save_relative_paths=True)
if __name__ == '__main__': parser = argparse.ArgumentParser( description='Training GlamPoints detector') parser.add_argument('--path_ymlfile', type=str, default='configs/glampoints_training.yml', help='Path to yaml file.') opt = parser.parse_args() with open(opt.path_ymlfile, 'r') as ymlfile: cfg = yaml.load(ymlfile) _device = settings.initialize_cuda_and_logging(cfg) train_loader, val_loader = make_data_loader(cfg) model = build_model(cfg) model.to(_device) optimizer = build_optimizer(cfg, model) loss_func = build_loss(cfg) logger, tb_logger = build_logger(cfg) do_train(cfg, model, train_loader, val_loader, optimizer, loss_func, logger, tb_logger, _device)
X_batch.append( model.preprocess(cv2.flip(im, 1), config.INPUT_IMAGE_CROP)) Y_batch.append(-Y[next_batch.offset]) else: X_batch.append(model.preprocess(im, config.INPUT_IMAGE_CROP)) Y_batch.append(Y[next_batch.offset]) next_batch.offset += 1 return X_batch, Y_batch X, keep_prob, pred = model.build_net() Y = tf.placeholder(tf.float32, [None], "human_data") loss = model.build_loss(Y, pred, config.REGULARIZER_COEF, tf.trainable_variables()) global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="global_step") optimizer = tf.train.AdamOptimizer(1e-6, name="optimizer").minimize( loss, global_step=global_step) samples, labels = load_training_data() tf.summary.scalar("loss", loss) summary_op = tf.summary.merge_all() exit_request = [False]
word = False max_time_step = data_handle.char_fixed_length else: char = False word = True max_time_step = data_handle.word_fixed_length print('data type: ', options.DataType) print('max time step: ', max_time_step) hidden_unit = [int(i) for i in options.HiddenUnit.split(' ')] print('hidden unit: ', hidden_unit) print('Epoch: ', options.Epoch) print('display while {0} step'.format(options.DisplayWhileNStep)) print('save while {0} step'.format(options.SaveWhileNStep)) print('val while {0} epoch'.format(options.ValWhileNEpoch)) model.build(embeding_len=embeding_len, batch_size=options.BatchSize, hidden_unit=hidden_unit, max_time_step=max_time_step) model.build_loss() model.train(epoch=options.Epoch, save_path=options.SavePath, save_while_n_step=options.SaveWhileNStep, val_while_n_epoch=options.ValWhileNEpoch, data=data_handle, char=char, word=word, display_shilw_n_step=options.DisplayWhileNStep, basic_lr=options.LearningRate, device=options.UseCpu) pass