def evaluate(tf_file, logs_test_dir, restore_dir, loss_dir, weight_write_dir): filenames = tf.placeholder(tf.string, shape=[None]) validation_filenames = tf_file iterator = data.read_and_decode(filenames, BATCH_SIZE, False) sess = tf.Session() sess.run(iterator.initializer, feed_dict={filenames: validation_filenames}) test_img, test_label = iterator.get_next() test_logits, test_weight = model.inference(test_img) test_loss = model.losses(test_logits, test_label) summary_op = tf.summary.merge_all() train_writer = tf.summary.FileWriter(logs_test_dir, sess.graph) saver = tf.train.Saver() saver.restore(sess, restore_dir) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: for step in range(MAX_STEP): if coord.should_stop(): break test_logits_val, test_loss_val, test_label_val = sess.run([test_logits, test_loss, test_label]) print('label:', test_label_val) print('estimate:', test_logits_val) print('Step %d, test loss = %.2f' % (step + 1, test_loss_val)) list_loss.append(test_loss_val) with open(loss_dir, 'a') as f: f.write('%.6f' % (test_loss_val)) f.write("\n") tra_weight = sess.run(test_weight) print(tra_weight.shape) for j in range(tra_weight.shape[0]): tra_weight_draw = tra_weight[j, :, :, :] tra_weight_draw = (tra_weight_draw - tra_weight_draw.min()) / \ (tra_weight_draw.max() - tra_weight_draw.min()) tra_weight_draw *= 255.0 tra_weight_draw = cv.resize(tra_weight_draw, (600, 400)) cv.imwrite(weight_write_dir.format(step), tra_weight_draw.astype(np.uint8)) summary_str = sess.run(summary_op) train_writer.add_summary(summary_str, step) if step % 2000 == 0 or (step + 1) == MAX_STEP: checkpoint_path = os.path.join(logs_test_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) print('mean', sess.run(tf.reduce_sum(list_loss) / MAX_STEP)) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: coord.request_stop() coord.join(threads) sess.close() cv.waitKey(0)
def train(): with tf.device('/cpu:0'): img, label = data.read_and_decode([tf_file1, tf_file2]) img_batch, label_batch = data.get_batch(img, label, BATCH_SIZE, CAPACITY) train_logits, train_weight = model.inference(img_batch) # s = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, 'ALexNet') # print(s) # for v in s: # print(v.name) # exit(0) train_loss = model.losses(train_logits, label_batch) train_op = model.trainning(train_loss, learning_rate) summary_op = tf.summary.merge_all() sess = tf.Session() train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph) saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) load_parameters("Alexnet.npy", sess, 'Alexnet') coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: for step in range(MAX_STEP): if coord.should_stop(): break _, tra_loss = sess.run([train_op, train_loss]) if step % 50 == 0: # tra_weight.imshow() print('Step %d, train loss = %.2f, l2 loss = %.2f' % (step, tra_loss, tra_loss)) summary_str = sess.run(summary_op) train_writer.add_summary(summary_str, step) if step % 2000 == 0 or (step + 1) == MAX_STEP: checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: coord.request_stop() coord.join(threads) sess.close()
def train(): img, label = data.read_and_decode("train.tfrecords") img_batch, label_batch = data.get_batch(img, label, BATCH_SIZE, CAPACITY) train_logits = model.inference(img_batch) # s = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, 'ALexNet') # print(s) # for v in s: # print(v.name) # exit(0) train_loss = model.losses(train_logits, label_batch) train_op = model.trainning(train_loss, learning_rate) summary_op = tf.summary.merge_all() sess = tf.Session() train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph) saver = tf.train.Saver() with tf.Session() as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) im, la = sess.run([img_batch, label_batch]) print(im.shape, la.shape) cv2.imshow("asdas", im[0, :, :, :]) cv2.waitKey(0) exit(0) try: img, label = sess.run([img_batch, label_batch]) for step in range(MAX_STEP): if coord.should_stop(): break _, tra_loss = sess.run([train_op, train_loss]) if step % 50 == 0: print('Step %d, train loss = %.2f' % (step, tra_loss)) summary_str = sess.run(summary_op) train_writer.add_summary(summary_str, step) if step % 2000 == 0 or (step + 1) == MAX_STEP: checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: coord.request_stop() coord.join(threads) sess.close()
def train(self, epoch): os.makedirs('tb/%s' % self.model_name) os.makedirs('trained_model/%s' % self.model_name) tf.summary.image('input x_image', self.x_image, 4) tf.summary.image('y_prediction', self.y_conv, 4) tf.summary.image('y_GT', self.y_, 4) tf.summary.image('y_pred_softmax', self.y_soft, 4) tf.summary.scalar('cross_entropy', self.cross_entropy) tf.summary.scalar('learning rate', self.lr) sess = tf.Session() saver = tf.train.Saver() merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter('/home/lsmjn/Drone-CNN/tb/%s' % self.model_name, sess.graph) conn, cur = data.get_db_connection() steps = data.get_steps(self.batch_size) filename_queue = tf.train.string_input_producer(['Drone-CNN.tfrecords']) image, annotation = data.read_and_decode(filename_queue, self.batch_size) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) print('Training...') for k in tqdm(range(0, steps*epoch)): x_batch, y_batch = sess.run([image, annotation]) summary, _ = sess.run([merged, self.train_step], feed_dict={self.x_image: x_batch, self.y_: y_batch, self.lr:self.lr_value, self.m:self.m_value, self.keep_prob: 0.5}) k = k + 1 train_writer.add_summary(summary, k) cur.close() conn.close() save_path = saver.save(sess, "trained_model/%s/Drone_CNN.ckpt" % self.model_name) print('Model saved in file: %s' % save_path) f_log.close() train_writer.close()
def train(tf_file, logs_train_dir): filenames = tf.placeholder(tf.string, shape=[None]) training_filenames = tf_file with tf.device('/cpu:0'): iterator = data.read_and_decode(filenames, BATCH_SIZE, True) sess = tf.Session() sess.run(iterator.initializer, feed_dict={filenames: training_filenames}) tra_img, tra_label = iterator.get_next() train_logits, train_weight = model.inference(tra_img) train_loss = model.losses(train_logits, tra_label) train_op = model.trainning(train_loss, learning_rate) load_parameters("Alexnet.npy", sess, 'Alexnet') summary_op = tf.summary.merge_all() sess.run(tf.global_variables_initializer()) train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph) saver = tf.train.Saver() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: for step in range(MAX_STEP): if coord.should_stop(): break _, tra_loss = sess.run([train_op, train_loss]) if step % 50 == 0: # tra_weight.imshow() print('Step %d, train loss = %.2f, l2 loss = %.2f' % (step, tra_loss, tra_loss)) summary_str = sess.run(summary_op) train_writer.add_summary(summary_str, step) if step % 2000 == 0 or (step + 1) == MAX_STEP: checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: coord.request_stop() coord.join(threads) sess.close()
def train(tf_file, checkpoint, visualize): restore_dir = checkpoint learning_rate = tf.Variable(0.0001, trainable=False) decay_lr = tf.assign(learning_rate, tf.maximum(learning_rate / 10.0, 1e-6)) filenames = tf.placeholder(tf.string, shape=[None]) training_filenames = tf_file with tf.device('/cpu:0'): iterator = data.read_and_decode(filenames, BATCH_SIZE, True) sess = tf.Session() sess.run(iterator.initializer, feed_dict={filenames: training_filenames}) tra_imgs, tra_labels = iterator.get_next() logits, tra_c5 = resnet.ResNet50(tra_imgs, True) p2, p3, p4, p5 = resnet.FPN(logits, True) tra_est_labels2, tra_fea_rgbs2= resnet.cal_loss(p2, True, 'p2', is_weight) tra_est_labels3, tra_fea_rgbs3= resnet.cal_loss(p3, True, 'p3', is_weight) tra_est_labels4, tra_fea_rgbs4= resnet.cal_loss(p4, True, 'p4', is_weight) tra_est_labels5, tra_fea_rgbs5= resnet.cal_loss(p5, True, 'p5', is_weight) tra_losss2 = resnet.losses(tra_est_labels2, tra_labels) tra_losss3 = resnet.losses(tra_est_labels3, tra_labels) tra_losss4 = resnet.losses(tra_est_labels4, tra_labels) tra_losss5 = resnet.losses(tra_est_labels5, tra_labels) tra_losss = (tra_losss2 + tra_losss3 + tra_losss4 + tra_losss5)/4.0 tra_est_labels = (tra_est_labels2 + tra_est_labels3 + tra_est_labels4 + tra_est_labels5) / 4.0 tra_fea_rgbs = tra_fea_rgbs2 # tra_est_labels, tra_fea_rgbs= resnet.cal_loss(logits, True, 'logits') # tra_losss = resnet.losses(tra_est_labels, tra_labels) saver = tf.train.Saver(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, 'resnet_model')) train_op = resnet.trainning(tra_losss, learning_rate) visualization = utils.get_visualization(tra_imgs, tra_fea_rgbs, tra_est_labels, tra_labels, BATCH_SIZE) summary_op = tf.summary.merge_all() train_writer = tf.summary.FileWriter(checkpoint, sess.graph) try: load_checkpoint(restore_dir, sess) except: sess.run(tf.global_variables_initializer()) saver.restore(sess, './resnet_imagenet_v2/model.ckpt-225207') print("restore succed!") saver = tf.train.Saver() try: for epoch in range(sess.run(it_cnt), END_EPOCH): sess.run(update_cnt) batches_per_epoch = train_num // BATCH_SIZE start_time = time.time() for j in range(batches_per_epoch): _, lr_value, tra_loss, vis_img = sess.run( [train_op, learning_rate, tra_losss, visualization]) duration_time = time.time() - start_time if epoch % 10 == 0: # tra_weight.imshow() print('Step %d, train loss = %.2f' % (epoch, tra_loss)) print("{} images per secs, lr = {}".format(batches_per_epoch * BATCH_SIZE / duration_time, lr_value)) summary_str = sess.run(summary_op) train_writer.add_summary(summary_str, epoch) # if epoch % 200 == 0: # for k, merged in enumerate(vis_img): # if k % 8==0: # cv2.imwrite(visualize.format(epoch + 1, k + 1, tra_loss).strip('\r'), # merged * 255) if (epoch + 1) % 2000 == 0 or (epoch + 1) == END_EPOCH: print("start decay lr") sess.run(decay_lr) checkpoint_path = os.path.join(checkpoint, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=epoch) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') sess.close()
def evaluate(MAX_STEP, tf_file, logs_test_dir, restore_dir, loss_dir, rgb_write_dir): filenames = tf.placeholder(tf.string, shape=[None]) validation_filenames = tf_file iterator = data.read_and_decode(filenames, BATCH_SIZE, False) sess = tf.Session() sess.run(iterator.initializer, feed_dict={filenames: validation_filenames}) test_imgs, test_labels = iterator.get_next() test_logits, c5 = resnet.ResNet50(test_imgs, False) test_p2, test_p3, test_p4, test_p5 = resnet.FPN(test_logits, False) test_est_labels2, test_fea_rgbs2= resnet.cal_loss(test_p2, False, 'p2', is_weight) test_est_labels3, test_fea_rgbs3= resnet.cal_loss(test_p3, False, 'p3', is_weight) test_est_labels4, test_fea_rgbs4= resnet.cal_loss(test_p4, False, 'p4', is_weight) test_est_labels5, test_fea_rgbs5= resnet.cal_loss(test_p5, False, 'p5', is_weight) test_est_labels = (test_est_labels2 + test_est_labels3 + test_est_labels4 + test_est_labels5)/4.0 # test_est_labels, test_fea_rgbs= resnet.cal_loss(test_logits, False, 'logits') test_losss2 = resnet.losses(test_est_labels2, test_labels) test_losss3 = resnet.losses(test_est_labels3, test_labels) test_losss4 = resnet.losses(test_est_labels4, test_labels) test_losss5 = resnet.losses(test_est_labels5, test_labels) visualization2 = utils.get_visualization(test_imgs, test_fea_rgbs2, test_est_labels2, test_labels, BATCH_SIZE) visualization3 = utils.get_visualization(test_imgs, test_fea_rgbs3, test_est_labels3, test_labels, BATCH_SIZE) visualization4 = utils.get_visualization(test_imgs, test_fea_rgbs4, test_est_labels4, test_labels, BATCH_SIZE) visualization5 = utils.get_visualization(test_imgs, test_fea_rgbs5, test_est_labels5, test_labels, BATCH_SIZE) test_losss = resnet.losses(test_est_labels, test_labels) summary_op = tf.summary.merge_all() train_writer = tf.summary.FileWriter(logs_test_dir, sess.graph) saver = tf.train.Saver() saver.restore(sess, restore_dir) try: for step in range(MAX_STEP): test_ill_label, test_est_label, test_loss, test_loss2, test_loss3, test_loss4, \ test_loss5, vis_img2, vis_img3, vis_img4, vis_img5, summary_str = sess.run( [test_labels, test_est_labels, test_losss, test_losss2, test_losss3, test_losss4, test_losss5, visualization2, visualization3, \ visualization4, visualization5, summary_op]) print('label:', test_ill_label) print('estimate:', test_est_label) print('Step %d, test loss = %.2f' % (step + 1, test_loss),\ 'loss2 = %.2f loss3 = %.2f loss4 = %.2f loss5 = %.2f' \ % (test_loss2, test_loss3, test_loss4, test_loss5)) list_loss.append(test_loss) with open(loss_dir, 'a') as f: f.write('%.2f ' % (test_loss)) f.write('%.2f ' % (test_loss2)) f.write('%.2f ' % (test_loss3)) f.write('%.2f ' % (test_loss4)) f.write('%.2f ' % (test_loss5)) f.write(str(test_ill_label)) f.write(str(test_est_label)) f.write("\n") # for k, merged in enumerate(vis_img2): # cv.imwrite(visualize.format(step + 1, k + 4, test_loss2).strip('\r'), # merged * 255) # for k, merged in enumerate(vis_img3): # cv.imwrite(visualize.format(step + 1, k + 3, test_loss3).strip('\r'), # merged * 255) # for k, merged in enumerate(vis_img4): # cv.imwrite(visualize.format(step + 1, k + 2, test_loss4).strip('\r'), # merged * 255) # for k, merged in enumerate(vis_img5): # cv.imwrite(visualize.format(step + 1, k + 1, test_loss5).strip('\r'), # merged * 255) train_writer.add_summary(summary_str, step) if step % 2000 == 0 or (step + 1) == MAX_STEP: checkpoint_path = os.path.join(logs_test_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') sess.close() metric(list_loss)
n_step = n_step_epoch*epoches print_freq =1 print("Start.") with tf.device("/gpu:3"): sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) #1.prepare data in cpu #print("Prepare Data ...") #train_indexs = load_index(is_train=True) #test_indexs = load_index(is_train=False) #data_to_tfrecord(train_indexs, "train_tfrecord", is_train=True) #data_to_tfrecord(test_indexs, "test_tfrecord", is_train=False) #print("Data Success.") x_train, y_train = read_and_decode("train_tfrecord", is_train=True) x_test = read_and_decode("test_tfrecord", is_train=False) x_train_batch, hair_batch, hat_batch, \ gender_batch, top_batch, down_batch, \ shoes_batch, bag_batch = tf.train.shuffle_batch([x_train, y_train['hair'], y_train['hat'], y_train['gender'], y_train['top'], y_train['down'], y_train['shoes'], y_train['bag']], batch_size=batch_size, capacity=2000, min_after_dequeue=1000, num_threads=12) y_train_batch = {"hat":hat_batch, "hair":hair_batch, "gender":gender_batch, "top":top_batch, "down":down_batch, "shoes":shoes_batch, "bag":bag_batch}