def train(dataset_train, dataset_val, dataset_test, ckptfile='', caffemodel=''): print('Training start...') batch_size = FLAGS.batch_size path = modelpath("") if not os.path.exists(path): os.makedirs(path) with tf.Graph().as_default(): startstep = 0 #if not is_finetune else int(ckptfile.split('-')[-1]) global_step = tf.Variable(startstep, trainable=False) # placeholders for graph input anchor_search = tf.placeholder('float32', shape=(None, 227, 227, 3)) anchor_street = tf.placeholder('float32', shape=(None, 227, 227, 3)) anchor_aerial = tf.placeholder('float32', shape=(None, 227, 227, 3)) positive = tf.placeholder('float32', shape=(None, 227, 227, 3)) negative = tf.placeholder('float32', shape=(None, 227, 227, 3)) keep_prob_ = tf.placeholder('float32') # graph outputs feature_anchor = model.inference_crossview( [anchor_search, anchor_street, anchor_aerial], keep_prob_, FLAGS.feature, False) feature_positive = model.inference(positive, keep_prob_, FLAGS.feature) feature_negative = model.inference(negative, keep_prob_, FLAGS.feature) feature_size = tf.size(feature_anchor) / batch_size feature_list = model.feature_normalize( [feature_anchor, feature_positive, feature_negative]) loss, d_pos, d_neg, loss_origin = model.triplet_loss( feature_list[0], feature_list[1], feature_list[2]) # summary summary_op = tf.merge_all_summaries() training_loss = tf.placeholder('float32', shape=(), name='training_loss') training_summary = tf.scalar_summary('training_loss', training_loss) optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.lr).minimize( loss) #batch size 512 #optimizer = tf.train.AdamOptimizer(learning_rate = 0.0000001).minimize(loss) #validation validation_loss = tf.placeholder('float32', shape=(), name='validation_loss') validation_summary = tf.scalar_summary('validation_loss', validation_loss) # test feature_pair_list = model.feature_normalize( [feature_anchor, feature_positive]) pair_loss = model.eval_loss(feature_pair_list[0], feature_pair_list[1]) testing_loss = tf.placeholder('float32', shape=(), name='testing_loss') testing_summary = tf.scalar_summary('testing_loss', testing_loss) init_op = tf.initialize_all_variables() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: saver = tf.train.Saver(max_to_keep=50) if ckptfile: # load checkpoint file saver.restore(sess, ckptfile) """ sess.run(init_op) all_vars = tf.all_variables() cv_vars = [k for k in all_vars if k.name.startswith("cv_")] share_vars = [k for k in all_vars if not k.name.startswith("cv_")] saver_share = tf.train.Saver(share_vars) saver_share.restore(sess, ckptfile) with tf.variable_scope('fc6', reuse=True): w = tf.get_variable('weights') b = tf.get_variable('biases') with tf.variable_scope('cv_fc6', reuse=True): for subkey, data in zip(('weights', 'biases'), (w, b)): print 'loading cv_fc6', subkey var = tf.get_variable(subkey) sess.run(var.assign(data)) """ print 'restore variables done' elif caffemodel: # load caffemodel generated with caffe-tensorflow sess.run(init_op) model.load_alexnet(sess, caffemodel) print 'loaded pretrained caffemodel:', caffemodel else: # from scratch sess.run(init_op) print 'init_op done' summary_writer = tf.train.SummaryWriter("logs/{}/{}/{}".format( FLAGS.train_dir, FLAGS.feature, parameter_name), graph=sess.graph) epoch = 1 global_step = step = print_iter_sum = 0 min_loss = min_test_loss = sys.maxint loss_sum = [] while True: batch_x, batch_y, batch_z, isnextepoch, start, end = dataset_train.sample_path2img( batch_size, True) step += len(batch_y) global_step += len(batch_y) print_iter_sum += len(batch_y) feed_dict = { anchor_search: batch_x['search'], anchor_street: batch_x['streetview_clean'], anchor_aerial: batch_x['aerial_clean'], positive: batch_y, negative: batch_z, keep_prob_: 0.5 } # dropout rate _, loss_value, pos_value, neg_value, origin_value, anchor_value = sess.run( [ optimizer, loss, d_pos, d_neg, loss_origin, feature_list[0] ], feed_dict=feed_dict) loss_value = np.mean(loss_value) loss_sum.append(loss_value) if print_iter_sum / print_iter >= 1: loss_sum = np.mean(loss_sum) print('epo{}, {}/{}, loss: {}'.format( epoch, step, len(dataset_train.data), loss_sum)) print_iter_sum -= print_iter loss_sum = [] loss_valuee = sess.run(training_summary, feed_dict={training_loss: loss_value}) summary_writer.add_summary(loss_valuee, global_step) summary_writer.flush() action = 0 if FLAGS.remove and loss_value == 0: action = dataset_train.remove(start, end) if action == 1: finish_training(saver, sess, epoch) break if isnextepoch or action == -1: val_loss_sum = [] isnextepoch = False # set for validation step = 0 print_iter_sum = 0 # validation while not isnextepoch: val_x, val_y, val_z, isnextepoch, start, end = dataset_val.sample_path2img( batch_size, True) val_feed_dict = { anchor_search: val_x['search'], anchor_street: val_x['streetview_clean'], anchor_aerial: val_x['aerial_clean'], positive: val_y, negative: val_z, keep_prob_: 1. } val_loss = sess.run([loss], feed_dict=val_feed_dict) val_loss_sum.append(np.mean(val_loss)) dataset_val.reset_sample() val_loss_sum = np.mean(val_loss_sum) print("Validation loss: {}".format(val_loss_sum)) summary_val_loss_sum = sess.run( validation_summary, feed_dict={validation_loss: val_loss_sum}) summary_writer.add_summary(summary_val_loss_sum, global_step) # testing #IPython.embed() num = 50 test_feed_dict = { anchor_search: dataset_test[0]['search'][:num], anchor_street: dataset_test[0]['streetview_clean'][:num], anchor_aerial: dataset_test[0]['aerial_clean'][:num], positive: dataset_test[1][:num], negative: dataset_test[0]['search'][:num], # useless keep_prob_: 1. } test_loss = sess.run([pair_loss], feed_dict=test_feed_dict) test_loss = np.mean(test_loss) print("Testing loss: {}".format(test_loss)) summary_test_loss = sess.run( testing_summary, feed_dict={testing_loss: test_loss}) summary_writer.add_summary(summary_test_loss, global_step) # ready to flush summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, global_step) summary_writer.flush() # save by testing if min_test_loss > test_loss: min_test_loss = test_loss """ if 'best_test_path' in locals(): os.remove(best_test_path) """ best_test_path = modelpath("test_{}_{}".format( epoch, test_loss)) saver.save(sess, best_test_path) print(best_test_path) # save by validation elif min_loss > val_loss_sum: min_loss = val_loss_sum """ if 'best_path' in locals(): os.remove(best_path) """ best_path = modelpath("val_{}_{}".format( epoch, val_loss_sum)) saver.save(sess, best_path) print(best_path) # save by SAVE_INTERVAL elif epoch % SAVE_INTERVAL == 0: path = modelpath(epoch) saver.save(sess, path) print(path) dataset_train.reset_sample() print(epoch) epoch += 1 if epoch >= max_epo: finish_training(saver, sess, epoch) break
def train(dataset_train, dataset_val, ckptfile='', caffemodel=''): print 'train() called' is_finetune = bool(ckptfile) batch_size = FLAGS.batch_size data_size = dataset_train.size() print 'training size:', data_size with tf.Graph().as_default(): startstep = 0 if not is_finetune else int(ckptfile.split('-')[-1]) global_step = tf.Variable(startstep, trainable=False) image_, y_ = model.input() keep_prob_ = tf.placeholder('float32', name='keep_prob') phase_train_ = tf.placeholder(tf.bool, name='phase_train') logits = model.inference(image_, keep_prob_, phase_train_) prediction = model.classify(logits) loss, print_op = model.loss(logits, y_) train_op = model.train(loss, global_step, data_size) # build the summary operation based on the F colection of Summaries summary_op = tf.summary.merge_all() # must be after merge_all_summaries validation_loss = tf.placeholder('float32', shape=(), name='validation_loss') validation_summary = tf.summary.scalar('validation_loss', validation_loss) validation_acc = tf.placeholder('float32', shape=(), name='validation_accuracy') validation_acc_summary = tf.summary.scalar('validation_accuracy', validation_acc) saver = tf.train.Saver(tf.global_variables(), max_to_keep=1000) init_op = tf.initialize_all_variables() # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) # sess = tf.Session(config=tf.ConfigProto(log_device_placement=FLAGS.log_device_placement, # gpu_options=gpu_options)) sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) if is_finetune: saver.restore(sess, ckptfile) print 'restore variables done' elif caffemodel: sess.run(init_op) model.load_alexnet(sess, caffemodel) print 'loaded pretrained caffemodel:', caffemodel else: # from scratch sess.run(init_op) print 'init_op done' summary_writer = tf.summary.FileWriter(FLAGS.train_dir, graph=sess.graph) step = startstep for epoch in xrange(100): print 'epoch:', epoch dataset_train.shuffle() # dataset_val.shuffle() for batch_x, batch_y in dataset_train.batches(batch_size): # print batch_x_v[0,0,:] # print batch_y if step >= FLAGS.max_steps: break step += 1 start_time = time.time() feed_dict = { image_: batch_x, y_: batch_y, keep_prob_: 0.5, phase_train_: True } _, loss_value, logitsyo, _ = sess.run( [train_op, loss, logits, print_op], feed_dict=feed_dict) # print batch_y # print logitsyo.max(), logitsyo.min() duration = time.time() - start_time assert not np.isnan( loss_value), 'Model diverged with loss = NaN' if step % 10 == 0 or step < 30: sec_per_batch = float(duration) print '%s: step %d, loss=%.2f (%.1f examples/sec; %.3f sec/batch)' \ % (datetime.now(), step, loss_value, FLAGS.batch_size/duration, sec_per_batch) # val if step % 100 == 0: # and step > 0: val_losses = [] val_logits = [] predictions = np.array([]) val_y = [] for val_step, (val_batch_x, val_batch_y) in \ enumerate(dataset_val.sample_batches(batch_size, g_.VAL_SAMPLE_SIZE)): # enumerate(dataset_val.batches(batch_size)): val_feed_dict = { image_: val_batch_x, y_: val_batch_y, keep_prob_: 1.0, phase_train_: False } val_loss, pred, val_logit, _ = sess.run( [loss, prediction, logits, print_op], feed_dict=val_feed_dict) val_losses.append(val_loss) val_logits.extend(val_logit.tolist()) predictions = np.hstack((predictions, pred)) val_y.extend(val_batch_y) val_logits = np.array(val_logits) # print val_logits # print val_y # print predictions # print val_logits[0].tolist() # val_logits.dump('val_logits.npy') # predictions.dump('predictions.npy') # np.array(val_y).dump('val_y.npy') val_loss = np.mean(val_losses) acc = metrics.accuracy_score(val_y[:predictions.size], np.array(predictions)) print '%s: step %d, validation loss=%.4f, acc=%f' %\ (datetime.now(), step, val_loss, acc*100.) # validation summary val_loss_summ = sess.run( validation_summary, feed_dict={validation_loss: val_loss}) val_acc_summ = sess.run(validation_acc_summary, feed_dict={validation_acc: acc}) summary_writer.add_summary(val_loss_summ, step) summary_writer.add_summary(val_acc_summ, step) summary_writer.flush() if step % 100 == 0: # print 'running f*****g summary' summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() if step % 200 == 0 or (step+1) == FLAGS.max_steps \ and step > startstep: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def train(dataset_train, dataset_val, finetune='',caffemodel=''): """ main train function for training and save the model of min-valiation loss""" batch_size = FLAGS.batch_size with tf.Graph().as_default(): # Graph of AlexNet images = tf.placeholder('float32', shape=(None, 227, 227, 3)) labels = tf.placeholder('int64', shape=(None)) keep_prob_ = tf.placeholder('float32') # dropout rate logits = model.inference(images, keep_prob_, CLASS_NUMBER) loss = model.loss(logits, labels) optimizer = tf.train.AdamOptimizer(learning_rate = FLAGS.lr).minimize(loss) # initialize all variables in graph init_op = tf.initialize_all_variables() step = 0 epo = 1 # store 5 latest model saver = tf.train.Saver(max_to_keep=5) min_loss = sys.maxint # Session of training prcoess with tf.Session(config=config) as sess: sess.run(init_op) # load pretrained alexnet model training by imagenet if caffemodel: model.load_alexnet(sess, caffemodel) print('loaded pretrained caffemodel: {}'.format(caffemodel)) # load pretrained model ourselves elif finetune: saver.restore(sess, finetune) print('loaded finetune model: {}'.format(caffemodel)) # training epo while epo <= 100: # get batch data from dataset batch_x, batch_y, isnextepoch = dataset_train.sample(batch_size) feed_dict = {images: batch_x, labels: batch_y, keep_prob_: 0.5} # run graph and backpropagation _, loss_value= sess.run([optimizer, loss], feed_dict=feed_dict) step += len(batch_y) #print step: if step/batch_size %10 ==0: print('epo{}: {}/{}, loss = {}'.format(epo, step, len(dataset_train), loss_value)) # epo end if isnextepoch: val_loss_sum = [] isnextepoch = False # set for validation # Validation process while not isnextepoch: val_x, val_y, isnextepoch = dataset_val.sample(batch_size) feed_dict = {images: batch_x, labels: batch_y, keep_prob_: 1.} logit, val_loss = sess.run([logits, loss], feed_dict=feed_dict) val_loss_sum.append(val_loss) val_loss = np.mean(val_loss_sum) print('validation loss: {}'.format(val_loss)) # if validation is good, save model if min_loss > val_loss: print('Save model...') saver.save(sess, osp.join(model_dir, 'model_best')) saver.save(sess, osp.join(model_dir, 'val_{}_{}'.format(epo, val_loss))) min_loss = val_loss epo += 1 step = 0 # shuffle dataset to prevent overfitting dataset_train.reset_sample()
def train(dataset_train, dataset_val, ckptfile='', caffemodel=''): print('Training start...') is_finetune = bool(ckptfile) batch_size = FLAGS.batch_size path = modelpath("") if not os.path.exists(path): os.makedirs(path) with tf.Graph().as_default(): startstep = 0 #if not is_finetune else int(ckptfile.split('-')[-1]) global_step = tf.Variable(startstep, trainable=False) # placeholders for graph input anchor = tf.placeholder('float32', shape=(None, 227, 227, 3)) positive = tf.placeholder('float32', shape=(None, 227, 227, 3)) negative = tf.placeholder('float32', shape=(None, 227, 227, 3)) keep_prob_ = tf.placeholder('float32') # graph outputs feature_anchor = model.inference(anchor, keep_prob_, FLAGS.feature, False) feature_positive = model.inference(positive, keep_prob_, FLAGS.feature) feature_negative = model.inference(negative, keep_prob_, FLAGS.feature) feature_size = tf.size(feature_anchor) / batch_size feature_list = model.feature_normalize( [feature_anchor, feature_positive, feature_negative]) loss, d_pos, d_neg, loss_origin = model.triplet_loss( feature_list[0], feature_list[1], feature_list[2]) # summary summary_op = tf.merge_all_summaries() training_loss = tf.placeholder('float32', shape=(), name='training_loss') training_summary = tf.scalar_summary('training_loss', training_loss) optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.lr).minimize( loss) #batch size 512 #validation validation_loss = tf.placeholder('float32', shape=(), name='validation_loss') validation_summary = tf.scalar_summary('validation_loss', validation_loss) init_op = tf.initialize_all_variables() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: saver = tf.train.Saver(max_to_keep=max_epo) if ckptfile: # load checkpoint file saver.restore(sess, ckptfile) print('restore variables done') elif caffemodel: # load caffemodel generated with caffe-tensorflow sess.run(init_op) model.load_alexnet(sess, caffemodel) print('loaded pretrained caffemodel:{}'.format(caffemodel)) else: # from scratch sess.run(init_op) print('init_op done') summary_writer = tf.train.SummaryWriter("logs/{}/{}/{}".format( FLAGS.train_dir, FLAGS.feature, parameter_name), graph=sess.graph) epoch = 1 global_step = step = print_iter_sum = 0 min_loss = min_test_loss = sys.maxint loss_sum = [] while True: batch_x, batch_y, batch_z, isnextepoch, start, end = dataset_train.sample_path2img( batch_size) step += len(batch_x) global_step += len(batch_x) print_iter_sum += len(batch_x) feed_dict = { anchor: batch_x, positive: batch_y, negative: batch_z, keep_prob_: FLAGS.dropout } # dropout rate _, loss_value, pos_value, neg_value, origin_value, anchor_value = sess.run( [ optimizer, loss, d_pos, d_neg, loss_origin, feature_list[0] ], feed_dict=feed_dict) loss_value = np.mean(loss_value) loss_sum.append(loss_value) if print_iter_sum / print_iter >= 1: loss_sum = np.mean(loss_sum) print('epo{}, {}/{}, loss: {}'.format( epoch, step, len(dataset_train.data), loss_sum)) print_iter_sum -= print_iter loss_sum = [] loss_valuee = sess.run(training_summary, feed_dict={training_loss: loss_value}) summary_writer.add_summary(loss_valuee, global_step) summary_writer.flush() action = 0 if FLAGS.remove and loss_value == 0: action = dataset_train.remove(start, end) if action == 1: finish_training(saver, sess, epoch) break if isnextepoch or action == -1: val_loss_sum = [] isnextepoch = False # set for validation step = 0 print_iter_sum = 0 # validation while not isnextepoch: val_x, val_y, val_z, isnextepoch, start, end = dataset_val.sample_path2img( batch_size) val_feed_dict = { anchor: val_x, positive: val_y, negative: val_z, keep_prob_: 1. } val_loss = sess.run([loss], feed_dict=val_feed_dict) val_loss_sum.append(np.mean(val_loss)) dataset_val.reset_sample() val_loss_sum = np.mean(val_loss_sum) print("Validation loss: {}".format(val_loss_sum)) summary_val_loss_sum = sess.run( validation_summary, feed_dict={validation_loss: val_loss_sum}) summary_writer.add_summary(summary_val_loss_sum, global_step) # ready to flush summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, global_step) summary_writer.flush() # save by validation if min_loss > val_loss_sum: min_loss = val_loss_sum best_path = modelpath("val_{}_{}".format( epoch, val_loss_sum)) saver.save(sess, best_path) print(best_path) # save by SAVE_INTERVAL elif epoch % SAVE_INTERVAL == 0: path = modelpath(epoch) saver.save(sess, path) print(path) dataset_train.reset_sample() print(epoch) epoch += 1 if epoch >= max_epo: finish_training(saver, sess, epoch) break