def main(): if FLAGS.train: test_num_updates = 5 else: test_num_updates = 10 orig_meta_batch_size = FLAGS.meta_batch_size # always use meta batch size of 1 when testing. FLAGS.meta_batch_size = 1 data_generator = SinusoidDataGenerator(FLAGS.update_batch_size*2, FLAGS.meta_batch_size) dim_output = data_generator.dim_output dim_input = data_generator.dim_input input_tensors = None model = MAML( stop_grad=FLAGS.stop_grad, meta_lr=FLAGS.meta_lr, num_updates=FLAGS.num_updates, update_lr=FLAGS.update_lr, dim_input=dim_input, dim_output=dim_output, test_num_updates=test_num_updates, meta_batch_size=FLAGS.meta_batch_size, metatrain_iterations=FLAGS.metatrain_iterations, norm=FLAGS.norm, ) model.build(input_tensors=input_tensors, prefix="metatrain") if FLAGS.train == False: # change to original meta batch size when loading model. FLAGS.meta_batch_size = orig_meta_batch_size trainer = Trainer( model, data_generator, Path(FLAGS.logdir), FLAGS.pretrain_iterations, FLAGS.metatrain_iterations, FLAGS.meta_batch_size, FLAGS.update_batch_size, FLAGS.num_updates, FLAGS.update_lr, stop_grad=FLAGS.stop_grad, baseline=FLAGS.baseline, is_training=True ) trainer.train() trainer.test()
def main(): training = not args.test kshot = 1 kquery = 15 nway = 5 meta_batchsz = 4 K = 5 # kshot + kquery images per category, nway categories, meta_batchsz tasks. db = DataGenerator(nway, kshot, kquery, meta_batchsz, 200000) if training: # only construct training model if needed # get the tensor # image_tensor: [4, 80, 84*84*3] # label_tensor: [4, 80, 5] image_tensor, label_tensor = db.make_data_tensor(training=True) # NOTICE: the image order in 80 images should like this now: # [label2, label1, label3, label0, label4, and then repeat by 15 times, namely one task] # support_x : [4, 1*5, 84*84*3] # query_x : [4, 15*5, 84*84*3] # support_y : [4, 5, 5] # query_y : [4, 15*5, 5] support_x = tf.slice(image_tensor, [0, 0, 0], [-1, nway * kshot, -1], name='support_x') query_x = tf.slice(image_tensor, [0, nway * kshot, 0], [-1, -1, -1], name='query_x') support_y = tf.slice(label_tensor, [0, 0, 0], [-1, nway * kshot, -1], name='support_y') query_y = tf.slice(label_tensor, [0, nway * kshot, 0], [-1, -1, -1], name='query_y') # construct test tensors. image_tensor, label_tensor = db.make_data_tensor(training=False) support_x_test = tf.slice(image_tensor, [0, 0, 0], [-1, nway * kshot, -1], name='support_x_test') query_x_test = tf.slice(image_tensor, [0, nway * kshot, 0], [-1, -1, -1], name='query_x_test') support_y_test = tf.slice(label_tensor, [0, 0, 0], [-1, nway * kshot, -1], name='support_y_test') query_y_test = tf.slice(label_tensor, [0, nway * kshot, 0], [-1, -1, -1], name='query_y_test') # 1. construct MAML model model = MAML(84, 3, 5) # construct metatrain_ and metaval_ if training: model.build(support_x, support_y, query_x, query_y, K, meta_batchsz, mode='train') model.build(support_x_test, support_y_test, query_x_test, query_y_test, K, meta_batchsz, mode='eval') else: model.build(support_x_test, support_y_test, query_x_test, query_y_test, K + 5, meta_batchsz, mode='test') model.summ_op = tf.summary.merge_all() all_vars = filter(lambda x: 'meta_optim' not in x.name, tf.trainable_variables()) for p in all_vars: print(p) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.InteractiveSession(config=config) # tf.global_variables() to save moving_mean and moving variance of batch norm # tf.trainable_variables() NOT include moving_mean and moving_variance. saver = tf.train.Saver(tf.global_variables(), max_to_keep=5) # initialize, under interative session tf.global_variables_initializer().run() tf.train.start_queue_runners() if os.path.exists(os.path.join('ckpt', 'checkpoint')): # alway load ckpt both train and test. model_file = tf.train.latest_checkpoint('ckpt') print("Restoring model weights from ", model_file) saver.restore(sess, model_file) if training: train(model, saver, sess) else: test(model, sess)
def main(): training = not args.test main_dir = './data/' dataset_name = 'flickr' kshot = 5 meta_batchsz = 4 k = 5 batch_num = 50000 if dataset_name == 'flickr': batch_num = 50000 elif dataset_name == 'wiki': batch_num = 10000 elif dataset_name == 'email': batch_num = 5000 else: batch_num = 10000 db = DataGenerator(main_dir, dataset_name, kshot, meta_batchsz, 50000) if training: node_tensor, label_tensor, data_tensor = db.make_data_tensor( training=True) support_n = tf.slice(node_tensor, [0, 0, 0], [-1, kshot, -1], name='support_n') query_n = tf.slice(node_tensor, [0, kshot, 0], [-1, -1, -1], name='query_n') support_x = tf.slice(data_tensor, [0, 0, 0], [-1, kshot, -1], name='support_x') query_x = tf.slice(data_tensor, [0, kshot, 0], [-1, -1, -1], name='query_x') support_y = tf.slice(label_tensor, [0, 0, 0], [-1, kshot, -1], name='support_y') query_y = tf.slice(label_tensor, [0, kshot, 0], [-1, -1, -1], name='query_y') node_tensor, label_tensor, data_tensor = db.make_data_tensor( training=False) support_n_test = tf.slice(node_tensor, [0, 0, 0], [-1, kshot, -1], name='support_n_test') query_n_test = tf.slice(node_tensor, [0, kshot, 0], [-1, -1, -1], name='query_n_test') support_x_test = tf.slice(data_tensor, [0, 0, 0], [-1, kshot, -1], name='support_x_test') query_x_test = tf.slice(data_tensor, [0, kshot, 0], [-1, -1, -1], name='query_x_test') support_y_test = tf.slice(label_tensor, [0, 0, 0], [-1, kshot, -1], name='support_y_test') query_y_test = tf.slice(label_tensor, [0, kshot, 0], [-1, -1, -1], name='query_y_test') model = MAML(128) model.build(support_n, support_x, support_y, query_n, query_x, query_y, k, meta_batchsz, mode='train') model.build(support_n_test, support_x_test, support_y_test, query_n_test, query_x_test, query_y_test, k, meta_batchsz, mode='test') model.summ_op = tf.summary.merge_all() all_vars = filter(lambda x: 'meta_optim' not in x.name, tf.trainable_variables()) for p in all_vars: print(p) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.InteractiveSession(config=config) saver = tf.train.Saver(tf.global_variables(), max_to_keep=5) tf.global_variables_initializer().run() tf.train.start_queue_runners() if os.path.exists(os.path.join('ckpt', 'checkpoint')): model_file = tf.train.latest_checkpoint('ckpt') print("Restoring model weights from ", model_file) saver.restore(sess, model_file) train(model, sess, batch_num) test(model, sess, dataset_name)