lr_init = 0.0002 batch_size = 100 zdim = 100 n_classes = 23 dropout = 0.2 im_size = [64, 64] dname, gname = 'd_', 'g_' tf.set_random_seed(1234) # DataLoader be = gen_backend(backend='cpu', batch_size=batch_size, datatype=np.float32) root_files = './dataset/wikiart' manifestfile = os.path.join(root_files, 'artist-train-index.csv') testmanifest = os.path.join(root_files, 'artist-val-index.csv') train = train_loader(manifestfile, root_files, be, h=im_size[0], w=im_size[1]) test = validation_loader(testmanifest, root_files, be, h=im_size[0], w=im_size[1], ncls=n_classes) OneHot = OneHot(be, n_classes) # Graph input is_train = tf.placeholder(tf.bool) keep_prob = tf.placeholder(tf.float32) x_n = tf.placeholder(tf.float32, [batch_size, 3, im_size[0], im_size[1]]) y = tf.placeholder(tf.float32, [batch_size, n_classes]) lr_tf = tf.placeholder(tf.float32) z = tf.random_uniform([batch_size, zdim], -1, 1)
batch_size = 100 zdim = 100 n_classes = 200 dropout = 0.2 im_size = [64, 64] dname, gname = 'd_', 'g_' tf.set_random_seed(1234) # DataLoader be = gen_backend(backend='cpu', batch_size=batch_size, datatype=np.float32) root_files = './dataset/cub200' manifestfile = os.path.join(root_files, 'train-index.csv') testmanifest = os.path.join(root_files, 'val-index.csv') train = train_loader(manifestfile, root_files, be, h=im_size[0], w=im_size[1], scale=[0.875, 0.875]) test = validation_loader(testmanifest, root_files, be, h=im_size[0], w=im_size[1], scale=[0.875, 0.875], ncls=n_classes) OneHot = OneHot(be, n_classes) # Graph input is_train = tf.placeholder(tf.bool) keep_prob = tf.placeholder(tf.float32) x_n = tf.placeholder(tf.float32, [batch_size, 3, im_size[0], im_size[1]])
def train(model, args): # 1. Create VisualDL logger logwriter = LogWriter(os.path.join(args.logdir, "visualdl_log"), sync_cycle=10) with logwriter.mode("Train") as writer: train_loss_scalar = writer.scalar("loss") train_acc_scalar = writer.scalar("acc") histogram1 = writer.histogram("Relation-BiLinear-W", 100) histogram2 = writer.histogram("Relation-BiLinear-b", 10) histogram3 = writer.histogram("Relation-FC-W", 100) with logwriter.mode("Val") as writer: val_acc_scalar = writer.scalar("acc") # 2. Setup program train_prog = fluid.default_main_program() train_startup = fluid.default_startup_program() train_reader = model.train_reader val_reader = model.val_reader test_reader = model.test_reader loss = model.loss mean_acc = model.mean_acc # Clone for val / test val_prog = train_prog.clone(for_test=True) test_prog = train_prog.clone(for_test=True) optimizer = fluid.optimizer.Adam(learning_rate=args.lr) optimizer.minimize(loss) # 3. Setup executor place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(train_startup) # 4. Get Relation Module params for VisualDL # print(fluid.io.get_program_parameter(train_startup)) relation_BL_w = train_startup.global_block().var("Relation-BiLinear.w_0") relation_BL_b = train_startup.global_block().var("Relation-BiLinear.b_0") relation_FC_w = train_startup.global_block().var("Relation-FC.w_0") # 5. Compile print("Compilling...") compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( loss_name=loss.name) compiled_val_prog = fluid.CompiledProgram(val_prog).with_data_parallel( share_vars_from=compiled_train_prog) compiled_test_prog = fluid.CompiledProgram(test_prog).with_data_parallel( share_vars_from=compiled_train_prog) # 6. Setup data source token2idx_dict, unk_idx, pad_idx = get_token2id_dict(args.emb_path) print("Setup dataloader...") places = fluid.cuda_places() if args.use_cuda else fluid.cpu_places() train_reader.set_sample_generator(train_loader(args.train_data_path, args.N, args.K, args.Q, token2idx_dict, unk_idx, pad_idx, args.max_length), batch_size=args.batch_size, places=places) val_reader.set_sample_generator(val_test_loader(args.val_data_path, args.N, args.K, args.Q, token2idx_dict, unk_idx, pad_idx, args.max_length, data_type="val"), batch_size=1, places=places) test_reader.set_sample_generator(val_test_loader(args.test_data_path, args.N, args.K, args.Q, token2idx_dict, unk_idx, pad_idx, args.max_length, data_type="test"), batch_size=1, places=places) # 7. Train loop # Record the best model best_val_acc = 0 # Record the train loss/acc by sliding window loss_record, acc_record = [], [] loss_window = acc_window = 0 # Sum of sliding window window = 50 # The size of sliding window for epi, train_data in zip(range(1, args.train_episodes + 1), train_reader()): # 7.1 Run (train_cur_loss, train_cur_acc, relation_BL_w_value, relation_BL_b_value, relation_FC_w_value) = exe.run(program=compiled_train_prog, feed=train_data, fetch_list=[ loss.name, mean_acc.name, relation_BL_w.name, relation_BL_b.name, relation_FC_w.name ]) # print(train_cur_loss[0], train_cur_acc[0]) loss_record.append(train_cur_loss[0]) acc_record.append(train_cur_acc[0]) # + right - left loss_window += train_cur_loss[0] acc_window += train_cur_acc[0] if epi - window - 1 >= 0: # Ensure that the left side is in the sliding window loss_window -= loss_record[epi - window - 1] acc_window -= acc_record[epi - window - 1] if epi % window == 0: print( "{} [Train episode: {:5d}/{:5d}] ==> Loss: {:2.6f} Mean acc: {:2.4f}" .format( str(datetime.datetime.now())[:-7], epi, args.train_episodes, loss_window / window, 100 * acc_window / window)) # 7.2 Add metrics/params to VisualDL train_loss_scalar.add_record(epi, loss_window / window) train_acc_scalar.add_record(epi, acc_window / window) histogram1.add_record(epi, relation_BL_w_value.flatten()) histogram2.add_record(epi, relation_BL_b_value.flatten()) histogram3.add_record(epi, relation_FC_w_value.flatten()) # 7.3 Validation if args.val_data_path and epi % args.val_steps == 0: # 7.3.1 Run val once val_acc_mean = eval(exe, compiled_val_prog, val_reader, [mean_acc.name], run_type="Val") print("{} [Val result: {:5d}/{:5d}] ==> Mean acc: {:2.4f}".format( str(datetime.datetime.now())[:-7], epi, args.train_episodes, 100 * val_acc_mean)) # Add val acc to VisualDL val_acc_scalar.add_record(epi, val_acc_mean) # 7.3.2 Save best model if val_acc_mean > best_val_acc: best_val_acc = val_acc_mean fluid.io.save_inference_model( os.path.join(args.logdir, "infer_model"), ["totalQ", "support", "support_len", "query", "query_len"], [model.prediction], exe, main_program=train_prog, params_filename="__params__") print( "{} [Save model of val mean acc: {:2.4f}] ==> {}".format( str(datetime.datetime.now())[:-7], 100 * best_val_acc, os.path.join(args.logdir, "infer_model"))) # 8. Test if args.test_data_path: test_acc_mean = eval(exe, compiled_test_prog, test_reader, [mean_acc.name], run_type="Test") print("{} [Test result] ==> Mean acc: {:2.4f}".format( str(datetime.datetime.now())[:-7], 100 * test_acc_mean))