def train(max_iter, snapshot, dataset, setname, mu, lr, bs, tfmodel_folder, conv5, model_name, stop_iter, pre_emb=False): iters_per_log = 100 data_folder = './' + dataset + '/' + setname + '_batch/' data_prefix = dataset + '_' + setname snapshot_file = os.path.join(tfmodel_folder, dataset + '_iter_%d.tfmodel') if not os.path.isdir(tfmodel_folder): os.makedirs(tfmodel_folder) cls_loss_avg = 0 avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg = 0, 0, 0 decay = 0.99 vocab_size = 8803 if dataset == 'referit' else 12112 emb_name = 'referit' if dataset == 'referit' else 'Gref' if pre_emb: print("Use pretrained Embeddings.") model = get_segmentation_model(model_name, mode='train', vocab_size=vocab_size, start_lr=lr, batch_size=bs, conv5=conv5, emb_name=emb_name) else: model = get_segmentation_model(model_name, mode='train', vocab_size=vocab_size, start_lr=lr, batch_size=bs, conv5=conv5) weights = './data/weights/deeplab_resnet_init.ckpt' print("Loading pretrained weights from {}".format(weights)) load_var = {var.op.name: var for var in tf.global_variables() if var.name.startswith('res') or var.name.startswith('bn') or var.name.startswith('conv1')} snapshot_loader = tf.train.Saver(load_var) snapshot_saver = tf.train.Saver(max_to_keep=4) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) snapshot_loader.restore(sess, weights) im_h, im_w, num_steps = model.H, model.W, model.num_steps text_batch = np.zeros((bs, num_steps), dtype=np.float32) image_batch = np.zeros((bs, im_h, im_w, 3), dtype=np.float32) mask_batch = np.zeros((bs, im_h, im_w, 1), dtype=np.float32) valid_idx_batch = np.zeros((bs, 1), dtype=np.int32) reader = data_reader.DataReader(data_folder, data_prefix) # for time calculate last_time = time.time() time_avg = MovingAverage() for n_iter in range(max_iter): for n_batch in range(bs): batch = reader.read_batch(is_log=(n_batch == 0 and n_iter % iters_per_log == 0)) text = batch['text_batch'] im = batch['im_batch'].astype(np.float32) mask = np.expand_dims(batch['mask_batch'].astype(np.float32), axis=2) im = im[:, :, ::-1] im -= mu text_batch[n_batch, ...] = text image_batch[n_batch, ...] = im mask_batch[n_batch, ...] = mask for idx in range(text.shape[0]): if text[idx] != 0: valid_idx_batch[n_batch, :] = idx break _, cls_loss_val, lr_val, scores_val, label_val = sess.run([model.train_step, model.cls_loss, model.learning_rate, model.pred, model.target], feed_dict={ model.words: text_batch, # np.expand_dims(text, axis=0), model.im: image_batch, # np.expand_dims(im, axis=0), model.target_fine: mask_batch, # np.expand_dims(mask, axis=0) model.valid_idx: valid_idx_batch }) cls_loss_avg = decay * cls_loss_avg + (1 - decay) * cls_loss_val # Accuracy accuracy_all, accuracy_pos, accuracy_neg = compute_accuracy(scores_val, label_val) avg_accuracy_all = decay * avg_accuracy_all + (1 - decay) * accuracy_all avg_accuracy_pos = decay * avg_accuracy_pos + (1 - decay) * accuracy_pos avg_accuracy_neg = decay * avg_accuracy_neg + (1 - decay) * accuracy_neg # timing cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time if n_iter % iters_per_log == 0: print('iter = %d, loss (cur) = %f, loss (avg) = %f, lr = %f' % (n_iter, cls_loss_val, cls_loss_avg, lr_val)) print('iter = %d, accuracy (cur) = %f (all), %f (pos), %f (neg)' % (n_iter, accuracy_all, accuracy_pos, accuracy_neg)) print('iter = %d, accuracy (avg) = %f (all), %f (pos), %f (neg)' % (n_iter, avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg)) time_avg.add(elapsed) print('iter = %d, cur time = %.5f, avg time = %.5f, model_name: %s' % (n_iter, elapsed, time_avg.get_avg(), model_name)) # Save snapshot if (n_iter + 1) % snapshot == 0 or (n_iter + 1) >= max_iter: snapshot_saver.save(sess, snapshot_file % (n_iter + 1)) print('snapshot saved to ' + snapshot_file % (n_iter + 1)) if (n_iter + 1) >= stop_iter: print('stop training at iter ' + str(stop_iter)) break print('Optimization done.')
def train(method, environment, resume, episodes, lr, lr_episodes, min_lr, eval_only, replay_width, batch_size, gamma, update_rate, save_interval): history = History(method + '_' + environment, ['steps', 'avg_reward', 'loss'], resume is not None) history.flush() memory = ReplayMemory(replay_width) game = Game(name=environments_to_names[environment], memory=memory, render=False) init_state, state_shape = game.get_state(True) n_actions = game.env.action_space.n agent_cls = agent_factory[method] agent = agent_cls(state_shape, n_actions, environment, episodes, update_rate, step_size=lr_episodes, lr=lr, save_interval=save_interval) # resume from a ckpt if resume is not None: agent.load(resume) avg_reward = MovingAverage(100) avg_loss = MovingAverage(100) log.info(f'Training with {episodes}, starting ...') # main training loop for i in range(episodes): state = game.reset() done = False loss = None while not done: state = game.state action = agent.select_action(state) transition, done = game.step(int(action.to('cpu').numpy())) if len(memory) > batch_size: batched = memory.sample(batch_size) loss = agent.train(batched, batch_size, gamma, i) avg_loss.add(loss) reward = game.rewards # agent.save_best(reward) agent.save() agent.scheduler.step() avg_reward.add(reward) # moving averages text = [ f'steps: {agent.step_cnt}', f'game epochs: {i}/{episodes}', f'train loss: {float(avg_loss):.5}', f'avg reward: {float(avg_reward):.5}', # f'best reward: {float(agent.best_reward):.5}', f'reward: {float(reward):.5}', f'epsilon: {agent.epsilon:.3}', ] log.info(', '.join(text), update=True) if agent.step_cnt % save_interval == 0: history.record({ 'steps': agent.step_cnt, 'avg_reward': float(avg_reward), 'loss': float(avg_loss), }) game.env.close()