def train_gru(self, gru_net, gru_net_path, gru_plot_dir, train_data, batch_size, train_epochs, cuda, bn_episodes, bottleneck_data_path, generate_max_steps, gru_prob_data_path, gru_dir): logging.info('Training GRU!') start_time = time.time() gru_net.train() optimizer = optim.Adam(gru_net.parameters(), lr=1e-3) gru_net = gru_nn.train(gru_net, self.env, optimizer, gru_net_path, gru_plot_dir, train_data, batch_size, train_epochs, cuda, trunc_k=50) logging.info('Generating Data-Set for Later Bottle Neck Training') gru_net.eval() tl.generate_bottleneck_data(gru_net, self.env, bn_episodes, bottleneck_data_path, cuda=cuda, max_steps=generate_max_steps) tl.generate_trajectories(self.env, 500, batch_size, gru_prob_data_path, gru_net.cpu()) tl.write_net_readme(gru_net, gru_dir, info={'time_taken': time.time() - start_time}) return gru_net
def generate_train_data(self, no_batches, batch_size, trajectories_data_path, generate_train_data, gru_dir): tl.set_log(gru_dir, 'generate_train_data') train_data = tl.generate_trajectories(self.env, no_batches, batch_size, trajectories_data_path) return train_data
def bgru_train(self, bgru_net, gru_net, cuda, gru_scratch, trajectories_data_path, bgru_net_path, bgru_plot_dir, batch_size, train_epochs, gru_prob_data_path, bgru_dir): self.env.spec.reward_threshold = gru_nn.test(gru_net, self.env, 10, log=True, cuda=cuda, render=True) logging.info('Training Binary GRUNet!') bgru_net.train() _start_time = time.time() if gru_scratch: optimizer = optim.Adam(bgru_net.parameters(), lr=1e-3) train_data = tl.generate_trajectories(self.env, 3, 5, trajectories_data_path) bgru_net = gru_nn.train(bgru_net, self.env, optimizer, bgru_net_path, bgru_plot_dir, train_data, batch_size, train_epochs, cuda) else: optimizer = optim.Adam(bgru_net.parameters(), lr=1e-4) train_data = tl.generate_trajectories( self.env, 3, 5, gru_prob_data_path, copy.deepcopy(bgru_net.gru_net).cpu()) bgru_net = bgru_nn.train(bgru_net, self.env, optimizer, bgru_net_path, bgru_plot_dir, train_data, 5, train_epochs, cuda, test_episodes=1, trunc_k=100) tl.write_net_readme( bgru_net, bgru_dir, info={'time_taken': round(time.time() - _start_time, 4)})
gru_net.load_state_dict(torch.load(gru_net_path)) gru_net.noise = False if args.cuda: gru_net = gru_net.cuda() gru_net.eval() tl.generate_bottleneck_data(gru_net, env, args.bn_episodes, bottleneck_data_path, cuda=args.cuda, eps=(0, 0.3), max_steps=args.generate_max_steps) tl.generate_trajectories(env, 3, 5, gru_prob_data_path, gru_net, cuda=args.cuda, render=True) # *********************************************************************************** # HX-QBN * # *********************************************************************************** if args.bhx_train or args.bhx_test: tl.set_log(bhx_dir, 'train' if args.bhx_train else 'test') gru_net = GRUNet(len(obs), args.gru_size, int(env.action_space.n)) gru_net.eval() bhx_net = HxQBNet(args.gru_size, args.bhx_size) if args.cuda: gru_net = gru_net.cuda() bhx_net = bhx_net.cuda()