def train_gru(self, gru_net, gru_net_path, gru_plot_dir, train_data, batch_size, train_epochs, cuda, bn_episodes, bottleneck_data_path, generate_max_steps, gru_prob_data_path, gru_dir): logging.info('Training GRU!') start_time = time.time() gru_net.train() optimizer = optim.Adam(gru_net.parameters(), lr=1e-3) gru_net = gru_nn.train(gru_net, self.env, optimizer, gru_net_path, gru_plot_dir, train_data, batch_size, train_epochs, cuda, trunc_k=50) logging.info('Generating Data-Set for Later Bottle Neck Training') gru_net.eval() tl.generate_bottleneck_data(gru_net, self.env, bn_episodes, bottleneck_data_path, cuda=cuda, max_steps=generate_max_steps) tl.generate_trajectories(self.env, 500, batch_size, gru_prob_data_path, gru_net.cpu()) tl.write_net_readme(gru_net, gru_dir, info={'time_taken': time.time() - start_time}) return gru_net
def bgru_train(self, bgru_net, gru_net, cuda, gru_scratch, trajectories_data_path, bgru_net_path, bgru_plot_dir, batch_size, train_epochs, gru_prob_data_path, bgru_dir): self.env.spec.reward_threshold = gru_nn.test(gru_net, self.env, 10, log=True, cuda=cuda, render=True) logging.info('Training Binary GRUNet!') bgru_net.train() _start_time = time.time() if gru_scratch: optimizer = optim.Adam(bgru_net.parameters(), lr=1e-3) train_data = tl.generate_trajectories(self.env, 3, 5, trajectories_data_path) bgru_net = gru_nn.train(bgru_net, self.env, optimizer, bgru_net_path, bgru_plot_dir, train_data, batch_size, train_epochs, cuda) else: optimizer = optim.Adam(bgru_net.parameters(), lr=1e-4) train_data = tl.generate_trajectories( self.env, 3, 5, gru_prob_data_path, copy.deepcopy(bgru_net.gru_net).cpu()) bgru_net = bgru_nn.train(bgru_net, self.env, optimizer, bgru_net_path, bgru_plot_dir, train_data, 5, train_epochs, cuda, test_episodes=1, trunc_k=100) tl.write_net_readme( bgru_net, bgru_dir, info={'time_taken': round(time.time() - _start_time, 4)})
def bhx_train(self, bhx_net, hx_train_data, hx_test_data, bhx_net_path, bhx_plot_dir, batch_size, train_epochs, cuda, target_net, bhx_dir): bhx_start_time = time.time() logging.info('Training HX SandGlassNet!') optimizer = optim.Adam(bhx_net.parameters(), lr=1e-4, weight_decay=0) bhx_net.train() bhx_net = qbn.train(bhx_net, (hx_train_data, hx_test_data), optimizer, bhx_net_path, bhx_plot_dir, batch_size, train_epochs, cuda, grad_clip=5, target_net=target_net, env=self.env, low=-0.02, high=0.02) bhx_end_time = time.time() tl.write_net_readme( bhx_net, bhx_dir, info={'time_taken': round(bhx_end_time - bhx_start_time, 4)})
args.generate_train_data, gru_dir) # *********************************************************************************** # GRU Network * # *********************************************************************************** if args.gru_train or args.gru_test: tl.set_log(gru_dir, 'train' if args.gru_train else 'test') gru_net = GRUNet(len(obs), args.gru_size, int(env.action_space.n)) if args.cuda: gru_net = gru_net.cuda() if args.gru_train: logging.info(['No Training Performed!!']) logging.warning( 'We assume that we already have a pre-trained model @ {}'. format(gru_net_path)) tl.write_net_readme(gru_net, gru_dir, info={}) if args.gru_test: test_performance = fsm_object.test_gru(gru_net, gru_net_path, args.cuda) # *********************************************************************************** # Generating BottleNeck training data * # *********************************************************************************** if args.generate_bn_data: tl.set_log(data_dir, 'generate_bn_data') logging.info('Generating Data-Set for Later Bottle Neck Training') gru_net = GRUNet(len(obs), args.gru_size, int(env.action_space.n)) gru_net.load_state_dict(torch.load(gru_net_path)) gru_net.noise = False if args.cuda: gru_net = gru_net.cuda() gru_net.eval()