Exemple #1
0
    def train_gru(self, gru_net, gru_net_path, gru_plot_dir, train_data,
                  batch_size, train_epochs, cuda, bn_episodes,
                  bottleneck_data_path, generate_max_steps, gru_prob_data_path,
                  gru_dir):
        logging.info('Training GRU!')
        start_time = time.time()
        gru_net.train()
        optimizer = optim.Adam(gru_net.parameters(), lr=1e-3)
        gru_net = gru_nn.train(gru_net,
                               self.env,
                               optimizer,
                               gru_net_path,
                               gru_plot_dir,
                               train_data,
                               batch_size,
                               train_epochs,
                               cuda,
                               trunc_k=50)
        logging.info('Generating Data-Set for Later Bottle Neck Training')
        gru_net.eval()
        tl.generate_bottleneck_data(gru_net,
                                    self.env,
                                    bn_episodes,
                                    bottleneck_data_path,
                                    cuda=cuda,
                                    max_steps=generate_max_steps)
        tl.generate_trajectories(self.env, 500, batch_size, gru_prob_data_path,
                                 gru_net.cpu())
        tl.write_net_readme(gru_net,
                            gru_dir,
                            info={'time_taken': time.time() - start_time})

        return gru_net
Exemple #2
0
 def generate_train_data(self, no_batches, batch_size,
                         trajectories_data_path, generate_train_data,
                         gru_dir):
     tl.set_log(gru_dir, 'generate_train_data')
     train_data = tl.generate_trajectories(self.env, no_batches, batch_size,
                                           trajectories_data_path)
     return train_data
Exemple #3
0
 def bgru_train(self, bgru_net, gru_net, cuda, gru_scratch,
                trajectories_data_path, bgru_net_path, bgru_plot_dir,
                batch_size, train_epochs, gru_prob_data_path, bgru_dir):
     self.env.spec.reward_threshold = gru_nn.test(gru_net,
                                                  self.env,
                                                  10,
                                                  log=True,
                                                  cuda=cuda,
                                                  render=True)
     logging.info('Training Binary GRUNet!')
     bgru_net.train()
     _start_time = time.time()
     if gru_scratch:
         optimizer = optim.Adam(bgru_net.parameters(), lr=1e-3)
         train_data = tl.generate_trajectories(self.env, 3, 5,
                                               trajectories_data_path)
         bgru_net = gru_nn.train(bgru_net, self.env, optimizer,
                                 bgru_net_path, bgru_plot_dir, train_data,
                                 batch_size, train_epochs, cuda)
     else:
         optimizer = optim.Adam(bgru_net.parameters(), lr=1e-4)
         train_data = tl.generate_trajectories(
             self.env, 3, 5, gru_prob_data_path,
             copy.deepcopy(bgru_net.gru_net).cpu())
         bgru_net = bgru_nn.train(bgru_net,
                                  self.env,
                                  optimizer,
                                  bgru_net_path,
                                  bgru_plot_dir,
                                  train_data,
                                  5,
                                  train_epochs,
                                  cuda,
                                  test_episodes=1,
                                  trunc_k=100)
     tl.write_net_readme(
         bgru_net,
         bgru_dir,
         info={'time_taken': round(time.time() - _start_time, 4)})
Exemple #4
0
            gru_net.load_state_dict(torch.load(gru_net_path))
            gru_net.noise = False
            if args.cuda:
                gru_net = gru_net.cuda()
            gru_net.eval()
            tl.generate_bottleneck_data(gru_net,
                                        env,
                                        args.bn_episodes,
                                        bottleneck_data_path,
                                        cuda=args.cuda,
                                        eps=(0, 0.3),
                                        max_steps=args.generate_max_steps)
            tl.generate_trajectories(env,
                                     3,
                                     5,
                                     gru_prob_data_path,
                                     gru_net,
                                     cuda=args.cuda,
                                     render=True)

        # ***********************************************************************************
        # HX-QBN                                                                            *
        # ***********************************************************************************
        if args.bhx_train or args.bhx_test:
            tl.set_log(bhx_dir, 'train' if args.bhx_train else 'test')
            gru_net = GRUNet(len(obs), args.gru_size, int(env.action_space.n))
            gru_net.eval()
            bhx_net = HxQBNet(args.gru_size, args.bhx_size)
            if args.cuda:
                gru_net = gru_net.cuda()
                bhx_net = bhx_net.cuda()