Esempio n. 1
0
    def train_gru(self, gru_net, gru_net_path, gru_plot_dir, train_data,
                  batch_size, train_epochs, cuda, bn_episodes,
                  bottleneck_data_path, generate_max_steps, gru_prob_data_path,
                  gru_dir):
        logging.info('Training GRU!')
        start_time = time.time()
        gru_net.train()
        optimizer = optim.Adam(gru_net.parameters(), lr=1e-3)
        gru_net = gru_nn.train(gru_net,
                               self.env,
                               optimizer,
                               gru_net_path,
                               gru_plot_dir,
                               train_data,
                               batch_size,
                               train_epochs,
                               cuda,
                               trunc_k=50)
        logging.info('Generating Data-Set for Later Bottle Neck Training')
        gru_net.eval()
        tl.generate_bottleneck_data(gru_net,
                                    self.env,
                                    bn_episodes,
                                    bottleneck_data_path,
                                    cuda=cuda,
                                    max_steps=generate_max_steps)
        tl.generate_trajectories(self.env, 500, batch_size, gru_prob_data_path,
                                 gru_net.cpu())
        tl.write_net_readme(gru_net,
                            gru_dir,
                            info={'time_taken': time.time() - start_time})

        return gru_net
Esempio n. 2
0
 def bgru_train(self, bgru_net, gru_net, cuda, gru_scratch,
                trajectories_data_path, bgru_net_path, bgru_plot_dir,
                batch_size, train_epochs, gru_prob_data_path, bgru_dir):
     self.env.spec.reward_threshold = gru_nn.test(gru_net,
                                                  self.env,
                                                  10,
                                                  log=True,
                                                  cuda=cuda,
                                                  render=True)
     logging.info('Training Binary GRUNet!')
     bgru_net.train()
     _start_time = time.time()
     if gru_scratch:
         optimizer = optim.Adam(bgru_net.parameters(), lr=1e-3)
         train_data = tl.generate_trajectories(self.env, 3, 5,
                                               trajectories_data_path)
         bgru_net = gru_nn.train(bgru_net, self.env, optimizer,
                                 bgru_net_path, bgru_plot_dir, train_data,
                                 batch_size, train_epochs, cuda)
     else:
         optimizer = optim.Adam(bgru_net.parameters(), lr=1e-4)
         train_data = tl.generate_trajectories(
             self.env, 3, 5, gru_prob_data_path,
             copy.deepcopy(bgru_net.gru_net).cpu())
         bgru_net = bgru_nn.train(bgru_net,
                                  self.env,
                                  optimizer,
                                  bgru_net_path,
                                  bgru_plot_dir,
                                  train_data,
                                  5,
                                  train_epochs,
                                  cuda,
                                  test_episodes=1,
                                  trunc_k=100)
     tl.write_net_readme(
         bgru_net,
         bgru_dir,
         info={'time_taken': round(time.time() - _start_time, 4)})
Esempio n. 3
0
 def bhx_train(self, bhx_net, hx_train_data, hx_test_data, bhx_net_path,
               bhx_plot_dir, batch_size, train_epochs, cuda, target_net,
               bhx_dir):
     bhx_start_time = time.time()
     logging.info('Training HX SandGlassNet!')
     optimizer = optim.Adam(bhx_net.parameters(), lr=1e-4, weight_decay=0)
     bhx_net.train()
     bhx_net = qbn.train(bhx_net, (hx_train_data, hx_test_data),
                         optimizer,
                         bhx_net_path,
                         bhx_plot_dir,
                         batch_size,
                         train_epochs,
                         cuda,
                         grad_clip=5,
                         target_net=target_net,
                         env=self.env,
                         low=-0.02,
                         high=0.02)
     bhx_end_time = time.time()
     tl.write_net_readme(
         bhx_net,
         bhx_dir,
         info={'time_taken': round(bhx_end_time - bhx_start_time, 4)})
Esempio n. 4
0
                args.generate_train_data, gru_dir)
        # ***********************************************************************************
        # GRU Network                                                                       *
        # ***********************************************************************************
        if args.gru_train or args.gru_test:
            tl.set_log(gru_dir, 'train' if args.gru_train else 'test')
            gru_net = GRUNet(len(obs), args.gru_size, int(env.action_space.n))

            if args.cuda:
                gru_net = gru_net.cuda()
            if args.gru_train:
                logging.info(['No Training Performed!!'])
                logging.warning(
                    'We assume that we already have a pre-trained model @ {}'.
                    format(gru_net_path))
                tl.write_net_readme(gru_net, gru_dir, info={})
            if args.gru_test:
                test_performance = fsm_object.test_gru(gru_net, gru_net_path,
                                                       args.cuda)
        # ***********************************************************************************
        # Generating BottleNeck training data                                               *
        # ***********************************************************************************
        if args.generate_bn_data:
            tl.set_log(data_dir, 'generate_bn_data')
            logging.info('Generating Data-Set for Later Bottle Neck Training')
            gru_net = GRUNet(len(obs), args.gru_size, int(env.action_space.n))
            gru_net.load_state_dict(torch.load(gru_net_path))
            gru_net.noise = False
            if args.cuda:
                gru_net = gru_net.cuda()
            gru_net.eval()