Ejemplo n.º 1
0
 def pretrain(self,
              memory_pretrained_fn,
              batch_size=512,
              iterations=int(1e5)):
     memory_pretrained = pickle.load(open(memory_pretrained_fn, 'rb'))
     self._memory = ReplayMemory(len(memory_pretrained))
     self._memory.memory = memory_pretrained
     self.train_q_batch_size = batch_size
     start_time = time.time()
     for i in range(iterations):
         if i % 500 == 0:
             if i > 1:
                 for key in self._Qt.state_dict():
                     try:
                         print(
                             sum(self._Qt.state_dict()[key] -
                                 self._Q.state_dict()[key]))
                     except TypeError as e:
                         print(e)
             self._Qt = copy.deepcopy(self._Q)
         if i % 10000 == 0:
             torch.save(self._Q.state_dict(), f'{self.save_name}_{i}.pth')
             pickle.dump(self.loss,
                         open(f'{self.save_name}_loss_{i}.pkl', 'wb'))
         print(f'Training iteration {i}...', )
         self.train_q(squeeze=True)
     end_time = time.time()
     print(f'Training completed. Took {start_time - end_time} seconds')
     torch.save(self._Q.state_dict(), f'{self.save_name}_{iterations}.pth')
     pickle.dump(self.loss,
                 open(f'{self.save_name}_loss_{iterations}.pkl', 'wb'))
Ejemplo n.º 2
0
    def __init__(self, save_name='./data/', load_name=None):
        super(BaseRLAgent, self).__init__()
        self.training = False
        self.max_frames = 10000000
        self._epsilon = Epsilon(start=0.9, end=0.1, update_increment=0.0001)
        self.gamma = 0.99
        self.train_q_per_step = 4
        self.train_q_batch_size = 256
        self.steps_before_training = 5000
        self.target_q_update_frequency = 10000

        self.save_name = save_name
        if load_name is None:
            self.load_name = self.save_name
        else:
            self.load_name = load_name
        self._Q = None
        self._Qt = None
        self._optimizer = None
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self._criterion = nn.MSELoss()
        self._memory = ReplayMemory(100000)

        self._loss = deque(maxlen=int(1e5))
        self._max_q = deque(maxlen=int(1e5))
        self.loss = []
        self.max_q = []
        self.reward = []
        self._action = None
        self._screen = None
        self._screen_size = 32
        self.n_episodes = 0
        self.features = None
Ejemplo n.º 3
0
 def __init__(self, mapname):
     super(BattleAgentScripted, self).__init__()
     self.max_frames = int(1e5)
     self._memory = ReplayMemory(self.max_frames)
     self.obs = None
     self.features = [_PLAYER_RELATIVE, _UNIT_TYPE, _UNIT_HIT_POINTS]
     self.mapname = mapname
     self.screen_size = 32
Ejemplo n.º 4
0
 def __init__(self, mapname):
     super(BattleAgentScriptedBeacon, self).__init__()
     self.max_frames = int(1e5)
     self._memory = ReplayMemory(self.max_frames)
     self.obs = None
     self.features = 5
     self.mapname = mapname
     self.screen_size = 32