def test_set_weights(self): model = lambda: create_test_model(10, 10) model = FFNet(model) weights = model.get_weights() for w in weights: w = w * 2 model.set_weights(weights)
def __init__(self, model, memory_size=1024, Batch_size=32, Gamma=0.99, Epsilon=rangefloat(1.0, 0.1, 1e6), K=1, name='Agent'): '''Create Agent from model description file.''' self.Memory = deque(maxlen=memory_size) self.Batch_size = Batch_size self.Gamma = Gamma if type(Epsilon) is float or type(Epsilon) is int: self.Epsilon = Epsilon self.Epsilon_gen = None else: self.Epsilon_gen = Epsilon self.Epsilon = next(self.Epsilon_gen) self.K = K self.current_state = None self.current_action = None self.model = FFNet(model) self.terminal = False
def net_async(pipe, args, kwargs): mydata = threading.local() mydata.net = FFNet(*args, **kwargs) while True: message = pipe[0].get() data = message.data if message.header == FFNetFunctions.predict_on_batch: pipe[1].put(mydata.net.predict_on_batch(data)) elif message.header == FFNetFunctions.train_on_batch: loss = mydata.net.train_on_batch(data[0], data[1]) elif message.header == FFNetFunctions.get_weights: pipe[1].put(mydata.net.get_weights()) elif message.header == FFNetFunctions.set_weights: mydata.net.set_weights(data) elif message.header == FFNetFunctions.Quit: break
def test_shape_binary(self): model = lambda: create_test_model(10, 1) model = FFNet(model) pred = model.predict_on_batch(np.ones((1, 10))) self.assertEqual(pred.shape, (1, 1))
def test_get_weights(self): model = lambda: create_test_model(10, 10) model = FFNet(model) weights = model.get_weights()
def test_shape_multilabel_multiple(self): model = lambda: create_test_model(10, 10) model = FFNet(model) pred = model.predict_on_batch(np.ones((10, 10))) self.assertEqual(pred.shape, (10, 10))
class Agent(object): '''An agent class used in testing reinforcement learning algorithms. This class is made with the purpose that it would allow multiple agents to be trained concurrently in a single game so the majority of their work should be hidden behind this class. ''' def __init__(self, model, memory_size=1024, Batch_size=32, Gamma=0.99, Epsilon=rangefloat(1.0, 0.1, 1e6), K=1, name='Agent'): '''Create Agent from model description file.''' self.Memory = deque(maxlen=memory_size) self.Batch_size = Batch_size self.Gamma = Gamma if type(Epsilon) is float or type(Epsilon) is int: self.Epsilon = Epsilon self.Epsilon_gen = None else: self.Epsilon_gen = Epsilon self.Epsilon = next(self.Epsilon_gen) self.K = K self.current_state = None self.current_action = None self.model = FFNet(model) self.terminal = False def initialize(self, current_state, current_action): self.current_state = current_state self.current_action = current_action self.terminal = False def chooseAction(self, time_step): '''Choose an action based on the current state.''' action = np.zeros(self.current_action.shape) if time_step % self.K == 0: if random.random() <= self.Epsilon: index = [random.randint(0, i - 1) for i in action.shape] action[index] = 1 else: x = self.model.predict_on_batch(self.current_state) index = np.argmax(x) action[index] = 1 self.current_action = action.astype(np.uint8) return self.current_action def chooseOptimal(self): action = np.zeros(self.current_action.shape) x = self.model.predict_on_batch(self.current_state) index = np.argmax(x) action[index] = 1 return action def feedback(self, frame, reward, terminal): '''Receive feedback from Game.''' new_state = np.append(frame, self.current_state[..., 0:-1], axis=3) self.Memory.append((self.current_state, self.current_action, reward, new_state, terminal)) self.current_state = new_state self.terminal = terminal def isTerminal(self): return self.terminal def save(self, name): #self.model.save(name) pass def train(self): '''Train the Agent.''' if self.Epsilon_gen is not None: self.Epsilon = next(self.Epsilon_gen) batch = random.sample(self.Memory, self.Batch_size) pseq_batch = np.concatenate([b[0] for b in batch], axis=0) action_batch = np.stack([b[1] for b in batch]) reward_batch = np.array([b[2] for b in batch]) seq_batch = np.concatenate([b[3] for b in batch], axis=0) term_batch = np.array([b[4] for b in batch]) out = self.model.predict_on_batch(seq_batch) y_batch = self.model.predict_on_batch(pseq_batch) y_batch[action_batch == 1] = reward_batch + self.Gamma * np.max( out, axis=1) * np.invert(term_batch) return self.model.train_on_batch(pseq_batch, y_batch) def get_epsilon(self): return self.Epsilon