def test_no_5_actions_0_1_what_is_terrible_idea(self): agents: List[SmartAgent] = get_SmartAgents() for agent in agents: agent.model = agent._build_model(layers=[20, 50, 30, 18]) env = Env(agents) env.u = env_settings.u_all_2 max_time = 90 Globals().time = 0 for t in range(max_time): actions = [0] # if t == 60 or t >= 63: # actions = [1] # if t == 61 or t == 62: # actions = [orange] # env.step(actions) for agent in agents: agent.reshape_rewards() # exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories, # netName='net4', # densityName='test_learn_no_1') # exportData.saveToJson() while True: agents[0].train_full(epochs=15000, learning_rate=0.001) x = [4, 4, 0] predictions = agents[0].model.predict(np.array([x])) self.assertTrue(predictions[0][0] > predictions[0][1])
def test_no_1_111_then_222_what_is_brilliant_idea(self): agents: List[SmartAgent] = get_SmartAgents() for agent in agents: agent.model = agent._build_model(layers=[20, 50, 30, 18]) env = Env(agents) env.u = env_data.u_all_2 max_time = 90 Globals().time = 0 for t in range(max_time): # actions = [1, 1, 1] if t < 60 elif 60 == t [2, 2, 2] actions = [1, 1, 1] if t == 60 or t >= 62: actions = [2, 2, 2] if t == 61: actions = [0, 0, 0] env.step(actions) for agent in agents: agent.reshape_rewards() exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories, netName='net4', densityName='test_learn_no_1') exportData.saveToJson() agents[0].train_full(epochs=7000, learning_rate=0.001) x = [2, 2, 2] + [0.6, 0.6, 28.88] + [0.126, 0.126, 0.18] + [1]
def train(learntAgents=True, max_time_learn=20): if not learntAgents: agents = get_SmartAgents() else: agents = get_LearnSmartAgents() models = [agent.model for agent in agents] batches = get_batches() start_time = timer() x_batch = batches[0]['x_batch'] y_batch = batches[0]['y_batch'] model = models[0] val_loss = 5000 escape_flag = False while timer() - start_time < max_time_learn and not escape_flag: res = model.fit(x_batch, y_batch, batch_size=100, epochs=1, verbose=0, validation_split=0.2) if res.history['val_loss'][-1] > val_loss: escape_flag = True loss = res.history['val_loss'][-1] print(f'wynik sieci: {loss} straty') val_loss = 5000 else: val_loss = res.history['val_loss'][-1] x = [4, 20] pred = model.predict(np.array([x])) Globals().pred_plot_memory.append(pred) model.save('static_files/model-agent' + str(0) + '.h5') plt.plot([pred[0][0] for pred in Globals().pred_plot_memory], color='red', label='0') plt.plot([pred[0][1] for pred in Globals().pred_plot_memory], color='green', label='1') plt.legend() plt.title('Nagrody przewidziane dla akcji podjętych \n podczas monitorowanego stanu [4, 20]') plt.savefig('images_generated/state_predictions.png') plt.close()
def test_no_2_pass_action_0_long_time_then_1(self): # TESTUJEMY: zmiana faz max_time = 90 agents = get_SmartAgents() for agent in agents: agent.yellow_phase_duration = 2 Globals().time = 0 env = Env(agents) Globals().u_value = 2 env.u = Globals().get_u(max_time) env.yellow_phase_duration = 2 for t in range(max_time): actions = [0, 0, 0] if t == 60 or t > 62: actions = [1, 1, 1] if t == 61 or t == 62: actions = [yellow, yellow, yellow] time = Globals().time # time = t env.step(actions) time = Globals().time # time = t + 1 if t in range(3, 60): self.assertEqual([agent.actual_phase for agent in agents], [0, 0, 0]) if t == 60 or t == 61: self.assertEqual([agent.actual_phase for agent in agents], [yellow, yellow, yellow]) if t >= 62: self.assertEqual([agent.actual_phase for agent in agents], [1, 1, 1]) exportData = ExportData(learningMethod='Nothing', learningEpochs=0, nets=env.global_memories, netName='net14', densityName='test_no_2') exportData.saveToJson()
def test_no_4_pass_action_0_1_0(self): # TESTUJEMY: actual_phase - czy jest ono zgodne rowniez w memories max_time = 90 agents = get_SmartAgents() for agent in agents: agent.yellow_phase_duration = 2 Globals().time = 0 env = Env(agents) env.u = env_settings.u_all_2 env.yellow_phase_duration = 2 for t in range(max_time): # actions = [0, 0, 0] if t < 60 elif 60 == t [1, 1, 1] actions = [0] if t == 60 or 63 <= t < 70: actions = [1] if t == 61 or t == 62: actions = [yellow] if t == 70 or t >= 73: actions = [0] if t == 71 or t == 72: actions = [yellow] env.step(actions) # print(f't:{t}, {agents[0].actual_phase}') if t in range(3, 60): self.assertEqual( [agent.actual_phase for agent in agents], [0] ) # Po operacji w chwili 60 mamy taka faze dla stanu w chwili 60 self.assertEqual( [agent.memories[t].state.actual_phase for agent in agents], [0] ) # Po operacji w chwili 60 mamy taka faze dla stanu w chwili 60 if t == 60 or t == 61: self.assertEqual([agent.actual_phase for agent in agents], [yellow]) self.assertEqual( [agent.memories[t].state.actual_phase for agent in agents], [yellow]) if 62 <= t <= 69: self.assertEqual([agent.actual_phase for agent in agents], [1]) self.assertEqual( [agent.memories[t].state.actual_phase for agent in agents], [1]) if t == 70 or t == 71: self.assertEqual([agent.actual_phase for agent in agents], [yellow]) self.assertEqual( [agent.memories[t].state.actual_phase for agent in agents], [yellow]) if t >= 72: self.assertEqual([agent.actual_phase for agent in agents], [0]) self.assertEqual( [agent.memories[t].state.actual_phase for agent in agents], [0]) exportData = ExportData(learningMethod='Monte Carlo TODO', learningEpochs=0, nets=env.global_memories, netName='net14', densityName='test_no_4') exportData.saveToJson()
def run_random(epochs, agents=None) -> List[SmartAgent]: Globals().max_epsilon = 0 if agents == None: agents: List[SmartAgent] = get_SmartAgents() for e in epochs: env: Env = epoch(agents) return agents
def generate_random_epochs(learntAgents=False, save_front_json=False, epochs=range(1)): reshaping = True cars_outs = [] rewards = [] rewards_mean = [] if learntAgents: agents: List[SmartAgent] = get_LearnSmartAgents() else: agents: List[SmartAgent] = get_SmartAgents() for agent in agents: agent.memories = [] for e in epochs: Globals().epsilon = 1 env: Env = epoch(agents, u=Globals().get_u(Globals().vp.max_time_learn), time=Globals().vp.max_time_learn) for agent in env.agents: agent.reshape_rewards() if save_front_json: exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories, netName='politechnika', densityName='random_now' + str(Globals().greedy_run_no)) exportData.saveToJson() env.remember_memory() save_batches(agents) return agents
def test_no_6_starting_phase_actual_phase(self): agents: List[SmartAgent] = get_SmartAgents() for agent in agents: agent.model = agent._build_model(layers=[20, 50, 30, 18]) env = Env(agents) env.u = env_data.u_all_2 max_time = 90 Globals().time = 0 for t in range(max_time): # actions = [1, 1, 1] if t < 60 elif 60 == t [2, 2, 2] actions = [1, 1, 1] if t == 60 or t >= 62: actions = [2, 2, 2] if t == 61 or t == 71: actions = [0, 0, 0] if t == 70 or t >= 72: actions = [3,3,3] env.step(actions) for agent in agents: agent.reshape_rewards() print(agents[0].memories[0]) self.assertEqual(agents[0].memories[60].state.starting_actual_phase,1) self.assertEqual(agents[0].memories[61].state.starting_actual_phase,0) self.assertEqual(agents[0].memories[62].state.starting_actual_phase,0) self.assertEqual(agents[0].memories[63].state.starting_actual_phase,2) self.assertEqual(agents[0].memories[70].state.starting_actual_phase,2) self.assertEqual(agents[0].memories[71].state.starting_actual_phase,0) self.assertEqual(agents[0].memories[72].state.starting_actual_phase,0) self.assertEqual(agents[0].memories[73].state.starting_actual_phase,3) exportData = ExportData(learningMethod='Monte Carlo TODO', learningEpochs=0, nets=env.global_memories, netName='net4', densityName='test_learn_no_1') exportData.saveToJson()
def test_no_5_reshaping_pass_action_0_1_0(self): # TESTUJEMY: rewardy max_time = 90 agents = get_SmartAgents() Globals().time = 0 Globals().gamma = 0 env = Env(agents) env.u = env_settings.u_all_9 for t in range(max_time): # actions = [0,0,0] if t < 60 elif 60 == t [1,1,1] actions = [0] if t == 1 or t == 2: actions = [orange] if t == 60 or 63 <= t < 70: actions = [1] if t == 61 or t == 62: actions = [orange] if t == 70 or t >= 73: actions = [0] if t == 71 or t == 72: actions = [orange] env.step(actions) env.agents[0].save_batch() env.agents[0].reshape_rewards() # self.assertAlmostEqual(env.agents[0].memories[60].reward, 2, 0) # self.assertAlmostEqual(env.agents[0].memories[61].reward, 4, 0) # self.assertAlmostEqual(env.agents[0].memories[62].reward, 6, 0) # env.update_memory_rewards() exportData = ExportData(learningMethod='Monte Carlo TODO', learningEpochs=0, nets=env.global_memories, netName='net11', densityName='test_no_6') exportData.saveToJson()
def test_no_2_pass_action_1_1_1_long_time_then_2_2_2(self): # TESTUJEMY: zmiana faz max_time = 90 agents = get_SmartAgents() Globals().time = 0 env = Env(agents) env.u = env_settings.u_all_2 for t in range(max_time): # actions = [1, 1, 1] if t < 60 elif 60 == t [2, 2, 2] actions = [1, 1, 1] if t == 60 or t >= 62: actions = [2, 2, 2] if t == 61: actions = [0, 0, 0] # time = Globals().time # time = t env.step(actions) # time = Globals().time # time = t + 1 if t in range(60): self.assertEqual([agent.actual_phase for agent in agents], [1, 1, 1]) if t == 60 or t == 61: self.assertEqual([agent.actual_phase for agent in agents], [0, 0, 0]) if t >= 63: self.assertEqual([agent.actual_phase for agent in agents], [2, 2, 2])
def test_no_1_pass_action_1_1_1_all_time(self): # u - wplywaja co chwile 2 pojazdy wszedzie # caly czasz dajemy akcje [1,1,1] # Przechodza wszystkie pojazdy w time stepie # orange_phase_duration = 0 tak samo jak phase_duration # TESTUJEMY: rewardy max_time = 90 agents = get_SmartAgents() for agent in agents: agent.orange_phase_duration = 0 Globals().time = 0 env = Env(agents) env.u = env_data.u_all_2 for t in range(max_time): actions = [1, 1, 1] env.step(actions) # print(f't:{t} {env.global_rewards[t]}') if 0 <= t <= 2: self.assertEqual( env.global_rewards[t], [0, 0, 0]) # nic nie przeplywa jeszcze w ogole if 3 <= t <= 11: self.assertEqual(env.global_rewards[t][0], 2) # jedynie agent 0 ma przeplyw 2 exportData = ExportData(learningMethod='Monte Carlo TODO', learningEpochs=0, nets=env.global_memories, netName='net4', densityName='test_no_1') exportData.saveToJson()
def test_no_5_reshaping_pass_action_1_1_1_long_time_then_2_2_2_long_time_then_3_3_3( self): # TESTUJEMY: rewardy max_time = 90 agents = get_SmartAgents() Globals().time = 0 Globals().gamma = 0 env = Env(agents) env.u = env_settings.u_all_2 for t in range(max_time): # actions = [1, 1, 1] if t < 60 elif 60 == t [2, 2, 2] actions = [1, 1, 1] if t == 60 or 63 <= t < 70: actions = [2, 2, 2] if t == 61 or t == 62: actions = [0, 0, 0] if t == 70 or t >= 73: actions = [3, 3, 3] if t == 71 or t == 72: actions = [0, 0, 0] env.step(actions) env.agents[0].save_batch() env.agents[0].reshape_rewards() self.assertAlmostEqual(env.agents[0].memories[60].reward, 23.3, 0) self.assertAlmostEqual(env.agents[0].memories[61].reward, 37, 0) self.assertAlmostEqual(env.agents[0].memories[62].reward, 39.1, 0) env.update_global_memory_rewards() exportData = ExportData(learningMethod='Monte Carlo TODO', learningEpochs=0, nets=env.global_memories, netName='net4', densityName='test_no_5') exportData.saveToJson()
def epoch_random(env): Globals().epsilon = 0 agents: List[SmartAgent] = get_SmartAgents() for t in range(max_time): actions: List[ActionInt] = [random.choice(agent.local_action_space) for agent in agents] env.step(actions) return agents
def test_no_4_pass_action_1_1_1_long_time_then_2_2_2_long_time_then_3_3_3( self): # TESTUJEMY: actual_phase - czy jest ono zgodne rowniez w memories max_time = 90 agents = get_SmartAgents() Globals().time = 0 env = Env(agents) env.u = env_settings.u_all_2 for t in range(max_time): # actions = [1, 1, 1] if t < 60 elif 60 == t [2, 2, 2] actions = [1, 1, 1] if t == 60 or 63 <= t < 70: actions = [2, 2, 2] if t == 61 or t == 62: actions = [0, 0, 0] if t == 70 or t >= 73: actions = [3, 3, 3] if t == 71 or t == 72: actions = [0, 0, 0] env.step(actions) if t in range(60): self.assertEqual( [agent.actual_phase for agent in agents], [1, 1, 1] ) # Po operacji w chwili 60 mamy taka faze dla stanu w chwili 60 self.assertEqual( [agent.memories[t].state.actual_phase for agent in agents], [1, 1, 1] ) # Po operacji w chwili 60 mamy taka faze dla stanu w chwili 60 if t == 60 or t == 61: self.assertEqual([agent.actual_phase for agent in agents], [0, 0, 0]) self.assertEqual( [agent.memories[t].state.actual_phase for agent in agents], [0, 0, 0]) if 69 >= t >= 62: self.assertEqual([agent.actual_phase for agent in agents], [2, 2, 2]) self.assertEqual( [agent.memories[t].state.actual_phase for agent in agents], [2, 2, 2]) if t == 70 or t == 71: self.assertEqual([agent.actual_phase for agent in agents], [0, 0, 0]) self.assertEqual( [agent.memories[t].state.actual_phase for agent in agents], [0, 0, 0]) if t >= 72: self.assertEqual([agent.actual_phase for agent in agents], [3, 3, 3]) self.assertEqual( [agent.memories[t].state.actual_phase for agent in agents], [3, 3, 3]) exportData = ExportData(learningMethod='Monte Carlo TODO', learningEpochs=0, nets=env.global_memories, netName='net4', densityName='test_no_4') exportData.saveToJson()
def test_no_5_some_batch_data(self): agents = get_SmartAgents() actual_phase = 0 orange_phase_duration = 2 phase_duration = orange_phase_duration + 1 den = [0, 0, 10, 8] den_pred = [2, 2, 2, 78] state_to_predict = LearningState( actual_phase=actual_phase, starting_actual_phase=actual_phase, phase_duration=phase_duration, global_densities=den_pred + [0, 0], densities=den, orange_phase_duration=orange_phase_duration) for i in range(150): # utrzymanie fazy - gorszy wybór action_0 = [0] env = single_simulate(agents, actual_phase, phase_duration, den, orange_phase_duration=orange_phase_duration, actions=action_0, u=env_settings.u_all_2) for x in range(40): simulate_from_env(env, action_0) action_1 = [1] # zmiana fazy - lepszy wybór # print('stan bazowy', env.x[env.t]) # print(env.global_memories[-1]) simulate_from_env(env, action_1) # print('stan', env.x[env.t]) # print('stan po', env.x[env.t]) simulate_from_env(env, [orange]) # print('stan', env.x[env.t]) simulate_from_env(env, [orange]) # print('stan', env.x[env.t]) for x in range(20): simulate_from_env(env, action_1) # print('stan', env.x[env.t]) memories = env.global_memories memories += env.global_memories save_batches(agents) batches = get_batches() train(agents=agents) env.global_memories = [] for agent in env.agents: agent.memories = [] prediction = agents[0].model.predict( state_to_predict.to_learn_array()) # zbiega to 10,20, cokolwiek print('prediction :)', prediction) exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=memories, netName='net14', densityName='test_fixed_no_4') exportData.saveToJson() a = 23
def generate_random_epochs(learntAgents=False, save_front_json=False, epochs=range(200), plotting=False): # learntAgents = True # save_json = True # plotting=True cars_outs = [] rewards = [] rewards_mean = [] if learntAgents: agents: List[SmartAgent] = get_LearnSmartAgents() else: agents: List[SmartAgent] = get_SmartAgents() for e in epochs: Globals().epsilon = 1 env: Env = epoch(agents, u=env_settings.u_all_4) for agent in env.agents: agent.reshape_rewards() env.update_memory_rewards() env.remember_memory() if save_front_json: exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories, netName='net4', densityName='random_' + str(e)) exportData.saveToJson() x_batch, y_batch = agents[0].memory_to_minibatch_with_oranges() if plotting: cars_outs.append(env.cars_out) print('rew', env.count_summed_rewards()[0]) print('cars_out', env.cars_out) rewards.append(env.count_summed_rewards()[0]) rewards_mean.append(env.count_summed_rewards()[1]) for i in range(len(agents)): # print('i',i) filename = 'static_files/x_batch_agent_' + str(i) + '.txt' x_batch, y_batch = agents[i].full_batch() np.savetxt(filename, x_batch, delimiter=',') filename = 'static_files/y_batch_agent_' + str(i) + '.txt' np.savetxt(filename, y_batch, delimiter=',') if plotting: plt.plot(cars_outs) plt.title('Ilość pojazdów opuszczających układ - losowe akcje') plt.savefig('img_cars_out_random.png') plt.close() plt.plot(rewards_mean) plt.title('Średnia nagroda za akcję - losowe akcje') plt.savefig('img_rewards_mean_random.png') plt.close() plt.plot(rewards) plt.title('Suma nagród - losowe akcje') plt.savefig('img_rewards_random.png') plt.close()
def train(learntAgents=True, max_time_learn=20,agents = None): if agents is None: if not learntAgents: agents = get_SmartAgents() else: agents = get_LearnSmartAgents() models = [agent.model for agent in agents] batches = get_batches() for i in range(1): start_time = timer() x_batch = batches[i]['x_batch'] y_batch = batches[i]['y_batch'] model = models[i] weights_best = model.get_weights() val_loss = 5000 val_loss_best = 5000 escape_flag = False escape_val = 0 a = 0 while timer() - start_time < max_time_learn and not escape_flag: res = model.fit(x_batch, y_batch, batch_size=Globals().vp().batch_size, initial_epoch=Globals().epochs_done, epochs=Globals().epochs_done+Globals().epochs_learn, verbose=0, validation_split=0.2, callbacks=[Globals().tensorboard,agents[i].weights_history_callback]) Globals().epochs_done+=Globals().epochs_learn if res.history['val_loss'][-1] < val_loss_best: val_loss_best = res.history['val_loss'][-1] weights_best = model.get_weights() if res.history['val_loss'][-1] > val_loss: escape_val += 1 # print('escape_val',escape_val) # print('val loss',res.history['val_loss'][-1]) if escape_val > 2: escape_flag = True # print('przerwalbym!!!!!!') # print('wynik sieci', res.history['val_loss'][-1]) val_loss = 5000 else: val_loss = res.history['val_loss'][-1] if i == 0: x = [0, 0, 10, 15, 1, 0, 0, 0] pred = model.predict(np.array([x])) try: diff = abs(pred[0][0] - Globals().pred_plot_memory[-1][0][0]) + abs( pred[0][1] - Globals().pred_plot_memory[-1][0][1]) if a == 0: # print('diff', diff) a += 1 except: a = 23 Globals().pred_plot_memory.append(pred) # print('najlepszy loss',val_loss_best) # print('koniec', model.get_weights()) Globals().last_weights == model.get_weights() model.set_weights(weights_best) model.save('static_files/model-agent' + str(i) + '.h5')
def generate_my_epochs(learntAgents=False, save_front_json=False, epochs=range(1), plotting=False, reshaping=False, actions=None, clear_memory=True,actual_number=''): save_front_json = True reshaping = True cars_outs = [] rewards = [] rewards_mean = [] if learntAgents: agents: List[SmartAgent] = get_LearnSmartAgents() else: agents: List[SmartAgent] = get_SmartAgents() if clear_memory: for agent in agents: agent.memories = [] # print(agents[0].orange_phase_duration) for e in epochs: Globals().epsilon = 1 env: Env = my_epoch(agents, u=Globals().get_u(Globals().vp().max_time_learn), time=Globals().vp().max_time_learn) if reshaping: for agent in env.agents: agent.reshape_rewards() action_0_rewards = [net.rewards[0] for net in env.global_memories if net.actions == [0]] action_1_rewards = [net.rewards[0] for net in env.global_memories if net.actions == [1]] if save_front_json: save_front_json=False exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories, netName='net16', densityName='my_epochs' + str(Globals().greedy_run_no)) exportData.saveToJson() env.remember_memory() if plotting: cars_outs.append(env.cars_out) rewards.append(env.count_summed_rewards()[0]) rewards_mean.append(env.count_summed_rewards()[1]) Globals().actual_epoch_index += 1 save_batches(agents,actual_number) if plotting: plt.plot(cars_outs) plt.title('Ilość pojazdów opuszczających układ - losowe akcje') plt.savefig('img_cars_out_random.png') plt.close() plt.plot(rewards_mean) plt.title('Średnia nagroda za akcję - losowe akcje') plt.savefig('img_rewards_mean_random.png') plt.close() plt.plot(rewards) plt.title('Suma nagród - losowe akcje') plt.savefig('img_rewards_random.png') plt.close() # if any(x for x in [mem.reward for mem in agents[0].memories] if x > 10.1): # print("weeeeeeeeeeee") return agents
def test_no_1_(self): epochs = range(1) xy_20_all = [] for e in epochs: agents: List[SmartAgent] = get_SmartAgents() Globals().epsilon = 1 env: Env = epoch(agents) env.u = env_settings.u_all_2 for agent in env.agents: agent.reshape_rewards() env.update_memory_rewards() env.remember_memory()
def test_no_3_pass_action_0_0_0_long_time_then_1_1_1_long_time_then_2_2_2(self): # TESTUJEMY: zmiana faz, phase_duration max_time = 90 agents = get_SmartAgents() Globals().time = 0 env = Env(agents) env.u = env_settings.u_all_2 orange = 'orange' for t in range(max_time): # actions = [1, 1, 1] if t < 60 elif 60 == t [2, 2, 2] actions = [0, 0, 0] if t == 60 or 63 <= t < 70: actions = [1, 1, 1] if t == 61 or t == 62: actions = [orange, orange, orange] if t == 70 or t >= 73: actions = [2, 2, 2] if t == 71 or t == 72: actions = [orange, orange, orange] env.step(actions) if t == 60 or t == 61: self.assertEqual([agent.actual_phase for agent in agents], [orange, orange, orange]) if 63 >= t >= 69: self.assertEqual([agent.actual_phase for agent in agents], [1, 1, 1]) if t == 70 or t == 71: self.assertEqual([agent.actual_phase for agent in agents], [orange, orange, orange]) if t >= 73: self.assertEqual([agent.actual_phase for agent in agents], [2, 2, 2]) # # # phase_duration Testujemy if t == 60: self.assertEqual([agent.phase_duration for agent in agents], [0, 0, 0]) if t == 61: self.assertEqual([agent.phase_duration for agent in agents], [1, 1, 1]) if t == 62: self.assertEqual([agent.phase_duration for agent in agents], [2, 2, 2]) # faza 0 trwa juz 2, przelaczylismy wlasnie na faze 0, ale nie musimy zerowac phase_duration if t == 63: self.assertEqual([agent.phase_duration for agent in agents], [3, 3, 3]) # w chwili if t == 70: self.assertEqual([agent.phase_duration for agent in agents], [0, 0, 0]) if t == 71: self.assertEqual([agent.phase_duration for agent in agents], [1, 1, 1]) if t == 72: self.assertEqual([agent.phase_duration for agent in agents], [2, 2, 2]) if t == 73: self.assertEqual([agent.phase_duration for agent in agents], [3, 3, 3]) exportData = ExportData(learningMethod='Monte Carlo TODO', learningEpochs=0, nets=env.global_memories, netName='net4', densityName='test_no_3') exportData.saveToJson()
def train(learntAgents=True, max_time_learn=20): if not learntAgents: agents = get_SmartAgents() else: print('get learnt!') agents = get_LearnSmartAgents() a = 7 models = [agent.model for agent in agents] batches = get_batches() for i in range(1): start_time = timer() x_batch = batches[i]['x_batch'] y_batch = batches[i]['y_batch'] model = models[i] val_loss = 5000 escape_flag = False while timer() - start_time < max_time_learn and not escape_flag: res = model.fit(x_batch, y_batch, batch_size=100, epochs=1, verbose=0, validation_split=0.2) if res.history['val_loss'][-1] > val_loss: escape_flag = True print('wynik sieci', res.history['val_loss'][-1]) val_loss = 5000 else: val_loss = res.history['val_loss'][-1] if i == 0: x = [4, 40, 0] pred = model.predict(np.array([x])) Globals().pred_plot_memory.append(pred) model.save('static_files/model-agent' + str(i) + '.h5') if i == 0: plt.plot([pred[0][0] for pred in Globals().pred_plot_memory], color='red', label='0') plt.plot([pred[0][1] for pred in Globals().pred_plot_memory], color='green', label='1') plt.plot([pred[0][2] for pred in Globals().pred_plot_memory], color='blue', label='2') plt.legend() plt.title( 'Nagrody przewidziane dla akcji podjętych podczas monitorowanego stanu' ) plt.savefig('foo' + str(Globals().run_no) + '.png') plt.close()
def test_learn_no_1_pass_action_1_1_1_long_time_then_2_2_2(self): # u - wplywaja co chwile 2 pojazdy wszedzie # caly czas dajemy akcje [1,1,1] # w momencie 60 dajemy akcje [2,2,2] # Przechodza wszystkie pojazdy w time stepie # orange_phase_duration = 0 tak samo jak phase_duration # TESTUJEMY: czy SmartAgenci sie ucza na podstawie poprawnych nagrod i stanow max_time = 90 agents = get_SmartAgents() for agent in agents: agent.orange_phase_duration = 0 Globals().time = 0 env = Env(agents) env.u = env_data.u_all_2 for t in range(max_time): actions = [1, 1, 1] if t < 60 else [2, 2, 2] env.step(actions) if t < 60: self.assertEqual([agent.actual_phase for agent in agents], [1, 1, 1]) if t >= 60: self.assertEqual([agent.actual_phase for agent in agents], [2, 2, 2]) self.assertEqual(agents[0].memories[60].action, 2) self.assertAlmostEqual(agents[0].memories[60].reward, 20.0, 0) # Moment 60 real_state = agents[0].memories[ 60].state.to_learn_nd_array_densities_group() expected_state = np.array( [[1, 4, 1]]) # densities are 2.0,28.88,0.18 so groups are 1,5,1 np.testing.assert_almost_equal(real_state, expected_state, decimal=0) # Moment 61 real_state = agents[0].memories[ 61].state.to_learn_nd_array_densities_group() expected_state = np.array( [[1, 3, 1]]) # densities are 2.6,10.8,0.3 so groups are 1,5,1 np.testing.assert_almost_equal(real_state, expected_state, decimal=0) Globals().batch_size = 90 for agent in env.agents: agent.train() exportData = ExportData(learningMethod='Monte Carlo TODO', learningEpochs=0, nets=env.global_memories, netName='net4', densityName='test_learn_no_2') exportData.saveToJson()
def test_no_4_batch_create(self): agents = get_SmartAgents() j = 1 predictions = [] while True: j += 1 for agent in agents: agent.orange_phase_duration = 2 agent.actual_phase = 0 agent.starting_actual_phase = 0 agent.memories = [] action_1 = [1] action_0 = [0] env = Env(agents) for i in range(100): simulate_from_env(env, action_0) # t=0 for i in range(50): simulate_from_env(env, action_1) # t=0 exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories, netName='net14', densityName='test_batching_4') exportData.saveToJson() save_batches(agents) # train(agents=agents) pred = agents[0].model.predict(np.array([[3, 3, 3, 297, 1, 0]])) predictions.append(pred) batches = agents[0].full_batch_no_orange() agents[0].model.fit(np.array(batches[0]),np.array(batches[1]),epochs=1000,validation_split=0) a = 435 ########### THAT IS OK # model = keras.models.Sequential() # model.add(keras.layers.Dense(7, activation='relu', input_dim=5)) # model.add(keras.layers.Dense(12, activation='relu')) # model.add(keras.layers.Dense(10, activation='relu')) # model.add(keras.layers.Dense(2)) # model.compile(loss='mse', optimizer=Adam()) # y=batches[1] # model.fit(np.array(batches[0]),np.array(batches[1]),epochs=1,validation_split=0) # predictions=model.predict(np.array(batches[0])) # diffs = [(predictions[i] - y[i]) ** 2 for i in range(len(predictions))] # mse = sum(sum(diffs)) / len(predictions) / len(predictions[0]) # print('mse',mse) ########### name = 'pred.png' plt.plot([pred[0][0] for pred in predictions], label='0') plt.plot([pred[0][1] for pred in predictions], label='1') plt.legend() plt.savefig(name) plt.close()
def train(learntAgents=True, max_time_learn=60, agents=None, shuffle=True, batches=None, actual_number=''): if agents is None: if not learntAgents: agents = get_SmartAgents() else: agents = get_LearnSmartAgents() if batches is None: batches = get_batches(agents, actual_number) models = [agent.model for agent in agents] for i in range(len(agents)): start_time = timer() x_batch = batches[i]['x_batch'] y_batch = batches[i]['y_batch'] model = models[i] weights_best = model.get_weights() val_loss = 10**10 val_loss_best = 10**10 escape_flag = False escape_val = 0 start_flag = True while timer() - start_time < max_time_learn and not escape_flag: res = model.fit(x_batch, y_batch, batch_size=Globals().vp().batch_size, initial_epoch=Globals().epochs_learn_done, epochs=Globals().epochs_learn_done + Globals().vp().epochs_learn, validation_split=0.2, verbose=0) Globals().epochs_learn_done += Globals().vp().epochs_learn if start_flag: start_flag = False if res.history['val_loss'][-1] < val_loss_best: val_loss_best = res.history['val_loss'][-1] weights_best = model.get_weights() if res.history['val_loss'][-1] >= val_loss: escape_val += 1 if escape_val > 10: escape_flag = True val_loss = 10**10 else: val_loss = res.history['val_loss'][-1] model.set_weights(weights_best) model.save('static_files/model-agent' + str(i) + '.h5')
def test_no_0_pass_action_0_all_time(self): max_time = 90 agents = get_SmartAgents() Globals().time = 0 env = Env(agents) env.u = env_settings.u_all_2 for t in range(max_time): if t >= 3: self.assertEqual([agent.actual_phase for agent in agents], [0]) actions = [0] env.step(actions) exportData = ExportData(learningMethod='None', learningEpochs=0, nets=env.global_memories, netName='net11', densityName='test_no_0') exportData.saveToJson()
def generate_random_epochs(learntAgents=False, save_front_json=False, epochs=range(1), plotting=False, u=Globals().u, clear_memory=True): reshaping = True cars_outs = [] rewards = [] rewards_mean = [] if learntAgents: agents: List[SmartAgent] = get_LearnSmartAgents() else: agents: List[SmartAgent] = get_SmartAgents() if clear_memory: for agent in agents: agent.memories = [] for e in epochs: Globals().epsilon = 1 env: Env = epoch(agents, u=u) if reshaping: for agent in env.agents: agent.reshape_rewards() if save_front_json: exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories, netName='net14', densityName='random_updated' + str(e)) exportData.saveToJson() env.remember_memory() if save_front_json: exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories, netName='net14', densityName='random_' + str(e)) exportData.saveToJson() if plotting: cars_outs.append(env.cars_out) rewards.append(env.count_summed_rewards()[0]) rewards_mean.append(env.count_summed_rewards()[1]) Globals().actual_epoch_index += 1 save_batches(agents) return agents
def test_no_1_000_then_111(self): # Testujemy dla akcji ktora rzeczywiscie byla i powinno przewidziec bardzo dobrze agents: List[SmartAgent] = get_SmartAgents() env = Env(agents) env.u = env_settings.u_all_2 max_time = 15 Globals().time = 0 for t in range(max_time): actions = [0, 0, 0] if t == 60 or t >= 63: actions = [1, 1, 1] if t == 61 or t == 62 or t == 1 or t == 2: actions = [orange, orange, orange] env.step(actions) if t == 10: dry_run(env, env.agents, [1, 2, 1]) for agent in agents: agent.reshape_rewards()
def test_no_1_pass_action_1_1_1_all_time(self): # TESTUJEMY: caly czas powinna byc aktualna faza = [1,1,1] max_time = 90 agents = get_SmartAgents() for agent in agents: agent.orange_phase_duration = 2 Globals().time = 0 env = Env(agents) env.u = env_settings.u_all_2 for t in range(max_time): if t >= 3: self.assertEqual([agent.actual_phase for agent in agents], [1, 1, 1]) actions = [1, 1, 1] env.step(actions) exportData = ExportData(learningMethod='Monte Carlo TODO', learningEpochs=0, nets=env.global_memories, netName='net4', densityName='test_no_1') exportData.saveToJson()
def test_no_5_reshaping_pass_action_0_0_0_long_time_then_1_1_1_long_time_then_2_2_2(self): # TESTUJEMY - starting_actual_phase max_time = 90 agents = get_SmartAgents() Globals().time = 0 Globals().gamma = 0 env = Env(agents) env.u = env_settings.u_all_2 for t in range(max_time): actions = [0, 0, 0] if t == 60 or 63 <= t < 70: actions = [1, 1, 1] if t == 61 or t == 62: actions = [orange, orange, orange] if t == 70 or t >= 73: actions = [2, 2, 2] if t == 71 or t == 72: actions = [orange, orange, orange] env.step(actions) print(f't:{t}, actual_phase:{agents[0].actual_phase}') if 3 <= t < 60: self.assertEquals([agent.starting_actual_phase for agent in agents], [0, 0, 0]) self.assertEquals([agent.actual_phase for agent in agents], [0, 0, 0]) if t == 60: self.assertEquals([agent.starting_actual_phase for agent in agents], [0, 0, 0]) self.assertEquals([agent.actual_phase for agent in agents], [orange, orange, orange]) if t == 61: self.assertEquals([agent.starting_actual_phase for agent in agents], [orange, orange, orange]) self.assertEquals([agent.actual_phase for agent in agents], [orange, orange, orange]) if t == 62: self.assertEquals([agent.starting_actual_phase for agent in agents], [orange, orange, orange]) self.assertEquals([agent.actual_phase for agent in agents], [1, 1, 1]) if 63 <= t <= 69: self.assertEquals([agent.starting_actual_phase for agent in agents], [1, 1, 1]) self.assertEquals([agent.actual_phase for agent in agents], [1, 1, 1]) if t == 70: self.assertEquals([agent.starting_actual_phase for agent in agents], [1, 1, 1]) self.assertEquals([agent.actual_phase for agent in agents], [orange, orange, orange]) if t == 71: self.assertEquals([agent.starting_actual_phase for agent in agents], [orange, orange, orange]) self.assertEquals([agent.actual_phase for agent in agents], [orange, orange, orange]) if t == 72: self.assertEquals([agent.starting_actual_phase for agent in agents], [orange, orange, orange]) self.assertEquals([agent.actual_phase for agent in agents], [2, 2, 2])
def test_no_6_starting_phase_actual_phase(self): # Testujemy stargin_actual_phase - jest potrzebne do learningState agents: List[SmartAgent] = get_SmartAgents() for agent in agents: agent.yellow_phase_duration = 2 env = Env(agents) env.yellow_phase_duration = 2 max_time = 90 Globals().time = 0 for t in range(max_time): # actions = [0] if t < 60 elif 60 == t [1] actions = [0] * 3 if t == 1 or t == 2: actions = [yellow] * 3 if t == 60 or t >= 63: actions = [1] * 3 if t == 61 or t == 62 or t == 71 or t == 72: actions = [yellow] * 3 if t == 70 or t >= 72: actions = [0] * 3 env.step(actions) for agent in agents: agent.reshape_rewards() self.assertEqual(agents[0].memories[60].state.starting_actual_phase, 0) self.assertEqual(agents[0].memories[61].state.starting_actual_phase, yellow) self.assertEqual(agents[0].memories[62].state.starting_actual_phase, yellow) self.assertEqual(agents[0].memories[63].state.starting_actual_phase, 1) self.assertEqual(agents[0].memories[70].state.starting_actual_phase, 1) self.assertEqual(agents[0].memories[71].state.starting_actual_phase, yellow) self.assertEqual(agents[0].memories[72].state.starting_actual_phase, yellow) self.assertEqual(agents[0].memories[73].state.starting_actual_phase, 0) exportData = ExportData(learningMethod='Monte Carlo TODO', learningEpochs=0, nets=env.global_memories, netName='net14', densityName='test_no_6') exportData.saveToJson()