def test_no_5_some_batch_data(self): agents = get_SmartAgents() actual_phase = 0 orange_phase_duration = 2 phase_duration = orange_phase_duration + 1 den = [0, 0, 10, 8] den_pred = [2, 2, 2, 78] state_to_predict = LearningState( actual_phase=actual_phase, starting_actual_phase=actual_phase, phase_duration=phase_duration, global_densities=den_pred + [0, 0], densities=den, orange_phase_duration=orange_phase_duration) for i in range(150): # utrzymanie fazy - gorszy wybór action_0 = [0] env = single_simulate(agents, actual_phase, phase_duration, den, orange_phase_duration=orange_phase_duration, actions=action_0, u=env_settings.u_all_2) for x in range(40): simulate_from_env(env, action_0) action_1 = [1] # zmiana fazy - lepszy wybór # print('stan bazowy', env.x[env.t]) # print(env.global_memories[-1]) simulate_from_env(env, action_1) # print('stan', env.x[env.t]) # print('stan po', env.x[env.t]) simulate_from_env(env, [orange]) # print('stan', env.x[env.t]) simulate_from_env(env, [orange]) # print('stan', env.x[env.t]) for x in range(20): simulate_from_env(env, action_1) # print('stan', env.x[env.t]) memories = env.global_memories memories += env.global_memories save_batches(agents) batches = get_batches() train(agents=agents) env.global_memories = [] for agent in env.agents: agent.memories = [] prediction = agents[0].model.predict( state_to_predict.to_learn_array()) # zbiega to 10,20, cokolwiek print('prediction :)', prediction) exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=memories, netName='net14', densityName='test_fixed_no_4') exportData.saveToJson() a = 23
pred_history = [] y_batch_history = [] Globals().u_value = 5 runs = range(len(Globals().val_params)) for run in runs: Globals().pred_plot_memory = [] Globals().run_no = run results = [] timeToLearn = 500000 startTime = timer() generate_my_epochs(learntAgents=False, epochs=range(Globals().vp().first_epochs_range) ) # bierze nowych agentow i tu jest 'is' train(learntAgents=False, max_time_learn=Globals().vp().max_time_learn) # run_learnt_greedy() lurns = 0 eps_decay = 0 actual_number = 0 while timer() - startTime < timeToLearn: actual_number += 1 eps_decay += 0.07 Globals().epsilon = 1 - eps_decay if Globals().epsilon < 0.2: Globals().epsilon = 0.2 # print('epsilon', Globals().epsilon) # print('czas', timer() - startTime) # print('U', Globals().u_value) generate_my_epochs(learntAgents=True, epochs=range(2),
from timeit import default_timer as timer from nn_trainer import train from random_epochs_generator import generate_random_epochs from services.globals import Globals from services.runnerService import run_learnt_greedy Globals().pred_plot_memory = [] results = [] timeToLearn = 60 startTime = timer() generate_random_epochs(learntAgents=False, epochs=range(50)) train(learntAgents=False) run_learnt_greedy() while timer() - startTime < timeToLearn: print('czas', timer() - startTime) generate_random_epochs(learntAgents=True, epochs=range(50)) train(max_time_learn=30) result = run_learnt_greedy() results.append(result)
def test_no_5_some_batch_data(self): agents = get_SmartAgents() actual_phase = 0 orange_phase_duration = 2 phase_duration = orange_phase_duration + 1 predictions_hist = [] # den = [0, 0, 0, 0] # den_pred = [2, 2, 2, 78] pred_x = np.array([[2, 2, 2, 78, 0]]) # state_to_predict = LearningState(actual_phase=actual_phase, starting_actual_phase=actual_phase, # phase_duration=phase_duration, global_densities=den_pred + [0, 0], # densities=den, # orange_phase_duration=orange_phase_duration) a = 0 for i in range(5000): a += 1 # utrzymanie fazy - gorszy wybór action_0 = [0] # env = single_simulate(agents, actual_phase, phase_duration, den, # orange_phase_duration=orange_phase_duration, # actions=action_0, u=env_settings.u_all_2) env = Env(agents) for x in range(40): simulate_from_env(env, action_0) action_1 = [1] # 41 # zmiana fazy - lepszy wybór # print('stan bazowy', env.x[env.t]) # print(env.global_memories[-1]) if a % 2 == 0: action = [1] else: action = [0] simulate_from_env(env, action) # 41 # print('stan', env.x[env.t]) # print('stan po', env.x[env.t]) simulate_from_env(env, [orange]) # 42 # print('stan', env.x[env.t]) simulate_from_env(env, [orange]) # 43 # print('stan', env.x[env.t]) for x in range(20): simulate_from_env(env, action) # print('stan', env.x[env.t]) memories = env.global_memories # print(f'rewardy t=40:{env.agents[0].memories[40].reward}') # print(f'rewardy t=41:{env.agents[0].memories[41].reward}') # print(f'rewardy t=42:{env.agents[0].memories[42].reward}') # memories += env.global_memories save_batches(agents) batches = get_batches() print('x batch', batches[0]['y_batch'][40]) train(agents=agents) env.global_memories = [] for agent in env.agents: agent.memories = [] prediction = agents[0].model.predict(pred_x) predictions_hist.append(prediction) # zbiega to 10,20, cokolwiek print('prediction :)', prediction) exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=memories, netName='net14', densityName='test_fixed_no_4') # exportData.saveToJson() a = 23 # plt.plot([pred[0][0] for pred in Globals().pred_plot_memory], color='red', label='0') # plt.plot([pred[0][1] for pred in Globals().pred_plot_memory], color='green', label='1') plt.plot([x[0][0] for x in predictions_hist], color='red', label='0') plt.plot([x[0][1] for x in predictions_hist], color='green', label='1') plt.legend() plt.title('Cokolwiek') plt.savefig('pred.png') plt.close()
from runner_learnt import run_learnt_greedy import matplotlib.pyplot as plt from services.globals import Globals runs = [0, 1, 2] for run in runs: Globals().pred_plot_memory = [] Globals().run_no = run results = [] timeToLearn = 600 startTime = timer() generate_random_epochs( learntAgents=False, epochs=range(50)) # bierze nowych agentow i tu jest 'is' train() run_learnt_greedy() while timer() - startTime < timeToLearn: print('czas', timer() - startTime) generate_random_epochs(learntAgents=True, epochs=range(50)) train(max_time_learn=40) result = run_learnt_greedy() results.append(result) # print(times) plt.plot([res[0] for res in results]) plt.title('Średnia wszystkich nagród - akcje wedle wyuczonej strategii') plt.savefig('rewards_mean' + str(run) + '.png') plt.close() # plt.show() # rewards_mean plt.plot([res[1] for res in results]) plt.title('Suma nagród - akcje wedle wyuczonej strategii')
from timeit import default_timer as timer from nn_trainer import train from services.drawerService import draw_rewards_mean, draw_rewards, draw_cars_out from services.globals import Globals from services.runnerService import generate_random_epochs, run_learnt_greedy results = [] timeToLearn = 500000 startTime = timer() agents = generate_random_epochs(learntAgents=False, epochs=range(Globals().vp.first_epochs_range) ) # bierze nowych agentow i tu jest 'is' train(learntAgents=False, max_time_learn=Globals().vp.max_time_learn) result = run_learnt_greedy() eps_decay = 0 max_iterations_without_progress = 15 iterations_without_progress = 0 while timer() - startTime < timeToLearn: eps_decay += 0.04 Globals().epsilon = 1 - eps_decay if Globals().epsilon < 0.2: Globals().epsilon = 0.2 print( f'Czas:{round(timer() - startTime, 0)} Epsilon:{round(Globals().epsilon, 2)} Średnia liczba wjeżdżających pojazdów:' f':{round(Globals().u_value, 2)}') generate_random_epochs(learntAgents=True, epochs=range(Globals().vp.epochs_range)) train(max_time_learn=Globals().vp.max_time_learn) result = run_learnt_greedy() maximum_possible_cars_out = result[4] if result[2] > maximum_possible_cars_out * 0.93: # cars_out
# train(learntAgents=False, max_time_learn=Globals().vp().max_time_learn) # result=run_learnt_greedy() lurns = 0 eps_decay = 0 while timer() - startTime < timeToLearn: eps_decay += 0.04 Globals().epsilon = 1 - eps_decay if Globals().epsilon < 0.2: Globals().epsilon = 0.2 print('epsilon', Globals().epsilon) print('czas', timer() - startTime) print('U', Globals().u_value) generate_random_epochs(learntAgents=True, epochs=range(Globals().vp().epochs_range)) draw_batches('static_files/x_batch' + str(Globals().greedy_run_no)) train(max_time_learn=Globals().vp().max_time_learn) result = run_learnt_greedy() draw_batches_from_agent(result[3], file_name='static_files/x_batch_greedy' + str(Globals().greedy_run_no)) maximum_possible_cars_out = Globals().u_value * Globals().vp( ).max_time_greedy * 8 print('max possible', maximum_possible_cars_out) if result[2] > maximum_possible_cars_out * 0.93: # cars_out print('u przed', Globals().u_value) Globals().u_value = Globals().u_value * 1.2 # print('u po',Globals().u_value) results.append(result) lurns += 1 name = 'teraz' + str(Globals().greedy_run_no) + "time" + str( timer() - startTime) + " " + str(Globals().vp())