Esempio n. 1
0
 def test_no_5_some_batch_data(self):
     agents = get_SmartAgents()
     actual_phase = 0
     orange_phase_duration = 2
     phase_duration = orange_phase_duration + 1
     den = [0, 0, 10, 8]
     den_pred = [2, 2, 2, 78]
     state_to_predict = LearningState(
         actual_phase=actual_phase,
         starting_actual_phase=actual_phase,
         phase_duration=phase_duration,
         global_densities=den_pred + [0, 0],
         densities=den,
         orange_phase_duration=orange_phase_duration)
     for i in range(150):
         # utrzymanie fazy - gorszy wybór
         action_0 = [0]
         env = single_simulate(agents,
                               actual_phase,
                               phase_duration,
                               den,
                               orange_phase_duration=orange_phase_duration,
                               actions=action_0,
                               u=env_settings.u_all_2)
         for x in range(40):
             simulate_from_env(env, action_0)
             action_1 = [1]
         # zmiana fazy - lepszy wybór
         # print('stan bazowy', env.x[env.t])
         # print(env.global_memories[-1])
         simulate_from_env(env, action_1)
         # print('stan', env.x[env.t])
         # print('stan po', env.x[env.t])
         simulate_from_env(env, [orange])
         # print('stan', env.x[env.t])
         simulate_from_env(env, [orange])
         # print('stan', env.x[env.t])
         for x in range(20):
             simulate_from_env(env, action_1)
             # print('stan', env.x[env.t])
         memories = env.global_memories
         memories += env.global_memories
         save_batches(agents)
         batches = get_batches()
         train(agents=agents)
         env.global_memories = []
         for agent in env.agents:
             agent.memories = []
         prediction = agents[0].model.predict(
             state_to_predict.to_learn_array())
         # zbiega to 10,20, cokolwiek
         print('prediction :)', prediction)
         exportData = ExportData(learningMethod='DQN',
                                 learningEpochs=0,
                                 nets=memories,
                                 netName='net14',
                                 densityName='test_fixed_no_4')
         exportData.saveToJson()
         a = 23
Esempio n. 2
0

pred_history = []
y_batch_history = []
Globals().u_value = 5
runs = range(len(Globals().val_params))
for run in runs:
    Globals().pred_plot_memory = []
    Globals().run_no = run
    results = []
    timeToLearn = 500000
    startTime = timer()
    generate_my_epochs(learntAgents=False,
                       epochs=range(Globals().vp().first_epochs_range)
                       )  # bierze nowych agentow i tu jest 'is'
    train(learntAgents=False, max_time_learn=Globals().vp().max_time_learn)
    # run_learnt_greedy()
    lurns = 0
    eps_decay = 0
    actual_number = 0
    while timer() - startTime < timeToLearn:
        actual_number += 1
        eps_decay += 0.07
        Globals().epsilon = 1 - eps_decay
        if Globals().epsilon < 0.2:
            Globals().epsilon = 0.2
        # print('epsilon', Globals().epsilon)
        # print('czas', timer() - startTime)
        # print('U', Globals().u_value)
        generate_my_epochs(learntAgents=True,
                           epochs=range(2),
Esempio n. 3
0
from timeit import default_timer as timer
from nn_trainer import train
from random_epochs_generator import generate_random_epochs
from services.globals import Globals
from services.runnerService import run_learnt_greedy

Globals().pred_plot_memory = []
results = []
timeToLearn = 60
startTime = timer()
generate_random_epochs(learntAgents=False, epochs=range(50))
train(learntAgents=False)
run_learnt_greedy()
while timer() - startTime < timeToLearn:
    print('czas', timer() - startTime)
    generate_random_epochs(learntAgents=True, epochs=range(50))
    train(max_time_learn=30)
    result = run_learnt_greedy()
    results.append(result)
Esempio n. 4
0
 def test_no_5_some_batch_data(self):
     agents = get_SmartAgents()
     actual_phase = 0
     orange_phase_duration = 2
     phase_duration = orange_phase_duration + 1
     predictions_hist = []
     # den = [0, 0, 0, 0]
     # den_pred = [2, 2, 2, 78]
     pred_x = np.array([[2, 2, 2, 78, 0]])
     # state_to_predict = LearningState(actual_phase=actual_phase, starting_actual_phase=actual_phase,
     #                                  phase_duration=phase_duration, global_densities=den_pred + [0, 0],
     #                                  densities=den,
     #                                  orange_phase_duration=orange_phase_duration)
     a = 0
     for i in range(5000):
         a += 1
         # utrzymanie fazy - gorszy wybór
         action_0 = [0]
         # env = single_simulate(agents, actual_phase, phase_duration, den,
         #                       orange_phase_duration=orange_phase_duration,
         #                       actions=action_0, u=env_settings.u_all_2)
         env = Env(agents)
         for x in range(40):
             simulate_from_env(env, action_0)
         action_1 = [1]  # 41
         # zmiana fazy - lepszy wybór
         # print('stan bazowy', env.x[env.t])
         # print(env.global_memories[-1])
         if a % 2 == 0:
             action = [1]
         else:
             action = [0]
         simulate_from_env(env, action)  # 41
         # print('stan', env.x[env.t])
         # print('stan po', env.x[env.t])
         simulate_from_env(env, [orange])  # 42
         # print('stan', env.x[env.t])
         simulate_from_env(env, [orange])  # 43
         # print('stan', env.x[env.t])
         for x in range(20):
             simulate_from_env(env, action)
             # print('stan', env.x[env.t])
         memories = env.global_memories
         # print(f'rewardy t=40:{env.agents[0].memories[40].reward}')
         # print(f'rewardy t=41:{env.agents[0].memories[41].reward}')
         # print(f'rewardy t=42:{env.agents[0].memories[42].reward}')
         # memories += env.global_memories
         save_batches(agents)
         batches = get_batches()
         print('x batch', batches[0]['y_batch'][40])
         train(agents=agents)
         env.global_memories = []
         for agent in env.agents:
             agent.memories = []
         prediction = agents[0].model.predict(pred_x)
         predictions_hist.append(prediction)
         # zbiega to 10,20, cokolwiek
         print('prediction :)', prediction)
         exportData = ExportData(learningMethod='DQN',
                                 learningEpochs=0,
                                 nets=memories,
                                 netName='net14',
                                 densityName='test_fixed_no_4')
         # exportData.saveToJson()
         a = 23
         # plt.plot([pred[0][0] for pred in Globals().pred_plot_memory], color='red', label='0')
         # plt.plot([pred[0][1] for pred in Globals().pred_plot_memory], color='green', label='1')
         plt.plot([x[0][0] for x in predictions_hist],
                  color='red',
                  label='0')
         plt.plot([x[0][1] for x in predictions_hist],
                  color='green',
                  label='1')
         plt.legend()
         plt.title('Cokolwiek')
         plt.savefig('pred.png')
         plt.close()
Esempio n. 5
0
from runner_learnt import run_learnt_greedy
import matplotlib.pyplot as plt

from services.globals import Globals

runs = [0, 1, 2]
for run in runs:
    Globals().pred_plot_memory = []
    Globals().run_no = run
    results = []
    timeToLearn = 600
    startTime = timer()
    generate_random_epochs(
        learntAgents=False,
        epochs=range(50))  # bierze nowych agentow i tu jest 'is'
    train()
    run_learnt_greedy()
    while timer() - startTime < timeToLearn:
        print('czas', timer() - startTime)
        generate_random_epochs(learntAgents=True, epochs=range(50))
        train(max_time_learn=40)
        result = run_learnt_greedy()
        results.append(result)
    # print(times)
    plt.plot([res[0] for res in results])
    plt.title('Średnia wszystkich nagród - akcje wedle wyuczonej strategii')
    plt.savefig('rewards_mean' + str(run) + '.png')
    plt.close()
    # plt.show()  # rewards_mean
    plt.plot([res[1] for res in results])
    plt.title('Suma nagród - akcje wedle wyuczonej strategii')
Esempio n. 6
0
from timeit import default_timer as timer
from nn_trainer import train
from services.drawerService import draw_rewards_mean, draw_rewards, draw_cars_out
from services.globals import Globals
from services.runnerService import generate_random_epochs, run_learnt_greedy

results = []
timeToLearn = 500000
startTime = timer()
agents = generate_random_epochs(learntAgents=False,
                                epochs=range(Globals().vp.first_epochs_range)
                                )  # bierze nowych agentow i tu jest 'is'
train(learntAgents=False, max_time_learn=Globals().vp.max_time_learn)
result = run_learnt_greedy()
eps_decay = 0
max_iterations_without_progress = 15
iterations_without_progress = 0
while timer() - startTime < timeToLearn:
    eps_decay += 0.04
    Globals().epsilon = 1 - eps_decay
    if Globals().epsilon < 0.2:
        Globals().epsilon = 0.2
    print(
        f'Czas:{round(timer() - startTime, 0)} Epsilon:{round(Globals().epsilon, 2)} Średnia liczba wjeżdżających pojazdów:'
        f':{round(Globals().u_value, 2)}')
    generate_random_epochs(learntAgents=True,
                           epochs=range(Globals().vp.epochs_range))
    train(max_time_learn=Globals().vp.max_time_learn)
    result = run_learnt_greedy()
    maximum_possible_cars_out = result[4]
    if result[2] > maximum_possible_cars_out * 0.93:  # cars_out
Esempio n. 7
0
 #  train(learntAgents=False, max_time_learn=Globals().vp().max_time_learn)
 #  result=run_learnt_greedy()
 lurns = 0
 eps_decay = 0
 while timer() - startTime < timeToLearn:
     eps_decay += 0.04
     Globals().epsilon = 1 - eps_decay
     if Globals().epsilon < 0.2:
         Globals().epsilon = 0.2
     print('epsilon', Globals().epsilon)
     print('czas', timer() - startTime)
     print('U', Globals().u_value)
     generate_random_epochs(learntAgents=True,
                            epochs=range(Globals().vp().epochs_range))
     draw_batches('static_files/x_batch' + str(Globals().greedy_run_no))
     train(max_time_learn=Globals().vp().max_time_learn)
     result = run_learnt_greedy()
     draw_batches_from_agent(result[3],
                             file_name='static_files/x_batch_greedy' +
                             str(Globals().greedy_run_no))
     maximum_possible_cars_out = Globals().u_value * Globals().vp(
     ).max_time_greedy * 8
     print('max possible', maximum_possible_cars_out)
     if result[2] > maximum_possible_cars_out * 0.93:  # cars_out
         print('u przed', Globals().u_value)
         Globals().u_value = Globals().u_value * 1.2
         # print('u po',Globals().u_value)
     results.append(result)
     lurns += 1
     name = 'teraz' + str(Globals().greedy_run_no) + "time" + str(
         timer() - startTime) + " " + str(Globals().vp())