Exemple #1
0
 def test_no_3_reshape_future_many_times_are_rewards_ok(self):
     # TRZEBA USTAWIC:
     # if state.actual_phase == 'orange':
     #     return 'orange'
     # return 0
     # w metodzie get_action - wtedy ma przechodzic test
     Globals().reshape_future = 4
     agents=generate_random_epochs(learntAgents=False,epochs=range(50),u=env_settings.u_all_2)  # bierze nowych agentow i tu jest 'is'
     rewards = [mem.reward for mem in agents[0].memories]
     a=14
Exemple #2
0
from timeit import default_timer as timer
from nn_trainer import train
from random_epochs_generator import generate_random_epochs
from services.globals import Globals
from services.runnerService import run_learnt_greedy

Globals().pred_plot_memory = []
results = []
timeToLearn = 60
startTime = timer()
generate_random_epochs(learntAgents=False, epochs=range(50))
train(learntAgents=False)
run_learnt_greedy()
while timer() - startTime < timeToLearn:
    print('czas', timer() - startTime)
    generate_random_epochs(learntAgents=True, epochs=range(50))
    train(max_time_learn=30)
    result = run_learnt_greedy()
    results.append(result)
Exemple #3
0
from timeit import default_timer as timer
from nn_trainer import train
from random_epochs_generator import generate_random_epochs
from runner_learnt import run_learnt_greedy
from services.drawerService import draw_rewards_mean, draw_rewards, draw_cars_out
from services.globals import Globals

results = []
timeToLearn = 500000
startTime = timer()
agents = generate_random_epochs(learntAgents=False,
                                epochs=range(Globals().vp.first_epochs_range))  # bierze nowych agentow i tu jest 'is'
train(learntAgents=False, max_time_learn=Globals().vp.max_time_learn)
result = run_learnt_greedy()
eps_decay = 0
max_iterations_without_progress = 10
iterations_without_progress = 0
while timer() - startTime < timeToLearn:
    eps_decay += 0.04
    Globals().epsilon = 1 - eps_decay
    if Globals().epsilon < 0.2:
        Globals().epsilon = 0.2
    print(
        f'Czas:{round(timer() - startTime, 0)} Epsilon:{round(Globals().epsilon, 2)} Średnia liczba wjeżdżających pojazdów:'
        f':{round(Globals().u_value, 2)}')
    generate_random_epochs(learntAgents=True, epochs=range(Globals().vp.epochs_range))
    train(max_time_learn=Globals().vp.max_time_learn)
    result = run_learnt_greedy()
    maximum_possible_cars_out = result[4]
    if result[2] > maximum_possible_cars_out * 0.93:  # cars_out
        Globals().u_value = Globals().u_value * 1.2
Exemple #4
0
from nn_trainer import train
from random_epochs_generator import generate_random_epochs
from runner_learnt import run_learnt_greedy
import matplotlib.pyplot as plt

from services.globals import Globals

runs = [0, 1, 2]
for run in runs:
    Globals().pred_plot_memory = []
    Globals().run_no = run
    results = []
    timeToLearn = 600
    startTime = timer()
    generate_random_epochs(
        learntAgents=False,
        epochs=range(50))  # bierze nowych agentow i tu jest 'is'
    train()
    run_learnt_greedy()
    while timer() - startTime < timeToLearn:
        print('czas', timer() - startTime)
        generate_random_epochs(learntAgents=True, epochs=range(50))
        train(max_time_learn=40)
        result = run_learnt_greedy()
        results.append(result)
    # print(times)
    plt.plot([res[0] for res in results])
    plt.title('Średnia wszystkich nagród - akcje wedle wyuczonej strategii')
    plt.savefig('rewards_mean' + str(run) + '.png')
    plt.close()
    # plt.show()  # rewards_mean
Exemple #5
0
 # agents=generate_random_epochs(learntAgents=False,
 #                        epochs=range(Globals().vp().first_epochs_range))  # bierze nowych agentow i tu jest 'is'
 # # draw_colored_batches(agents)
 #  train(learntAgents=False, max_time_learn=Globals().vp().max_time_learn)
 #  result=run_learnt_greedy()
 lurns = 0
 eps_decay = 0
 while timer() - startTime < timeToLearn:
     eps_decay += 0.04
     Globals().epsilon = 1 - eps_decay
     if Globals().epsilon < 0.2:
         Globals().epsilon = 0.2
     print('epsilon', Globals().epsilon)
     print('czas', timer() - startTime)
     print('U', Globals().u_value)
     generate_random_epochs(learntAgents=True,
                            epochs=range(Globals().vp().epochs_range))
     draw_batches('static_files/x_batch' + str(Globals().greedy_run_no))
     train(max_time_learn=Globals().vp().max_time_learn)
     result = run_learnt_greedy()
     draw_batches_from_agent(result[3],
                             file_name='static_files/x_batch_greedy' +
                             str(Globals().greedy_run_no))
     maximum_possible_cars_out = Globals().u_value * Globals().vp(
     ).max_time_greedy * 8
     print('max possible', maximum_possible_cars_out)
     if result[2] > maximum_possible_cars_out * 0.93:  # cars_out
         print('u przed', Globals().u_value)
         Globals().u_value = Globals().u_value * 1.2
         # print('u po',Globals().u_value)
     results.append(result)
     lurns += 1