Example #1
0
def run_learnt_greedy(saveJson=True):
    Globals().cars_out_memory = []
    model_file_names = [
        'static_files/model-agent0.h5', 'static_files/model-agent1.h5',
        'static_files/model-agent2.h5', 'static_files/model-agent3.h5'
    ]
    agents = get_LearnSmartAgents(model_file_names)
    env = Env(agents)
    u = epoch_greedy(env)
    rewards_sum, rewards_mean = count_rewards(env)
    cars_out = env.cars_out
    if saveJson:
        exportData = ExportData(learningMethod='DQN',
                                learningEpochs=0,
                                nets=env.global_memories,
                                netName='env4',
                                densityName='learnt_' +
                                str(Globals().greedy_run_no))
        exportData.saveToJson()
    maximum_possible_cars_out = Globals().u_value * Globals(
    ).vp.max_time_greedy * 8
    cars_out_percentage = round(100 * cars_out / maximum_possible_cars_out, 2)
    print(
        f'gready run {Globals().greedy_run_no} - rewards_mean:{round(rewards_mean, 2)} rewar'
        f'ds_sum:{round(rewards_sum, 0)}. Do układu wjechało {round(sum(sum(u)), 0)} pojazdów.'
        f' Wyjechało {round(cars_out, 0)}. Układ opuściło pr'
        f'ocentowo pojazdów:{cars_out_percentage}')
    Globals().greedy_run_no += 1
    return rewards_mean, rewards_sum, cars_out, agents, sum(
        sum(u)), cars_out_percentage
Example #2
0
def draw_predictions(no):
    agents = get_LearnSmartAgents()
    to_predict = []
    for den0 in range(60):
        for den1 in range(60):
            to_predict.append([den0 * 2, den1 * 2, 0])
    predictions = agents[0].model.predict(
        np.array(to_predict))  # tylko dla stanow z faza 0
    dots_action_0 = []
    dots_action_1 = []
    dots_action_orange = []
    for i in range(len(predictions)):
        pred = predictions[i]
        to_predict_state = to_predict[i]
        best_action_predicted = np.argmax(pred[:-1])
        if best_action_predicted == 0:
            dots_action_0.append(to_predict_state)
        if best_action_predicted == 1:
            dots_action_1.append(to_predict_state)
        if best_action_predicted == 2:
            dots_action_orange.append(to_predict_state)
    # os x to zerowe den os y to pierwsze den
    plt.plot([den[0] for den in dots_action_0],
             [den[1] for den in dots_action_0], 'go')
    plt.plot([den[0] for den in dots_action_1],
             [den[1] for den in dots_action_1], 'ro')
    # plt.plot([den[0] for den in dots_action_orange], [den[1] for den in dots_action_orange], 'bo')
    plt.savefig('predictions' + str(no) + '.png')
    plt.close()
Example #3
0
def train(learntAgents=True, max_time_learn=20):
    if not learntAgents:
        agents = get_SmartAgents()
    else:
        agents = get_LearnSmartAgents()
    models = [agent.model for agent in agents]
    batches = get_batches()
    start_time = timer()
    x_batch = batches[0]['x_batch']
    y_batch = batches[0]['y_batch']
    model = models[0]
    val_loss = 5000
    escape_flag = False
    while timer() - start_time < max_time_learn and not escape_flag:
        res = model.fit(x_batch, y_batch, batch_size=100, epochs=1, verbose=0, validation_split=0.2)
        if res.history['val_loss'][-1] > val_loss:
            escape_flag = True
            loss = res.history['val_loss'][-1]
            print(f'wynik sieci: {loss} straty')
            val_loss = 5000
        else:
            val_loss = res.history['val_loss'][-1]
        x = [4, 20]
        pred = model.predict(np.array([x]))
        Globals().pred_plot_memory.append(pred)
    model.save('static_files/model-agent' + str(0) + '.h5')
    plt.plot([pred[0][0] for pred in Globals().pred_plot_memory], color='red', label='0')
    plt.plot([pred[0][1] for pred in Globals().pred_plot_memory], color='green', label='1')
    plt.legend()
    plt.title('Nagrody przewidziane dla akcji podjętych \n podczas monitorowanego stanu [4, 20]')
    plt.savefig('images_generated/state_predictions.png')
    plt.close()
def generate_random_epochs(learntAgents=False,
                           save_front_json=False,
                           epochs=range(1)):
    reshaping = True
    cars_outs = []
    rewards = []
    rewards_mean = []
    if learntAgents:
        agents: List[SmartAgent] = get_LearnSmartAgents()
    else:
        agents: List[SmartAgent] = get_SmartAgents()
    for agent in agents:
        agent.memories = []
    for e in epochs:
        Globals().epsilon = 1
        env: Env = epoch(agents,
                         u=Globals().get_u(Globals().vp.max_time_learn),
                         time=Globals().vp.max_time_learn)
        for agent in env.agents:
            agent.reshape_rewards()
        if save_front_json:
            exportData = ExportData(learningMethod='DQN',
                                    learningEpochs=0,
                                    nets=env.global_memories,
                                    netName='politechnika',
                                    densityName='random_now' +
                                    str(Globals().greedy_run_no))
            exportData.saveToJson()
        env.remember_memory()
    save_batches(agents)
    return agents
Example #5
0
def run_learnt_greedy(saveJson=False):
    Globals().cars_out_memory = []
    Globals().cars_in_memory = []
    saveJson = True
    model_file_names = [
        'static_files/model-agent0.h5', 'static_files/model-agent1.h5',
        'static_files/model-agent2.h5', 'static_files/model-agent3.h5'
    ]
    agents = get_LearnSmartAgents(model_file_names)
    # print('weights!',agents[0].model.weights[0])
    env = Env(agents)
    epoch_greedy(env)
    # env.update_memory_rewards() # TODO czy to mozna odkomentowac?
    rewards_sum, rewards_mean = count_rewards(env)
    cars_out = env.cars_out
    print('cars_out', cars_out)
    if saveJson:
        exportData = ExportData(learningMethod='DQN',
                                learningEpochs=0,
                                nets=env.global_memories,
                                netName='polibuda',
                                densityName='learnt_' +
                                str(Globals().greedy_run_no))
        exportData.saveToJson()
        # print('exported')
    maximum_possible_cars_out = Globals().u_value * Globals().vp(
    ).max_time_greedy * 8
    print('memory losowych', Globals().actions_memory)
    print('max greedy', max([max(x) for x in env.x]))
    print(
        f'gready run {Globals().greedy_run_no} - rewards_mean:{rewards_mean} rewards_sum:{rewards_sum} cars_out:{cars_out} procentowo:{float(cars_out)/maximum_possible_cars_out}'
    )
    Globals().greedy_run_no += 1
    return rewards_mean, rewards_sum, cars_out, agents
Example #6
0
def run_learnt_greedy(saveJson=True):
    model_file_names = [
        'static_files/model-agent0.h5', 'static_files/model-agent1.h5',
        'static_files/model-agent2.h5'
    ]
    agents = get_LearnSmartAgents(model_file_names)
    env = Env(agents)
    epoch_greedy(env)
    rewards_sum, rewards_mean = count_rewards(env)
    cars_out = env.cars_out
    if saveJson:
        exportData = ExportData(learningMethod='DQN',
                                learningEpochs=0,
                                nets=env.global_memories,
                                netName='env3',
                                densityName='learnt_' +
                                str(Globals().greedy_run_no))
        exportData.saveToJson()
    maximum_possible_cars_out = Globals().u_value * Globals().vp(
    ).max_time_greedy * 3
    print(
        f'gready run {Globals().greedy_run_no} - rewards_mean:{round(rewards_mean, 2)} rewards_sum:{round(rewards_sum,0)} cars_out:{round(cars_out, 0)} układ opuściło procentowo pojazdów:{cars_out / maximum_possible_cars_out}'
    )
    Globals().greedy_run_no += 1

    return rewards_mean, rewards_sum, cars_out, agents
Example #7
0
def train(learntAgents=True, max_time_learn=20):
    l_rate = 0.0001
    layers = [15, 25, 20, 15]
    optimizer = 'relu'
    regularizers_ = [0.2, 0.2, 0.2]
    print('train learntAgents', learntAgents)
    agents = get_LearnSmartAgents()

    # create_model(layers, optimizer, l_rate)
    # for i in range(3)
    models = [agent.model for agent in agents]
    batches = get_batches()
    # for i in range(len(models)):
    for i in range(3):
        start_time = timer()
        x_batch = batches[i]['x_batch']
        y_batch = batches[i]['y_batch']
        model = models[i]
        x2 = []
        y2 = []
        val_loss = 5000
        escape_flag = False
        while timer() - start_time < max_time_learn and not escape_flag:
            res = model.fit(x_batch,
                            y_batch,
                            batch_size=100,
                            epochs=1,
                            verbose=0,
                            validation_split=0.2)
            if res.history['val_loss'][-1] > val_loss:
                escape_flag = True
                print('wynik sieci', res.history['val_loss'][-1])
                val_loss = 5000
            else:
                val_loss = res.history['val_loss'][-1]
            # res = model.fit(np.array(x2), np.array(y2), batch_size=20, epochs=1, verbose=0)
            if i == 0:
                # x = [7, 10, 10] + [10, 10, 20] + [6, 5, 4] + [2]
                x = [4, 4, 62] + [10, 10, 49] + [0, 10, 10] + [0]
                pred = model.predict(np.array([x]))
                Globals().pred_plot_memory.append(pred)
            # model.evaluate(np.array(x2), np.array(y2))
        model.save('static_files/model-agent' + str(i) + '.h5')
        if i == 0:
            plt.plot([pred[0][0] for pred in Globals().pred_plot_memory],
                     color='red',
                     label='0')
            plt.plot([pred[0][1] for pred in Globals().pred_plot_memory],
                     color='green',
                     label='1')
            plt.plot([pred[0][2] for pred in Globals().pred_plot_memory],
                     color='blue',
                     label='2')
            plt.legend()
            plt.title(
                'Nagrody przewidziane dla akcji podjętych podczas monitorowanego stanu'
            )
            plt.savefig('foo' + str(Globals().run_no) + '.png')
            plt.close()
Example #8
0
def draw_predictions(no):
    agents = get_LearnSmartAgents()
    to_predict = []
    for den0 in range(25):
        for den1 in range(25):
            for den2 in range(25):
                for den3 in range(25):
                    to_predict.append([den0, den1, den2, den3,
                                       0])  # na razie dla fazy 0
    predictions = agents[0].model.predict(np.array(to_predict))
    dots_action_0 = []
    dots_action_1 = []
    dots_action_orange = []
    for i in range(len(predictions)):
        pred = predictions[i]
        to_predict_state = to_predict[i]
        best_action_predicted = np.argmax(pred)
        if best_action_predicted == 0:
            dots_action_0.append(to_predict_state)
        if best_action_predicted == 1:
            dots_action_1.append(to_predict_state)
        if best_action_predicted == 2:
            dots_action_orange.append(to_predict_state)
    # os x to den z gory os y to den z dolu
    fig, ax = plt.subplots()
    for den1 in range(25):
        for den3 in range(25):
            actions_0_better = len([
                den for den in dots_action_0
                if den[1] == den1 and den[3] == den3
            ])
            actions_1_better = len([
                den for den in dots_action_1
                if den[1] == den1 and den[3] == den3
            ])
            all = actions_0_better + actions_1_better
            if all == 0:
                ax.plot(den1, den3, 'o', color=(0, 0, 0))
                continue
            # print('action_0_better',actions_0_better)
            # print('action_1_better',actions_1_better)
            red = actions_0_better / all
            green = actions_1_better / all
            # print('red', actions_0_better)
            # print('green', actions_1_better)
            # print('r', red)
            # print('g', green)
            # if den1==den3:
            #     print(green)
            ax.plot(den1, den3, 'o', color=(red, green, 0))
    # plt.plot([den[1] for den in dots_action_0], [den[3] for den in dots_action_0], 'ro')
    # plt.plot([den[1] for den in dots_action_1], [den[3] for den in dots_action_1], 'go')
    # plt.plot([den[0] for den in dots_action_orange], [den[1] for den in dots_action_orange], 'bo')
    # print("draw pred!")
    # fig.savefig('plotcircles.png')
    name = 'predictions' + str(no) + '.png'
    fig.savefig(name)
    plt.close(fig)
    a = 3
Example #9
0
def generate_random_epochs(learntAgents=False,
                           save_front_json=False,
                           epochs=range(200),
                           plotting=False):
    # learntAgents = True
    # save_json = True
    # plotting=True
    cars_outs = []
    rewards = []
    rewards_mean = []

    if learntAgents:
        agents: List[SmartAgent] = get_LearnSmartAgents()
    else:
        agents: List[SmartAgent] = get_SmartAgents()
    for e in epochs:
        Globals().epsilon = 1
        env: Env = epoch(agents, u=env_settings.u_all_4)
        for agent in env.agents:
            agent.reshape_rewards()
        env.update_memory_rewards()
        env.remember_memory()
        if save_front_json:
            exportData = ExportData(learningMethod='DQN',
                                    learningEpochs=0,
                                    nets=env.global_memories,
                                    netName='net4',
                                    densityName='random_' + str(e))
            exportData.saveToJson()
        x_batch, y_batch = agents[0].memory_to_minibatch_with_oranges()
        if plotting:
            cars_outs.append(env.cars_out)
            print('rew', env.count_summed_rewards()[0])
            print('cars_out', env.cars_out)
            rewards.append(env.count_summed_rewards()[0])
            rewards_mean.append(env.count_summed_rewards()[1])

    for i in range(len(agents)):
        # print('i',i)
        filename = 'static_files/x_batch_agent_' + str(i) + '.txt'
        x_batch, y_batch = agents[i].full_batch()
        np.savetxt(filename, x_batch, delimiter=',')
        filename = 'static_files/y_batch_agent_' + str(i) + '.txt'
        np.savetxt(filename, y_batch, delimiter=',')
    if plotting:
        plt.plot(cars_outs)
        plt.title('Ilość pojazdów opuszczających układ - losowe akcje')
        plt.savefig('img_cars_out_random.png')
        plt.close()
        plt.plot(rewards_mean)
        plt.title('Średnia nagroda za akcję - losowe akcje')
        plt.savefig('img_rewards_mean_random.png')
        plt.close()
        plt.plot(rewards)
        plt.title('Suma nagród - losowe akcje')
        plt.savefig('img_rewards_random.png')
        plt.close()
Example #10
0
def train(learntAgents=True, max_time_learn=20,agents = None):
    if agents is None:
        if not learntAgents:
            agents = get_SmartAgents()
        else:
            agents = get_LearnSmartAgents()
    models = [agent.model for agent in agents]
    batches = get_batches()
    for i in range(1):
        start_time = timer()
        x_batch = batches[i]['x_batch']
        y_batch = batches[i]['y_batch']
        model = models[i]
        weights_best = model.get_weights()
        val_loss = 5000
        val_loss_best = 5000
        escape_flag = False
        escape_val = 0
        a = 0
        while timer() - start_time < max_time_learn and not escape_flag:
            res = model.fit(x_batch, y_batch, batch_size=Globals().vp().batch_size,
                            initial_epoch=Globals().epochs_done,
                            epochs=Globals().epochs_done+Globals().epochs_learn,
                            verbose=0, validation_split=0.2, callbacks=[Globals().tensorboard,agents[i].weights_history_callback])
            Globals().epochs_done+=Globals().epochs_learn
            if res.history['val_loss'][-1] < val_loss_best:
                val_loss_best = res.history['val_loss'][-1]
                weights_best = model.get_weights()
            if res.history['val_loss'][-1] > val_loss:
                escape_val += 1
                # print('escape_val',escape_val)
                # print('val loss',res.history['val_loss'][-1])
                if escape_val > 2:
                    escape_flag = True
                #     print('przerwalbym!!!!!!')
                # print('wynik sieci', res.history['val_loss'][-1])
                val_loss = 5000
            else:
                val_loss = res.history['val_loss'][-1]
            if i == 0:
                x = [0, 0, 10, 15, 1, 0, 0, 0]
                pred = model.predict(np.array([x]))
                try:
                    diff = abs(pred[0][0] - Globals().pred_plot_memory[-1][0][0]) + abs(
                        pred[0][1] - Globals().pred_plot_memory[-1][0][1])
                    if a == 0:
                        # print('diff', diff)
                        a += 1
                except:
                    a = 23
                Globals().pred_plot_memory.append(pred)
        # print('najlepszy loss',val_loss_best)
        # print('koniec', model.get_weights())
        Globals().last_weights == model.get_weights()
        model.set_weights(weights_best)
        model.save('static_files/model-agent' + str(i) + '.h5')
Example #11
0
def draw_batches(file_name='batches.png'):
    agents = get_LearnSmartAgents()
    batches = get_batches(agents)
    x_batch = batches[0]['x_batch']
    y_batch = batches[0]['y_batch']
    fig, ax = plt.subplots()
    for i in range(len(x_batch[0])):
        dens_i = [x[i] for x in x_batch]
        x_coordinate_for_i = [i] * len(dens_i)
        ax.plot(x_coordinate_for_i, dens_i, 'o', color=(0, 0, 0))
    fig.savefig(file_name)
def generate_my_epochs(learntAgents=False, save_front_json=False, epochs=range(1), plotting=False, reshaping=False,
                       actions=None, clear_memory=True,actual_number=''):
    save_front_json = True
    reshaping = True
    cars_outs = []
    rewards = []
    rewards_mean = []
    if learntAgents:
        agents: List[SmartAgent] = get_LearnSmartAgents()
    else:
        agents: List[SmartAgent] = get_SmartAgents()
    if clear_memory:
        for agent in agents:
            agent.memories = []
    # print(agents[0].orange_phase_duration)
    for e in epochs:
        Globals().epsilon = 1
        env: Env = my_epoch(agents, u=Globals().get_u(Globals().vp().max_time_learn),
                            time=Globals().vp().max_time_learn)
        if reshaping:
            for agent in env.agents:
                agent.reshape_rewards()
        action_0_rewards = [net.rewards[0] for net in env.global_memories if net.actions == [0]]
        action_1_rewards = [net.rewards[0] for net in env.global_memories if net.actions == [1]]
        if save_front_json:
            save_front_json=False
            exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories,
                                    netName='net16',
                                    densityName='my_epochs' + str(Globals().greedy_run_no))
            exportData.saveToJson()
        env.remember_memory()
        if plotting:
            cars_outs.append(env.cars_out)
            rewards.append(env.count_summed_rewards()[0])
            rewards_mean.append(env.count_summed_rewards()[1])
        Globals().actual_epoch_index += 1
    save_batches(agents,actual_number)
    if plotting:
        plt.plot(cars_outs)
        plt.title('Ilość pojazdów opuszczających układ - losowe akcje')
        plt.savefig('img_cars_out_random.png')
        plt.close()
        plt.plot(rewards_mean)
        plt.title('Średnia nagroda za akcję - losowe akcje')
        plt.savefig('img_rewards_mean_random.png')
        plt.close()
        plt.plot(rewards)
        plt.title('Suma nagród - losowe akcje')
        plt.savefig('img_rewards_random.png')
        plt.close()
    # if any(x for x in [mem.reward for mem in agents[0].memories] if x > 10.1):
    #     print("weeeeeeeeeeee")
    return agents
Example #13
0
def train(learntAgents=True, max_time_learn=20):
    if not learntAgents:
        agents = get_SmartAgents()
    else:
        print('get learnt!')
        agents = get_LearnSmartAgents()
        a = 7
    models = [agent.model for agent in agents]
    batches = get_batches()
    for i in range(1):
        start_time = timer()
        x_batch = batches[i]['x_batch']
        y_batch = batches[i]['y_batch']
        model = models[i]
        val_loss = 5000
        escape_flag = False
        while timer() - start_time < max_time_learn and not escape_flag:
            res = model.fit(x_batch,
                            y_batch,
                            batch_size=100,
                            epochs=1,
                            verbose=0,
                            validation_split=0.2)
            if res.history['val_loss'][-1] > val_loss:
                escape_flag = True
                print('wynik sieci', res.history['val_loss'][-1])
                val_loss = 5000
            else:
                val_loss = res.history['val_loss'][-1]
            if i == 0:
                x = [4, 40, 0]
                pred = model.predict(np.array([x]))
                Globals().pred_plot_memory.append(pred)
        model.save('static_files/model-agent' + str(i) + '.h5')
        if i == 0:
            plt.plot([pred[0][0] for pred in Globals().pred_plot_memory],
                     color='red',
                     label='0')
            plt.plot([pred[0][1] for pred in Globals().pred_plot_memory],
                     color='green',
                     label='1')
            plt.plot([pred[0][2] for pred in Globals().pred_plot_memory],
                     color='blue',
                     label='2')
            plt.legend()
            plt.title(
                'Nagrody przewidziane dla akcji podjętych podczas monitorowanego stanu'
            )
            plt.savefig('foo' + str(Globals().run_no) + '.png')
            plt.close()
Example #14
0
def train(learntAgents=True,
          max_time_learn=60,
          agents=None,
          shuffle=True,
          batches=None,
          actual_number=''):
    if agents is None:
        if not learntAgents:
            agents = get_SmartAgents()
        else:
            agents = get_LearnSmartAgents()
    if batches is None:
        batches = get_batches(agents, actual_number)
    models = [agent.model for agent in agents]
    for i in range(len(agents)):
        start_time = timer()
        x_batch = batches[i]['x_batch']
        y_batch = batches[i]['y_batch']
        model = models[i]
        weights_best = model.get_weights()
        val_loss = 10**10
        val_loss_best = 10**10
        escape_flag = False
        escape_val = 0
        start_flag = True
        while timer() - start_time < max_time_learn and not escape_flag:
            res = model.fit(x_batch,
                            y_batch,
                            batch_size=Globals().vp().batch_size,
                            initial_epoch=Globals().epochs_learn_done,
                            epochs=Globals().epochs_learn_done +
                            Globals().vp().epochs_learn,
                            validation_split=0.2,
                            verbose=0)
            Globals().epochs_learn_done += Globals().vp().epochs_learn
            if start_flag:
                start_flag = False
            if res.history['val_loss'][-1] < val_loss_best:
                val_loss_best = res.history['val_loss'][-1]
                weights_best = model.get_weights()
            if res.history['val_loss'][-1] >= val_loss:
                escape_val += 1
                if escape_val > 10:
                    escape_flag = True
                val_loss = 10**10
            else:
                val_loss = res.history['val_loss'][-1]
        model.set_weights(weights_best)
        model.save('static_files/model-agent' + str(i) + '.h5')
Example #15
0
def generate_random_epochs(learntAgents=False,
                           save_front_json=False,
                           epochs=range(1),
                           plotting=False,
                           u=Globals().u,
                           clear_memory=True):
    reshaping = True
    cars_outs = []
    rewards = []
    rewards_mean = []
    if learntAgents:
        agents: List[SmartAgent] = get_LearnSmartAgents()
    else:
        agents: List[SmartAgent] = get_SmartAgents()
    if clear_memory:
        for agent in agents:
            agent.memories = []
    for e in epochs:
        Globals().epsilon = 1
        env: Env = epoch(agents, u=u)
        if reshaping:
            for agent in env.agents:
                agent.reshape_rewards()
        if save_front_json:
            exportData = ExportData(learningMethod='DQN',
                                    learningEpochs=0,
                                    nets=env.global_memories,
                                    netName='net14',
                                    densityName='random_updated' + str(e))
            exportData.saveToJson()
        env.remember_memory()
        if save_front_json:
            exportData = ExportData(learningMethod='DQN',
                                    learningEpochs=0,
                                    nets=env.global_memories,
                                    netName='net14',
                                    densityName='random_' + str(e))

            exportData.saveToJson()
        if plotting:
            cars_outs.append(env.cars_out)
            rewards.append(env.count_summed_rewards()[0])
            rewards_mean.append(env.count_summed_rewards()[1])
        Globals().actual_epoch_index += 1
    save_batches(agents)
    return agents
Example #16
0
def draw_predictions(no):
    agents = get_LearnSmartAgents()
    to_predict = []
    for den0 in range(25):
        for den1 in range(25):
            for den2 in range(25):
                for den3 in range(25):
                    to_predict.append([den0, den1, den2, den3])
    predictions = agents[0].model.predict(np.array(to_predict))
    dots_action_0 = []
    dots_action_1 = []
    dots_action_orange = []
    for i in range(len(predictions)):
        pred = predictions[i]
        to_predict_state = to_predict[i]
        best_action_predicted = np.argmax(pred)
        if best_action_predicted == 0:
            dots_action_0.append(to_predict_state)
        if best_action_predicted == 1:
            dots_action_1.append(to_predict_state)
        if best_action_predicted == 2:
            dots_action_orange.append(to_predict_state)
    # os x to den z gory os y to den z dolu
    fig, ax = plt.subplots()
    for den1 in range(25):
        for den3 in range(25):
            actions_0_better = len([
                den for den in dots_action_0
                if den[1] == den1 and den[3] == den3
            ])
            actions_1_better = len([
                den for den in dots_action_1
                if den[1] == den1 and den[3] == den3
            ])
            all = actions_0_better + actions_1_better
            red = actions_0_better / all
            green = actions_1_better / all
            ax.plot(den1, den3, 'o', color=(red, green, 0))
    # plt.plot([den[1] for den in dots_action_0], [den[3] for den in dots_action_0], 'ro')
    # plt.plot([den[1] for den in dots_action_1], [den[3] for den in dots_action_1], 'go')
    # plt.plot([den[0] for den in dots_action_orange], [den[1] for den in dots_action_orange], 'bo')
    # print("draw pred!")
    # fig.savefig('plotcircles.png')
    fig.savefig('predictions' + str(no) + '.png')
Example #17
0
def generate_random_epochs(learntAgents=False, save_front_json=False, epochs=range(1)):
    cars_outs = []
    rewards = []
    rewards_mean = []
    if learntAgents:
        agents: List[SmartAgent] = get_LearnSmartAgents()
    else:
        agents: List[SmartAgent] = get_SmartAgents()
    for agent in agents:
        agent.memories = []
    for e in epochs:
        Globals().epsilon = 1
        env: Env = epoch(agents, u=Globals().get_u(Globals().vp().max_time_learn), time=Globals().vp().max_time_learn)
        for agent in env.agents:
            agent.reshape_rewards()
        env.remember_memory()
        Globals().actual_epoch_index += 1
    save_batches(agents)
    return agents
Example #18
0
def run_learnt_greedy(saveJson=True):
    model_file_names = ['static_files/model-agent0.h5']
    agents = get_LearnSmartAgents(model_file_names)
    env = Env(agents)
    epoch_greedy(env)
    rewards_sum, rewards_mean = count_rewards(env)
    cars_out = env.cars_out
    if saveJson:
        exportData = ExportData(learningMethod='DQN',
                                learningEpochs=0,
                                nets=env.global_memories,
                                netName='env1',
                                densityName='learnt_' +
                                str(Globals().greedy_run_no))
        exportData.saveToJson()
    Globals().greedy_run_no += 1
    print(
        f'gready run - rewards_mean:{rewards_mean} rewards_sum:{rewards_sum} cars_out:{cars_out}'
    )
    return rewards_mean, rewards_sum, cars_out
Example #19
0
def train(learntAgents=True, max_time_learn=60, agents=None):
    if agents is None:
        if not learntAgents:
            agents = get_SmartAgents()
        else:
            agents = get_LearnSmartAgents()
    models = [agent.model for agent in agents]
    batches = get_batches()
    for i in range(1):
        start_time = timer()
        x_batch = batches[i]['x_batch']
        y_batch = batches[i]['y_batch']
        model = models[i]
        weights_best = model.get_weights()
        val_loss = 10 ** 10
        val_loss_best = 10 ** 10
        escape_flag = False
        escape_val = 0
        while timer() - start_time < max_time_learn and not escape_flag:
            res = model.fit(x_batch, y_batch, batch_size=Globals().vp.batch_size,
                            initial_epoch=Globals().epochs_done,
                            epochs=Globals().epochs_done + Globals().epochs_learn,
                            validation_split=0.2,
                            verbose=0)  # callbacks=[Globals().tensorboard,agents[i].weights_history_callback]
            Globals().epochs_done += Globals().epochs_learn
            if res.history['val_loss'][-1] < val_loss_best:
                val_loss_best = res.history['val_loss'][-1]
                weights_best = model.get_weights()
            if res.history['val_loss'][-1] > val_loss:
                escape_val += 1
                if escape_val > 2:
                    escape_flag = True
                val_loss = 10 ** 10
            else:
                val_loss = res.history['val_loss'][-1]
            if i == 0:
                x = [0, 0, 10, 15, 1, 0]  # dla fazy 0 - lepiej jednak jakby zmienil na swiatlo 1
                pred = model.predict(np.array([x]))
                Globals().pred_plot_memory.append(pred)
        model.set_weights(weights_best)
        model.save('static_files/model-agent' + str(i) + '.h5')
Example #20
0
def run_learnt_greedy(saveJson=False):
    # saveJson = True
    model_file_names = ['static_files/model-agent0.h5']
    agents = get_LearnSmartAgents(model_file_names)
    # print('weights!',agents[0].model.weights[0])
    env = Env(agents)
    epoch_greedy(env)
    # env.update_memory_rewards() # TODO czy to mozna odkomentowac?
    rewards_sum, rewards_mean = count_rewards(env)
    cars_out = env.cars_out
    if saveJson:
        exportData = ExportData(learningMethod='DQN',
                                learningEpochs=0,
                                nets=env.global_memories,
                                netName='net11',
                                densityName='learnt_' +
                                str(Globals().greedy_run_no))
        exportData.saveToJson()
    Globals().greedy_run_no += 1
    # print(f'gready run - rewards_mean:{rewards_mean} rewards_sum:{rewards_sum} cars_out:{cars_out}')
    return rewards_mean, rewards_sum, cars_out
Example #21
0
def draw_weights():
    agents = get_LearnSmartAgents()
    for agent in agents:
        agent.plot_weights()
Example #22
0
                           actual_number=actual_number)
        train(max_time_learn=Globals().vp().max_time_learn,
              actual_number=actual_number)
        # result = run_learnt_greedy()
        maximum_possible_cars_out = Globals().u_value * Globals().vp(
        ).max_time_greedy * 3
        # print('max possible', maximum_possible_cars_out)
        pred_array = np.array([[
            90,
            0,
            0,
            0,
            0,
            1,
        ]])
        agent_0 = get_LearnSmartAgents()[0]
        pred_history.append(agent_0.model.predict(pred_array)[0])
        print(f'{actual_number} pred {pred_history[-1]}')
        batches = get_batches(get_LearnSmartAgents(), actual_number)
        x_batch = batches[0]['x_batch']
        y_batch = batches[0]['y_batch']
        y_batch_history.append(y_batch[19])
        # print(f'x_batch {x_batch} y_batch {y_batch}')
        draw_y_history(y_batch_history)
        indexes = [i for i in range(len(x_batch)) if x_batch[i][-1] == 0]

        # if result[2] >  maximum_possible_cars_out * 0.93:  # cars_out
        # print('u przed',Globals().u_value)
        # Globals().u_value=Globals().u_value*1.2
        # print('u po',Globals().u_value)
        # results.append(result)
Example #23
0
def generate_random_epochs(learntAgents=False,
                           save_front_json=False,
                           epochs=range(1),
                           plotting=False,
                           reshaping=False):
    # save_front_json = True
    cars_outs = []
    rewards = []
    rewards_mean = []
    if learntAgents:
        agents: List[SmartAgent] = get_LearnSmartAgents()
    else:
        agents: List[SmartAgent] = get_SmartAgents()
    for e in epochs:
        # print('epoch!!!!!',e)
        Globals().epsilon = 1
        env: Env = epoch(agents, u=env_settings.get_u_under_x_random(8))
        if reshaping:
            for agent in env.agents:
                agent.reshape_rewards()
        action_0_rewards = [
            net.rewards[0] for net in env.global_memories
            if net.actions == [0]
        ]
        action_1_rewards = [
            net.rewards[0] for net in env.global_memories
            if net.actions == [1]
        ]
        # print('mean 0', np.mean(action_0_rewards))
        # print('mean 1', np.mean(action_1_rewards))
        # if np.mean(action_0_rewards) > np.mean(action_1_rewards):
        #     print('kupa')
        # else:
        #     print('ok')
        if save_front_json:
            exportData = ExportData(learningMethod='DQN',
                                    learningEpochs=0,
                                    nets=env.global_memories,
                                    netName='net14',
                                    densityName='random_updated' + str(e))
            exportData.saveToJson()
        env.remember_memory()
        if save_front_json:
            exportData = ExportData(learningMethod='DQN',
                                    learningEpochs=0,
                                    nets=env.global_memories,
                                    netName='net14',
                                    densityName='random_' + str(e))

            exportData.saveToJson()
        if plotting:
            cars_outs.append(env.cars_out)
            rewards.append(env.count_summed_rewards()[0])
            rewards_mean.append(env.count_summed_rewards()[1])

        action_0_rewards = [
            net.rewards[0] for net in env.global_memories
            if net.actions == [0]
        ]
        action_1_rewards = [
            net.rewards[0] for net in env.global_memories
            if net.actions == [1]
        ]
        # print('mean 0', np.mean(action_0_rewards))
        # print('mean 1', np.mean(action_1_rewards))
        # if np.mean(action_0_rewards)>np.mean(action_1_rewards):
        #     print('kupa')
        # else:
        #     print('ok')

    for i in range(len(agents)):
        filename = 'static_files/x_batch_agent_' + str(i) + '.txt'
        x_batch, y_batch = agents[i].full_batch()
        np.savetxt(filename, x_batch, delimiter=',')
        filename = 'static_files/y_batch_agent_' + str(i) + '.txt'
        np.savetxt(filename, y_batch, delimiter=',')
    if plotting:
        plt.plot(cars_outs)
        plt.title('Ilość pojazdów opuszczających układ - losowe akcje')
        plt.savefig('img_cars_out_random.png')
        plt.close()
        plt.plot(rewards_mean)
        plt.title('Średnia nagroda za akcję - losowe akcje')
        plt.savefig('img_rewards_mean_random.png')
        plt.close()
        plt.plot(rewards)
        plt.title('Suma nagród - losowe akcje')
        plt.savefig('img_rewards_random.png')
        plt.close()
Example #24
0
from services.parser import get_G

ActionInt = int


def epoch():
    Globals().time = 0
    env = Env(agents)
    for t in range(max_time):
        actions: List[ActionInt] = [
            agent.get_action(agent.local_state) for agent in agents
        ]
        env.step(actions)
    Globals().epochs_done += 1
    return env


Globals().max_epsilon = 0
agents: List[SmartAgent] = get_LearnSmartAgents()
env: Env = epoch()  # :1
rewards = nested_sum(env.global_rewards)
print('rewards', rewards)
print('carsout', env.cars_out)

exportData = ExportData(learningMethod='Monte Carlo TODO',
                        learningEpochs=0,
                        nets=env.global_memories,
                        netName='net4',
                        densityName='last_epoch')
exportData.saveToJson()
def generate_random_epochs(learntAgents=False, save_front_json=False, epochs=range(1), plotting=False, reshaping=False,
                           actions=None, clear_memory=True):
    # save_front_json = True
    reshaping = True
    cars_outs = []
    rewards = []
    rewards_mean = []
    if learntAgents:
        agents: List[SmartAgent] = get_LearnSmartAgents()
    else:
        agents: List[SmartAgent] = get_SmartAgents()
    if clear_memory:
        for agent in agents:
            agent.memories = []
    # print(agents[0].orange_phase_duration)
    for e in epochs:
        Globals().epsilon = 1
        env: Env = epoch(agents, u=Globals().get_u(Globals().vp().max_time_learn), time=Globals().vp().max_time_learn)
        if reshaping:
            for agent in env.agents:
                agent.reshape_rewards()
        action_0_rewards = [net.rewards[0] for net in env.global_memories if net.actions == [0]]
        action_1_rewards = [net.rewards[0] for net in env.global_memories if net.actions == [1]]
        # print('mean 0', np.mean(action_0_rewards))
        # print('mean 1', np.mean(action_1_rewards))
        # if np.mean(action_0_rewards) > np.mean(action_1_rewards):
        #     print('kupa')
        # else:
        #     print('ok')
        # [x[:][1] for x in self.A[0]]
        if save_front_json:
            exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories,
                                    netName='politechnika',
                                    densityName='random_now' + str(Globals().greedy_run_no))
            exportData.saveToJson()
            print('doneeeeeee',e)
        env.remember_memory()
        if plotting:
            cars_outs.append(env.cars_out)
            rewards.append(env.count_summed_rewards()[0])
            rewards_mean.append(env.count_summed_rewards()[1])
        Globals().actual_epoch_index += 1

        # action_0_rewards = [net.rewards[0] for net in env.global_memories if net.actions == [0]]
        # action_1_rewards = [net.rewards[0] for net in env.global_memories if net.actions == [1]]
        # print('mean 0', np.mean(action_0_rewards))
        # print('mean 1', np.mean(action_1_rewards))
        # if np.mean(action_0_rewards)>np.mean(action_1_rewards):
        #     print('kupa')
        # else:
        #     print('ok')
        # print('cars_out random', env.cars_out)
        # for agent in agents:
        #     highest_x=max([max(mem.state.to_learn_array(agent)[0]) for mem in agent.memories])
        #     print('highest_x',highest_x)
    save_batches(agents)
    if plotting:
        plt.plot(cars_outs)
        plt.title('Ilość pojazdów opuszczających układ - losowe akcje')
        plt.savefig('img_cars_out_random.png')
        plt.close()
        plt.plot(rewards_mean)
        plt.title('Średnia nagroda za akcję - losowe akcje')
        plt.savefig('img_rewards_mean_random.png')
        plt.close()
        plt.plot(rewards)
        plt.title('Suma nagród - losowe akcje')
        plt.savefig('img_rewards_random.png')
        plt.close()
    # if any(x for x in [mem.reward for mem in agents[0].memories] if x > 10.1):
    #     print("weeeeeeeeeeee")
    return agents
def generate_random_epochs(learntAgents=False,
                           save_front_json=False,
                           epochs=range(1),
                           plotting=False,
                           reshaping=False,
                           u=env_settings.u,
                           actions=None):
    # save_front_json = True
    reshaping = True
    cars_outs = []
    rewards = []
    rewards_mean = []
    if learntAgents:
        agents: List[SmartAgent] = get_LearnSmartAgents()
    else:
        agents: List[SmartAgent] = get_SmartAgents()
    for e in epochs:
        # print('epoch!!!!!',e)
        Globals().epsilon = 1
        env: Env = epoch(agents, u=u)
        if reshaping:
            for agent in env.agents:
                agent.reshape_rewards()
        action_0_rewards = [
            net.rewards[0] for net in env.global_memories
            if net.actions == [0]
        ]
        action_1_rewards = [
            net.rewards[0] for net in env.global_memories
            if net.actions == [1]
        ]
        # print('mean 0', np.mean(action_0_rewards))
        # print('mean 1', np.mean(action_1_rewards))
        # if np.mean(action_0_rewards) > np.mean(action_1_rewards):
        #     print('kupa')
        # else:
        #     print('ok')
        if save_front_json:
            exportData = ExportData(learningMethod='DQN',
                                    learningEpochs=0,
                                    nets=env.global_memories,
                                    netName='net15',
                                    densityName='random_updated' + str(e))
            exportData.saveToJson()
        env.remember_memory()
        if save_front_json:
            exportData = ExportData(learningMethod='DQN',
                                    learningEpochs=0,
                                    nets=env.global_memories,
                                    netName='net15',
                                    densityName='random_' + str(e))

            exportData.saveToJson()
        if plotting:
            cars_outs.append(env.cars_out)
            rewards.append(env.count_summed_rewards()[0])
            rewards_mean.append(env.count_summed_rewards()[1])
        Globals().actual_epoch_index += 1

        # action_0_rewards = [net.rewards[0] for net in env.global_memories if net.actions == [0]]
        # action_1_rewards = [net.rewards[0] for net in env.global_memories if net.actions == [1]]
        # print('mean 0', np.mean(action_0_rewards))
        # print('mean 1', np.mean(action_1_rewards))
        # if np.mean(action_0_rewards)>np.mean(action_1_rewards):
        #     print('kupa')
        # else:
        #     print('ok')
    save_batches(agents)
    if plotting:
        plt.plot(cars_outs)
        plt.title('Ilość pojazdów opuszczających układ - losowe akcje')
        plt.savefig('img_cars_out_random.png')
        plt.close()
        plt.plot(rewards_mean)
        plt.title('Średnia nagroda za akcję - losowe akcje')
        plt.savefig('img_rewards_mean_random.png')
        plt.close()
        plt.plot(rewards)
        plt.title('Suma nagród - losowe akcje')
        plt.savefig('img_rewards_random.png')
        plt.close()
    if any(x for x in [mem.reward for mem in agents[0].memories] if x > 10):
        print("weeeeeeeeeeee")
    return agents
Example #27
0
def train(learntAgents=True, max_time_learn=60, agents=None,shuffle=True,batches=None,actual_number=''):
    if agents is None:
        if not learntAgents:
            agents = get_SmartAgents()
        else:
            agents = get_LearnSmartAgents()
    if batches is None:
        batches = get_batches(agents,actual_number)
    models = [agent.model for agent in agents]
    for i in range(len(agents)):
        start_time = timer()
        x_batch = batches[i]['x_batch']
        y_batch = batches[i]['y_batch']
        model = models[i]
        weights_best = model.get_weights()
        val_loss = 10 ** 10
        val_loss_best = 10 ** 10
        escape_flag = False
        escape_val = 0
        inne = 0
        te_same = 0
        start_flag=True
        while timer() - start_time < max_time_learn and not escape_flag:
            # print('na start mamy res.history[val_loss]',model.history['val_loss'][-1])
            # if shuffle:
            #     validation_indexes = random.sample(range(len(x_batch)),int(len(x_batch)/10))
            #     validation_x,validation_y = [[x_batch[index_x] for index_x in validation_indexes]],[[y_batch[index_y] for index_y in validation_indexes]]
            #     res = model.fit(x_batch, y_batch, batch_size=Globals().vp().batch_size,
            #                     initial_epoch=Globals().epochs_done,
            #                     epochs=Globals().epochs_done + Globals().epochs_learn,
            #                     validation_data=(validation_x,validation_y),
            #                     verbose=0)  # callbacks=[Globals().tensorboard,agents[i].weights_history_callback]
            wagi_przed_uczeniem = model.get_weights()
            res = model.fit(x_batch, y_batch, batch_size=Globals().vp().batch_size,
                            initial_epoch=Globals().epochs_learn_done,
                            epochs=Globals().epochs_learn_done + Globals().vp().epochs_learn,
                            validation_split=0.2,
                            verbose=0)  # callbacks=[Globals().tensorboard,agents[i].weights_history_callback]
            same = True
            porownanie = wagi_przed_uczeniem [0] == model.get_weights()[0]
            for porownanie_warstwa in porownanie:
                if any([s == False for s in porownanie_warstwa]):
                    same = False
            if same:
                te_same += 1
                print(f'wagi te same {te_same} inne {inne}')
            else:
                inne += 1
            # print('wagi te same - uczenie', same)
            Globals().epochs_learn_done += Globals().vp().epochs_learn
            if start_flag:
                # print('res history z start_flag to',res.history['val_loss'])
                start_flag=False
            if res.history['val_loss'][-1] < val_loss_best:
                res_hist=res.history['val_loss'][-1]
                # print(f'res.history {res_hist} lepsze niz {val_loss_best}')
                val_loss_best = res.history['val_loss'][-1]
                weights_best = model.get_weights()
            if res.history['val_loss'][-1] >= val_loss:
                escape_val += 1
                # print('escape_val',escape_val)
                # print('val loss',res.history['val_loss'][-1])
                if escape_val > 10:
                    escape_flag = True
                #     print('przerwalbym!!!!!!')
                # print('wynik sieci', res.history['val_loss'][-1])
                val_loss = 10 ** 10
            else:
                val_loss = res.history['val_loss'][-1]
            if not (timer() - start_time < max_time_learn and not escape_flag):
                print('loss',res.history['val_loss'][-1])
        Globals().last_weights == model.get_weights()
        model.set_weights(weights_best)
        # print('wagi po', model.get_weights())
        # print('porownanko',model.get_weights()[0])
        # print('wagi takie same',wagi_przed[0]==model.get_weights()[0])
        model.save('static_files/model-agent' + str(i) + '.h5')
Example #28
0
def count_rewards(env):
    memsum = 0
    i = 0
    for agent in env.agents:
        for mem in agent.memories:
            i += 1
            memsum += mem.reward
    return memsum, memsum / i


model_file_names = [
    'static_files/model-agent0.h5', 'static_files/model-agent1.h5',
    'static_files/model-agent2.h5'
]
agents = get_LearnSmartAgents(model_file_names)
env = Env(agents)
epoch_greedy(env)
env.update_memory_rewards()
rewards_sum, rewards_mean = count_rewards(env)
cars_out = env.cars_out
exportData = ExportData(learningMethod='DQN',
                        learningEpochs=0,
                        nets=env.global_memories,
                        netName='net4',
                        densityName='learnt1')
exportData.saveToJson()
print(
    f'rewards_mean:{rewards_mean} rewards_sum:{rewards_sum} cars_out:{cars_out}'
)