예제 #1
0
                    direction = -1
                else:
                    direction = 1
                energy_ai = abs(action - direction_boundry) * temperature_step
            else:
                q_values = model.predict(current_state)
                action = np.argmax(q_values[0])
                if (action - direction_boundry < 0):
                    direction = -1
                else:
                    direction = 1
                energy_ai = abs(action - direction_boundry) * temperature_step
            next_state, reward, game_over = env.update_env(
                direction, energy_ai, int(timestep / (30 * 24 * 60)))
            total_reward += reward
            dqn.remember(
                transition=([current_state, action, reward, next_state]),
                game_over)
            inputs, targets = dqn.get_batch(model, batch_size=batch_size)
            loss += model.train_on_batch(inputs, targets)
            timestep += 1
            current_state = next_state

        print("\n")
        print("Epoch: {:03d}/{:03d}".format(epoch, number_epochs))
        print("Total Energy spent with an AI: {:.0f}".format(
            env.total_energy_ai))
        print("Total Energy spent with no AI: {:.0f}".format(
            env.total_energy_noai))

        model.save("model.h5")
         energy_ai = abs(action - direction_boundary) * temperature_step
     # PLAYING THE NEXT ACTION BY INFERENCE
     else:
         q_values = model.predict(current_state)
         action = np.argmax(q_values[0])
         if (action - direction_boundary < 0):
             direction = -1
         else:
             direction = 1
         energy_ai = abs(action - direction_boundary) * temperature_step
     # UPDATING THE ENVIRONMENT AND REACHING THE NEXT STATE
     next_state, reward, game_over = env.update_env(
         direction, energy_ai, int(timestep / (30 * 24 * 60)))
     total_reward += reward
     # STORING THIS NEW TRANSITION INTO THE MEMORY
     dqn.remember([current_state, action, reward, next_state],
                  game_over)
     # GATHERING IN TWO SEPARATE BATCHES THE INPUTS AND THE TARGETS
     inputs, targets = dqn.get_batch(model, batch_size=batch_size)
     # COMPUTING THE LOSS OVER THE TWO WHOLE BATCHES OF INPUTS AND TARGETS
     loss += model.train_on_batch(inputs, targets)
     timestep += 1
     current_state = next_state
 # PRINTING THE TRAINING RESULTS FOR EACH EPOCH
 print("\n")
 print("Epoch: {:03d}/{:03d}".format(epoch, number_epochs))
 print("Total Energy spent with an AI: {:.0f}".format(
     env.total_energy_ai))
 print("Total Energy spent with no AI: {:.0f}".format(
     env.total_energy_noai))
 # SAVING THE MODEL
 model.save("model.h5")
예제 #3
0
                    direction = 1
                energy_ai = abs(action - direction_boundary) * temperature_step

            # UPDATING THE ENVIRONMENT AND REACHING THE NEXT STATE
            month = new_month + int(timestep / (30 * 24 * 60))
            if month >= 12:
                month -= 12

            next_state, reward, game_over = env.update_env(
                direction, energy_ai, month)

            total_reward += reward

            # STORING THIS NEW TRANSITION INTO THE MEMORY
            transition = [current_state, action, reward, next_state]
            dqn.remember(transition, game_over)

            # GATHERING IN TWO SEPARATE BATCHES THE INPUTS AND THE TARGETS
            inputs, targets = dqn.get_batch(model, batch_size=batch_size)

            # COMPUTING THE LOSS OVER THE TWO WHOLE BATCHES OF INPUTS AND TARGETS
            loss += model.train_on_batch(inputs, targets)
            timestep += 1
            current_state = next_state

        # PRINTING THE TRAINING RESULTS FOR EACH EPOCH
        print("\n")
        print("Epoch: {:03d}/{:03d}".format(epoch, number_epochs))
        print("Total energy spent with an AI: {:.0f}".format(
            env.total_energy_ai))
        print("Total energy spent with no AI: {:.0f}".format(