direction = -1 else: direction = 1 energy_ai = abs(action - direction_boundry) * temperature_step else: q_values = model.predict(current_state) action = np.argmax(q_values[0]) if (action - direction_boundry < 0): direction = -1 else: direction = 1 energy_ai = abs(action - direction_boundry) * temperature_step next_state, reward, game_over = env.update_env( direction, energy_ai, int(timestep / (30 * 24 * 60))) total_reward += reward dqn.remember( transition=([current_state, action, reward, next_state]), game_over) inputs, targets = dqn.get_batch(model, batch_size=batch_size) loss += model.train_on_batch(inputs, targets) timestep += 1 current_state = next_state print("\n") print("Epoch: {:03d}/{:03d}".format(epoch, number_epochs)) print("Total Energy spent with an AI: {:.0f}".format( env.total_energy_ai)) print("Total Energy spent with no AI: {:.0f}".format( env.total_energy_noai)) model.save("model.h5")
energy_ai = abs(action - direction_boundary) * temperature_step # PLAYING THE NEXT ACTION BY INFERENCE else: q_values = model.predict(current_state) action = np.argmax(q_values[0]) if (action - direction_boundary < 0): direction = -1 else: direction = 1 energy_ai = abs(action - direction_boundary) * temperature_step # UPDATING THE ENVIRONMENT AND REACHING THE NEXT STATE next_state, reward, game_over = env.update_env( direction, energy_ai, int(timestep / (30 * 24 * 60))) total_reward += reward # STORING THIS NEW TRANSITION INTO THE MEMORY dqn.remember([current_state, action, reward, next_state], game_over) # GATHERING IN TWO SEPARATE BATCHES THE INPUTS AND THE TARGETS inputs, targets = dqn.get_batch(model, batch_size=batch_size) # COMPUTING THE LOSS OVER THE TWO WHOLE BATCHES OF INPUTS AND TARGETS loss += model.train_on_batch(inputs, targets) timestep += 1 current_state = next_state # PRINTING THE TRAINING RESULTS FOR EACH EPOCH print("\n") print("Epoch: {:03d}/{:03d}".format(epoch, number_epochs)) print("Total Energy spent with an AI: {:.0f}".format( env.total_energy_ai)) print("Total Energy spent with no AI: {:.0f}".format( env.total_energy_noai)) # SAVING THE MODEL model.save("model.h5")
direction = 1 energy_ai = abs(action - direction_boundary) * temperature_step # UPDATING THE ENVIRONMENT AND REACHING THE NEXT STATE month = new_month + int(timestep / (30 * 24 * 60)) if month >= 12: month -= 12 next_state, reward, game_over = env.update_env( direction, energy_ai, month) total_reward += reward # STORING THIS NEW TRANSITION INTO THE MEMORY transition = [current_state, action, reward, next_state] dqn.remember(transition, game_over) # GATHERING IN TWO SEPARATE BATCHES THE INPUTS AND THE TARGETS inputs, targets = dqn.get_batch(model, batch_size=batch_size) # COMPUTING THE LOSS OVER THE TWO WHOLE BATCHES OF INPUTS AND TARGETS loss += model.train_on_batch(inputs, targets) timestep += 1 current_state = next_state # PRINTING THE TRAINING RESULTS FOR EACH EPOCH print("\n") print("Epoch: {:03d}/{:03d}".format(epoch, number_epochs)) print("Total energy spent with an AI: {:.0f}".format( env.total_energy_ai)) print("Total energy spent with no AI: {:.0f}".format(