Ejemplo n.º 1
0
def pretrain():
    sim = Simulation(nodes)
    state = sim.get_state()
    for i in range(pretrain_length):
        a = np.random.randint(0, len(actions)) # Random action
        new_state, reward, done = sim.step(actions[a])

        if done:
            # We finished the episode
            new_state = np.zeros(state.shape)
            memory.add((state, a, reward, new_state, done)) # Add experience to memory
            sim = Simulation(nodes) # Start a new episode
            state = sim.get_state() # First we need a state

        else:
            memory.add((state, a, reward, new_state, done)) # Add experience to memory
            state = new_state # Our state is now the next_state
Ejemplo n.º 2
0
model.add(Dense(5, activation='sigmoid'))
model.add(Dense(len(actions), activation='linear'))
model.compile(loss='mse', optimizer='adam', metrics=['mae'])
# Q-learning

num_episodes = 1000
y = 0.95
eps = 0.5 # Exploration rate
r_avg_list = []
for i in range(num_episodes):
    if i % 100 == 0:
        print("Episode {} of {}".format(i + 1, num_episodes))
    done = False
    r_sum = 0
    sim = Simulation(nodes)
    state = sim.get_state() # Initial state

    iteration = 0

    while not done:

        eps = 1/math.sqrt(iteration + 1)                # Gradually decrease exploration rate

        if np.random.random() < eps:
            a = np.random.randint(0, len(actions))      # Explore by picking a random action
        else:
            a = np.argmax(model.predict(state))             # Use network to predict which action to take

        action = actions[a]
        #print(action)
        new_state, reward, done = sim.step(action)               # Use selected action to update the environment