def pretrain(): sim = Simulation(nodes) state = sim.get_state() for i in range(pretrain_length): a = np.random.randint(0, len(actions)) # Random action new_state, reward, done = sim.step(actions[a]) if done: # We finished the episode new_state = np.zeros(state.shape) memory.add((state, a, reward, new_state, done)) # Add experience to memory sim = Simulation(nodes) # Start a new episode state = sim.get_state() # First we need a state else: memory.add((state, a, reward, new_state, done)) # Add experience to memory state = new_state # Our state is now the next_state
model.add(Dense(5, activation='sigmoid')) model.add(Dense(len(actions), activation='linear')) model.compile(loss='mse', optimizer='adam', metrics=['mae']) # Q-learning num_episodes = 1000 y = 0.95 eps = 0.5 # Exploration rate r_avg_list = [] for i in range(num_episodes): if i % 100 == 0: print("Episode {} of {}".format(i + 1, num_episodes)) done = False r_sum = 0 sim = Simulation(nodes) state = sim.get_state() # Initial state iteration = 0 while not done: eps = 1/math.sqrt(iteration + 1) # Gradually decrease exploration rate if np.random.random() < eps: a = np.random.randint(0, len(actions)) # Explore by picking a random action else: a = np.argmax(model.predict(state)) # Use network to predict which action to take action = actions[a] #print(action) new_state, reward, done = sim.step(action) # Use selected action to update the environment