Ejemplo n.º 1
0
 
 # Choose the greedy action and the Q values
 current_f = area.getDeltaF()
 a,Q_values = session.run([mainNet.predict,mainNet.Qout],
                          feed_dict={mainNet.inputs:np.array(current_f).reshape(1,1)})
 a = a[0]
 
 # Explore if epsilon parameter agrees
 if np.random.rand() < epsilon:
     a = np.random.randint(0,2)
 
 # Take the action, modify environment and get the reward
 generator = rl.setDiscretePower(a,generator)
 area.calculateDeltaF([generator,load])
 new_f = area.getDeltaF()
 r = rl.getSimpleReward(new_f)
 cumm_r += r
 
 # Store the experience and print some data
 buffer.add(np.reshape(np.array([current_f,new_f,a,r]),[1,4]))
 print("Delta f: ",round(current_f,2)," Action: ",a, " Reward: ",r)
 
 # Update the model each 32 steps with a minibatch of 32
 if ((j % 4) == 0) & (len(buffer.buffer) > 32):
     miniBatch = buffer.sample(size = 32)
     
     # Run the network for current state and next state
     Q_values = session.run(mainNet.Qout,feed_dict={mainNet.inputs:np.reshape(miniBatch[:,0],[32,1])})
     Q_mainNet = session.run(mainNet.Qout,feed_dict={mainNet.inputs:np.reshape(miniBatch[:,1],[32,1])})
     Q_targetNet = session.run(targetNet.Qout,feed_dict={targetNet.inputs:np.reshape(miniBatch[:,1],[32,1])})
     a_max_mainNet = np.argmax(Q_mainNet,axis=1)
Ejemplo n.º 2
0
            current_f = area.getDeltaF()
            a, new_state = session.run(
                [net.pi_sample, net.rnn_state],
                feed_dict={
                    net.inputs: np.array(current_f).reshape(1, 1),
                    net.batch_size: 1,
                    net.trainLength: 1,
                    net.state_in: state
                })
            a = a[0, 0] + epsilon * np.random.normal(0.0, 1)
            state = new_state

            # Take the action, modify environment and get the reward
            generator = rl.setContinuousPower(a, generator)
            area.calculateDeltaF([generator, load])
            r = rl.getSimpleReward(area.getDeltaF())

            # Store the rewards and states
            cumm_r += r
            r_episode.append(r)
            f_episode.append(current_f)
            a_episode.append(a)

            # Print some data to observe the evolution of the system
            print("Delta f: ", round(current_f, 2), " Action: ", a,
                  " Reward: ", r)

            # Update epsilon
            epsilon = rl.getNewEpsilon(epsilon)

            # End episode if delta f is too large