コード例 #1
0
ファイル: train_dqn.py プロジェクト: wenwenla/DeepRL
def test_dqn():
    args = DQNArgs()
    env = gym.make(args.env_name)
    agent = DQNAgent(env, QNet, SimpleNormalizer, args)
    agent.load(args.save_dir)
    for _ in range(10):
        agent.test_one_episode(True)
コード例 #2
0
class DQNScheduler:
    def __init__(self, simulator):
        self.agent = DQNAgent(25, 6)
        self.agent.load("./save/car-100-dqn.h5")
        self.simulator = simulator
        self.agent.epsilon = 0

    def schedule(self):
        action = self.agent.act(np.reshape(self.simulator.get_state(),
                                           [1, 25]))
        return action
コード例 #3
0
def load_model(MODEL_TYPE):
    curr_model = None
    if MODEL_TYPE == "SVM":
        print("LOADING SVM...")
        curr_model = load("svm.joblib")
    elif MODEL_TYPE == "LR":
        print("LOADING LR...")
        lr = LogReg(74)  #(env.matches.shape[1])
        lr.load_weights("weights/weights-improvement-100-0.31.hdf5")
        curr_model = lr
    elif MODEL_TYPE == "DT":
        print("LOADING DT...")
        curr_model = load("dt.joblib")
    elif MODEL_TYPE == "GB":
        print("LOADING GB...")
        curr_model = load("gb.joblib")
    elif MODEL_TYPE == "RF":
        print("LOADING RF...")
        curr_model = load("rfc.joblib")
    elif MODEL_TYPE == "NB":
        print("LOADING NB...")
        curr_model = load("nb.joblib")
    elif MODEL_TYPE == "AB":
        print("LOADING AB...")
        curr_model = load("ab.joblib")
    elif MODEL_TYPE == "DQN":
        print("LOADING DQN...")
        BetNet = DQNAgent(75)
        BetNet.load("weights/betnet-weights-dqn.h5")
        curr_model = BetNet
    else:
        print("LOADING NN...")
        BetNet = Network(74)  #(env.matches.shape[1])
        BetNet.load_weights(
            'weights/Adadelta/test9_400_Best/weights-improvement-400-0.48.hdf5'
        )  #PCA("weights/Adadelta/test13_100iter_reglast2/weights-improvement-100-0.52.hdf5")  # Most recent weights
        curr_model = BetNet
    return curr_model
コード例 #4
0
ファイル: visualization.py プロジェクト: tristan-ka/IBOAT_RL
    def simulateGustsControl(self):
        '''
        Simulate the response of the controller to gusts.

        :return: A plot of the simulation.
        '''
        self.sim_time = 100
        agent = DQNAgent(self.mdp.size, self.action_size)
        agent.load(self.src)
        WH = self.wh.generateWind()
        hdg0 = 0 * TORAD * np.ones(self.wh.samples)

        state = self.mdp.initializeMDP(hdg0, WH)

        i = np.ones(0)
        v = np.ones(0)
        wind_heading = np.ones(0)

        for time in range(self.sim_time):
            WH = self.wh.generateWind()
            if time == 20:
                WH = self.wh.generateGust(10 * TORAD)
            action = agent.actDeterministically(state)
            next_state, reward = self.mdp.transition(action, WH)
            state = next_state
            i = np.concatenate([i, self.mdp.extractSimulationData()[0, :]])
            v = np.concatenate([v, self.mdp.extractSimulationData()[1, :]])
            wind_heading = np.concatenate([wind_heading, WH[0:10]])

        time_vec = np.linspace(0, self.sim_time, int((self.sim_time) / self.mdp.dt))

        f, axarr = plt.subplots(2, sharex=True)
        axarr[0].plot(time_vec, i / TORAD)
        axarr[1].plot(time_vec, v)
        axarr[0].set_ylabel("angle of attack")
        axarr[1].set_ylabel("v")

        plt.show()
コード例 #5
0
ファイル: visualization.py プロジェクト: tristan-ka/IBOAT_RL
    def simulateDQNControl(self, hdg0):
        '''
        Plots the control law of the network over a simulation.

        :param hdg0: Initial heading of the boat for the simulation.
        :return: A plot of the angle of attack and velocity during the control.
        '''
        agent = DQNAgent(self.mdp.size, self.action_size)
        agent.load(self.src)
        WH = self.wh.generateWind()
        hdg0 = hdg0 * TORAD * np.ones(self.wh.samples)

        state = self.mdp.initializeMDP(hdg0, WH)

        i = np.ones(0)
        v = np.ones(0)
        wind_heading = np.ones(0)

        for time in range(self.sim_time):
            WH = self.wh.generateWind()
            action = agent.actDeterministically(state)
            next_state, reward = self.mdp.transition(action, WH)
            state = next_state
            i = np.concatenate([i, self.mdp.extractSimulationData()[0, :]])
            v = np.concatenate([v, self.mdp.extractSimulationData()[1, :]])
            wind_heading = np.concatenate([wind_heading, WH[0:10]])

        time_vec = np.linspace(0, self.sim_time, int((self.sim_time) / self.mdp.dt))

        f, axarr = plt.subplots(2, sharex=True)
        axarr[0].plot(time_vec, i / TORAD)
        axarr[1].plot(time_vec, v)
        axarr[0].set_ylabel("i [°]")
        axarr[1].set_ylabel("v [m/s]")
        axarr[0].set_xlabel("t [s]")
        axarr[1].set_xlabel("t [s]")

        plt.show()
コード例 #6
0
ファイル: AutoBetter.py プロジェクト: zsimone10/FIFABets
elif MODEL_TYPE == "GB":
    print("LOADING GB...")
    curr_model = load("gb.joblib")
elif MODEL_TYPE == "RF":
    print("LOADING RF...")
    curr_model = load("rfc.joblib")
elif MODEL_TYPE == "NB":
    print("LOADING NB...")
    curr_model = load("nb.joblib")
elif MODEL_TYPE == "AB":
    print("LOADING AB...")
    curr_model = load("ab.joblib")
elif MODEL_TYPE == "DQN":
    print("LOADING DQN...")
    BetNet = DQNAgent(75)
    BetNet.load("weights/betnet-weights-dqn.h5")
    curr_model = BetNet
else:
    print("LOADING NN...")
    BetNet = Network(env.matches.shape[1])
    BetNet.load_weights(
        "weights/Adadelta/test13_100iter_reglast2/weights-improvement-100-0.52.hdf5"
    )  # Most recent weights
    curr_model = BetNet

###############################################################################


#GETS THE PREDICTION VEC GIVEN MODEL
def generatePrediction(mt, curr_model, to_process):
    prediction = None
コード例 #7
0
ファイル: dqn_example.py プロジェクト: AndreErvilha/soccerGym
action_size = 9

actions = [[[0, 0], [-100, -100]], [[0, 0], [-100, 0]], [[0, 0], [-100, 100]],
           [[0, 0], [0, -100]], [[0, 0], [0, 0]], [[0, 0], [0, 100]],
           [[0, 0], [100, -100]], [[0, 0], [100, 0]], [[0, 0], [100, 100]]]

# actions = [
#     [[0,0],[0,0]],
#     [[0,0],[0,100]],
#     [[0,0],[100,0]],
#     [[0,0],[100,100]]
# ]

agent = DQNAgent(state_size, action_size)
#load
agent.load("./save/example_dqn.h5")
done = False
batch_size = 32

for e in range(EPISODES):
    state = env.reset()
    state = np.reshape(state, [1, state_size])
    # print(e)
    last_reward = 0
    for time in range(1000):
        # delay.sleep(1/50)
        #render
        env.render()
        # action = agent.act(state)
        action = agent.act_2(state)
        commands = actions[action]
コード例 #8
0
from keras.optimizers import Adam
from dqn import DQNAgent
from keras.models import model_from_json
from keras.models import load_model

EPISODES = 100

if __name__ == "__main__":
    env = gym.make('CartPole-v1')
    state_size = env.observation_space.shape[0]
    action_size = env.action_space.n
    agent = DQNAgent(state_size, action_size)
    # agent.epsilon = 0.01

    # agent.model = model_from_json(open('cartpole.json').read())
    agent.load('cp2.h5')
    state = env.reset()
    state = np.reshape(state, [1, state_size])
    done = False

    for t in range( 10000 ):
        env.render()
        # action = agent.act(state)

        act_values = agent.model.predict(state)
        action = np.argmax(act_values[0])  # returns action

        next_state, reward, done, _ = env.step(action)
        reward = reward if not done else -10
        next_state = np.reshape(next_state, [1, state_size])
        state = next_state
コード例 #9
0
state_size = 3
action_size = 9

maxForce = 300

actions = [[[0, 0], [-maxForce, -maxForce]], [[0, 0], [2 * -maxForce, 0]],
           [[0, 0], [-maxForce, maxForce]], [[0, 0], [0, 2 * -maxForce]],
           [[0, 0], [0, 0]], [[0, 0], [0, 2 * maxForce]],
           [[0, 0], [maxForce, -maxForce]], [[0, 0], [2 * maxForce, 0]],
           [[0, 0], [maxForce, maxForce]]]

agent = DQNAgent(state_size, action_size)

#load
agent.load("./save/execution1.h5")
env = MyEnvironment(ut=3 / 50)
batch_size = 100
print("done;episode;episodes;score;epsilon")

for e in range(EPISODES):
    state = env.reset()
    state = np.reshape(state, [1, state_size])
    # print(e)
    last_reward = 0
    for time in range(MOVES):
        if (not TRAINING):
            # delay.sleep(1/50)
            env.render()
        else:
            env.render()
コード例 #10
0
ファイル: visualization.py プロジェクト: tristan-ka/IBOAT_RL
    def generateAnimation(self, hdg0):
        """
        Generate an animation showing the two Q-values during an interesting control simulation including gusts.

        :param hdg0: Initial heading of the boat for the simulation
        """
        agent = DQNAgent(self.mdp.size, self.action_size)
        agent.load(self.src)
        WH = self.wh.generateWind()
        hdg0 = hdg0 * TORAD * np.ones(self.wh.samples)

        state = self.mdp.initializeMDP(hdg0, WH)

        i = np.ones(0)
        v = np.ones(0)
        NN_Q0 = np.zeros(self.sim_time)
        NN_Q1 = np.zeros(self.sim_time)
        wind_heading = np.ones(0)

        for timesim in range(self.sim_time):
            WH = self.wh.generateWind()
            if timesim == 50:
                WH = self.wh.generateGust(10 * TORAD)
            action = agent.actDeterministically(state)
            next_state, reward = self.mdp.transition(action, WH)
            state = next_state
            i = np.concatenate([i, self.mdp.extractSimulationData()[0, :]])
            v = np.concatenate([v, self.mdp.extractSimulationData()[1, :]])

            NN_Q0[timesim] = self.agent.evaluate(self.mdp.s)[0]
            NN_Q1[timesim] = self.agent.evaluate(self.mdp.s)[1]

            wind_heading = np.concatenate([wind_heading, WH[0:10]])

        time_vec = np.linspace(0, self.sim_time, int((self.sim_time) / self.mdp.dt))

        # Visualization tools start here
        f = plt.figure(figsize=(15, 5))
        ax0 = f.add_subplot(2, 2, 1)
        ax1 = f.add_subplot(2, 2, 3)
        ax2 = f.add_subplot(2, 2, (2, 4))

        ax0.set_title('Simulation')
        ax0.set_ylabel('i [°]')
        ax0.set_xlabel('t [s]')
        ax0.grid(linestyle='-', linewidth=1)

        ax1.set_ylabel('v [m/s]')
        ax1.set_xlabel('t [s]')
        ax1.grid(linestyle='-', linewidth=1)

        ax2.set_xticks([0, 1])
        ax2.grid(linestyle='-', linewidth=1)
        ax2.set_xticklabels(['Bear-off', 'Luff'])
        ax2.set_ylim([14, 20])
        ax2.set_xlim([0, 1])
        ax2.set_title('Q(s,a) of actions')

        l0, = ax0.plot(time_vec, i)
        l1, = ax1.plot(time_vec, v)
        bar0 = ax2.bar([0, 1], [NN_Q0[0], NN_Q1[0]], color=['b', 'r'])

        def animate(k):
            l0.set_data(time_vec[:k], i[:k])
            time.sleep(.0025)
            l1.set_data(time_vec[:k], v[:k])
            if k % 10 == 0:
                kk = k // 10
                bar0[0].set_height(NN_Q0[kk])
                bar0[1].set_height(NN_Q1[kk])

            return l0, l1, bar0

        ani = animation.FuncAnimation(f, animate, frames=1000, interval=1, blit=False)
        plt.show()

        return ani
コード例 #11
0
ファイル: visualization.py プロジェクト: tristan-ka/IBOAT_RL
    def generateDeltaAnimation(self, hdg0):
        """
        Generate an animation showing the differences between the two Q-values during an interesting control simulation including gusts.

        :param hdg0: Initial heading of the boat for the simulation
        """
        agent = DQNAgent(self.mdp.size, self.action_size)
        agent.load(self.src)
        WH = self.wh.generateWind()
        hdg0 = hdg0 * TORAD * np.ones(self.wh.samples)

        state = self.mdp.initializeMDP(hdg0, WH)

        i = np.ones(0)
        v = np.ones(0)
        NN_Q0 = np.zeros(self.sim_time)
        NN_Q1 = np.zeros(self.sim_time)
        wind_heading = np.ones(0)

        for timesim in range(self.sim_time):
            WH = self.wh.generateWind()
            if timesim == 50:
                WH = self.wh.generateGust(10 * TORAD)
            action = agent.actDeterministically(state)
            next_state, reward = self.mdp.transition(action, WH)
            state = next_state
            i = np.concatenate([i, self.mdp.extractSimulationData()[0, :]])
            v = np.concatenate([v, self.mdp.extractSimulationData()[1, :]])

            NN_Q0[timesim] = self.agent.evaluate(self.mdp.s)[0]
            NN_Q1[timesim] = self.agent.evaluate(self.mdp.s)[1]

            wind_heading = np.concatenate([wind_heading, WH[0:10]])

        time_vec = np.linspace(0, self.sim_time, int((self.sim_time) / self.mdp.dt))

        # Visualization tools start here
        f = plt.figure(figsize=(15, 5))
        ax0 = f.add_subplot(2, 2, 1)
        ax1 = f.add_subplot(2, 2, 3)
        ax2 = f.add_subplot(2, 2, (2, 4))

        ax0.set_title('Simulation')
        ax0.set_ylabel('i [°]')
        ax0.grid(linestyle='-', linewidth=1)

        ax1.set_ylabel('v [m/s]')
        ax1.set_xlabel('t [s]')
        ax1.grid(linestyle='-', linewidth=1)

        ax3 = ax2.twiny()
        tresh = np.max(NN_Q0 - NN_Q1)
        ax3.plot(np.linspace(-tresh, tresh, 100), 0.5 * np.ones(100))  # Create a dummy plot
        ax3.cla()

        ax2.set_xticks([-1, 1])
        ax2.grid(linestyle='-', linewidth=1)
        ax2.set_xticklabels(['Bear off', 'Luff'])
        ax2.set_ylim([-.5, .5])
        ax2.set_xlim([-1, 1])
        ax2.set_title('Q(s,bear-off) - Q(s,luff)', y=-0.1)
        ax2.get_yaxis().set_visible(False)
        ax3.get_yaxis().set_visible(False)

        l0, = ax0.plot(time_vec, i / TORAD)
        l1, = ax1.plot(time_vec, v)
        bar0 = ax2.barh(0, NN_Q1[0] - NN_Q0[0])
        ax2.plot([0, 0], [-0.5, 0.5], color='gray')

        def animate(k):
            l0.set_data(time_vec[:k], i[:k] / TORAD)
            time.sleep(.0025)
            l1.set_data(time_vec[:k], v[:k])
            if k % 10 == 0:
                kk = k // 10
                bar0[0].set_width(NN_Q1[kk] - NN_Q0[kk])

            return l0, l1, bar0

        ani = animation.FuncAnimation(f, animate, frames=1000, interval=1, blit=False)
        plt.show()

        return ani