def test():
    pygame.init()
    agent = DQNAgent(output_dim=3)
    agent.model.load_weights('weights.hdf5')
    # while counter_games < 150:
    # Initialize classes
    game = Game(440, 440)
    player1 = game.player
    field0 = game.field

    # Perform first move
    record = 0
    initialize_game(player1, game, field0, agent)
    if display_option:
        display(player1, field0, game, record)

    while not game.crash:
        # get old state
        state_old = agent.get_state(game, player1, field0)

        # predict action based on the old state
        prediction = agent.model.predict(state_old)
        final_move = np.argmax(prediction[0])
        print("move {} with prediction : {}".format(final_move, prediction))

        # perform new move and get new state
        player1.do_move(final_move, field0, game)

        record = get_record(game.score, record)
        if display_option:
            pygame.time.wait(speed)
            display(player1, field0, game, record)
Beispiel #2
0
def train(display_on, speed, params):
    pygame.init()
    pygame.font.init()

    agent = DQNAgent(params)

    counter_games = 0
    high_score = 0
    score_plot = []
    counter_plot = []

    while counter_games < params['episodes']:
        game = Game(440, 440, high_score)

        if display_on:
            game.update_display()

        while not game.crash:
            if handle_game_event(game):
                return

            # agent.epsilon is set to give randomness to actions
            agent.epsilon = 1 - (counter_games *
                                 params['epsilon_decay_linear'])

            state = game.get_state()
            move = agent.get_move(state)
            game.do_move(move)

            new_state = game.get_state()
            reward = get_reward(game)

            # train short memory base on the new action and state
            agent.train_short_memory(state, move, reward, new_state,
                                     game.crash)

            agent.remember(state, move, reward, new_state, game.crash)

            if display_on:
                game.update_display()
                pygame.time.wait(speed)

        counter_games += 1
        print(f'Game {counter_games}      Score: {game.score}')
        high_score = game.high_score

        score_plot.append(game.score)
        counter_plot.append(counter_games)

        agent.replay_memory(params['batch_size'])

    agent.model.save_weights(params['weights_path'])
    pygame.quit()
    plot_seaborn(counter_plot, score_plot)
Beispiel #3
0
def script():
    agents = [BetterRandomAgent(), BetterPathAgent(), DQNAgent()]
    field_sizes = [8, 20]
    n_games = 1000
    for size in field_sizes:
        for agent in agents:
            run_n_logs(agent, n_games, size)
Beispiel #4
0
    def __init__(self, env_name='harvest', num_agents=1):
        self.env_name = env_name
        if env_name == 'harvest':
            print('Initializing Harvest environment')
            self.env = HarvestEnv(ascii_map=HARVEST_MAP_CPR,
                                  num_agents=num_agents,
                                  render=True)
        elif env_name == 'cleanup':
            print('Initializing Cleanup environment')
            self.env = CleanupEnv(num_agents=num_agents, render=True)
        else:
            print('Error! Not a valid environment type')
            return

        self.num_agents = num_agents

        self.agent_policies = []
        self.agents = list(self.env.agents.values())
        # print(agents[0].action_space)
        self.action_dim = self.agents[0].action_space.n
        for _ in range(num_agents):
            # TODO right now only using 1 frame, update later to look back x (e.g. 4) frames. Later RNN/LSTM
            neural_net = ConvFC(
                conv_in_channels=
                3,  # harvest specific input is 15x15x3 (HARVEST_VIEW_SIZE = 7)
                conv_out_channels=3,
                input_size=15,
                hidden_size=64,
                output_size=self.action_dim)
            self.agent_policies.append(
                DQNAgent(0, self.action_dim - 1, neural_net))

        self.env.reset()
Beispiel #5
0
    def __init__(self):

        self.amountOfSimulations = 0
        self.maxSpeed = 4
        self.score = 0
        self.previousScore = 0
        self.highscore = 0
        self.highscoreTime = 0
        #self.delayCounter = 0
        #self.delay = randint(5,10)
        self.keepRunning = True
        self.agent = DQNAgent()

        self.mysystem = chrono.ChSystemNSC()
        self.ground = theBattleground.theBattleground(self.mysystem)
        self.createRobot(self.mysystem)
        self.createApplication()
        self.run()
Beispiel #6
0
def train(epoch=10):
    pygame.init()
    agent = DQNAgent(output_dim=5)
    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    while counter_games < epoch:
        # Initialize classes
        game = Game(440, 440)
        player1 = game.player1
        player2 = game.player2
        field0 = game.field


        # Perform first move
        initialize_game(player1, player2, game, field0, agent)
        if display_option:
            display(player1, player2, field0, game, record)

        game_epoch = 0
        while not game.crash:
            #agent.epsilon is set to give randomness to actions
            agent.epsilon = 50 - game_epoch

            train_each_epoch(agent, game, field0, player1, [player2], game_epoch)
            train_each_epoch(agent, game, field0, player2, [player1], game_epoch)

            record = get_record(game.player1.score, game.player2.score, record)
            if display_option:
                display(player1, player2, field0, game, record)
                pygame.time.wait(speed)
            
            game_epoch += 1
            game.crash = not (game.player1.display or game.player2.display)

        agent.replay_new(agent.memory)
        counter_games += 1
        print('Game', counter_games, '      Score:', game.player1.score, game.player2.score)
        score_plot.append(game.player1.score)
        counter_plot.append(counter_games)
        print(counter_plot)
    agent.model.save_weights('weights_multi.hdf5')
    plot_seaborn(counter_plot, score_plot)
Beispiel #7
0
def test():
    global dqn_save, checkFirst, dqn_agent

    if checkFirst:
        dqn_agent = DQNAgent(num_state, num_action, num_hidden_node)
        checkFirst = False
    else:
        dqn_agent = dqn_save

    dqn_save = dqn_agent
    return dqn_agent
 def __init__(self,
              env_name,
              state_size,
              frame_size,
              agent="DQN",
              render=True,
              train=True,
              load_path="checkpoint.pth"):
     self.env_name = env_name
     self.frame_size = frame_size
     self.render = render
     self.train = train
     self.env = gym.make(self.env_name)
     if (agent == "DQN"):
         self.agent = DQNAgent(state_size, frame_size,
                               self.env.action_space.n, load_path)
     else:
         raise Exception("Agent not Found")
     self.initialized = False
     self.epoch = 0
Beispiel #9
0
def play(display_on, speed, params):
    pygame.init()
    pygame.font.init()

    agent = DQNAgent(params)
    agent.epsilon = 0

    counter_games = 0
    high_score = 0
    score_plot = []
    counter_plot = []

    while counter_games < params['episodes']:
        game = Game(440, 440, high_score)

        if display_on:
            game.update_display()

        while not game.crash:
            if handle_game_event(game):
                return

            state = game.get_state()
            move = agent.get_move(state)

            game.do_move(move)

            if display_on:
                game.update_display()
                pygame.time.wait(speed)

        counter_games += 1
        print(f'Game {counter_games}      Score: {game.score}')
        high_score = game.high_score

        score_plot.append(game.score)
        counter_plot.append(counter_games)

    pygame.quit()
    plot_seaborn(counter_plot, score_plot)
 def __init__(self):
     
     self.rows = 0
     self.cols = 0
     self.path_for_my_units = None
     self.units_points=np.zeros(shape=(9,),dtype=int)
     self.epsilon=0
     self.agent=DQNAgent()
     self.picker=Picker()
     self.state_old=np.zeros(121)
     self.new_state=[]
     self.picked=np.zeros(9,dtype=int)
     self.prediction=[[]]
Beispiel #11
0
class QLearningBehaviour(Behaviour):
    def __init__(self,visualRange=3):
        self.agent = DQNAgent(visualRange)
        self.age = 0
        self.current_move = None
        self.current_input = None
    
    #def on_init(self,visualRange):

    def decide(self, input):
        self.current_input = input

        final_move = None

        epsilon = 150 - self.age
        if epsilon < 15:
            epsilon = 15
        
        if randint(0, 100) > epsilon:
            final_move = randint(0,5)
        else:
            #get old state
            #state_old = agent.get_state(game, player1, food1)
            state_old = np.asarray(input)

            prediction = self.agent.model.predict(input.reshape((1,-1)))
            #final_move = to_categorical(np.argmax(prediction[0]), num_classes=5)
            final_move = np.argmax(prediction[0])

        self.current_move = final_move
        self.age += 1
        print(final_move)
        return final_move

        #perform new move and get new state
        #self.do_move(final_move, self.x,self.y,agent)
        #state_new = agent.get_state(game, player1, food1)

    def feedback(self, reward, state):
        #set treward for the new state
        #reward = agent.set_reward(input, move,reward)
        state_new = np.asarray(state)
        #train short memory base on the new action and state
        state_old = self.current_input #
        final_move = to_categorical(self.current_move, num_classes=6)
        self.agent.train_short_memory(state_old, final_move, reward, state_new)

        # store the new data into a long term memory
        self.agent.remember(state_old, final_move, reward, state_new)
        #record = get_record(game.score, record)
        #if display_option:
        #    display(player1, food1, game, record)
        #    pygame.time.wait(speed)

        self.agent.replay_new(self.agent.memory) #???
Beispiel #12
0
def train():
    env = gym.make('CartPole-v0')
    agent = DQNAgent(env=env)
    num_episodes = 200
    for i_episode in range(num_episodes):
        state = env.reset()
        total_reward = 0
        while True:
            action = agent.get_action(state)
            next_state, reward, done, info = env.step(action)
            total_reward += reward
            update_array = [state, action, reward, next_state, done]
            agent.update(update_array)
            state = next_state
            if done:
                print("Episode ", i_episode, ": ", total_reward, "  epsilon: ", agent.epsilon)
                break
    agent.save('myClassModel')
    env.close()
Beispiel #13
0
def play(display_on, speed, params):
    pygame.init()
    pygame.font.init()

    agent = DQNAgent(params)

    counter_games = 0
    high_score = 0;
    #score_plot = []
    #counter_plot = []

    while counter_games < params['episodes']:
        game = Game(440, 440, high_score)

        if display_on:
            game.update_display()

        while not game.crash:
            if handle_game_event(game):
                return

            state = game.get_state()
            prediction = agent.model.predict(state.reshape((1,11)))
            move = to_categorical(np.argmax(prediction[0]), num_classes=3)

            game.do_move(move)

            if display_on:
                game.update_display()
                pygame.time.wait(speed)

        counter_games += 1
        print(f'Game {counter_games}      Score: {game.score}')

        high_score = game.high_score

    pygame.quit()
def train(epoch=10):
    pygame.init()
    agent = DQNAgent(output_dim=3)
    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    while counter_games < epoch:
        # Initialize classes
        game = Game(440, 440)
        player1 = game.player
        field0 = game.field

        # Perform first move
        initialize_game(player1, game, field0, agent)
        if display_option:
            display(player1, field0, game, record)

        game_epoch = 0
        while not game.crash:
            #agent.epsilon is set to give randomness to actions
            agent.epsilon = 50 - game_epoch

            #get old state
            state_old = agent.get_state(game, player1, field0)

            #perform random actions based on agent.epsilon, or choose the action
            if randint(0, 100) < agent.epsilon:
                final_move = randint(0, 2)
                # print("random with prob {}".format(agent.epsilon))
            else:
                # predict action based on the old state
                prediction = agent.model.predict(state_old)
                final_move = np.argmax(prediction[0])
                print("prediction : {}".format(prediction))

            # print("move: {} to position ({}, {})".format(final_move, player1.x, player1.y))

            #perform new move and get new state
            player1.do_move(final_move, field0, game)

            if game_epoch >= 19:
                # get new state
                state_new = agent.get_state(game, player1, field0)

                #set treward for the new state
                reward = agent.set_reward(player1, game.crash, final_move)

                #train short memory base on the new action and state
                agent.train_short_memory(state_old, final_move, reward,
                                         state_new, game.crash)

                # store the new data into a long term memory
                if_remember = False
                if game.crash:
                    agent.remember(state_old, final_move, reward, state_new,
                                   game.crash)
                    if_remember = True
                    # print("remember this move with reward {}".format(reward))
                elif final_move == 0 and randint(1, 20) < 20:
                    agent.remember(state_old, final_move, reward, state_new,
                                   game.crash)
                    if_remember = True
                    # print("remember this move with reward {}".format(reward))
                elif final_move != 0 and randint(1, 20) < 20:
                    agent.remember(state_old, final_move, reward, state_new,
                                   game.crash)
                    if_remember = True
                    # print("remember this move with reward {}".format(reward))
                print(
                    "actual move {} to ({}, {}) gets reward {} - remember {}".
                    format(final_move, player1.x, player1.y, reward,
                           if_remember))

                # explore other move
                if final_move == 0:  # no
                    # 1 left
                    explore_moves(game, field0, agent, player1, state_old, 1,
                                  max(0, player1.x - 1), player1.y)
                    # 2 right
                    explore_moves(game, field0, agent, player1, state_old, 2,
                                  min(player1.x + 1, 21), player1.y)
                elif final_move == 1:  # left
                    # 0 no
                    explore_moves(game, field0, agent, player1, state_old, 0,
                                  min(player1.x + 1, 21), player1.y)
                    # 2 right
                    explore_moves(game, field0, agent, player1, state_old, 2,
                                  min(player1.x + 2, 21), player1.y)
                elif final_move == 2:  # right
                    # 0 no
                    explore_moves(game, field0, agent, player1, state_old, 0,
                                  max(0, player1.x - 1), player1.y)
                    # 2 right
                    explore_moves(game, field0, agent, player1, state_old, 1,
                                  max(0, player1.x - 2), player1.y)

            record = get_record(game.score, record)
            if display_option:
                display(player1, field0, game, record)
                pygame.time.wait(speed)

            game_epoch += 1

        agent.replay_new(agent.memory)
        counter_games += 1
        print('Game', counter_games, '      Score:', game.score)
        score_plot.append(game.score)
        counter_plot.append(counter_games)

        if game.score >= record:
            agent.model.save_weights(modelFile + '/weights.hdf5')
    agent.model.save_weights(modelFile + '/weightsFinal.hdf5')
    plot_seaborn(counter_plot, score_plot)
Beispiel #15
0
def run(params):
    """
    Run the DQN algorithm, based on the parameters previously set.   
    """
    pygame.init()
    agent = DQNAgent(params)
    agent = agent.to(DEVICE)
    agent.optimizer = optim.Adam(agent.parameters(),
                                 weight_decay=0,
                                 lr=params['learning_rate'])
    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    total_score = 0
    while counter_games < params['episodes']:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()
        # Initialize classes
        game = Game(440, 440)
        player1 = game.player
        food1 = game.food

        # Perform first move
        initialize_game(player1, game, food1, agent, params['batch_size'])
        if params['display']:
            display(player1, food1, game, record)

        while not game.crash:
            if not params['train']:
                agent.epsilon = 0.01
            else:
                # agent.epsilon is set to give randomness to actions
                agent.epsilon = 1 - (counter_games *
                                     params['epsilon_decay_linear'])

            # get old state
            state_old = agent.get_state(game, player1, food1)

            # perform random actions based on agent.epsilon, or choose the action
            if random.uniform(0, 1) < agent.epsilon:
                final_move = np.eye(3)[randint(0, 2)]
            else:
                # predict action based on the old state
                with torch.no_grad():
                    state_old_tensor = torch.tensor(
                        state_old.reshape(
                            (1, 11)), dtype=torch.float32).to(DEVICE)
                    prediction = agent(state_old_tensor)
                    final_move = np.eye(3)[np.argmax(
                        prediction.detach().cpu().numpy()[0])]

            # perform new move and get new state
            player1.do_move(final_move, player1.x, player1.y, game, food1,
                            agent)
            state_new = agent.get_state(game, player1, food1)

            # set reward for the new state
            reward = agent.set_reward(player1, game.crash)

            if params['train']:
                # train short memory base on the new action and state
                agent.train_short_memory(state_old, final_move, reward,
                                         state_new, game.crash)
                # store the new data into a long term memory
                agent.remember(state_old, final_move, reward, state_new,
                               game.crash)

            record = get_record(game.score, record)
            if params['display']:
                display(player1, food1, game, record)
                pygame.time.wait(params['speed'])
        if params['train']:
            agent.replay_new(agent.memory, params['batch_size'])
        counter_games += 1
        total_score += game.score
        print(f'Game {counter_games}      Score: {game.score}')
        score_plot.append(game.score)
        counter_plot.append(counter_games)
    mean, stdev = get_mean_stdev(score_plot)
    if params['train']:
        model_weights = agent.state_dict()
        torch.save(model_weights, params["weights_path"])
    if params['plot_score']:
        plot_seaborn(counter_plot, score_plot, params['train'])
    return total_score, mean, stdev
Beispiel #16
0
import numpy as np
import gym

from DQN import DQNAgent
from utils import plot_learning

env = gym.make('LunarLander-v2')
lr = 0.001
n_games = 500
agent = DQNAgent(gamma=0.99,
                 epsilon=1.0,
                 lr=lr,
                 input_dims=env.observation_space.shape,
                 n_actions=env.action_space.n,
                 mem_size=1000000,
                 batch_size=64,
                 epsilon_end=0.01)
scores = []
eps_history = []

for i in range(n_games):
    done = False
    score = 0
    observation = env.reset()
    while not done:
        # env.render()
        action = agent.choose_action(observation)
        observation_, reward, done, _ = env.step(action)
        score += reward
        agent.store_transition(observation, action, reward, observation_, done)
        observation = observation_
        con_3 = Convolution1D(32, 3)(con_2)
        con_4 = Convolution1D(32, 3)(con_3)
        flt_1 = Flatten()(con_4)
        den_1 = Dense(32, activation='sigmoid')(flt_1)
        den_2 = Dense(32, activation='sigmoid')(den_1)
        den_3 = Dense(16, activation='sigmoid')(den_2)
        den_4 = Dense(16, activation='sigmoid')(den_3)
        predictions = Dense(8, activation="linear")(den_4)

        model = Model(inputs=inputs, outputs=predictions)
        model.compile(loss='mean_squared_error', optimizer='sgd')

    input_test = list()
    for i in range(0, 10):
        input_test.append([i for i in range(0, 85)])

    print(model.predict(np.array(input_test).reshape(1, 85, -1)))

    trainer = Trainer(model)

    pool = ThreadPoolExecutor(max_workers=255)

    for i in range(0, 255):
        pool.submit(
            DQNAgent(20, reward, model=trainer).run_bot_join)

    DQNAgent(20, reward, model=trainer).run_bot_join()
    pool.shutdown(wait=True)
    print("Shut down!")
    model.save('trained_model.h5')
Beispiel #18
0
def run(display_option, speed, params):
    pygame.init()
    agent = DQNAgent(params)
    weights_filepath = params['weights_path']
    if params['load_weights']:
        agent.model.load_weights(weights_filepath)
        print("weights loaded")

    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    while counter_games < params['episodes']:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()
        # Initialize classes
        game = Game(440, 440)
        player1 = game.player
        food1 = game.food

        # Perform first move
        initialize_game(player1, game, food1, agent, params['batch_size'])
        if display_option:
            display(player1, food1, game, record)

        while not game.crash:
            if not params['train']:
                agent.epsilon = 0
            else:
                # agent.epsilon is set to give randomness to actions
                agent.epsilon = 1 - (counter_games *
                                     params['epsilon_decay_linear'])

            # get old state
            state_old = agent.get_state(game, player1, food1)

            # perform random actions based on agent.epsilon, or choose the action
            if randint(0, 1) < agent.epsilon:
                final_move = to_categorical(randint(0, 2), num_classes=3)
            else:
                # predict action based on the old state
                prediction = agent.model.predict(state_old.reshape((1, 11)))
                final_move = to_categorical(np.argmax(prediction[0]),
                                            num_classes=3)

            # perform new move and get new state
            player1.do_move(final_move, player1.x, player1.y, game, food1,
                            agent)
            state_new = agent.get_state(game, player1, food1)

            # set reward for the new state
            reward = agent.set_reward(player1, game.crash)

            if params['train']:
                # train short memory base on the new action and state
                agent.train_short_memory(state_old, final_move, reward,
                                         state_new, game.crash)
                # store the new data into a long term memory
                agent.remember(state_old, final_move, reward, state_new,
                               game.crash)

            record = get_record(game.score, record)
            if display_option:
                display(player1, food1, game, record)
                pygame.time.wait(speed)
        if params['train']:
            agent.replay_new(agent.memory, params['batch_size'])
        counter_games += 1
        print(f'Game {counter_games}      Score: {game.score}')
        score_plot.append(game.score)
        counter_plot.append(counter_games)
    if params['train']:
        agent.model.save_weights(params['weights_path'])
    plot_seaborn(counter_plot, score_plot)
Beispiel #19
0
def run():
    agent = DQNAgent(size)
    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    while counter_games < games:
        # Initialize classes
        game = Game(size, size)
        player1 = game.player
        food1 = game.food

        # Perform first move
        initialize_game(player1, game, food1, agent)
        if display_option:
            display(player1, food1, game, record)

        while not game.crash:
            #agent.epsilon is set to give randomness to actions
            agent.epsilon = (games * 0.4) - counter_games

            #get old state
            state_old = agent.get_state(game, player1, food1)

            #perform random actions based on agent.epsilon, or choose the action
            if randint(0, games) < agent.epsilon:
                final_move = to_categorical(randint(0, 2), num_classes=3)
            else:
                # predict action based on the old state
                prediction = agent.model.predict(state_old.reshape((1, agent.size)))
                final_move = to_categorical(np.argmax(prediction[0]), num_classes=3)

            #perform new move and get new state
            player1.do_move(final_move, player1.x, player1.y, game, food1, agent)
            state_new = agent.get_state(game, player1, food1)

            #set reward for the new state
            reward = agent.set_reward(player1, game.crash)

            #train short memory base on the new action and state
            agent.train_short_memory(state_old, final_move, reward, state_new, game.crash)

            # store the new data into a long term memory
            agent.remember(state_old, final_move, reward, state_new, game.crash)
            record = get_record(game.score, record)
            if display_option:
                display(player1, food1, game, record)
                pygame.time.wait(speed)

        agent.replay_new(agent.memory)
        counter_games += 1
        score_plot.append(game.score)
        counter_plot.append(counter_games)
        print('Game', counter_games, ' Score:', game.score, 'Last 10 Avg:', np.mean(score_plot[-10:]))

    agent.model.save_weights('weights.hdf5')
    plot_seaborn(counter_plot, score_plot)
Beispiel #20
0
# #################################################

# Start timer
start = timeit.default_timer()
if DDQN is True:

    # Name of weights + Initialize DDQN class + run training + evaluate
    name_of_weights_DDQN = 'weights_DDQN.h5'
    name_of_plot = 'train_plot_DDQN'
    pre_agent = DDQNAgent()
    aft_agent = run(pre_agent, name_of_weights_DDQN, name_of_plot)
    evaluate_network(aft_agent, name_of_weights_DDQN)
else:
    # Name of weights + Initialize DQN target class + run training + evaluatn
    if target:
        name_of_weights_DQN = 'weights_DQN_target.h5'
        name_of_plot = 'train_plot_DQN_target'
    else:
        name_of_weights_DQN = 'weights_DQN_not_target_2.h5'
        name_of_plot = 'train_plot_DQN_not_target_2'

    pre_agent = DQNAgent(target)
    aft_agent = run(pre_agent, name_of_weights_DQN, name_of_plot)
    evaluate_network(aft_agent, name_of_weights_DQN)

stop = timeit.default_timer()
print('Total run time: %i min' % ((stop - start)/60))



        diff = target_q_values - q_values
        self.memory.update_priorities(indices, diff.detach().sequeeze().abs().cpu().numpy().tolist())

        return loss


if __name__ == "__main__":
    log_dir = "./dqn-p/1"
    if not osp.exists(log_dir):
        os.makedirs(log_dir)
    env_id = "Pong-v0"
    env = gym.make(env_id)
    # env = gym.wrappers.Monitor(env, osp.join(log_dir,"record"))
    env = WrapPytorch(env)

    agent = DQNAgent(env, log_dir=log_dir)
    # agent.load(log_dir)
    episode_rewards = []

    ep = 0
    obs = env.reset()
    episode_reward = 0
    for frame in range(Config.MAX_FRAMES):
        # print("frame", frame)
        # env.render()
        epsilon = Config.epsilon_by_frame(frame)
        action = agent.get_action(obs, epsilon)
        prev_obs = obs
        obs, reward, done, _ = env.step(action)
        episode_reward += reward
        agent.update(prev_obs, action, reward, obs, frame)
Beispiel #22
0
    attacks["spsa"] = {
        "epsilon": 0.05,
        "delta": 0.005,
        "num_steps": 5,
        "spsa_iters": 5,
        "spsa_samples": 2,
        "y_target": 0,
        "is_targeted": True
    }  #, "early_stop_loss_threshold": -1.}

    tf.reset_default_graph()
    sess = tf.Session()
    env = gym.make('PongNoFrameskip-v4')

    statsFolder = "../experiments/pong/"
    dqn = DQNAgent(env, sess, "../ckpts/dqn/pong_final/dqn_final.ckpt")
    attackModel = AttackModel(dqn)

    stats = testAttacks(dqn, attackModel, attacks, gamesNum, attackProbs,
                        actionProbThr, attDetThr, statsFolder)

    #adv training test
    tf.reset_default_graph()
    sess = tf.Session()
    env = gym.make('PongNoFrameskip-v4')

    statsFolder = "../experiments/pong_adv_training_0.015/"
    dqn = DQNAgent(env, sess,
                   "../ckpts/dqn/pong_adv_training/0.015/dqn_final.ckpt")
    attackModel = AttackModel(dqn)
Beispiel #23
0
def run_game():
    env = Tetris()
    episodes = 2000
    max_steps = None
    discount = 0.98
    replay_mem_size = 20000
    minibatch_size = 512
    epsilon = 1
    epsilon_min = 0
    epsilon_stop_episode = 1500
    learning_rate = 5e-3
    epochs = 1
    show_every = 50
    log_every = 50
    replay_start_size = 2000
    train_every = 1
    hidden_dims = [64, 64]
    activations = ['relu', 'relu', 'linear']

    agent = DQNAgent(env.get_state_size(), discount=discount, \
                   replay_mem_size=replay_mem_size, \
                   minibatch_size=minibatch_size, epsilon=epsilon, \
                   # epsilon_decay=epsilon_decay, \
                   epsilon_min=epsilon_min, \
                   epsilon_stop_episode=epsilon_stop_episode, \
                   learning_rate=learning_rate, hidden_dims=hidden_dims, \
                   activations=activations, \
                   replay_start_size=replay_start_size)

    log_dir = f'log/tetris-{datetime.now().strftime("%Y%m%d-%H%M%S")}-nn={str(hidden_dims)}-mem={replay_mem_size}-bs={minibatch_size}-discount={discount}'
    log = ModifiedTensorBoard(log_dir=log_dir)

    scores = []
    for episode in tqdm(range(episodes)):
        current_state = env.reset_game()
        done = False
        step = 0
        log.step = episode

        if show_every and episode % show_every == 0:
            show = True
        else:
            show = False

        # Run the game until either game over or we've hit max number of steps
        while not done and (not max_steps or step < max_steps):
            next_states = env.get_next_states()
            best_state = agent.best_state(next_states.values())

            best_action = None
            # action is (x,i), state is [lines_cleared, holes, total_bumpiness, sum_height]
            for action, state in next_states.items():
                if state == best_state:
                    best_action = action
                    break

            # reward is the score, done is gameover status
            reward, done = env.play_game(best_action[0],
                                         best_action[1],
                                         show=show)
            if show:
                env.show()
            agent.update_replay_memory(current_state, best_action,
                                       next_states[best_action], reward, done)

            # move to next timestep
            current_state = next_states[best_action]
            step += 1
        if show:
            # After game is completed, collect the final score
            print("Episode %d  score: %d  epsilon: %.2f" %
                  (episode, env.get_game_score(), agent.epsilon))
        scores.append(env.get_game_score())

        agent.train(epochs=epochs)

        if log_every and episode % log_every == 0:
            avg_score = mean(scores[-log_every:])
            min_score = min(scores[-log_every:])
            max_score = max(scores[-log_every:])

            log.update_stats(avg_score=avg_score,
                             min_score=min_score,
                             max_score=max_score)

        if env.get_game_score() >= MIN_SCORE:
            if not os.path.exists('models/'):
                os.makedirs('models/')
            agent.model.save(
                f'models/eps_{str(episode)}nn_{str(hidden_dims)}__bs{minibatch_size}__score_{env.get_game_score()}__{int(time.time())}.h5'
            )
Beispiel #24
0
def test():
    env = gym.make('CartPole-v0')
    my_test_agent = DQNAgent(env, model='myClassModel')
    avg_reward, max_reward = my_test_agent.test_agent()
    print("average reward: ", avg_reward, " maximum reward: ", max_reward)
Beispiel #25
0
from flask import Flask, jsonify, request
from DQN import DQNAgent
import os

app = Flask(__name__)

num_state = 6
num_action = 2

### if you want to change the number of node in hidden, you must change it in build_model() in DQN.py and here both.
num_hidden_node = [120, 120]

dqn_agent = DQNAgent(num_state, num_action, num_hidden_node)


@app.route('/model', methods=['GET'])
def get_model():
    return jsonify(dqn_agent.get_model())


@app.route('/update', methods=['POST'])
def update():

    dqn_agent.run(request.json)
    print("finish run")
    return jsonify(dqn_agent.get_model())


if __name__ == '__main__':
    app.run(debug=False)
Beispiel #26
0
    trainingEpisodes = 2
    testingEpisodes = 2
    frames = 1000

    name = "DQN"

    run = wandb.init(project="test",
                     config={
                         "trainingEpisodes": trainingEpisodes,
                         "testingEpisodes": testingEpisodes,
                         "frames": frames,
                         "epsilon": epsilon,
                         "deepLayers": deepLayers,
                         "layerSize": layerSize,
                         "layerSizeMult": layerSizeMult,
                         "learningRate": learningRate,
                         "gamma": gamma,
                         "epsilonDecay": epsilonDecay,
                         "epsilonMin": epsilonMin,
                         "batchSize": batchSize,
                         "memory": memory,
                         "name": name,
                         "replay_step_size": replayStepSize,
                     },
                     name=name,
                     allow_val_change=True)
    config = wandb.config

    agent = DQNAgent(env, config)
    trainDQN(DQNAgent)
Beispiel #27
0
target_net = NeurosmashAgent()
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()

#Init empty Memorys
memory = ReplayMemory(max_size=1024)
victory_memory = ReplayMemory(max_size=1024)
#Init lists
R = np.zeros(n_episodes)
reward = 0

losses = []
epses = []

#Init DQN agent
agent = DQNAgent(target_net, policy_net, memory)

if torch.cuda.is_available():
    print("Running on GPU")
    agent.target_net.cuda()
    agent.policy_net.cuda()
    torch.cuda.empty_cache()
#Catch KeyboardInterrupts and save model
#i=-1
# try:
#     #Reinforcement Loop
#     #for i in tqdm.trange(n_episodes):
#     while True:
#        i += 1
for i in range(n_episodes):
    info, reward, state = env.reset(
Beispiel #28
0
import random
import time
from DQN import DQNAgent
import numpy as np

pygame.init()
pygame.font.init()
pygame.display.set_caption('Snake Game')
window_width = 440
window_height = 480
clock = pygame.time.Clock()
max_score = 0
num_games = 0
game_speed = 10
epsilon = 1
agent = DQNAgent()


class Game:
    def __init__(self, window_width, window_height):
        self.window_width = 440
        self.window_height = 480
        self.screen = pygame.display.set_mode((window_width, window_height))
        self.background_image = pygame.image.load("images/background.png")
        self.score = 0
        self.increase_length = False
        self.game_over = False


class Player:
    def __init__(self, game, x, y, direction=4):
Beispiel #29
0
def run():
    pygame.init()
    
    
    agent = DQNAgent()
    counter_games = 0
    score_plot = []
    counter_plot =[]
    record = 0
    
    while counter_games < 60:
        game = Game(width, height)
        pipeU = game.upper_pipe
        pipeD = game.lower_pipe
        player = game.player
        between_pipes = False
        while not game.crash:
            
            agent.epsilon = 20 - counter_games
            
            state_old = agent.get_state(game, player, pipeU, pipeD)
            
            if randint(0, 200) < agent.epsilon:
                final_move = to_categorical(randint(0, 1), num_classes=2)
            else:
                # predict action based on the old state
                prediction = agent.model.predict(state_old.reshape((1,8)))
                final_move = to_categorical(np.argmax(prediction[0]), num_classes=2)
              
            player.Move(game,final_move)
            move_pipes(pipeU,pipeD,160)
            check_collision(player,pipeU,pipeD,game)
            
            check_score(pipeU,game)
            between_pipes = check_pipes(pipeU, pipeD, player)
            
            reward = agent.set_reward(pipeU, game.crash, between_pipes)
            
            state_new = agent.get_state(game, player, pipeU, pipeD)
            
            
            agent.train_short_memory(state_old, final_move, reward, state_new, game.crash)
            
             # store the new data into a long term memory
            agent.remember(state_old, final_move, reward, state_new, game.crash)
            record = get_record(game.score, record)
            
         
            
            display(game,player,pipeU,pipeD,record)
            if game.score == 50:
                game.crash = True;
        agent.replay_new(agent.memory)
        counter_games += 1
        print('Game', counter_games, '      Score:', game.score)
        score_plot.append(game.score)
        counter_plot.append(counter_games)
        
    agent.model.save_weights('weights.hdf5')
    plot_seaborn(counter_plot, score_plot)
    pygame.quit()
Beispiel #30
0
# Exploration settings
epsilon = 1  # not a constant, going to be decayed
# epsilon = 0.00
EPSILON_DECAY = 0.99975
MIN_EPSILON = 0.001

# For more repetitive results
random.seed(1)
np.random.seed(1)
tf.random.set_seed(1)

MIN_REWARD = -200  # For model save
MEMORY_FRACTION = 0.20

agent = DQNAgent()
env = BlobEnv()

# Create models folder
if not os.path.isdir('models'):
    os.makedirs('models')

# Iterate over episodes
for episode in tqdm(range(1, EPISODES + 1), ascii=True, unit='episodes'):

    # Update tensorboard step every episode
    agent.tensorboard.step = episode

    # Restarting episode - reset episode reward and step number
    episode_reward = 0
    step = 1