Esempio n. 1
0
def swap_networks(network1, network2):
    # out1 = open("temp1_weights.pickle", "wb")
    # pickle.dump(network1.model, out1)
    # out1.close()

    # network1.model.save("net1_weights.h5")
    # network2.model.save('')
    network1.model = network2.model
    temp1 = DQNAgent(network2.currentState, network2.player)
    temp1.memory = copy.deepcopy(network2.memory)
    temp1.currentState = copy.deepcopy(network2.currentState)
    # temp1.model = network2.model
    # in1 = open("temp1_weights.pickle", "rb")
    # in1.close()
    temp1.epsilon = network2.epsilon
    temp1.current_training_episodes = network2.current_training_episodes
    temp1.max_training_episodes = network2.max_training_episodes
    temp1.max_agent_live_episodes = network2.max_agent_live_episodes
    temp1.player = network2.player

    # out2 = open("temp2_weights.pickle", "wb")
    # network2.model.save("p2_weights.h5")
    # pickle.dump(network2.model, out2)
    # out2.close()
    temp2 = DQNAgent(network1.currentState, network1.player)
    temp2.memory = copy.deepcopy(network1.memory)
    temp2.currentState = copy.deepcopy(network1.currentState)
    temp2.model = network1.model
    temp1.model = network2.model

    # in2 = open("temp2_weights.pickle", "rb")
    # temp2.model = models.load_model('net1_weights.h5')
    # temp1.model = models.load_model('p2_weights.h5')
    # in2.close()
    temp2.epsilon = network1.epsilon
    temp2.current_training_episodes = network1.current_training_episodes
    temp2.max_training_episodes = network1.max_training_episodes
    temp2.max_agent_live_episodes = network1.max_agent_live_episodes
    temp2.player = network1.player
    # temp2.model = network1.model

    # network1 = temp2
    # network2 = temp1
    print(network1.epsilon)
    print(network2.epsilon)
    return temp1, temp2
Esempio n. 2
0
def run():
    max_level = 5
    #pygame.init()
    agent = DQNAgent()
    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    while counter_games < 150:
        # Initialize classes
        #game = Game(440, 440)
        aut = Automaton()
        board = Board()
        #player1 = game.player
        #food1 = game.food

        # Perform first move
        #initialize_game(player1, game, food1, agent)
        if display_option:
            #display(player1, food1, game, record)
            aut.display(max_level)
        while not board.finished:
            #agent.epsilon is set to give randomness to actions
            agent.epsilon = 80 - counter_games

            #get old state
            state_old = agent.get_state(board)

            #perform random actions based on agent.epsilon, or choose the action
            #if randint(0, 200) < agent.epsilon:
            #final_move = to_categorical(randint(SLEEP,NB_STATES), num_classes=8)
            final_move = randint(SLEEP, NB_STATES)
            aut2 = Automaton(aut)
            board2 = Board(board)
            aut2.set_rule_pos(board.nrw, final_move)
            board2.play(aut2.rules, max_level)
            """else:
                # predict action based on the old state
                prediction = agent.model.predict(state_old.reshape((1,11)))
                final_move = to_categorical(np.argmax(prediction[0]), num_classes=3)"""

            #perform new move and get new state
            """exhaustive_search(aut,board,max_level)"""
            #player1.do_move(final_move, player1.x, player1.y, game, food1, agent)
            state_new = agent.get_state(board2)

            #set treward for the new state
            reward = agent.set_reward(board2, board2.finished)

            #train short memory base on the new action and state
            #agent.train_short_memory(state_old, final_move, reward, state_new, game.crash)

            # store the new data into a long term memory
            agent.remember(state_old, final_move, reward, state_new,
                           board2.finished)
            #record = get_record(game.score, record)
            if display_option:
                aut2.display(max_level)
                #display(player1, food1, game, record)
                #pygame.time.wait(speed)

        agent.replay_new(agent.memory)
        counter_games += 1
        print('Game', counter_games)  #, '      Score:', game.score"""
        #score_plot.append(game.score)
        counter_plot.append(counter_games)
Esempio n. 3
0
def run_agent(games=1, train=True, model=False, save_model=True):

    num_cities = len(initialize_simulation().cities)
    weights_path = 'throwaway1/'
    state_dimensions = int(num_cities * 8 + 2)
    action_dimensions = int(num_cities * 2 + 1)
    agent = DQNAgent(state_dimensions=state_dimensions,
                     action_dimensions=action_dimensions,
                     num_parameters=75)

    if (train == False):
        agent.epsilon = 0
    final_stats = []

    if model != False:
        agent.load(model)

    game_counter = 0
    simulation_results = []
    done = 0
    all_moves = []
    while game_counter < games:
        day = 0
        region = initialize_simulation()

        moves_made = []
        while 1:

            # get human readable state
            print("\nDay: ", day)
            state = region.get_state()
            region.print_state(state)

            # transform state for input into DQN
            state = agent.transform_state(state)

            # get action from DQN. Dependent on epsilon
            action = agent.get_action(state, region.water_stations,
                                      region.field_hospitals)

            # if (region.water_stations > 0):
            # 	action = [1,1]
            # else:
            # 	action = [-1, 3]

            print("wanted action: ", action)
            if (train == False and action != [-1, 3]):
                moves_made.append([day, action])

            # perform the action and update the state
            region.take_action(str(action[0]), str(action[1]))
            done = region.update()

            # get the new state we are in
            next_state = region.get_state()
            next_state = agent.transform_state(next_state)

            # get the reward for the state we are now in. Combination
            # of deaths and infections at new state
            reward = region.get_reward()

            if train:
                agent.train_individual(state,
                                       action,
                                       reward,
                                       next_state,
                                       done,
                                       future_reward=True)
                print("Training!")

            # add to memory
            agent.memorize(state, action, reward, next_state, done)

            if done == 1:
                break
            day += 1

            # data = input("proceed to next day? (y/n)\n")
            # if data == 'n':
            # 	break

        game_counter += 1
        if train:
            agent.train_batch(400, future_reward=True)
            print("training episode")
        if save_model and train and game_counter % 20 == 0:
            agent.save_model(weights_path + 'post_game' + str(game_counter))
        final_stats.append(region.get_final_stats())
        all_moves.append(moves_made)
    return final_stats, all_moves
Esempio n. 4
0
def main():
    game_count = 6000
    agent = DQNAgent(enviroment.state_size, enviroment.action_size)
    if os.path.exists(enviroment.model_name):
        print('load existing model : ', enviroment.model_name)
        agent.load(enviroment.model_name)
    agent.epsilon = read_epsilon()

    # init game enviroment
    game = pika_game.Game()
    game.start()

    print('Pika experiment start')

    episode = 1
    reward_record = read_reward()

    for i in range(1, game_count + 1):
        print('start new game : ', i, ' !!!')
        game.play()
        time.sleep(1.5)

        # print('step start')
        training_flag = True
        episode_reward = 0
        game_reward = 0
        game.state.update()

        while True:
            # handle wine crash
            if game.state.crash:
                print('wine crash!!!')
                break

            if training_flag:
                episode_reward = episode_reward + 1
                pre_state = game.state.input.reshape(1, -1)
                action = agent.act(pre_state)
                # print('action : ', action)

                thread = threading.Thread(target=game.act, args=(action, ))
                thread.daemon = True
                thread.start()

            game.state.update()

            if training_flag:
                reward = game.state.reward
                # print('reward : ', reward)
                agent.remember(pre_state, action, reward,
                               game.state.input.reshape(1, -1),
                               game.state.is_score_change)

            if game.state.is_score_change:
                print(str(game.state.left_score), ' vs ',
                      str(game.state.right_score))
                episode = episode + 1
                training_flag = False
                episode_reward = episode_reward + game.state.reward
                game_reward = game_reward + episode_reward
                agent.update_target_model()
                # print("episode: {}, score: {}"
                #       .format(episode, episode_reward))
            elif game.state.is_episode_start:
                reward_record.append(str(game_reward))
                print("game: {}, score: {}".format(i, game_reward),
                      str(game.state.left_score), ' vs ',
                      str(game.state.right_score), agent.epsilon)
                episode_reward = 0
                training_flag = False
                if i % 2 == 0:
                    agent.save(enviroment.model_name)
                    write_reward(reward_record)
                    write_epsilon(agent.epsilon)
                game.reset()
                break
            elif game.state.check_step_start():
                training_flag = True
                episode_reward = 0
                # print('step start')

            if training_flag:
                if len(agent.memory) > enviroment.batch_size:
                    agent.replay(enviroment.batch_size)

        if game.state.crash:
            break

    if not game.state.crash:
        plot_reward(reward_record)

    return game.state.crash
Esempio n. 5
0
import random
import gym

from keras.models import load_model
import keras.backend as K

episodes = 100
env_name = 'CartPole-v0'
RENDER = False

# initialize gym environment and the agent
env = gym.make(env_name)
agent = DQNAgent(np.prod(env.observation_space.shape), env.action_space.n)
agent.model = load_model("./models/{}.h5".format(env_name))
print(agent.model.summary())
agent.epsilon = 0  # remove randomness in the learning agent
rewards = []

# Iterate the game
for ep in range(episodes):

    cur_reward = 0
    state = env.reset()
    state = np.reshape(state, [1, 4])
    done = False
    time = 0

    while not done:
        time += 1
        if RENDER:
            env.render()
        waiting_time = 0
        _waiting_times = {}
        total_waiting_time = 0
        sum_wait = 0

        traci.start([sumoBinary, "-c", "cross3ltl.sumocfg", '--start'])
        traci.trafficlight.setPhase("0", 0)
        traci.trafficlight.setPhaseDuration("0", 200)
        print("--------------------")
        print("episode - " + str(e + 1))

        while traci.simulation.getMinExpectedNumber() > 0 and steps < 7000:

            traci.simulationStep()
            state = sumoInt.getState()
            agent.epsilon = epsilon
            action = agent.act(state)
            #print(random.randrange(2))
            light = state[2]

            #incoming_roads = ["1si", "2si", "3si", "4si"]
            #car_list = traci.vehicle.getIDList()

            #for car_id in car_list:
            #    wait_time = traci.vehicle.getAccumulatedWaitingTime(car_id)
            #    road_id = traci.vehicle.getRoadID(car_id)  # get the road id where the car is located
            #    if road_id in incoming_roads:  # consider only the waiting times of cars in incoming roads
            #        _waiting_times[car_id] = wait_time
            #        #print(wait_time)
            #    else:
            #        if car_id in _waiting_times: # a car that was tracked has cleared the intersection
Esempio n. 7
0
rewards = []
rewards_avg = []

fig = plot.figure(figsize=(9, 3))
ax = fig.add_subplot(111)
ax.set_xlabel("Episodes")
ax.set_ylabel("Rewards")
fig.show()
for episode in range(num_episodes):
    state = env.reset()
    state = np.array(state) / 50

    terminated = False
    reward_episode = 0
    agent.epsilon = agent.get_exploration_rate(episode)
    time = 0
    while not terminated:
        env.render()
        action = agent.act(state)
        next_state, reward, terminated, info = agent.env.step(action)
        next_state = tf.convert_to_tensor(np.array(next_state) / 50)
        agent.store(state, action, reward, next_state, terminated)
        state = next_state
        reward_episode += reward
        time += 1
        if time >= 2000:
            break

    if len(agent.experience) > agent.batch_size:
        agent.train()
Esempio n. 8
0
    scaler = get_scaler(env)

    # store the final value of the portfolio (end of episode)
    portfolio_value = []

    if args.mode == 'test':
        # then load the previous scaler
        with open(f'{models_folder}/scaler.pkl', 'rb') as f:
            scaler = pickle.load(f)

        # remake the env with test data
        env = MultiStockEnv(test_data, initial_investment)

        # make sure epsilon is not 1!
        # no need to run multiple episodes if epsilon 0, it is deterministic
        agent.epsilon = 0.01

        # load trained weights
        agent.load(f'{models_folder}/linear.npz')

    # play the game num_episodes times
    for e in range(num_episodes):
        t0 = datetime.now()
        val = play_one_episode(agent, env, scaler, args.mode)
        dt = datetime.now() - t0
        print("episode: {}, episode end value: {:.2f}, duration: {} ".format(
            (e + 1) / num_episodes, val, dt))
        portfolio_value.append(val)  # append episode end portfolio value

    # save the weights when we are done
    if args.mode == 'train':