def swap_networks(network1, network2): # out1 = open("temp1_weights.pickle", "wb") # pickle.dump(network1.model, out1) # out1.close() # network1.model.save("net1_weights.h5") # network2.model.save('') network1.model = network2.model temp1 = DQNAgent(network2.currentState, network2.player) temp1.memory = copy.deepcopy(network2.memory) temp1.currentState = copy.deepcopy(network2.currentState) # temp1.model = network2.model # in1 = open("temp1_weights.pickle", "rb") # in1.close() temp1.epsilon = network2.epsilon temp1.current_training_episodes = network2.current_training_episodes temp1.max_training_episodes = network2.max_training_episodes temp1.max_agent_live_episodes = network2.max_agent_live_episodes temp1.player = network2.player # out2 = open("temp2_weights.pickle", "wb") # network2.model.save("p2_weights.h5") # pickle.dump(network2.model, out2) # out2.close() temp2 = DQNAgent(network1.currentState, network1.player) temp2.memory = copy.deepcopy(network1.memory) temp2.currentState = copy.deepcopy(network1.currentState) temp2.model = network1.model temp1.model = network2.model # in2 = open("temp2_weights.pickle", "rb") # temp2.model = models.load_model('net1_weights.h5') # temp1.model = models.load_model('p2_weights.h5') # in2.close() temp2.epsilon = network1.epsilon temp2.current_training_episodes = network1.current_training_episodes temp2.max_training_episodes = network1.max_training_episodes temp2.max_agent_live_episodes = network1.max_agent_live_episodes temp2.player = network1.player # temp2.model = network1.model # network1 = temp2 # network2 = temp1 print(network1.epsilon) print(network2.epsilon) return temp1, temp2
def run(): max_level = 5 #pygame.init() agent = DQNAgent() counter_games = 0 score_plot = [] counter_plot = [] record = 0 while counter_games < 150: # Initialize classes #game = Game(440, 440) aut = Automaton() board = Board() #player1 = game.player #food1 = game.food # Perform first move #initialize_game(player1, game, food1, agent) if display_option: #display(player1, food1, game, record) aut.display(max_level) while not board.finished: #agent.epsilon is set to give randomness to actions agent.epsilon = 80 - counter_games #get old state state_old = agent.get_state(board) #perform random actions based on agent.epsilon, or choose the action #if randint(0, 200) < agent.epsilon: #final_move = to_categorical(randint(SLEEP,NB_STATES), num_classes=8) final_move = randint(SLEEP, NB_STATES) aut2 = Automaton(aut) board2 = Board(board) aut2.set_rule_pos(board.nrw, final_move) board2.play(aut2.rules, max_level) """else: # predict action based on the old state prediction = agent.model.predict(state_old.reshape((1,11))) final_move = to_categorical(np.argmax(prediction[0]), num_classes=3)""" #perform new move and get new state """exhaustive_search(aut,board,max_level)""" #player1.do_move(final_move, player1.x, player1.y, game, food1, agent) state_new = agent.get_state(board2) #set treward for the new state reward = agent.set_reward(board2, board2.finished) #train short memory base on the new action and state #agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # store the new data into a long term memory agent.remember(state_old, final_move, reward, state_new, board2.finished) #record = get_record(game.score, record) if display_option: aut2.display(max_level) #display(player1, food1, game, record) #pygame.time.wait(speed) agent.replay_new(agent.memory) counter_games += 1 print('Game', counter_games) #, ' Score:', game.score""" #score_plot.append(game.score) counter_plot.append(counter_games)
def run_agent(games=1, train=True, model=False, save_model=True): num_cities = len(initialize_simulation().cities) weights_path = 'throwaway1/' state_dimensions = int(num_cities * 8 + 2) action_dimensions = int(num_cities * 2 + 1) agent = DQNAgent(state_dimensions=state_dimensions, action_dimensions=action_dimensions, num_parameters=75) if (train == False): agent.epsilon = 0 final_stats = [] if model != False: agent.load(model) game_counter = 0 simulation_results = [] done = 0 all_moves = [] while game_counter < games: day = 0 region = initialize_simulation() moves_made = [] while 1: # get human readable state print("\nDay: ", day) state = region.get_state() region.print_state(state) # transform state for input into DQN state = agent.transform_state(state) # get action from DQN. Dependent on epsilon action = agent.get_action(state, region.water_stations, region.field_hospitals) # if (region.water_stations > 0): # action = [1,1] # else: # action = [-1, 3] print("wanted action: ", action) if (train == False and action != [-1, 3]): moves_made.append([day, action]) # perform the action and update the state region.take_action(str(action[0]), str(action[1])) done = region.update() # get the new state we are in next_state = region.get_state() next_state = agent.transform_state(next_state) # get the reward for the state we are now in. Combination # of deaths and infections at new state reward = region.get_reward() if train: agent.train_individual(state, action, reward, next_state, done, future_reward=True) print("Training!") # add to memory agent.memorize(state, action, reward, next_state, done) if done == 1: break day += 1 # data = input("proceed to next day? (y/n)\n") # if data == 'n': # break game_counter += 1 if train: agent.train_batch(400, future_reward=True) print("training episode") if save_model and train and game_counter % 20 == 0: agent.save_model(weights_path + 'post_game' + str(game_counter)) final_stats.append(region.get_final_stats()) all_moves.append(moves_made) return final_stats, all_moves
def main(): game_count = 6000 agent = DQNAgent(enviroment.state_size, enviroment.action_size) if os.path.exists(enviroment.model_name): print('load existing model : ', enviroment.model_name) agent.load(enviroment.model_name) agent.epsilon = read_epsilon() # init game enviroment game = pika_game.Game() game.start() print('Pika experiment start') episode = 1 reward_record = read_reward() for i in range(1, game_count + 1): print('start new game : ', i, ' !!!') game.play() time.sleep(1.5) # print('step start') training_flag = True episode_reward = 0 game_reward = 0 game.state.update() while True: # handle wine crash if game.state.crash: print('wine crash!!!') break if training_flag: episode_reward = episode_reward + 1 pre_state = game.state.input.reshape(1, -1) action = agent.act(pre_state) # print('action : ', action) thread = threading.Thread(target=game.act, args=(action, )) thread.daemon = True thread.start() game.state.update() if training_flag: reward = game.state.reward # print('reward : ', reward) agent.remember(pre_state, action, reward, game.state.input.reshape(1, -1), game.state.is_score_change) if game.state.is_score_change: print(str(game.state.left_score), ' vs ', str(game.state.right_score)) episode = episode + 1 training_flag = False episode_reward = episode_reward + game.state.reward game_reward = game_reward + episode_reward agent.update_target_model() # print("episode: {}, score: {}" # .format(episode, episode_reward)) elif game.state.is_episode_start: reward_record.append(str(game_reward)) print("game: {}, score: {}".format(i, game_reward), str(game.state.left_score), ' vs ', str(game.state.right_score), agent.epsilon) episode_reward = 0 training_flag = False if i % 2 == 0: agent.save(enviroment.model_name) write_reward(reward_record) write_epsilon(agent.epsilon) game.reset() break elif game.state.check_step_start(): training_flag = True episode_reward = 0 # print('step start') if training_flag: if len(agent.memory) > enviroment.batch_size: agent.replay(enviroment.batch_size) if game.state.crash: break if not game.state.crash: plot_reward(reward_record) return game.state.crash
import random import gym from keras.models import load_model import keras.backend as K episodes = 100 env_name = 'CartPole-v0' RENDER = False # initialize gym environment and the agent env = gym.make(env_name) agent = DQNAgent(np.prod(env.observation_space.shape), env.action_space.n) agent.model = load_model("./models/{}.h5".format(env_name)) print(agent.model.summary()) agent.epsilon = 0 # remove randomness in the learning agent rewards = [] # Iterate the game for ep in range(episodes): cur_reward = 0 state = env.reset() state = np.reshape(state, [1, 4]) done = False time = 0 while not done: time += 1 if RENDER: env.render()
waiting_time = 0 _waiting_times = {} total_waiting_time = 0 sum_wait = 0 traci.start([sumoBinary, "-c", "cross3ltl.sumocfg", '--start']) traci.trafficlight.setPhase("0", 0) traci.trafficlight.setPhaseDuration("0", 200) print("--------------------") print("episode - " + str(e + 1)) while traci.simulation.getMinExpectedNumber() > 0 and steps < 7000: traci.simulationStep() state = sumoInt.getState() agent.epsilon = epsilon action = agent.act(state) #print(random.randrange(2)) light = state[2] #incoming_roads = ["1si", "2si", "3si", "4si"] #car_list = traci.vehicle.getIDList() #for car_id in car_list: # wait_time = traci.vehicle.getAccumulatedWaitingTime(car_id) # road_id = traci.vehicle.getRoadID(car_id) # get the road id where the car is located # if road_id in incoming_roads: # consider only the waiting times of cars in incoming roads # _waiting_times[car_id] = wait_time # #print(wait_time) # else: # if car_id in _waiting_times: # a car that was tracked has cleared the intersection
rewards = [] rewards_avg = [] fig = plot.figure(figsize=(9, 3)) ax = fig.add_subplot(111) ax.set_xlabel("Episodes") ax.set_ylabel("Rewards") fig.show() for episode in range(num_episodes): state = env.reset() state = np.array(state) / 50 terminated = False reward_episode = 0 agent.epsilon = agent.get_exploration_rate(episode) time = 0 while not terminated: env.render() action = agent.act(state) next_state, reward, terminated, info = agent.env.step(action) next_state = tf.convert_to_tensor(np.array(next_state) / 50) agent.store(state, action, reward, next_state, terminated) state = next_state reward_episode += reward time += 1 if time >= 2000: break if len(agent.experience) > agent.batch_size: agent.train()
scaler = get_scaler(env) # store the final value of the portfolio (end of episode) portfolio_value = [] if args.mode == 'test': # then load the previous scaler with open(f'{models_folder}/scaler.pkl', 'rb') as f: scaler = pickle.load(f) # remake the env with test data env = MultiStockEnv(test_data, initial_investment) # make sure epsilon is not 1! # no need to run multiple episodes if epsilon 0, it is deterministic agent.epsilon = 0.01 # load trained weights agent.load(f'{models_folder}/linear.npz') # play the game num_episodes times for e in range(num_episodes): t0 = datetime.now() val = play_one_episode(agent, env, scaler, args.mode) dt = datetime.now() - t0 print("episode: {}, episode end value: {:.2f}, duration: {} ".format( (e + 1) / num_episodes, val, dt)) portfolio_value.append(val) # append episode end portfolio value # save the weights when we are done if args.mode == 'train':