def train(): env = gym.make('CartPole-v0') agent = DQNAgent(env=env) num_episodes = 200 for i_episode in range(num_episodes): state = env.reset() total_reward = 0 while True: action = agent.get_action(state) next_state, reward, done, info = env.step(action) total_reward += reward update_array = [state, action, reward, next_state, done] agent.update(update_array) state = next_state if done: print("Episode ", i_episode, ": ", total_reward, " epsilon: ", agent.epsilon) break agent.save('myClassModel') env.close()
agent = DQNAgent(env, log_dir=log_dir) # agent.load(log_dir) episode_rewards = [] ep = 0 obs = env.reset() episode_reward = 0 for frame in range(Config.MAX_FRAMES): # print("frame", frame) # env.render() epsilon = Config.epsilon_by_frame(frame) action = agent.get_action(obs, epsilon) prev_obs = obs obs, reward, done, _ = env.step(action) episode_reward += reward agent.update(prev_obs, action, reward, obs, frame) if done: episode_rewards.append(episode_reward) agent.writer.add_scalar("data/reward", episode_reward, ep) print("episode", ep, "reward:", episode_reward) ep += 1 obs = env.reset() episode_reward = 0 if ep % 50 == 0: agent.save(log_dir) agent.save(log_dir) env.close() agent.writer.close()
states = np.reshape(states, [1, state_size]) action = QN.act_test(states) # print('SNR:', sim_env.SNR[-1]) # print('action:', sim_env.action[action]) next_state, rewards, overall_err = sim_env.Assign_Cores(action) error = np.append(error, overall_err) next_state = np.reshape(next_state, [1, state_size]) states = next_state print(e) print(sim_env.error/testing) error_avg = np.append(error_avg, np.power(10, -sim_env.error/testing)) # Save Error and Losses in CSV file, Save weights of networks #### parameters = '_DQN_S{}_rho{}_SNR{}_PS{}_lr{}_df{}_sl{}_nhl{}_ef{}'.\ format(sim_env.S, sim_env.p, sim_env.SNR_avg[0], sim_env.pi, QN.learning_rate, QN.gamma, QN.size_layers, QN.number_hidden_layers, QN.epsilon_decay) print(parameters) np.savetxt(sim_env.channel_type + '/Hyperparameters/Avg_Error' + parameters + '_6.csv', np.transpose(error_avg), header='Error[sum(-log10(e))]', fmt='0%30.28f') np.savetxt(sim_env.channel_type + '/Hyperparameters/Abs_Error' + parameters + '_6.csv', np.transpose(error), header='Error', fmt='0%13.11f') np.savetxt(sim_env.channel_type + '/Hyperparameters/Avg_Loss' + parameters + '_6.csv', np.transpose(loss_overall), header='Error[sum(-log10(e))]', fmt='0%30.28f') np.savetxt(sim_env.channel_type + '/Hyperparameters/Abs_Loss' + parameters + '_6.csv', np.transpose(QN.loss), header='Error', fmt='0%13.11f') QN.save(sim_env.channel_type + '/Hyperparameters/' + parameters) index += 1
agent.epsilon = 0.12 done = False episodes = 0 app = TetrisApp() ############################################################### ####################Deo bitan za tetris######################## key_actions = { 'ESCAPE': app.quit, 'LEFT': lambda: app.move(-1), 'RIGHT': lambda: app.move(+1), 'DOWN': lambda: app.drop(True), 'UP': app.rotate_stone, 'p': app.toggle_pause, 'RETURN': app.start_game, 'SPACE': agent.save("mreza-dqn.h5") } app.gameover = False app.paused = False dont_burn_my_cpu = pygame.time.Clock() num = 0 mon = {'top': 0, 'left': 0, 'width': 200, 'height': 200} sct = mss.mss() write_to_file_step = 0.95 broj_ociscenih_linija_file = 0 broj_partija_file = 0 while 1: app.screen.fill((0, 0, 0)) if app.gameover:
allow_val_change=True) # Utilize the hyperparameters of the model like this: config.parameter config = wandb.config model = DQNAgent(env, config, epsilon, training_episodes, testing_episodes, frames) hyper_param_counter += 1 model.train() print("Run {} of {}.".format(hyper_param_counter, total_runs)) model_dir = "saved_models" model_save_name = model_dir + "LR_{}_LS_{}_BS_{}_MS_{}_Timestamp_{}".format( learning_rate, layer_size, batch_size, memory_size, int( time.time())) + "sb.h5" model.save(model_save_name) #--------------------------------------------------------------------------------------- # Model previously finished in 242 Episodes learning_rate = 0.001 layer_size = 256 batch_size = 64 memory_size = 50_000 # name = "WithConfig_Timestamp_{}".format(int(time.time())) name = "LR_{}_LS_{}_BS_{}_MS_{}_Timestamp_{}".format( learning_rate, layer_size, batch_size, memory_size, int(time.time())) # For Weights and Biases parameter Sweeps
n_episodes = 500 state_size = env.observation_space.shape[0] action_size = env.action_space.n agent = DQNAgent(state_size , action_size) done = False for i in range(n_episodes): state = env.reset() state = np.reshape(state,[1,state_size]) for time in range(5000): if(i % 50 == 0): env.render() action = agent.act(state) next_state,reward,done, _ = env.step(action) reward = reward if not done else -10 next_state = np.reshape(next_state,[1,state_size]) if(np.random.rand() < 0.3): agent.remember(state,action,reward,next_state,done) state = next_state if done: print("episode: {}/{} , score: {}".format(i,n_episodes,time)) break if(len(agent.memory) > batch_size): agent.replay(batch_size) if i % 100 == 0: agent.save("weights_cartpole_{}.hdf5".format(i))
break # def q_learning(): if __name__ == '__main__': EPISODES = 50 pin_servo = 9 batch_size = 32 game_name = 'CartPole-v1' board = Arduino('9600', port='/dev/cu.usbmodem14311') # demo_(board, pin_servo) # initialise game env, state_size, action_size = env_init(game_name) # initialise agent agent = DQNAgent(state_size, action_size) # load model agent.load("../models/cartpole-dqn.h5") # train # agent.train(agent, env, EPISODES, state_size, batch_size) # save model/agent state agent.save("cartpole-dqn.h5") # move servo!! demo(agent, env, EPISODES, state_size, batch_size)
np.savetxt(sim_env.channel_type + '/{}-Abs_Error'.format(ID) + parameters + '.csv', np.transpose(error), header='Error', fmt='0%13.11f') np.savetxt(sim_env.channel_type + '/{}-Avg_Loss'.format(ID) + parameters + '.csv', np.transpose(loss_overall), header='Error[sum(-log10(e))]', fmt='0%30.28f') np.savetxt(sim_env.channel_type + '/{}-Abs_Loss'.format(ID) + parameters + '.csv', np.transpose(QN.loss), header='Error', fmt='0%13.11f') QN.save(sim_env.channel_type + '/{}'.format(ID) + parameters) index += 1 ### Test Using random number of available Servers ### # sim_env.reset() # for u in range(testing_comps): # action = QN.random_act() # next_state, rewards, overall_err = sim_env.Assign_Cores(action) # avg_error_random = sim_env.error / testing_comps # print(avg_error_random) ### Test with optimal current decision ### # optimal_current = Sim_Optimal_Offloading_V2.Optimal_Offloading(sim_env.S, 1, sim_env.SNR_avg[0]) # sim_env.reset() # for u in range(testing_comps): # error_opt, action = optimal_current.return_action(sim_env.SNR[-2])