Ejemplo n.º 1
0
def train():
    env = gym.make('CartPole-v0')
    agent = DQNAgent(env=env)
    num_episodes = 200
    for i_episode in range(num_episodes):
        state = env.reset()
        total_reward = 0
        while True:
            action = agent.get_action(state)
            next_state, reward, done, info = env.step(action)
            total_reward += reward
            update_array = [state, action, reward, next_state, done]
            agent.update(update_array)
            state = next_state
            if done:
                print("Episode ", i_episode, ": ", total_reward, "  epsilon: ", agent.epsilon)
                break
    agent.save('myClassModel')
    env.close()
    agent = DQNAgent(env, log_dir=log_dir)
    # agent.load(log_dir)
    episode_rewards = []

    ep = 0
    obs = env.reset()
    episode_reward = 0
    for frame in range(Config.MAX_FRAMES):
        # print("frame", frame)
        # env.render()
        epsilon = Config.epsilon_by_frame(frame)
        action = agent.get_action(obs, epsilon)
        prev_obs = obs
        obs, reward, done, _ = env.step(action)
        episode_reward += reward
        agent.update(prev_obs, action, reward, obs, frame)
        if done:
            episode_rewards.append(episode_reward)
            agent.writer.add_scalar("data/reward", episode_reward, ep)
            print("episode", ep, "reward:", episode_reward)
            ep += 1
            obs = env.reset()
            episode_reward = 0
        if ep % 50 == 0:
            agent.save(log_dir)

    agent.save(log_dir)
    env.close()
    agent.writer.close()
                states = np.reshape(states, [1, state_size])
                action = QN.act_test(states)
                # print('SNR:', sim_env.SNR[-1])
                # print('action:', sim_env.action[action])
                next_state, rewards, overall_err = sim_env.Assign_Cores(action)
                error = np.append(error, overall_err)
                next_state = np.reshape(next_state, [1, state_size])
                states = next_state
            print(e)
            print(sim_env.error/testing)
            error_avg = np.append(error_avg, np.power(10, -sim_env.error/testing))

        # Save Error and Losses in CSV file, Save weights of networks ####
        parameters = '_DQN_S{}_rho{}_SNR{}_PS{}_lr{}_df{}_sl{}_nhl{}_ef{}'.\
            format(sim_env.S, sim_env.p, sim_env.SNR_avg[0], sim_env.pi, QN.learning_rate,
                    QN.gamma, QN.size_layers, QN.number_hidden_layers, QN.epsilon_decay)
        print(parameters)
        np.savetxt(sim_env.channel_type + '/Hyperparameters/Avg_Error' + parameters + '_6.csv', np.transpose(error_avg),
                        header='Error[sum(-log10(e))]', fmt='0%30.28f')
        np.savetxt(sim_env.channel_type + '/Hyperparameters/Abs_Error' + parameters + '_6.csv', np.transpose(error), header='Error',
                        fmt='0%13.11f')
        np.savetxt(sim_env.channel_type + '/Hyperparameters/Avg_Loss' + parameters + '_6.csv', np.transpose(loss_overall),
                         header='Error[sum(-log10(e))]', fmt='0%30.28f')
        np.savetxt(sim_env.channel_type + '/Hyperparameters/Abs_Loss' + parameters + '_6.csv', np.transpose(QN.loss), header='Error',
                        fmt='0%13.11f')
        QN.save(sim_env.channel_type + '/Hyperparameters/' + parameters)
        index += 1



Ejemplo n.º 4
0
    agent.epsilon = 0.12
    done = False
    episodes = 0

    app = TetrisApp()
    ###############################################################
    ####################Deo bitan za tetris########################
    key_actions = {
        'ESCAPE': app.quit,
        'LEFT': lambda: app.move(-1),
        'RIGHT': lambda: app.move(+1),
        'DOWN': lambda: app.drop(True),
        'UP': app.rotate_stone,
        'p': app.toggle_pause,
        'RETURN': app.start_game,
        'SPACE': agent.save("mreza-dqn.h5")
    }

    app.gameover = False
    app.paused = False

    dont_burn_my_cpu = pygame.time.Clock()
    num = 0
    mon = {'top': 0, 'left': 0, 'width': 200, 'height': 200}
    sct = mss.mss()
    write_to_file_step = 0.95
    broj_ociscenih_linija_file = 0
    broj_partija_file = 0
    while 1:
        app.screen.fill((0, 0, 0))
        if app.gameover:
Ejemplo n.º 5
0
                         allow_val_change=True)

        # Utilize the hyperparameters of the model like this: config.parameter
        config = wandb.config

        model = DQNAgent(env, config, epsilon, training_episodes,
                         testing_episodes, frames)

        hyper_param_counter += 1
        model.train()
        print("Run {} of {}.".format(hyper_param_counter, total_runs))
        model_dir = "saved_models"
        model_save_name = model_dir + "LR_{}_LS_{}_BS_{}_MS_{}_Timestamp_{}".format(
            learning_rate, layer_size, batch_size, memory_size, int(
                time.time())) + "sb.h5"
        model.save(model_save_name)

        #---------------------------------------------------------------------------------------

        # Model previously finished in 242 Episodes
        learning_rate = 0.001
        layer_size = 256
        batch_size = 64
        memory_size = 50_000

        # name = "WithConfig_Timestamp_{}".format(int(time.time()))
        name = "LR_{}_LS_{}_BS_{}_MS_{}_Timestamp_{}".format(
            learning_rate, layer_size, batch_size, memory_size,
            int(time.time()))

        # For Weights and Biases parameter Sweeps
Ejemplo n.º 6
0
n_episodes = 500

state_size = env.observation_space.shape[0]
action_size = env.action_space.n

agent = DQNAgent(state_size , action_size)
done = False

for i in range(n_episodes):
	state = env.reset()
	state = np.reshape(state,[1,state_size])
	for time in range(5000):
		if(i % 50 == 0):
			env.render()
		action = agent.act(state)
		next_state,reward,done, _ = env.step(action)
		reward = reward if not done else -10
		next_state = np.reshape(next_state,[1,state_size])
		if(np.random.rand() < 0.3):
			agent.remember(state,action,reward,next_state,done)
		state = next_state
		if done:
			print("episode: {}/{} , score: {}".format(i,n_episodes,time))
			break
	if(len(agent.memory) > batch_size):
		agent.replay(batch_size)

	if i % 100 == 0:
		agent.save("weights_cartpole_{}.hdf5".format(i))
Ejemplo n.º 7
0
                break


# def q_learning():

if __name__ == '__main__':
    EPISODES = 50
    pin_servo = 9
    batch_size = 32
    game_name = 'CartPole-v1'

    board = Arduino('9600', port='/dev/cu.usbmodem14311')
    # demo_(board, pin_servo)

    # initialise game
    env, state_size, action_size = env_init(game_name)

    # initialise agent
    agent = DQNAgent(state_size, action_size)

    # load model
    agent.load("../models/cartpole-dqn.h5")

    # train
    # agent.train(agent, env, EPISODES, state_size, batch_size)

    # save model/agent state
    agent.save("cartpole-dqn.h5")

    # move servo!!
    demo(agent, env, EPISODES, state_size, batch_size)
Ejemplo n.º 8
0
    np.savetxt(sim_env.channel_type + '/{}-Abs_Error'.format(ID) + parameters +
               '.csv',
               np.transpose(error),
               header='Error',
               fmt='0%13.11f')
    np.savetxt(sim_env.channel_type + '/{}-Avg_Loss'.format(ID) + parameters +
               '.csv',
               np.transpose(loss_overall),
               header='Error[sum(-log10(e))]',
               fmt='0%30.28f')
    np.savetxt(sim_env.channel_type + '/{}-Abs_Loss'.format(ID) + parameters +
               '.csv',
               np.transpose(QN.loss),
               header='Error',
               fmt='0%13.11f')
    QN.save(sim_env.channel_type + '/{}'.format(ID) + parameters)
    index += 1

### Test Using random number of available Servers ###
# sim_env.reset()
# for u in range(testing_comps):
#     action = QN.random_act()
#     next_state, rewards, overall_err = sim_env.Assign_Cores(action)
# avg_error_random = sim_env.error / testing_comps
# print(avg_error_random)

### Test with optimal current decision ###
# optimal_current = Sim_Optimal_Offloading_V2.Optimal_Offloading(sim_env.S, 1, sim_env.SNR_avg[0])
# sim_env.reset()
# for u in range(testing_comps):
#     error_opt, action = optimal_current.return_action(sim_env.SNR[-2])