Ejemplo n.º 1
0
    observation = env.reset()

    for t in range(1000):
        #env.render()
        ##First run through of the network. Take the observation, put it through the network
        action = agent.choose_action(observation)

        observation_, reward, done, info = env.step(action)

        ######RECORDS AND SAVES THE STATE#####
        agent.store_rewards(reward)

        #######################################
        score += reward
        agent.learn(observation, reward, observation_, done)
        observation = observation_
        if done:
            score_history.append(score)

            print("==========================================")
            print("Episode: ", i_episode)
            print("Reward: ", score)

            break
    if i_episode % 100 == 0:
        agent.save_model()
        print("Model Saved")

plotLearning(score_history, filename="cartpole.png", window=10)
env.close()
              layer1_size=400,
              layer2_size=300,
              n_actions=2)

#agent.load_models()
np.random.seed(0)

score_history = []
for i in range(1000):
    obs = env.reset()
    done = False
    score = 0
    while not done:
        act = agent.choose_action(obs)
        new_state, reward, done, info = env.step(act)
        agent.remember(obs, act, reward, new_state, int(done))
        agent.learn()
        score += reward
        obs = new_state
        #env.render()
    score_history.append(score)

    if i % 25 == 0:
        agent.save_models()

    print('episode ', i, 'score %.2f' % score,
          'trailing 100 games avg %.3f' % np.mean(score_history[-100:]))

filename = 'LunarLander-alpha000025-beta00025-400-300.png'
plotLearning(score_history, filename, window=100)
Ejemplo n.º 3
0
                  input_dims=[8],
                  lr=0.001)
    scores, eps_history = [], []
    n_games = 500

    for i in range(n_games):
        score = 0
        done = False
        observation = env.reset()

        while not done:
            action = agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)
            score += reward
            agent.store_transition(observation, action, reward, observation_,
                                   done)
            agent.learn()
            observation = observation_
            #env.render()

        scores.append(score)
        eps_history.append(agent.epsilon)
        avg_score = np.mean(scores[-100:])

        print('episode ', i, 'score %.2f' % score,
              'average score %.2f' % avg_score, 'epsilon %.2f' % agent.epsilon)

    x = [i + 1 for i in range(n_games)]
    filename = 'lunar_lander_dqn_2.png'
    plotLearning(x, scores, eps_history, filename)
Ejemplo n.º 4
0
              tau=0.001,
              env=env,
              batch_size=64,
              layer1_size=400,
              layer2_size=300,
              n_actions=1)

np.random.seed(0)
score_history = []
num_epsiodes = 2500

for i in range(num_epsiodes):
    obs = env.reset()
    done = False
    score = 0
    while not done:
        action = agent.choose_action(obs)
        obs_, reward, done, info = env.step(action)
        agent.remember(obs, action, reward, obs_, done)
        agent.learn()
        score += reward
        obs = obs_
        env.render()
    score_history.append(score)
    print('episode ', i, 'score %.2f' % score,
          '100 game average %.2f' % np.mean(score_history[-100:]))

filename = 'pendulum.png'
x = [i for i in range(num_epsiodes)]
plotLearning(x, score_history, score_history, filename)
Ejemplo n.º 5
0
            if done:
                stepIdx = 0
                if currIt + 1 < iterationNum:
                    env.reset()
                break
        throughput_history.append(throughput_sum)
        rtt_history.append(rtt_sum)
        score_history.append(score)
        print('episode: ', i, 'score: %.2f' % score)

except KeyboardInterrupt:
    print("Ctrl-C -> Exit")
finally:
    env.close()
    filename = 'TCP_A2C_20_step.png'
    plotLearning(score_history, filename=filename, window=10)

    with open('reward_20_step.csv', mode='w') as reward_file:
        csv_writer = csv.writer(reward_file,
                                delimiter=',',
                                quotechar='"',
                                quoting=csv.QUOTE_MINIMAL)
        csv_writer.writerow(score_history)

    throughput_plot_filename = 'throughput_plot_20_step.png'
    plotLearning(throughput_history,
                 filename=throughput_plot_filename,
                 window=10)

    rtt_plot_filename = 'rtt_plot_20_step.png'
    plotLearning(rtt_history, filename=rtt_plot_filename, window=10)
def main(alpha, beta, gamma):


    sim_param = SimParam()
    no_of_slices = sim_param.no_of_slices
    no_of_users_per_slice = sim_param.no_of_users_per_slice
    no_of_rb =  len(sim_param.RB_pool)
    no_of_timeslots = int(sim_param.T_C / sim_param.T_S)

    # state space :
    #n_states = no_of_slices * no_of_users_per_slice * no_of_rb * no_of_timeslots
    n_states = no_of_slices

    # action space : #_slices ^ #_rb
    n_actions = no_of_slices ** no_of_rb

    agent = Agent(alpha=alpha, beta=beta, input_dims=[n_states], gamma=gamma,
                  n_actions=n_actions, layer1_size=32, layer2_size=32)

    env = gym.make('ransim-v1')

    # run baseline algorithm
    baseline_score = 0
    done = False
    observation = env.reset()
    while not done:
        action = 'baseline'
        observation_, reward, done, info = env.step(action)
        if done:
            env.plot()
        observation = observation_
        baseline_score += reward
    print('baseline score: %.3f' % baseline_score)

    score_history = []
    score = 0
    num_episodes = 250
    t0 = time.time()
    for i in range(num_episodes):
        t_tmp = time.time()

        done = False
        score = 0

        # insert parameters
        class Parameters:
            pass
        parameters = Parameters()
        parameters.SEED_IAT = 0
        parameters.SEED_SHADOWING = 0
        if (i%50 ==0):
            NO_logging = 0
        else:
            NO_logging = 1

        observation = env.reset(parameters, NO_logging)


        while not done:
            action = agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)

            if done & (i%50 ==0):
                env.plot()

            agent.learn(observation, reward, observation_, done)

            observation = observation_
            score += reward#.sum()

        score_history.append(score)
        elapsed_time = time.time() - t_tmp
        print('episode: ', i,'score: %.3f  time: %d' % (score,elapsed_time))

    print(time.time()-t0)
    filename = 'results/result_alpha_%.4f_beta_%.4f_gamma_%.2f.png' %(alpha, beta, gamma)
    #filename = 'cartpole-discrete-actor-critic-alpha0001-beta0005-32x32fc-1500games.png'
    plotLearning(score_history, filename=filename, window=10)
Ejemplo n.º 7
0
        epsHistory.append(brain.EPSILON)
        done = False
        observation = env.reset()
        frames = [np.sum(observation[15:200, 30:125], axis=2)]
        score = 0
        lastAction = 0
        while not done:
            if len(frames) == 3:
                action = brain.chooseAction(frames)
                frames = []
            else:
                action = lastAction
            observation_, reward, done, info = env.step(action)
            score += reward
            frames.append(np.sum(observation_[15:200, 30:125], axis=2))
            if done and info['ale.lives'] == 0:
                reward = -100
            brain.storeTransition(
                np.mean(observation[15:200, 30:125], axis=2), action, reward,
                np.mean(observation_[15:200, 30:125], axis=2))
            observation = observation_
            brain.learn(batch_size)
            lastAction = action
            #env.render(
        scores.append(score)
        print('score:', score)
    x = [i + 1 for i in range(numGames)]
    fileName = str(numGames) + 'Games' + 'Gamma' + str(brain.GAMMA) + \
               'Alpha' + str(brain.ALPHA) + 'Memory' + str(brain.memSize)+ '.png'
    plotLearning(x, scores, epsHistory, fileName)