Esempio n. 1
0
def play(episodes, is_render, is_testing, checkpoint_interval,
         weights_filename_prefix, csv_filename_prefix, batch_size):
    # init statistics. NOTE: simple tag specific!
    statistics_header = ["episode"]
    statistics_header.append("steps")
    statistics_header.extend(["reward_{}".format(i) for i in range(env.n)])
    statistics_header.extend(["loss_{}".format(i) for i in range(env.n)])
    statistics_header.extend(["collisions_{}".format(i) for i in range(env.n)])
    statistics_header.extend(["ou_theta_{}".format(i) for i in range(env.n)])
    statistics_header.extend(["ou_mu_{}".format(i) for i in range(env.n)])
    statistics_header.extend(["ou_sigma_{}".format(i) for i in range(env.n)])
    statistics_header.extend(["ou_dt_{}".format(i) for i in range(env.n)])
    statistics_header.extend(["ou_x0_{}".format(i) for i in range(env.n)])
    print("Collecting statistics {}:".format(" ".join(statistics_header)))
    statistics = general_utilities.Time_Series_Statistics_Store(
        statistics_header)

    for episode in range(args.episodes):
        states = env.reset()
        episode_losses = np.zeros(env.n)
        episode_rewards = np.zeros(env.n)
        collision_count = np.zeros(env.n)
        steps = 0
        jsonFile = []
        coords = []
        fullyBreak = False

        while steps < 300:
            steps += 1

            # render
            if args.render:
                env.render()
                time.sleep(0.05)

            if args.dump_file:
                # NIJE STO JE NASE...
                frame = env.dump_file()
                coords.append(frame)
                print("Radim...\n")

            # act
            actions = []
            for i in range(env.n):
                action = np.clip(
                    actors[i].choose_action(states[i]) + actors_noise[i](), -2,
                    2)
                actions.append(action)

            # step
            states_next, rewards, done, info = env.step(actions)

            # learn
            if not args.testing:
                size = memories[0].pointer
                batch = random.sample(
                    range(size), size) if size < batch_size else random.sample(
                        range(size), batch_size)

                for i in range(env.n):
                    if done[i]:
                        rewards[i] -= 500

                    memories[i].remember(states, actions, rewards[i],
                                         states_next, done[i])

                    if memories[i].pointer > batch_size * 10:
                        s, a, r, sn, _ = memories[i].sample(batch, env.n)
                        r = np.reshape(r, (batch_size, 1))
                        loss = critics[i].learn(s, a, r, sn)
                        actors[i].learn(actors, s)
                        episode_losses[i] += loss
                    else:
                        episode_losses[i] = -1

            states = states_next
            episode_rewards += rewards
            collision_count += np.array(
                simple_tag_utilities.count_agent_collisions(env))

            # reset states if done
            if any(done):
                episode_rewards = episode_rewards / steps
                episode_losses = episode_losses / steps

                statistic = [episode]
                statistic.append(steps)
                statistic.extend([episode_rewards[i] for i in range(env.n)])
                statistic.extend([episode_losses[i] for i in range(env.n)])
                statistic.extend(collision_count.tolist())
                statistic.extend([actors_noise[i].theta for i in range(env.n)])
                statistic.extend([actors_noise[i].mu for i in range(env.n)])
                statistic.extend([actors_noise[i].sigma for i in range(env.n)])
                statistic.extend([actors_noise[i].dt for i in range(env.n)])
                statistic.extend([actors_noise[i].x0 for i in range(env.n)])
                statistics.add_statistics(statistic)

                # NIJE STO JE NASE...

                if args.dump_file:
                    fileNum = 1
                    print("Pravim...\n")
                    with open("results/coords.txt", "w+") as f:
                        f.write(str(len(coords[0]) / 2))
                        f.write("\n")
                        for fr in coords:
                            print("Pisem...\n")
                            f.write(" ".join(str(i) for i in fr))
                            f.write("\n")
                    fullyBreak = True
                    break

                #fileNum += 1
                coords = []

                if episode % 25 == 0:
                    print(statistics.summarize_last())
                break

        if episode % checkpoint_interval == 0:
            statistics.dump("{}_{}.csv".format(csv_filename_prefix, episode))
            if not os.path.exists(weights_filename_prefix):
                os.makedirs(weights_filename_prefix)
            save_path = saver.save(session,
                                   os.path.join(weights_filename_prefix,
                                                "models"),
                                   global_step=episode)
            print("saving model to {}".format(save_path))
            if episode >= checkpoint_interval:
                os.remove("{}_{}.csv".format(csv_filename_prefix,
                                             episode - checkpoint_interval))
        if fullyBreak:
            break

    return statistics
Esempio n. 2
0
def play(episodes, is_render, is_testing, checkpoint_interval,
         weights_filename_prefix, csv_filename_prefix, batch_size):
    # init statistics. NOTE: simple tag specific!
    statistics_header = ["episode"]
    statistics_header.append("steps")
    statistics_header.extend(["reward_{}".format(i) for i in range(env.n)])
    statistics_header.extend(["loss_{}".format(i) for i in range(env.n)])
    statistics_header.extend(["eps_greedy_{}".format(i) for i in range(env.n)])
    statistics_header.extend(["collisions_{}".format(i) for i in range(env.n)])
    print("Collecting statistics {}:".format(" ".join(statistics_header)))
    statistics = general_utilities.Time_Series_Statistics_Store(
        statistics_header)

    for episode in range(args.episodes):
        states = env.reset()
        episode_losses = np.zeros(env.n)
        episode_rewards = np.zeros(env.n)
        collision_count = np.zeros(env.n)
        steps = 0

        while True:
            steps += 1

            # render
            if args.render:
                env.render()
                time.sleep(0.1)

            # act
            actions = []
            actions_onehot = []
            for i in range(env.n):
                action = dqns[i].choose_action(states[i])
                speed = 0.9 if env.agents[i].adversary else 1

                onehot_action = np.zeros(n_actions[i])
                onehot_action[action] = speed
                actions_onehot.append(onehot_action)
                actions.append(action)

            # step
            states_next, rewards, done, info = env.step(actions_onehot)

            # learn
            if not args.testing:
                size = memories[0].pointer
                batch = random.sample(
                    range(size), size) if size < batch_size else random.sample(
                        range(size), batch_size)

                for i in range(env.n):
                    if done[i]:
                        rewards[i] -= 50

                    memories[i].remember(states[i], actions[i], rewards[i],
                                         states_next[i], done[i])

                    if memories[i].pointer > batch_size * 10:
                        history = dqns[i].learn(*memories[i].sample(batch))
                        episode_losses[i] += history.history["loss"][0]
                    else:
                        episode_losses[i] = -1

            states = states_next
            episode_rewards += rewards
            collision_count += np.array(
                simple_tag_utilities.count_agent_collisions(env))

            # reset states if done
            if any(done):
                episode_rewards = episode_rewards / steps
                episode_losses = episode_losses / steps

                statistic = [episode]
                statistic.append(steps)
                statistic.extend([episode_rewards[i] for i in range(env.n)])
                statistic.extend([episode_losses[i] for i in range(env.n)])
                statistic.extend([dqns[i].eps_greedy for i in range(env.n)])
                statistic.extend(collision_count.tolist())
                statistics.add_statistics(statistic)
                if episode % 25 == 0:
                    print(statistics.summarize_last())
                break

        if episode % checkpoint_interval == 0:
            statistics.dump("{}_{}.csv".format(csv_filename_prefix, episode))
            general_utilities.save_dqn_weights(
                dqns, "{}_{}_".format(weights_filename_prefix, episode))
            if episode >= checkpoint_interval:
                os.remove("{}_{}.csv".format(csv_filename_prefix,
                                             episode - checkpoint_interval))

    return statistics
Esempio n. 3
0
def play(episodes, is_render, is_testing, checkpoint_interval,
         weights_filename_prefix, csv_filename_prefix, batch_size):
    # init statistics. NOTE: simple tag specific!
    statistics_header = ["episode"]
    statistics_header.append("steps")
    statistics_header.extend(["reward_{}".format(i) for i in range(env.n)])
    statistics_header.extend(["loss_{}".format(i) for i in range(env.n)])
    statistics_header.extend(["collisions_{}".format(i) for i in range(env.n)])
    statistics_header.extend(["ou_theta_{}".format(i) for i in range(env.n)])
    statistics_header.extend(["ou_mu_{}".format(i) for i in range(env.n)])
    statistics_header.extend(["ou_sigma_{}".format(i) for i in range(env.n)])
    statistics_header.extend(["ou_dt_{}".format(i) for i in range(env.n)])
    statistics_header.extend(["ou_x0_{}".format(i) for i in range(env.n)])
    print("Collecting statistics {}:".format(" ".join(statistics_header)))
    statistics = general_utilities.Time_Series_Statistics_Store(
        statistics_header)

    for episode in range(args.episodes):
        states = env.reset()
        episode_losses = np.zeros(env.n)
        episode_rewards = np.zeros(env.n)
        collision_count = np.zeros(env.n)
        steps = 0

        while True:
            steps += 1

            # render
            if args.render:
                env.render()

            # act
            actions = []
            for i in range(env.n):
                action = np.clip(
                    actors[i].choose_action(states[i]) + actors_noise[i](), -2,
                    2)
                actions.append(action)

            # step
            states_next, rewards, done, info = env.step(actions)

            # learn
            if not args.testing:
                size = memories[0].pointer
                batch = random.sample(
                    range(size), size) if size < batch_size else random.sample(
                        range(size), batch_size)

                for i in range(env.n):
                    if done[i]:
                        rewards[i] -= 50

                    memories[i].remember(states[i], actions[i], rewards[i],
                                         states_next[i], done[i])

                    if memories[i].pointer > batch_size * 10:
                        s, a, r, sn, _ = memories[i].sample(batch)
                        r = np.reshape(r, (batch_size, 1))
                        loss = critics[i].learn(s, a, r, sn)
                        actors[i].learn(s)
                        episode_losses[i] += loss
                    else:
                        episode_losses[i] = -1

            states = states_next
            episode_rewards += rewards
            collision_count += np.array(
                simple_tag_utilities.count_agent_collisions(env))

            # reset states if done
            if any(done):
                episode_rewards = episode_rewards / steps
                episode_losses = episode_losses / steps

                statistic = [episode]
                statistic.append(steps)
                statistic.extend([episode_rewards[i] for i in range(env.n)])
                statistic.extend([episode_losses[i] for i in range(env.n)])
                statistic.extend(collision_count.tolist())
                statistic.extend([actors_noise[i].theta for i in range(env.n)])
                statistic.extend([actors_noise[i].mu for i in range(env.n)])
                statistic.extend([actors_noise[i].sigma for i in range(env.n)])
                statistic.extend([actors_noise[i].dt for i in range(env.n)])
                statistic.extend([actors_noise[i].x0 for i in range(env.n)])
                statistics.add_statistics(statistic)
                if episode % 25 == 0:
                    print(statistics.summarize_last())
                break

        if episode % checkpoint_interval == 0:
            statistics.dump("{}_{}.csv".format(csv_filename_prefix, episode))
            if not os.path.exists(weights_filename_prefix):
                os.makedirs(weights_filename_prefix)
            save_path = saver.save(session,
                                   os.path.join(weights_filename_prefix,
                                                "models"),
                                   global_step=episode)
            print("saving model to {}".format(save_path))
            if episode >= checkpoint_interval:
                os.remove("{}_{}.csv".format(csv_filename_prefix,
                                             episode - checkpoint_interval))

    return statistics
Esempio n. 4
0
def play(checkpoint_interval, weights_filename_prefix, csv_filename_prefix, batch_size, stats_df):
    """Doc-string here"""
    for episode in range(args.episodes):
        states = env.reset()
        episode_losses = np.zeros(env.n)
        episode_rewards = np.zeros(env.n)
        collision_count = np.zeros(env.n)
        steps = 0

        while True:
            steps += 1

            # render
            if args.render:
                env.render()

            # act
            actions = []
            for i in range(env.n):
                action = np.clip(
                    actors[i].choose_action(states[i]) + actors_noise[i](), -2, 2)
                actions.append(action)

            # step
            states_next, rewards, done, info = env.step(actions)

            # learn
            if not args.testing:
                size = memories[0].pointer
                batch = random.sample(range(size), size) if size < batch_size else random.sample(
                    range(size), batch_size)

                for i in range(env.n):
                    if done[i]:
                        rewards[i] -= 500

                    memories[i].remember(states, actions, rewards[i],
                                         states_next, done[i])

                    if memories[i].pointer > batch_size * 10:
                        s, a, r, sn, _ = memories[i].sample(batch, env.n)
                        r = np.reshape(r, (batch_size, 1))
                        loss = critics[i].learn(s, a, r, sn)
                        actors[i].learn(actors, s)
                        episode_losses[i] += loss
                    else:
                        episode_losses[i] = -1

            states = states_next
            episode_rewards += rewards
            collision_count += np.array(
                simple_tag_utilities.count_agent_collisions(env))

            # reset states if done
            if any(done):
                episode_rewards = episode_rewards / steps
                episode_losses = episode_losses / steps

                write_stats_row(env, stats_df, episode, steps, episode_rewards, episode_losses, collision_count)

                if episode % 25 == 0:
                    print(stats_df.iloc[episode])
                break

        if episode % checkpoint_interval == 0:
            stats_file = f"{csv_filename_prefix}_{episode}.h5"
            store = pd.HDFStore(stats_file)
            store['stats_df'] = stats_df
            print(f"stats_df saved to {stats_file}")

            if not os.path.exists(weights_filename_prefix):
                os.makedirs(weights_filename_prefix)
            save_path = saver.save(session, os.path.join(
                weights_filename_prefix, "models"), global_step=episode)

    stats_file = f"{csv_filename_prefix}_{args.episodes}.h5"
    store = pd.HDFStore(stats_file)
    store['stats_df'] = stats_df