def __init__(self,
              memory_size=20000,
              random_generator=default_random_generator,
              memory_ratio=1.0):
     self.memory = Queue(maxsize=memory_size)
     self.random_generator = random_generator
     self.memory_ratio = memory_ratio
Beispiel #2
0
def test_simple_use():
    q = Queue()

    items = list(range(10))

    for item in items:
        q.put(item)

    for item in items:
        assert item == q.get()
class EvaluationLogger:
    def __init__(self, get_log_data, log_size=100000):
        self.log = Queue()
        self.get_log_data = get_log_data

    def log_data(self, env):
        self.log.put(self.get_log_data(env))

    def get_data(self):
        return [self.log.get() for _ in range(self.log.size())]
Beispiel #4
0
def test_simple_usage(ray_start_regular):

    q = Queue()

    items = list(range(10))

    for item in items:
        q.put(item)

    for item in items:
        assert item == q.get()
Beispiel #5
0
def test_async():
    q = Queue()

    items = set(range(10))
    producers = [  # noqa
        put_async.remote(q, item, True, None, 0.5) for item in items
    ]
    consumers = [get_async.remote(q, True, None, 0) for _ in items]

    result = set(ray.get(consumers))

    assert items == result
Beispiel #6
0
def test_async_put(ray_start_regular):
    q = Queue(1)
    q.put(1)
    future = async_put.remote(q, 2)

    with pytest.raises(Full):
        q.put_nowait(3)

    with pytest.raises(RayTimeoutError):
        ray.get(future, timeout=0.1)  # task not canceled on timeout.

    assert q.get() == 1
    assert q.get() == 2
Beispiel #7
0
def test_qsize():
    q = Queue()

    items = list(range(10))
    size = 0

    assert q.qsize() == size

    for item in items:
        q.put(item)
        size += 1
        assert q.qsize() == size

    for item in items:
        assert q.get() == item
        size -= 1
        assert q.qsize() == size
Beispiel #8
0
def test_async_get(ray_start_regular):
    q = Queue()
    future = async_get.remote(q)

    with pytest.raises(Empty):
        q.get_nowait()

    with pytest.raises(RayTimeoutError):
        ray.get(future, timeout=0.1)  # task not canceled on timeout.

    q.put(1)
    assert ray.get(future) == 1
class ReplayMemory:
    def __init__(self,
                 memory_size=20000,
                 random_generator=default_random_generator,
                 memory_ratio=1.0):
        self.memory = Queue(maxsize=memory_size)
        self.random_generator = random_generator
        self.memory_ratio = memory_ratio

    def add(self, data, block=False):
        if self.memory.full(): self.memory.get(True)
        if self.random_generator.rand() < self.memory_ratio:
            self.memory.put(data, block)

    def sample(self, n):
        assert n <= self.memory.size(), "Not enough replay memory"
        data = []
        while self.memory.size() > 0:
            data.append(self.memory.get())
        sample_idx = self.random_generator.randint(len(data), n)
        samples = [data[i] for i in sample_idx]
        return torch.stack(samples, dim=0)
Beispiel #10
0
 def time_put(self):
     queue = Queue(1000)
     for i in range(1000):
         queue.put(i)
Beispiel #11
0
def test_put(ray_start_regular):

    q = Queue(1)

    item = 0
    q.put(item, block=False)
    assert q.get() == item

    item = 1
    q.put(item, timeout=0.2)
    assert q.get() == item

    with pytest.raises(ValueError):
        q.put(0, timeout=-1)

    q.put(0)
    with pytest.raises(Full):
        q.put_nowait(1)

    with pytest.raises(Full):
        q.put(1, timeout=0.2)
Beispiel #12
0
def test_get(ray_start_regular):

    q = Queue()

    item = 0
    q.put(item)
    assert q.get(block=False) == item

    item = 1
    q.put(item)
    assert q.get(timeout=0.2) == item

    with pytest.raises(ValueError):
        q.get(timeout=-1)

    with pytest.raises(Empty):
        q.get_nowait()

    with pytest.raises(Empty):
        q.get(timeout=0.2)
Beispiel #13
0
def test_put():
    q = Queue(1)

    item = 0
    q.put(item, block=False)
    assert q.get() == item

    item = 1
    q.put(item, timeout=0.2)
    assert q.get() == item

    with pytest.raises(ValueError):
        q.put(0, timeout=-1)

    q.put(0)
    with pytest.raises(Full):
        q.put_nowait(1)

    with pytest.raises(Full):
        q.put(1, timeout=0.2)

    q.get()
    q.put(1)

    get_id = get_async.remote(q, False, None, 0.2)
    q.put(2)

    assert ray.get(get_id) == 1
Beispiel #14
0
def test_get():
    q = Queue()

    item = 0
    q.put(item)
    assert q.get(block=False) == item

    item = 1
    q.put(item)
    assert q.get(timeout=0.2) == item

    with pytest.raises(ValueError):
        q.get(timeout=-1)

    with pytest.raises(Empty):
        q.get_nowait()

    with pytest.raises(Empty):
        q.get(timeout=0.2)

    item = 0
    put_async.remote(q, item, True, None, 0.2)
    assert q.get() == item
Beispiel #15
0
def test_put():
    start_ray()
    q = Queue(1)

    item = 0
    q.put(item, block=False)
    assert q.get() == item

    item = 1
    q.put(item, timeout=0.2)
    assert q.get() == item

    with pytest.raises(ValueError):
        q.put(0, timeout=-1)

    q.put(0)
    with pytest.raises(Full):
        q.put_nowait(1)

    with pytest.raises(Full):
        q.put(1, timeout=0.2)

    q.get()
    q.put(1)

    get_id = get_async.remote(q, False, None, 0.2)
    q.put(2)

    assert ray.get(get_id) == 1
Beispiel #16
0
def test_get():
    start_ray()
    q = Queue()

    item = 0
    q.put(item)
    assert q.get(block=False) == item

    item = 1
    q.put(item)
    assert q.get(timeout=0.2) == item

    with pytest.raises(ValueError):
        q.get(timeout=-1)

    with pytest.raises(Empty):
        q.get_nowait()

    with pytest.raises(Empty):
        q.get(timeout=0.2)

    item = 0
    put_async.remote(q, item, True, None, 0.2)
    assert q.get() == item
Beispiel #17
0
 def time_get(self):
     queue = Queue()
     for i in range(1000):
         queue.put(i)
     for _ in range(1000):
         queue.get()
Beispiel #18
0
 def time_qsize(self):
     queue = Queue()
     for _ in range(1000):
         queue.qsize()
    def train(self, writer: Logger):
        ray.init()
        os.makedirs(self.config.results_path, exist_ok=True)

        # Initialize workers
        training_worker = trainer.Trainer.options(
            num_gpus=1 if "cuda" in self.config.training_device else 0).remote(
                copy.deepcopy(self.muzero_weights), self.config)
        shared_storage_worker = shared_storage.SharedStorage.remote(
            copy.deepcopy(self.muzero_weights),
            self.game_name,
            self.config,
        )
        replay_buffer_worker = replay_buffer.ReplayBuffer.remote(
            self.config, shared_storage_worker)
        self_play_workers = [
            self_play.SelfPlay.remote(
                copy.deepcopy(self.muzero_weights),
                self.Game(self.config.seed + seed),
                self.config,
            ) for seed in range(self.config.num_actors)
        ]
        test_worker = self_play.SelfPlay.remote(
            copy.deepcopy(self.muzero_weights),
            self.Game(self.config.seed + self.config.num_actors),
            self.config,
        )
        queue = None
        if self.config.policy_update_rate > 0:
            if self.config.reanalyze_mode == "fast":
                reanalyze_worker = fast_reanalyze.ReanalyzeWorker.remote(
                    copy.deepcopy(self.muzero_weights), shared_storage_worker,
                    replay_buffer_worker, self.config)
                reanalyze_worker.update_policies.remote()
            else:
                queue = Queue()
                for i in range(self.config.num_reanalyze_cpus):
                    reanalyze_worker = reanalyze.ReanalyzeQueueWorker.remote(
                        copy.deepcopy(self.muzero_weights),
                        shared_storage_worker, replay_buffer_worker,
                        self.config, queue)
                    reanalyze_worker.fill_batch_queue.remote()
        # Launch workers
        [
            self_play_worker.continuous_self_play.remote(
                shared_storage_worker, replay_buffer_worker)
            for self_play_worker in self_play_workers
        ]
        test_worker.continuous_self_play.remote(shared_storage_worker, None,
                                                True)
        training_worker.continuous_update_weights.remote(
            replay_buffer_worker, shared_storage_worker, queue)

        # Save hyperparameters to TensorBoard
        hp_table = [
            "| {} | {} |".format(key, value)
            for key, value in self.config.__dict__.items()
        ]
        writer.add_text(
            "Hyperparameters",
            "| Parameter | Value |\n|-------|-------|\n" + "\n".join(hp_table),
        )
        # Loop for monitoring in real time the workers
        counter = 0
        infos = ray.get(shared_storage_worker.get_infos.remote())
        try:
            while infos["training_step"] < self.config.training_steps:
                # Get and save real time performance
                infos = ray.get(shared_storage_worker.get_infos.remote())
                writer.add_scalar(
                    "1.Total reward/1.Total reward",
                    infos["total_reward"],
                    counter,
                )
                writer.add_scalar(
                    "1.Total reward/2.Episode length",
                    infos["episode_length"],
                    counter,
                )
                writer.add_scalar(
                    "1.Total reward/3.Player 0 MuZero reward",
                    infos["player_0_reward"],
                    counter,
                )

                writer.add_scalar(
                    "1.Total reward/4.Player 1 Random reward",
                    infos["player_1_reward"],
                    counter,
                )
                writer.add_scalar(
                    "1.Total reward/5.Average reward",
                    infos["average_reward"],
                    counter,
                )
                writer.add_scalar(
                    "2.Workers/1.Self played games",
                    ray.get(replay_buffer_worker.get_self_play_count.remote()),
                    counter,
                )
                writer.add_scalar("2.Workers/2.Training steps",
                                  infos["training_step"], counter)
                writer.add_scalar(
                    "2.Workers/3.Self played games per training step ratio",
                    ray.get(replay_buffer_worker.get_self_play_count.remote())
                    / max(1, infos["training_step"]),
                    counter,
                )
                writer.add_scalar("2.Workers/4.Learning rate", infos["lr"],
                                  counter)
                writer.add_scalar(
                    "2.Workers/5.Self played test games",
                    infos["test_games"],
                    counter,
                )
                writer.add_scalar(
                    "2.Workers/6.Samples count per training step ratio",
                    infos["samples_count"] / max(1, infos["training_step"]),
                    counter,
                )
                writer.add_scalar(
                    "2.Workers/7.Samples count",
                    infos["samples_count"],
                    counter,
                )
                writer.add_scalar(
                    "2.Workers/8.Reanalyzed count",
                    infos["reanalyzed_count"],
                    counter,
                )
                writer.add_scalar(
                    "2.Workers/9.Reanalyzed count per samples count",
                    infos["reanalyzed_count"] / max(1, infos["samples_count"]),
                    counter,
                )
                writer.add_scalar(
                    "2.Workers/10.ReMCTS count",
                    infos["remcts_count"],
                    counter,
                )
                writer.add_scalar(
                    "2.Workers/11.ReMCTS count per samples count",
                    infos["remcts_count"] / max(1, infos["samples_count"]),
                    counter,
                )
                writer.add_scalar("3.Loss/1.Total weighted loss",
                                  infos["total_loss"], counter)
                writer.add_scalar("3.Loss/Value loss", infos["value_loss"],
                                  counter)
                writer.add_scalar("3.Loss/Reward loss", infos["reward_loss"],
                                  counter)
                writer.add_scalar("3.Loss/Policy loss", infos["policy_loss"],
                                  counter)
                print(
                    "Last test reward: {0:.2f}. Training step: {1}/{2}. Played games: {3}. Loss: {4:.2f}"
                    .format(
                        infos["total_reward"],
                        infos["training_step"],
                        self.config.training_steps,
                        ray.get(
                            replay_buffer_worker.get_self_play_count.remote()),
                        infos["total_loss"],
                    ),
                    end="\r",
                )
                counter += 1
                time.sleep(0.5)
        except KeyboardInterrupt as err:
            # Comment the line below to be able to stop the training but keep running
            # raise err
            pass
        self.muzero_weights = ray.get(
            shared_storage_worker.get_target_network_weights.remote())
        # End running actors
        ray.shutdown()
Beispiel #20
0
def test_queue(ray_start_regular):
    @ray.remote
    def get_async(queue, block, timeout, sleep):
        time.sleep(sleep)
        return queue.get(block, timeout)

    @ray.remote
    def put_async(queue, item, block, timeout, sleep):
        time.sleep(sleep)
        queue.put(item, block, timeout)

    # Test simple usage.

    q = Queue()

    items = list(range(10))

    for item in items:
        q.put(item)

    for item in items:
        assert item == q.get()

    # Test asynchronous usage.

    q = Queue()

    items = set(range(10))
    producers = [  # noqa
        put_async.remote(q, item, True, None, 0.5) for item in items
    ]
    consumers = [get_async.remote(q, True, None, 0) for _ in items]

    result = set(ray.get(consumers))

    assert items == result

    # Test put.

    q = Queue(1)

    item = 0
    q.put(item, block=False)
    assert q.get() == item

    item = 1
    q.put(item, timeout=0.2)
    assert q.get() == item

    with pytest.raises(ValueError):
        q.put(0, timeout=-1)

    q.put(0)
    with pytest.raises(Full):
        q.put_nowait(1)

    with pytest.raises(Full):
        q.put(1, timeout=0.2)

    q.get()
    q.put(1)

    get_id = get_async.remote(q, False, None, 0.2)
    q.put(2)

    assert ray.get(get_id) == 1

    # Test get.

    q = Queue()

    item = 0
    q.put(item)
    assert q.get(block=False) == item

    item = 1
    q.put(item)
    assert q.get(timeout=0.2) == item

    with pytest.raises(ValueError):
        q.get(timeout=-1)

    with pytest.raises(Empty):
        q.get_nowait()

    with pytest.raises(Empty):
        q.get(timeout=0.2)

    item = 0
    put_async.remote(q, item, True, None, 0.2)
    assert q.get() == item

    # Test qsize.

    q = Queue()

    items = list(range(10))
    size = 0

    assert q.qsize() == size

    for item in items:
        q.put(item)
        size += 1
        assert q.qsize() == size

    for item in items:
        assert q.get() == item
        size -= 1
        assert q.qsize() == size
 def __init__(self, get_log_data, log_size=100000):
     self.log = Queue()
     self.get_log_data = get_log_data
def evaluation(
    evaluation_config_path="./configs/evaluation/fast_reanalyze_evaluation.toml"
):
    t1 = time.time()
    ray.init()
    config = load_toml(evaluation_config_path)
    api = wandb.Api()
    if len(config.run_ids) > 0:
        runs = [
            api.run(path=f"{config.entity}/{config.project_name}/{id}")
            for id in config.run_ids
        ]
    else:
        runs = api.runs(path=f"{config.entity}/{config.project_name}",
                        filters=config.filters)
    results = SharedResults.remote(num_episodes=config.num_episodes)

    job_queue = Queue()
    # Fill the queue with models to evaluate
    for run in runs:
        files = run.files()
        print(files)
        env_config_file = find_env_config(files.objects, r"(:?^|\s)\w*(?=.py)")
        try:
            weights_file_result = run.files("model.weights")
            if env_config_file is None:
                continue
            env_config_name = os.path.splitext(env_config_file.name)[0]
            # if os.path.exists(os.path.join(ModelEvaluator.CONFIGS_DIR_PATH, env_config_file.name)) is False:
            env_config_file.download(True,
                                     root=ModelEvaluator.CONFIGS_DIR_PATH)
            weight_file_path = os.path.join(ModelEvaluator.WEIGHTS_DIR_PATH,
                                            env_config_name,
                                            f"{run.id}.weights")
            if os.path.exists(weight_file_path) is False:
                pathlib.Path(os.path.dirname(weight_file_path)).mkdir(
                    parents=True, exist_ok=True)
                weights_file = weights_file_result[0].download(
                    replace=True, root=ModelEvaluator.WEIGHTS_DIR_PATH)
                shutil.move(weights_file.name, weight_file_path)
                weight_file_path = weights_file.name
                del weights_file

            for seed in range(config.num_episodes):
                job_queue.put(
                    (env_config_name, weight_file_path, env_config_file, seed))
        except:
            print(f"{run.name} failure")

    # Start the model evaluator worker
    evaluators = []
    for _ in range(config.num_workers):
        model_evaluator = ModelEvaluator.remote(job_queue, results,
                                                config.num_episodes)
        evaluators.append(model_evaluator.evaluate.remote())
    # Wait for all the workers to be done
    ray.get(evaluators)
    # Save the results
    ids_string = '_'.join(config.run_ids[-10])
    filter_string = '_'.join(
        [f"{key}-{value}" for key, value in config.filters.items()])
    with open(
            f'evaluation_results/test_results_{ids_string}_{filter_string}.json',
            'w') as outfile:
        json.dump(ray.get(results.get_result.remote()), outfile)
    print(f"Time taken : {time.time() - t1}")
Beispiel #23
0
def test_queue(ray_start):
    @ray.remote
    def get_async(queue, block, timeout, sleep):
        time.sleep(sleep)
        return queue.get(block, timeout)

    @ray.remote
    def put_async(queue, item, block, timeout, sleep):
        time.sleep(sleep)
        queue.put(item, block, timeout)

    # Test simple usage.

    q = Queue()

    items = list(range(10))

    for item in items:
        q.put(item)

    for item in items:
        assert item == q.get()

    # Test asynchronous usage.

    q = Queue()

    items = set(range(10))
    producers = [  # noqa
        put_async.remote(q, item, True, None, 0.5) for item in items
    ]
    consumers = [get_async.remote(q, True, None, 0) for _ in items]

    result = set(ray.get(consumers))

    assert items == result

    # Test put.

    q = Queue(1)

    item = 0
    q.put(item, block=False)
    assert q.get() == item

    item = 1
    q.put(item, timeout=0.2)
    assert q.get() == item

    with pytest.raises(ValueError):
        q.put(0, timeout=-1)

    q.put(0)
    with pytest.raises(Full):
        q.put_nowait(1)

    with pytest.raises(Full):
        q.put(1, timeout=0.2)

    q.get()
    q.put(1)

    get_id = get_async.remote(q, False, None, 0.2)
    q.put(2)

    assert ray.get(get_id) == 1

    # Test get.

    q = Queue()

    item = 0
    q.put(item)
    assert q.get(block=False) == item

    item = 1
    q.put(item)
    assert q.get(timeout=0.2) == item

    with pytest.raises(ValueError):
        q.get(timeout=-1)

    with pytest.raises(Empty):
        q.get_nowait()

    with pytest.raises(Empty):
        q.get(timeout=0.2)

    item = 0
    put_async.remote(q, item, True, None, 0.2)
    assert q.get() == item

    # Test qsize.

    q = Queue()

    items = list(range(10))
    size = 0

    assert q.qsize() == size

    for item in items:
        q.put(item)
        size += 1
        assert q.qsize() == size

    for item in items:
        assert q.get() == item
        size -= 1
        assert q.qsize() == size