Python SimpleQueue.get Examples, torch.multiprocessing.SimpleQueue.get Python Examples

Example #1

0

Show file

File: monobeast.py Project: victorvg17/interpretable-reinforcement-learning-using-attention

def get_batch(
    flags,
    free_queue: mp.SimpleQueue,
    full_queue: mp.SimpleQueue,
    buffers: Buffers,
    initial_agent_state_buffers,
    timings,
    lock=threading.Lock(),
):
    with lock:
        timings.time("lock")
        indices = [full_queue.get() for _ in range(flags.batch_size)]
        timings.time("dequeue")
    batch = {
        key: torch.stack([buffers[key][m] for m in indices], dim=1) for key in buffers
    }
    # NOTE: AttentionNet is batch first.
    initial_agent_state = tuple(
        torch.cat(ts, dim=0)
        for ts in zip(*[initial_agent_state_buffers[m] for m in indices])
    )
    timings.time("batch")
    for m in indices:
        free_queue.put(m)
    timings.time("enqueue")
    batch = {k: t.to(device=flags.device, non_blocking=True) for k, t in batch.items()}
    initial_agent_state = tuple(
        t.to(device=flags.device, non_blocking=True) for t in initial_agent_state
    )
    timings.time("device")
    return batch, initial_agent_state

Example #2

0

Show file

def get_batch(
    free_queue: mp.SimpleQueue,
    full_queue: mp.SimpleQueue,
    buffers: Buffers,
    flags,
    timings,
    lock=threading.Lock()) -> typing.Dict[str, torch.Tensor]:
    with lock:
        timings.time('lock')
        indices = [full_queue.get() for _ in range(flags.batch_size)]
        timings.time('dequeue')
    batch = {
        key: torch.stack([buffers[key][m] for m in indices], dim=1)
        for key in buffers
    }
    timings.time('batch')
    for m in indices:
        free_queue.put(m)
    timings.time('enqueue')
    batch = {
        k: t.to(device=flags.device, non_blocking=True)
        for k, t in batch.items()
    }
    timings.time('device')
    return batch

Example #3

0

Show file

File: monobeast.py Project: nicoladainese96/SC2-RL

def get_batch(
        flags,
        free_queue: mp.SimpleQueue,
        full_queue: mp.SimpleQueue,
        buffers: Buffers,
        timings,
        lock=threading.Lock(),
):
    with lock:
        timings.time("lock")
        indices = [full_queue.get() for _ in range(flags.batch_size)]
        timings.time("dequeue")
    batch = {
        key: torch.stack([buffers[key][m] for m in indices], dim=1)
        for key in buffers
    }
    timings.time("batch")
    for m in indices:
        free_queue.put(m)
    timings.time("enqueue")
    batch = {
        k: t.to(device=flags.device, non_blocking=True)
        for k, t in batch.items()
    }
    timings.time("device")
    return batch

Example #4

0

Show file

def get_batch(
    flags,
    free_queue: mp.SimpleQueue,
    full_queue: mp.SimpleQueue,
    buffers: Buffers,
    initial_agent_state_buffers,
    timings,
    lock=threading.Lock(),
):
    with lock:
        timings.time("lock")
        indices = [full_queue.get() for _ in range(flags.batch_size)]
        timings.time("dequeue")
    batch = {
        key: torch.stack([buffers[key][m] for m in indices], dim=1) for key in buffers
    }
    initial_agent_state = [torch.stack([initial_agent_state_buffers[m][i][0] for m in indices], axis=0)
                      for i in range(2)]
    #print("initial_agent_state[0].shape: ", initial_agent_state[0].shape)
    timings.time("batch")
    for m in indices:
        free_queue.put(m)
    timings.time("enqueue")
    batch = {k: t.to(device=flags.device, non_blocking=True) for k, t in batch.items()}
    initial_agent_state = [t.to(device=flags.device, non_blocking=True) for t in initial_agent_state]
    timings.time("device")
    return batch, initial_agent_state

Example #5

0

Show file

File: agent.py Project: yilu1021/nle

def get_batch(
        flags,
        free_queue: mp.SimpleQueue,
        full_queue: mp.SimpleQueue,
        buffers,
        initial_agent_state_buffers,
        lock=threading.Lock(),
):
    with lock:
        indices = [full_queue.get() for _ in range(flags.batch_size)]
    batch = {
        key: torch.stack([buffers[key][m] for m in indices], dim=1)
        for key in buffers
    }
    initial_agent_state = (torch.cat(ts, dim=1) for ts in zip(
        *[initial_agent_state_buffers[m] for m in indices]))
    for m in indices:
        free_queue.put(m)
    batch = {
        k: t.to(device=flags.device, non_blocking=True)
        for k, t in batch.items()
    }
    initial_agent_state = tuple(
        t.to(device=flags.device, non_blocking=True)
        for t in initial_agent_state)
    return batch, initial_agent_state

Example #6

0

Show file

File: monobeast.py Project: weihongwei0586/adaptive-transformers-in-rl

def act(flags, actor_index: int, free_queue: mp.SimpleQueue,
        full_queue: mp.SimpleQueue, model: torch.nn.Module, buffers: Buffers,
        initial_agent_state_buffers, level_name):
    try:
        logging.info("Actor %i started.", actor_index)
        timings = prof.Timings()  # Keep track of how fast things are.
        seed = actor_index ^ int.from_bytes(os.urandom(4), byteorder="little")
        ######changed next line
        gym_env = create_env(flags, level_name, seed)
        env = environment.Environment(gym_env)
        env_output = env.initial()
        agent_state = model.initial_state(batch_size=1)
        agent_output, unused_state = model(env_output, agent_state)
        while True:
            index = free_queue.get()
            if index is None:
                break

            # Write old rollout end.
            for key in env_output:
                buffers[key][index][0, ...] = env_output[key]
            for key in agent_output:
                buffers[key][index][0, ...] = agent_output[key]
            for i, tensor in enumerate(agent_state):
                initial_agent_state_buffers[index][i][...] = tensor

            # Do new rollout.
            for t in range(flags.unroll_length):
                timings.reset()

                with torch.no_grad():
                    agent_output, agent_state = model(env_output, agent_state)

                timings.time("model")

                env_output = env.step(agent_output["action"])

                timings.time("step")

                for key in env_output:
                    buffers[key][index][t + 1, ...] = env_output[key]
                for key in agent_output:
                    buffers[key][index][t + 1, ...] = agent_output[key]

                timings.time("write")
            full_queue.put(index)

        if actor_index == 0:
            logging.info("Actor %i: %s", actor_index, timings.summary())

    except KeyboardInterrupt:
        pass  # Return silently.
    except Exception as e:
        logging.error("Exception in worker process %i", actor_index)
        traceback.print_exc()
        print()
        raise e

Example #7

0

Show file

File: agent.py Project: zeta1999/nle

def act(
    flags,
    actor_index: int,
    free_queue: mp.SimpleQueue,
    full_queue: mp.SimpleQueue,
    model: torch.nn.Module,
    buffers,
    initial_agent_state_buffers,
):
    try:
        logging.info("Actor %i started.", actor_index)

        gym_env = create_env(
            flags.env,
            savedir=flags.rundir,
            archivefile="nethack.%i.%%(pid)i.%%(time)s.zip" % actor_index,
        )
        env = ResettingEnvironment(gym_env)
        env_output = env.initial()
        agent_state = model.initial_state(batch_size=1)
        agent_output, unused_state = model(env_output, agent_state)
        while True:
            index = free_queue.get()
            if index is None:
                break

            # Write old rollout end.
            for key in env_output:
                buffers[key][index][0, ...] = env_output[key]
            for key in agent_output:
                buffers[key][index][0, ...] = agent_output[key]
            for i, tensor in enumerate(agent_state):
                initial_agent_state_buffers[index][i][...] = tensor

            # Do new rollout.
            for t in range(flags.unroll_length):
                with torch.no_grad():
                    agent_output, agent_state = model(env_output, agent_state)

                env_output = env.step(agent_output["action"])

                for key in env_output:
                    buffers[key][index][t + 1, ...] = env_output[key]
                for key in agent_output:
                    buffers[key][index][t + 1, ...] = agent_output[key]

            full_queue.put(index)

    except KeyboardInterrupt:
        pass  # Return silently.
    except Exception:
        logging.error("Exception in worker process %i", actor_index)
        traceback.print_exc()
        print()
        raise

Example #8

0

Show file

File: run_exp.py Project: vzhong/RTFM-1

def act(i: int, free_queue: mp.SimpleQueue, full_queue: mp.SimpleQueue,
        model: torch.nn.Module, buffers: Buffers, flags):
    try:
        logging.info('Actor %i started.', i)
        timings = prof.Timings()  # Keep track of how fast things are.

        gym_env = Net.create_env(flags)
        seed = i ^ int.from_bytes(os.urandom(4), byteorder='little')
        gym_env.seed(seed)
        env = environment.Environment(gym_env)
        env_output = env.initial()
        agent_output = model(env_output)
        while True:
            index = free_queue.get()
            if index is None:
                break

            # Write old rollout end.
            for key in env_output:
                buffers[key][index][0, ...] = env_output[key]
            for key in agent_output:
                buffers[key][index][0, ...] = agent_output[key]

            # Do new rollout
            for t in range(flags.unroll_length):
                timings.reset()

                with torch.no_grad():
                    agent_output = model(env_output)

                timings.time('model')

                env_output = env.step(agent_output['action'])

                timings.time('step')

                for key in env_output:
                    buffers[key][index][t + 1, ...] = env_output[key]
                for key in agent_output:
                    buffers[key][index][t + 1, ...] = agent_output[key]

                timings.time('write')
            full_queue.put(index)

        if i == 0:
            logging.info('Actor %i: %s', i, timings.summary())

    except KeyboardInterrupt:
        pass  # Return silently.
    except Exception as e:
        logging.error('Exception in worker process %i', i)
        traceback.print_exc()
        print()
        raise e

Example #9

0

Show file

File: Evaluator.py Project: M-J-Murray/gym_practice

def main():
    episode = 0
    path = "/home/michael/dev/fyp/AIGym/MP-Conv-Pong/"
    mp.set_start_method('spawn')
    worker_count = 3  #mp.cpu_count()
    learning_rate = 1e3
    model = Model(2)
    criterion = nn.CrossEntropyLoss(reduce=False)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    if episode > 0:
        model.load_state_dict(torch.load(path + "Models/" + str(episode)))
        optimizer.load_state_dict(
            torch.load(path + "Optimizers/" + str(episode)))
    model.cuda()
    model.share_memory()

    envs = [gym.make("Pong-v0") for i in range(worker_count)]

    epoch_size = 1
    batch_save = 128
    best_score = None
    running_reward = None
    reward_queue = SimpleQueue()

    #Start workers
    workers = [
        Worker(envs[i], epoch_size, model, criterion, optimizer, reward_queue,
               str(i + 1)) for i in range(worker_count)
    ]
    [w.start() for w in workers]

    # Gather rewards
    while True:
        reward = reward_queue.get()
        if isinstance(reward, Exception):
            print(reward)
        else:
            episode += 1
            if (episode % batch_save == 0):
                torch.save(model.state_dict(), path + "Models/" + str(episode))
                torch.save(optimizer.state_dict(),
                           path + "Optimizers/" + str(episode))

            if best_score is None:
                best_score = reward
            elif reward > best_score:
                best_score = reward
            running_reward = reward if running_reward is None else running_reward * 0.95 + reward * 0.05
            if episode % 1 == 0:
                print(
                    "episode {:4.0f} complete - average reward = {:3.0f}, last score was = {:3.0f}, best score is = {:3.0f}"
                    .format(episode, running_reward, reward, best_score))

Example #10

0

Show file

    def _generate_parallel(self, iteration, network, device, num_workers):
        q, r = divmod(self.remaining_games, num_workers)
        num_active_workers = Value('i', num_workers)
        resign_threshold = Value('d', self.resign_mgr.threshold())
        evaluator_mgr = BulkEvaluatorManager([network], device, num_workers)
        output_queue = SimpleQueue()

        # start the workers
        workers = []
        for worker_id in range(num_workers):
            num_games = q + 1 if worker_id < r else q
            evaluator = evaluator_mgr.get_evaluator(worker_id, 0)
            worker = Process(
                target=self._worker_job,
                args=(worker_id, num_games, num_active_workers,
                      resign_threshold, evaluator, output_queue),
            )
            workers.append(worker)
            worker.start()

        # start evaluator server
        server = evaluator_mgr.get_server(num_active_workers)
        server.start()

        # collect the examples generated by workers
        while num_active_workers.value > 0 or not output_queue.empty():
            examples, resign_value_history, result = output_queue.get()
            self.example_pool += examples
            self.game_length.append(len(examples))

            # add the history into resignation manager to update the threshold
            if resign_value_history is not None:
                self.resign_mgr.add(resign_value_history, result)
                resign_threshold.value = self.resign_mgr.threshold()

            self.remaining_games -= 1

            # periodically save the progress
            if (self.conf.GAMES_PER_ITERATION - self.remaining_games) \
                    % self.conf.EXAMPLE_POOL_SAVE_FREQUENCY == 0:
                self.save(iteration)
                log.info(
                    f'[iter={iteration}] ExamplePool: checkpoint saved, '
                    f'{self.remaining_games} games remaining'
                )

        for worker in workers:
            worker.join()
        server.join()

Example #11

0

Show file

File: monobeast.py Project: MachengShen/torchbeastpopart

def get_batch(
        flags,
        free_queue: mp.SimpleQueue,
        full_queue: mp.SimpleQueue,
        buffers: Buffers,
        initial_agent_state_buffers,
        timings,
        lock=threading.Lock(),
):
    # need to make sure that we wait until batch_size trajectories/rollouts have been put into the queue
    with lock:
        timings.time("lock")
        # get the indices of actors "offering" trajectories/rollouts to be processed by the learner
        indices = [full_queue.get() for _ in range(flags.batch_size)]
        timings.time("dequeue")

    # create the batch as a dictionary for all the data in the buffers (see act() function for list of
    # keys), where each entry is a tensor of these values stacked across actors along the first dimension,
    # which I believe should be the "batch dimension" (see _format_frame())
    batch = {
        key: torch.stack([buffers[key][m] for m in indices], dim=1)
        for key in buffers
    }

    # similar thing for the initial agent states, where I think the tuples are concatenated to become torch tensors
    initial_agent_state = (torch.cat(ts, dim=1) for ts in zip(
        *[initial_agent_state_buffers[m] for m in indices]))
    timings.time("batch")

    # once the data has been "transferred" into batch and initial_agent_state,
    # signal that the data has been processed to the actors
    for m in indices:
        free_queue.put(m)
    timings.time("enqueue")

    # move the data to the right device (e.g. GPU)
    batch = {
        k: t.to(device=flags.device, non_blocking=True)
        for k, t in batch.items()
    }
    initial_agent_state = tuple(
        t.to(device=flags.device, non_blocking=True)
        for t in initial_agent_state)
    timings.time("device")

    return batch, initial_agent_state

Example #12

0

Show file

File: monobeast_test.py Project: weihongwei0586/adaptive-transformers-in-rl

def get_batch(
        flags,
        free_queue: mp.SimpleQueue,
        full_queue: mp.SimpleQueue,
        buffers: Buffers,
        initial_agent_state_buffers,
        timings,
        lock=threading.Lock(),
):
    with lock:
        timings.time("lock")
        indices = [full_queue.get() for _ in range(flags.batch_size)]

        # TODO: Check if emptying full_queue and then readding to it takes very long,
        #       seems like the only way to ensure a batch of similar length elements
        # One problem with doing this is that if get a really short trajectory, may never end up
        # using it. DONT CHANGE THIS FOR NOW.

        timings.time("dequeue")
    batch = {
        key: torch.stack([buffers[key][m] for m in indices], dim=1)
        for key in buffers
    }
    initial_agent_state = (torch.cat(ts, dim=1) for ts in zip(
        *[initial_agent_state_buffers[m] for m in indices]))
    timings.time("batch")
    for m in indices:
        free_queue.put(m)
    timings.time("enqueue")
    batch = {
        k: t.to(device=flags.device, non_blocking=True)
        for k, t in batch.items()
    }
    initial_agent_state = tuple(
        t.to(device=flags.device, non_blocking=True)
        for t in initial_agent_state)
    timings.time("device")
    return batch, initial_agent_state

Example #13

0

Show file

File: utils.py Project: mmervecerit/impact-driven-exploration

def act(i: int, free_queue: mp.SimpleQueue, full_queue: mp.SimpleQueue,
        model: torch.nn.Module, buffers: Buffers, initial_agent_state_buffers, flags):
    try:
        log.info('Actor %i started.', i)
        timings = prof.Timings() 
        
        gym_env = create_env(flags)
        seed = i ^ int.from_bytes(os.urandom(4), byteorder='little')
        gym_env.seed(seed)
        
        if flags.num_input_frames > 1:
            gym_env = FrameStack(gym_env, flags.num_input_frames)  

        env = Environment(gym_env, fix_seed=flags.fix_seed, env_seed=flags.env_seed)

        env_output = env.initial()
        agent_state = model.initial_state(batch_size=1)
        agent_output, unused_state = model(env_output, agent_state)
        

        while True:
            index = free_queue.get()
            if index is None:
                break
            
            for key in env_output:
                buffers[key][index][0, ...] = env_output[key]
            for key in agent_output:
                buffers[key][index][0, ...] = agent_output[key]
            for i, tensor in enumerate(agent_state):
                initial_agent_state_buffers[index][i][...] = tensor

            for t in range(flags.unroll_length):
                timings.reset()

                with torch.no_grad():
                    agent_output, agent_state = model(env_output, agent_state)

                timings.time('model')

                env_output = env.step(agent_output['action'])

                timings.time('step')

                for key in env_output:
                    buffers[key][index][t + 1, ...] = env_output[key]
                for key in agent_output:
                    buffers[key][index][t + 1, ...] = agent_output[key]
                
                timings.time('write')
            full_queue.put(index)

        if i == 0:
            log.info('Actor %i: %s', i, timings.summary())

    except KeyboardInterrupt:
        pass  
    except Exception as e:
        logging.error('Exception in worker process %i', i)
        traceback.print_exc()
        print()
        raise e

Example #14

0

Show file

def act(flags, gym_env, actor_index: int, free_queue: mp.SimpleQueue,
        full_queue: mp.SimpleQueue, buffers: Buffers, actor_buffers: Buffers,
        actor_model_queues: List[mp.SimpleQueue],
        actor_env_queues: List[mp.SimpleQueue]):
    try:
        logging.info("Actor %i started.", actor_index)
        timings = prof.Timings()  # Keep track of how fast things are.

        gym_env = gym_env
        #seed = actor_index ^ int.from_bytes(os.urandom(4), byteorder="little")
        #gym_env.seed(seed)
        if flags.agent in ["CNN"]:
            env = environment.Environment(gym_env, "image")
        elif flags.agent in ["NLM", "KBMLP", "GCN"]:
            if flags.state in ["relative", "integer", "block"]:
                env = environment.Environment(gym_env, "VKB")
            elif flags.state == "absolute":
                env = environment.Environment(gym_env, "absVKB")
        env_output = env.initial()
        for key in env_output:
            actor_buffers[key][actor_index][0] = env_output[key]
        while True:
            index = free_queue.get()
            if index is None:
                break

            # Write old rollout end.
            for key in actor_buffers:
                buffers[key][index][0] = actor_buffers[key][actor_index][0]

            # Do new rollout.
            for t in range(flags.unroll_length):
                timings.reset()

                actor_model_queues[actor_index].put(actor_index)
                env_info = actor_env_queues[actor_index].get()
                if env_info == "exit":
                    return

                timings.time("model")

                env_output = env.step(actor_buffers["action"][actor_index][0])

                timings.time("step")

                for key in actor_buffers:
                    buffers[key][index][t +
                                        1] = actor_buffers[key][actor_index][0]
                for key in env_output:
                    buffers[key][index][t + 1, ...] = env_output[key]
                for key in env_output:
                    actor_buffers[key][actor_index][0] = env_output[key]

                timings.time("write")

            full_queue.put(index)

        if actor_index == 0:
            logging.info("Actor %i: %s", actor_index, timings.summary())

    except KeyboardInterrupt:
        pass  # Return silently.
    except Exception as e:
        logging.error("Exception in worker process %i", actor_index)
        traceback.print_exc()
        print()
        raise e

Example #15

0

Show file

File: Worker.py Project: M-J-Murray/gym_practice

                        epoch_reward += r
                        history["observation"].append(x.cpu())
                        history["reward"].append(torch.FloatTensor(1).fill_(r))

                self.reward_queue.put(epoch_reward)
                dataset = compileHistory(history)
                train(self.model, self.criterion, self.optimizer, dataset)

        except Exception as identifier:
            self.reward_queue.put(identifier)
            self.reward_queue.put(traceback.format_exc())


if __name__ == '__main__':
    mp.set_start_method('spawn')
    learning_rate = 1e-3
    model = Model(2)
    model.cuda()
    model.share_memory()
    criterion = nn.CrossEntropyLoss(reduce=False)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    env = gym.make("Pong-v0")
    epoch_size = 1
    queue = SimpleQueue()
    worker = Worker(env, epoch_size, model, criterion, optimizer, queue,
                    "test")
    worker.start()
    while True:
        print(queue.get())

Example #16

0

Show file

def train(config):
    task_queue = SimpleQueue()
    result_queue = SimpleQueue()
    stop = mp.Value('i', False)
    stats = SharedStats(config.state_dim)
    normalizers = [StaticNormalizer(config.state_dim) for _ in range(config.num_workers)]
    for normalizer in normalizers:
        normalizer.offline_stats.load(stats)

    workers = [Worker(id, normalizers[id], task_queue, result_queue, stop, config) for id in range(config.num_workers)]
    for w in workers: w.start()

    opt = cma.CMAOptions()
    opt['tolfun'] = -config.target
    opt['popsize'] = config.pop_size
    opt['verb_disp'] = 0
    opt['verb_log'] = 0
    opt['maxiter'] = sys.maxsize
    es = cma.CMAEvolutionStrategy(config.initial_weight, config.sigma, opt)

    total_steps = 0
    initial_time = time.time()
    training_rewards = []
    training_steps = []
    training_timestamps = []
    test_mean, test_ste = test(config, config.initial_weight, stats)
    logger.info('total steps %d, %f(%f)' % (total_steps, test_mean, test_ste))
    training_rewards.append(test_mean)
    training_steps.append(0)
    training_timestamps.append(0)
    while True:
        solutions = es.ask()
        for id, solution in enumerate(solutions):
            task_queue.put((id, solution))
        while not task_queue.empty():
            continue
        result = []
        while len(result) < len(solutions):
            if result_queue.empty():
                continue
            result.append(result_queue.get())
        result = sorted(result, key=lambda x: x[0])
        total_steps += np.sum([r[2] for r in result])
        cost = [r[1] for r in result]
        best_solution = solutions[np.argmin(cost)]
        elapsed_time = time.time() - initial_time
        test_mean, test_ste = test(config, best_solution, stats)
        logger.info('total steps %d, test %f(%f), best %f, elapased time %f' %
            (total_steps, test_mean, test_ste, -np.min(cost), elapsed_time))
        training_rewards.append(test_mean)
        training_steps.append(total_steps)
        training_timestamps.append(elapsed_time)
        # with open('data/%s-best_solution_%s.bin' % (TAG, config.task), 'wb') as f:
        #     pickle.dump(solutions[np.argmin(result)], f)
        if config.max_steps and total_steps > config.max_steps:
            stop.value = True
            break

        cost = fitness_shift(cost)
        es.tell(solutions, cost)
        # es.disp()
        for normalizer in normalizers:
            stats.merge(normalizer.online_stats)
            normalizer.online_stats.zero()
        for normalizer in normalizers:
            normalizer.offline_stats.load(stats)

    stop.value = True
    for w in workers: w.join()
    return [training_rewards, training_steps, training_timestamps]

Example #17

0

Show file

def act(
    flags,
    actor_index: int,
    free_queue: mp.SimpleQueue,
    full_queue: mp.SimpleQueue,
    model: torch.nn.Module,
    buffers: Buffers,
    initial_agent_state_buffers,
):
    try:
        logging.info("Actor %i started.", actor_index)
        timings = prof.Timings()  # Keep track of how fast things are.

        gym_env = create_env(flags)
        seed = actor_index ^ int.from_bytes(os.urandom(4), byteorder="little")
        gym_env.seed(seed)
        env = environment.Environment(gym_env)
        env_output = env.initial()

        agent_state = model.initial_state(batch_size=1)
        mems, mem_padding = None, None
        agent_output, unused_state, mems, mem_padding, _ = model(
            env_output, agent_state, mems, mem_padding)
        while True:
            index = free_queue.get()
            if index is None:
                break

            # explicitly make done False to allow the loop to run
            # Don't need to set 'done' to true since now take step out of done state
            # when do arrive at 'done'
            # env_output['done'] = torch.tensor([0], dtype=torch.uint8)

            # Write old rollout end.
            for key in env_output:
                buffers[key][index][0, ...] = env_output[key]
            for key in agent_output:
                buffers[key][index][0, ...] = agent_output[key]
            for i, tensor in enumerate(agent_state):
                initial_agent_state_buffers[index][i][...] = tensor

            # Do one new rollout, untill flags.unroll_length
            t = 0
            while t < flags.unroll_length and not env_output['done'].item():
                # for t in range(flags.unroll_length):
                timings.reset()

                # REmoved since never this will never be true (MOVED TO AFTER FOR LOOP)
                # if env_output['done'].item():
                #    mems = None

                with torch.no_grad():
                    agent_output, agent_state, mems, mem_padding, _ = model(
                        env_output, agent_state, mems, mem_padding)

                timings.time("model")

                # TODO: Shakti add action repeat?
                env_output = env.step(agent_output["action"])

                timings.time("step")

                for key in env_output:
                    buffers[key][index][t + 1, ...] = env_output[key]
                for key in agent_output:
                    buffers[key][index][t + 1, ...] = agent_output[key]

                timings.time("write")
                t += 1

            if env_output['done'].item():
                mems = None
                # Take arbitrary step to reset environment
                env_output = env.step(torch.tensor([2]))

            if t != flags.unroll_length:
                # TODO I checked and seems good but Shakti can you check as well?
                buffers['done'][index][t + 1:] = torch.tensor(
                    [True]).repeat(flags.unroll_length - t)

            full_queue.put(index)

        if actor_index == 0:
            logging.info("Actor %i: %s", actor_index, timings.summary())

    except KeyboardInterrupt:
        pass  # Return silently.
    except Exception as e:
        logging.error("Exception in worker process %i", actor_index)
        traceback.print_exc()
        # print()
        raise e

Example #18

0

Show file

File: impala.py Project: johnlime/cleanrl

def act(
    args,
    actor_index: int,
    free_queue: mp.SimpleQueue,
    full_queue: mp.SimpleQueue,
    model: torch.nn.Module,
    buffers: Buffers,
    initial_agent_state_buffers,
):
    try:
        logging.info("Actor %i started.", actor_index)
        timings = Timings()  # Keep track of how fast things are.

        gym_env = create_env(args)
        seed = actor_index ^ int.from_bytes(os.urandom(4), byteorder="little")
        gym_env.seed(seed)
        env = Environment(gym_env)
        def make_env(args):
            def thunk():
                env = create_env(args)
                return env
            return thunk
        envs = DummyVecEnv([make_env(args) for i in range(1)])
        
        env_output = env.initial()
        envs.reset()
        agent_state = model.initial_state(batch_size=1)
        agent_output, unused_state = model(env_output, agent_state)
        while True:
            index = free_queue.get()
            if index is None:
                break

            # Write old rollout end.
            for key in env_output:
                buffers[key][index][0, ...] = env_output[key]
            for key in agent_output:
                buffers[key][index][0, ...] = agent_output[key]
            for i, tensor in enumerate(agent_state):
                initial_agent_state_buffers[index][i][...] = tensor

            # Do new rollout.
            for t in range(args.unroll_length):
                timings.reset()

                with torch.no_grad():
                    agent_output, agent_state = model(env_output, agent_state)

                # timings.time("model")

                env_output = env.step(agent_output["action"])
                # env_output = env.step(agent_output["action"])
                # envs.step((torch.randint(0, envs.action_space.n, (envs.num_envs,))).numpy())
                assert agent_output["action"] == env_output["last_action"]
                timings.time("step")

                for key in env_output:
                    buffers[key][index][t + 1, ...] = env_output[key]
                for key in agent_output:
                    buffers[key][index][t + 1, ...] = agent_output[key]

                timings.time("write")
            full_queue.put(index)

        if actor_index == 0:
            logging.info("Actor %i: %s", actor_index, timings.summary())

    except KeyboardInterrupt:
        pass  # Return silently.
    except Exception as e:
        logging.error("Exception in worker process %i", actor_index)
        traceback.print_exc()
        print()
        raise e

Example #19

0

Show file

File: monobeast.py Project: nicoladainese96/SC2-RL

def act(
    flags,
    game_params,
    actor_index: int,
    free_queue: mp.SimpleQueue,
    full_queue: mp.SimpleQueue,
    model: torch.nn.Module,
    buffers: Buffers,
):
    try:
        logging.info("Actor %i started.", actor_index)
        timings = prof.Timings()  # Keep track of how fast things are.

        seed = actor_index ^ int.from_bytes(os.urandom(4), byteorder="little")
        sc_env = init_game(game_params['env'],
                           flags.map_name,
                           random_seed=seed)
        obs_processer = IMPALA_ObsProcesser(action_table=model.action_table,
                                            **game_params['obs_processer'])
        env = environment.Environment(sc_env, obs_processer, seed)
        # initial rollout starts here
        env_output = env.initial()
        with torch.no_grad():
            agent_output = model.actor_step(env_output)

        while True:
            index = free_queue.get()
            if index is None:
                break

            # Write old rollout end.
            for key in env_output:
                buffers[key][index][0, ...] = env_output[key]
            for key in agent_output:
                if key not in ['sc_env_action'
                               ]:  # no need to save this key on buffers
                    buffers[key][index][0, ...] = agent_output[key]

            # Do new rollout.
            for t in range(flags.unroll_length):
                timings.reset()

                env_output = env.step(agent_output["sc_env_action"])

                timings.time("step")

                with torch.no_grad():
                    agent_output = model.actor_step(env_output)

                timings.time("model")

                #env_output = env.step(agent_output["sc_env_action"])

                #timings.time("step")

                for key in env_output:
                    buffers[key][index][t + 1, ...] = env_output[key]
                for key in agent_output:
                    if key not in ['sc_env_action'
                                   ]:  # no need to save this key on buffers
                        buffers[key][index][t + 1, ...] = agent_output[key]
                # env_output will be like
                # s_{0}, ..., s_{T}
                # act_mask_{0}, ..., act_mask_{T}
                # discount_{0}, ..., discount_{T}
                # r_{-1}, ..., r_{T-1}
                # agent_output will be like
                # a_0, ..., a_T with a_t ~ pi(.|s_t)
                # log_pi(a_0|s_0), ..., log_pi(a_T|s_T)
                # so the learner can use (s_i, act_mask_i) to predict log_pi_i
                timings.time("write")
            full_queue.put(index)

        if actor_index == 0:
            logging.info("Actor %i: %s", actor_index, timings.summary())

    except KeyboardInterrupt:
        pass  # Return silently.
    except Exception as e:
        logging.error("Exception in worker process %i", actor_index)
        traceback.print_exc()
        print()
        raise e

Example #20

0

Show file

def train(config):
    task_queue = SimpleQueue()
    result_queue = SimpleQueue()
    stop = mp.Value('i', False)
    stats = SharedStats(config.state_dim)
    param = torch.FloatTensor(torch.from_numpy(config.initial_weight))
    param.share_memory_()
    n_params = len(param.numpy().flatten())
    if config.args.noise_type == 'lss':
        noise_sizes = [
            cofig.state_dim * config.hidden_size,
            config.hidden_size * config.hidden_size,
            config.hidden_size * config.action_dim
        ]
    else:
        noise_sizes = None
    noise_generator = NoiseGenerator(n_params,
                                     config.pop_size,
                                     config.args.noise,
                                     noise_sizes=noise_sizes)
    normalizers = [
        StaticNormalizer(config.state_dim) for _ in range(config.num_workers)
    ]
    for normalizer in normalizers:
        normalizer.offline_stats.load(stats)
    workers = [
        Worker(id, param, normalizers[id], task_queue, result_queue, stop,
               noise_generator, config) for id in range(config.num_workers)
    ]
    for w in workers:
        w.start()

    training_rewards = []
    training_steps = []
    training_timestamps = []
    initial_time = time.time()
    total_steps = 0
    iteration = 0
    while not stop.value:
        test_mean, test_ste = test(config, param.numpy(), stats)
        elapsed_time = time.time() - initial_time
        training_rewards.append(test_mean)
        training_steps.append(total_steps)
        training_timestamps.append(elapsed_time)
        logger.info('Test: total steps %d, %f(%f), elapsed time %d' %
                    (total_steps, test_mean, test_ste, elapsed_time))

        for i in range(config.pop_size):
            task_queue.put(i)
        rewards = []
        epsilons = []
        steps = []
        while len(rewards) < config.pop_size:
            if result_queue.empty():
                continue
            epsilon, fitness, step = result_queue.get()
            epsilons.append(epsilon)
            rewards.append(fitness)
            steps.append(step)

        total_steps += np.sum(steps)
        r_mean = np.mean(rewards)
        r_std = np.std(rewards)
        # rewards = (rewards - r_mean) / r_std
        logger.info('Train: iteration %d, %f(%f)' %
                    (iteration, r_mean, r_std / np.sqrt(config.pop_size)))
        iteration += 1
        # if r_mean > config.target:
        if config.max_steps and total_steps > config.max_steps:
            stop.value = True
            break
        for normalizer in normalizers:
            stats.merge(normalizer.online_stats)
            normalizer.online_stats.zero()
        for normalizer in normalizers:
            normalizer.offline_stats.load(stats)
        if config.args.reward_type == 'rank':
            rewards = fitness_shift(rewards)
        gradient = np.asarray(epsilons) * np.asarray(rewards).reshape((-1, 1))
        gradient = np.mean(gradient, 0) / config.sigma
        gradient -= config.weight_decay * gradient
        if config.args.opt == 'adam':
            gradient = config.opt.update(gradient)
        gradient = torch.FloatTensor(gradient)
        param.add_(config.learning_rate * gradient)

    for w in workers:
        w.join()
    return [training_rewards, training_steps, training_timestamps]

Example #21

0

Show file

File: monobeast.py Project: MachengShen/torchbeastpopart

def act(
    flags,
    env: str,
    task: int,
    full_action_space: bool,
    actor_index: int,
    free_queue: mp.SimpleQueue,
    full_queue: mp.SimpleQueue,
    model: torch.nn.Module,
    buffers: Buffers,
    initial_agent_state_buffers,
):
    try:
        logging.info("Actor %i started.", actor_index)
        timings = prof.Timings()  # Keep track of how fast things are.

        # create the environment from command line parameters
        # => could also create a special one which operates on a list of games (which we need)
        gym_env = create_env(
            env,
            frame_height=flags.frame_height,
            frame_width=flags.frame_width,
            gray_scale=(flags.aaa_input_format == "gray_stack"),
            full_action_space=full_action_space,
            task=task)

        # generate a seed for the environment (NO HUMAN STARTS HERE!), could just
        # use this for all games wrapped by the environment for our application
        seed = actor_index ^ int.from_bytes(os.urandom(4), byteorder="little")
        gym_env.seed(seed)

        # wrap the environment, this is actually probably the point where we could
        # use multiple games, because the other environment is still one from Gym
        env = environment.Environment(gym_env)

        # get the initial frame, reward, done, return, step, last_action
        env_output = env.initial()

        # perform the first step
        agent_state = model.initial_state(batch_size=1)
        agent_output, unused_state = model(env_output, agent_state)
        while True:
            # get a buffer index from the queue for free buffers (?)
            index = free_queue.get()
            # termination signal (?) for breaking out of this loop
            if index is None:
                break

            # Write old rollout end.
            # the keys here are (frame, reward, done, episode_return, episode_step, last_action)
            for key in env_output:
                buffers[key][index][0, ...] = env_output[key]
            # here the keys are (policy_logits, baseline, action)
            for key in agent_output:
                buffers[key][index][0, ...] = agent_output[key]
            # I think the agent_state is just the RNN/LSTM state (which will be the "initial" state for the next step)
            # not sure why it's needed though because it really just seems to be the initial state before starting to
            # act; however, it might be randomly initialised, which is why we might want it...
            for i, tensor in enumerate(agent_state):
                initial_agent_state_buffers[index][i][...] = tensor

            # Do new rollout
            for t in range(flags.unroll_length):
                timings.reset()

                # forward pass without keeping track of gradients to get the agent action
                with torch.no_grad():
                    agent_output, agent_state = model(env_output, agent_state)

                timings.time("model")

                # agent acting in the environment
                env_output = env.step(agent_output["action"])

                timings.time("step")

                # writing the respective outputs of the current step (see above for the list of keys)
                for key in env_output:
                    buffers[key][index][t + 1, ...] = env_output[key]
                for key in agent_output:
                    buffers[key][index][t + 1, ...] = agent_output[key]

                timings.time("write")

            # after finishing a trajectory put the index in the "full queue",
            # presumably so that the data can be processed/sent to the learner
            full_queue.put(index)

        if actor_index == 0:
            logging.info("Actor %i: %s", actor_index, timings.summary())

    except KeyboardInterrupt:
        pass  # Return silently.
    except Exception as e:
        logging.error("Exception in worker process %i", actor_index)
        traceback.print_exc()
        print()
        raise e

Example #22

0

Show file

def act(
    flags,
    game_params,
    actor_index: int,
    free_queue: mp.SimpleQueue,
    full_queue: mp.SimpleQueue,
    model: torch.nn.Module,
    buffers: Buffers,
    initial_agent_state_buffers,
):
    try:
        logging.info("Actor %i started.", actor_index)
        timings = prof.Timings()  # Keep track of how fast things are.

        seed = actor_index ^ int.from_bytes(os.urandom(4), byteorder="little")
        sc_env = init_game(game_params['env'], flags.map_name, random_seed=seed)
        obs_processer = IMPALA_ObsProcesser_v2(env=sc_env, action_table=model.action_table, **game_params['obs_processer'])
        env = environment.Environment_v2(sc_env, obs_processer, seed)
        # initial rollout starts here
        env_output = env.initial() 
        new_res = model.spatial_processing_block.new_res
        agent_state = model.spatial_processing_block.conv_lstm._init_hidden(batch_size=1, 
                                                                            image_size=(new_res,new_res)
                                                                           )
        
        with torch.no_grad():
            agent_output, new_agent_state = model.actor_step(env_output, *agent_state[0]) 

        agent_state = agent_state[0] # _init_hidden yields [(h,c)], whereas actor step only (h,c)
        while True:
            index = free_queue.get()
            if index is None:
                break

            # Write old rollout end. 
            for key in env_output:
                buffers[key][index][0, ...] = env_output[key]
            for key in agent_output:
                if key not in ['sc_env_action']: # no need to save this key on buffers
                    buffers[key][index][0, ...] = agent_output[key]
            
            # lstm state in syncro with the environment / input to the agent 
            # that's why agent_state = new_agent_state gets executed afterwards
            initial_agent_state_buffers[index][0][...] = agent_state[0]
            initial_agent_state_buffers[index][1][...] = agent_state[1]
            
            
            # Do new rollout.
            for t in range(flags.unroll_length):
                timings.reset()

                env_output = env.step(agent_output["sc_env_action"])
                
                timings.time("step")
                
                # update state
                agent_state = new_agent_state 
            
                with torch.no_grad():
                    agent_output, new_agent_state = model.actor_step(env_output, *agent_state)
                
                timings.time("model")
                
                #env_output = env.step(agent_output["sc_env_action"])

                #timings.time("step")

                for key in env_output:
                    buffers[key][index][t+1, ...] = env_output[key] 
                for key in agent_output:
                    if key not in ['sc_env_action']: # no need to save this key on buffers
                        buffers[key][index][t+1, ...] = agent_output[key] 
                # env_output will be like
                # s_{0}, ..., s_{T}
                # act_mask_{0}, ..., act_mask_{T}
                # discount_{0}, ..., discount_{T}
                # r_{-1}, ..., r_{T-1}
                # agent_output will be like
                # a_0, ..., a_T with a_t ~ pi(.|s_t)
                # log_pi(a_0|s_0), ..., log_pi(a_T|s_T)
                # so the learner can use (s_i, act_mask_i) to predict log_pi_i
                timings.time("write")
            full_queue.put(index)

        if actor_index == 0:
            logging.info("Actor %i: %s", actor_index, timings.summary())

    except KeyboardInterrupt:
        pass  # Return silently.
    except Exception as e:
        logging.error("Exception in worker process %i", actor_index)
        traceback.print_exc()
        print()
        raise e

Example #23

0

Show file

def act(i: int, free_queue: mp.SimpleQueue, full_queue: mp.SimpleQueue,
        model: torch.nn.Module, buffers: Buffers,
        episode_state_count_dict: dict, train_state_count_dict: dict,
        initial_agent_state_buffers, flags):
    try:
        log.info('Actor %i started.', i)
        timings = prof.Timings()

        gym_env = create_env(flags)
        seed = i ^ int.from_bytes(os.urandom(4), byteorder='little')
        gym_env.seed(seed)

        if flags.num_input_frames > 1:
            gym_env = FrameStack(gym_env, flags.num_input_frames)

        env = Environment(gym_env,
                          fix_seed=flags.fix_seed,
                          env_seed=flags.env_seed)

        env_output = env.initial()
        agent_state = model.initial_state(batch_size=1)
        agent_output, unused_state = model(env_output, agent_state)

        while True:
            index = free_queue.get()
            if index is None:
                break

            # Write old rollout end.
            for key in env_output:
                buffers[key][index][0, ...] = env_output[key]
            for key in agent_output:
                buffers[key][index][0, ...] = agent_output[key]
            for i, tensor in enumerate(agent_state):
                initial_agent_state_buffers[index][i][...] = tensor

            # Update the episodic state counts
            episode_state_key = tuple(env_output['frame'].view(-1).tolist())
            if episode_state_key in episode_state_count_dict:
                episode_state_count_dict[episode_state_key] += 1
            else:
                episode_state_count_dict.update({episode_state_key: 1})
            buffers['episode_state_count'][index][0, ...] = \
                torch.tensor(1 / np.sqrt(episode_state_count_dict.get(episode_state_key)))

            # Reset the episode state counts when the episode is over
            if env_output['done'][0][0]:
                for episode_state_key in episode_state_count_dict:
                    episode_state_count_dict = dict()

            # Update the training state counts if you're doing count-based exploration
            if flags.model == 'count':
                train_state_key = tuple(env_output['frame'].view(-1).tolist())
                if train_state_key in train_state_count_dict:
                    train_state_count_dict[train_state_key] += 1
                else:
                    train_state_count_dict.update({train_state_key: 1})
                buffers['train_state_count'][index][0, ...] = \
                    torch.tensor(1 / np.sqrt(train_state_count_dict.get(train_state_key)))

            # Do new rollout
            for t in range(flags.unroll_length):
                timings.reset()

                with torch.no_grad():
                    agent_output, agent_state = model(env_output, agent_state)

                timings.time('model')

                env_output = env.step(agent_output['action'])

                timings.time('step')

                for key in env_output:
                    buffers[key][index][t + 1, ...] = env_output[key]

                for key in agent_output:
                    buffers[key][index][t + 1, ...] = agent_output[key]

                # Update the episodic state counts
                episode_state_key = tuple(
                    env_output['frame'].view(-1).tolist())
                if episode_state_key in episode_state_count_dict:
                    episode_state_count_dict[episode_state_key] += 1
                else:
                    episode_state_count_dict.update({episode_state_key: 1})
                buffers['episode_state_count'][index][t + 1, ...] = \
                    torch.tensor(1 / np.sqrt(episode_state_count_dict.get(episode_state_key)))

                # Reset the episode state counts when the episode is over
                if env_output['done'][0][0]:
                    episode_state_count_dict = dict()

                # Update the training state counts if you're doing count-based exploration
                if flags.model == 'count':
                    train_state_key = tuple(
                        env_output['frame'].view(-1).tolist())
                    if train_state_key in train_state_count_dict:
                        train_state_count_dict[train_state_key] += 1
                    else:
                        train_state_count_dict.update({train_state_key: 1})
                    buffers['train_state_count'][index][t + 1, ...] = \
                        torch.tensor(1 / np.sqrt(train_state_count_dict.get(train_state_key)))

                timings.time('write')
            full_queue.put(index)

        if i == 0:
            log.info('Actor %i: %s', i, timings.summary())

    except KeyboardInterrupt:
        pass
    except Exception as e:
        logging.error('Exception in worker process %i', i)
        traceback.print_exc()
        print()
        raise e