Example #1
0
    def _compare_parallel(self, network, opponent_network, device, num_workers):
        q, r = divmod(self.conf.GAMES_PER_COMPARISON, num_workers)
        num_active_workers = Value('i', num_workers)
        evaluator_mgr = BulkEvaluatorManager(
            [network, opponent_network], device, num_workers)
        score = Value('i', 0)

        workers = []
        s = 0
        for worker_id in range(num_workers):
            num_games = q + 1 if worker_id < r else q
            evaluator = evaluator_mgr.get_evaluator(worker_id, 0)
            opponent_evaluator = evaluator_mgr.get_evaluator(worker_id, 1)
            color = BLACK if s % 2 == 0 else WHITE
            s += num_games
            worker = Process(
                target=self._worker_job,
                args=(num_games, num_active_workers,
                      evaluator, opponent_evaluator, color, score),
            )
            workers.append(worker)
            worker.start()

        # start evaluator server
        server = evaluator_mgr.get_server(num_active_workers)
        server.start()

        for worker in workers:
            worker.join()
        server.join()

        return score.value / self.conf.GAMES_PER_COMPARISON
Example #2
0
    def __init__(self, opt, world):
        super().__init__(opt)
        self.inner_world = world
        self.numthreads = opt['numthreads']

        self.sync = {  # syncronization primitives
            # semaphores for counting queued examples
            'queued_sem': Semaphore(0),  # counts num exs to be processed
            'threads_sem': Semaphore(0),  # counts threads
            'reset_sem': Semaphore(0),  # allows threads to reset

            # flags for communicating with threads
            'reset_flag': Value('b', False),  # threads should reset
            'term_flag': Value('b', False),  # threads should terminate

            # counters
            'epoch_done_ctr': Value('i', 0),  # number of done threads
            'total_parleys': Value('l', 0),  # number of parleys in threads
        }

        self.threads = []
        for i in range(self.numthreads):
            self.threads.append(
                HogwildProcess(i, opt, world.share(), self.sync))
            time.sleep(
                0.05)  # delay can help prevent deadlock in thread launches
        for t in self.threads:
            t.start()

        for _ in self.threads:
            # wait for threads to launch
            # this makes sure that no threads get examples before all are set up
            # otherwise they might reset one another after processing some exs
            self.sync['threads_sem'].acquire()
Example #3
0
    def __init__(self, opt, world):
        super().__init__(opt)
        self.inner_world = world
        self.numthreads = opt['numthreads']

        self.sync = {  # syncronization primitives
            # semaphores for counting queued examples
            'queued_sem': Semaphore(0),  # counts num exs to be processed
            'threads_sem': Semaphore(0),  # counts threads
            'reset_sem': Semaphore(0),  # allows threads to reset

            # flags for communicating with threads
            'reset_flag': Value('b', False),  # threads should reset
            'term_flag': Value('b', False),  # threads should terminate

            # counters
            'epoch_done_ctr': Value('i', 0),  # number of done threads
            'total_parleys': Value('l', 0),  # number of parleys in threads
        }

        # don't let threads create more threads!
        self.threads = []
        for i in range(self.numthreads):
            self.threads.append(HogwildProcess(i, opt, world, self.sync))
        for t in self.threads:
            t.start()

        for _ in self.threads:
            self.sync['threads_sem'].acquire()
Example #4
0
    def __init__(self,
                 config,
                 share_batches=True,
                 manager=None,
                 new_process=True):
        if new_process == True and manager is None:
            manager = Manager()
        self.knows = Semaphore(0)  # > 0 if we know if any are coming
        # == 0 if DatasetReader is processing a command
        self.working = Semaphore(1 if new_process else 100)
        self.finished_reading = Lock(
        )  # locked if we're still reading from file
        # number of molecules that have been sent to the pipe:
        self.in_pipe = Value('i', 0)

        # Tracking what's already been sent through the pipe:
        self._example_number = Value('i', 0)

        # The final kill switch:
        self._close = Value('i', 0)

        self.command_queue = manager.Queue(10)
        self.molecule_pipeline = None
        self.batch_queue = Queue(config.data.batch_queue_cap
                                 )  #manager.Queue(config.data.batch_queue_cap)
        self.share_batches = share_batches

        self.dataset_reader = DatasetReader("dataset_reader",
                                            self,
                                            config,
                                            new_process=new_process)
        if new_process:
            self.dataset_reader.start()
    def start(self) -> None:
        shards = glob.glob(self.file_path)
        # Ensure a consistent order before shuffling for testing.
        shards.sort()
        num_shards = len(shards)

        # If we want multiple epochs per read, put shards in the queue multiple times.
        self.input_queue = Queue(num_shards * self.epochs_per_read +
                                 self.num_workers)
        for _ in range(self.epochs_per_read):
            np.random.shuffle(shards)
            for shard in shards:
                self.input_queue.put(shard)

        # Then put a None per worker to signify no more files.
        for _ in range(self.num_workers):
            self.input_queue.put(None)

        assert not self.processes, "Process list non-empty! You must call QIterable.join() before restarting."
        self.num_active_workers = Value('i', self.num_workers)
        self.num_inflight_items = Value('i', 0)
        for worker_id in range(self.num_workers):
            process = Process(target=_worker,
                              args=(self.reader, self.input_queue,
                                    self.output_queue, self.num_active_workers,
                                    self.num_inflight_items, worker_id))
            logger.info(f"starting worker {worker_id}")
            process.start()
            self.processes.append(process)
    def __init__(self, name, env_kwargs, model_kwargs, **kwargs):
        super().__init__(env_kwargs=env_kwargs, model_kwargs=model_kwargs)
        self.name = name
        self.num_processes = 16

        self._report_queue = Queue(maxsize=16)
        self._shared_global_t = Value('i', 0)
        self._shared_is_stopped = Value('i', False)
Example #7
0
    def _generate_parallel(self, iteration, network, device, num_workers):
        q, r = divmod(self.remaining_games, num_workers)
        num_active_workers = Value('i', num_workers)
        resign_threshold = Value('d', self.resign_mgr.threshold())
        evaluator_mgr = BulkEvaluatorManager([network], device, num_workers)
        output_queue = SimpleQueue()

        # start the workers
        workers = []
        for worker_id in range(num_workers):
            num_games = q + 1 if worker_id < r else q
            evaluator = evaluator_mgr.get_evaluator(worker_id, 0)
            worker = Process(
                target=self._worker_job,
                args=(worker_id, num_games, num_active_workers,
                      resign_threshold, evaluator, output_queue),
            )
            workers.append(worker)
            worker.start()

        # start evaluator server
        server = evaluator_mgr.get_server(num_active_workers)
        server.start()

        # collect the examples generated by workers
        while num_active_workers.value > 0 or not output_queue.empty():
            examples, resign_value_history, result = output_queue.get()
            self.example_pool += examples
            self.game_length.append(len(examples))

            # add the history into resignation manager to update the threshold
            if resign_value_history is not None:
                self.resign_mgr.add(resign_value_history, result)
                resign_threshold.value = self.resign_mgr.threshold()

            self.remaining_games -= 1

            # periodically save the progress
            if (self.conf.GAMES_PER_ITERATION - self.remaining_games) \
                    % self.conf.EXAMPLE_POOL_SAVE_FREQUENCY == 0:
                self.save(iteration)
                log.info(
                    f'[iter={iteration}] ExamplePool: checkpoint saved, '
                    f'{self.remaining_games} games remaining'
                )

        for worker in workers:
            worker.join()
        server.join()
Example #8
0
    def __init__(self, world_class, opt, agents):
        super().__init__(opt)
        self.inner_world = world_class(opt, agents)

        self.queued_items = Semaphore(0)  # counts num exs to be processed
        self.epochDone = Condition()  # notifies when exs are finished
        self.terminate = Value('b', False)  # tells threads when to shut down
        self.cnt = Value('i', 0)  # number of exs that remain to be processed

        self.threads = []
        for i in range(opt['numthreads']):
            self.threads.append(
                HogwildProcess(i, world_class, opt, agents, self.queued_items,
                               self.epochDone, self.terminate, self.cnt))
        for t in self.threads:
            t.start()
Example #9
0
    def __init__(self, config: ParamDict, environment: Environment,
                 policy: Policy, filter_op: Filter):
        threads, gpu = config.require("threads", "gpu")
        super(Agent_sync, self).__init__(config, environment, policy,
                                         filter_op)

        # sync signal, -1: terminate, 0: normal running, >0 restart and waiting for parameter update
        self._sync_signal = Value('i', 0)

        # sampler sub-process list
        self._sampler_proc = []

        # used for synchronize commands
        self._cmd_pipe = None
        self._param_pipe = None
        self._cmd_lock = Lock()

        cmd_pipe_child, cmd_pipe_parent = Pipe(duplex=True)
        param_pipe_child, param_pipe_parent = Pipe(duplex=False)
        self._cmd_pipe = cmd_pipe_parent
        self._param_pipe = param_pipe_parent
        for i_thread in range(threads):
            child_name = f"sampler_{i_thread}"
            worker_cfg = ParamDict({
                "seed": self.seed + 1024 + i_thread,
                "gpu": gpu
            })
            child = Process(target=Agent_sync._sampler_worker,
                            name=child_name,
                            args=(worker_cfg, cmd_pipe_child, param_pipe_child,
                                  self._cmd_lock, self._sync_signal,
                                  deepcopy(policy), deepcopy(environment),
                                  deepcopy(filter_op)))
            self._sampler_proc.append(child)
            child.start()
Example #10
0
 def create(cls):
     if not hasattr(cls, 'length_to_eps'):
         # Maps episode length to list of episodes
         cls.length_to_eps = {}
     if not hasattr(cls, 'ep_indices'):
         # Set of episode indices already in the cache
         cls.ep_indices = set()
     if not hasattr(cls, 'batches'):
         # List of batches if popping batches
         cls.batches = []
     if not hasattr(cls, 'load_complete'):
         # If all episodes have been loaded into memory
         cls.load_complete = Value(ctypes.c_bool, False)
     if not hasattr(cls, 'batches_lock'):
         # Lock to access batches
         cls.batches_lock = Lock()
     if not hasattr(cls, 'cache_lock'):
         # Lock to access length_to_eps
         cls.cache_lock = Lock()
     if not hasattr(cls, 'fill_cache_lock'):
         # Lock for condition variables
         cls.fill_cache_lock = RLock()
     if not hasattr(cls, 'add_to_cache_cv'):
         # Condition notifying Loader to add to cache
         cls.add_to_cache_cv = Condition(lock=cls.fill_cache_lock)
     if not hasattr(cls, 'cache_filled_cv'):
         # Condition notifying teacher that cache has episodes
         cls.cache_filled_cv = Condition(lock=cls.fill_cache_lock)
Example #11
0
    def __init__(self,
                 chk_dir,
                 chk,
                 keep_epoch_chk=True,
                 overwrite=True,
                 mode=CFMode.AUTO,
                 chk_prefix='model_v_'):

        self.logger = logging.getLogger(__name__)
        self.chk_dir = chk_dir
        self.chk = chk
        self.keep_epoch_chk = keep_epoch_chk
        self.overwrite = overwrite
        self.chk_prefix = chk_prefix
        self.mode = mode
        self.chk_epoch_subdir = 'epoch'
        self.mp_manager = Manager()
        self.snapshot_copy = None

        self.cpu_side = False
        # Active snapshot, if true, don't snapshot again
        self.active_snapshot = Value('i', 0)
        self.lock = Lock()
        self.in_progress_snapshot = Value('i', 0)

        # Handle to the process performing checkpoint
        # Can be only one at any instant. A new checkpoint
        # cannot start unless the previous one completes
        self.chk_process = None

        # `overwrite` supersedes if False
        if self.overwrite is False and self.keep_epoch_chk is False:
            self.keep_epoch_chk = True

        # Global ID of checkpoints being written
        # Used to format the checkpoint path
        # Instantiate from chk when restoring
        self.chk_global_id = -1

        # Sorted List of available checkpoints (fnames)
        self.available_chk_iters = self.mp_manager.list()
        self.available_chk_epochs = self.mp_manager.list()
        self.initalize_chk_dir()

        self.logger.info("Available checkpoints : ")
        for item in self.available_chk_iters:
            self.logger.info(item)
Example #12
0
    def __init__(self, config: ParamDict, environment: Environment,
                 policy: Policy, filter_op: Filter):
        threads, gpu = config.require("threads", "gpu")
        threads_gpu = config["gpu threads"] if "gpu threads" in config else 2
        super(Agent_async, self).__init__(config, environment, policy,
                                          filter_op)

        # sync signal, -1: terminate, 0: normal running, >0 restart and waiting for parameter update
        self._sync_signal = Value('i', 0)

        # environment sub-process list
        self._environment_proc = []
        # policy sub-process list
        self._policy_proc = []

        # used for synchronize policy parameters
        self._param_pipe = None
        self._policy_lock = Lock()
        # used for synchronize roll-out commands
        self._control_pipe = None
        self._environment_lock = Lock()

        step_pipe = []
        cmd_pipe_child, cmd_pipe_parent = Pipe(duplex=True)
        param_pipe_child, param_pipe_parent = Pipe(duplex=False)
        self._control_pipe = cmd_pipe_parent
        self._param_pipe = param_pipe_parent
        for i_envs in range(threads):
            child_name = f"environment_{i_envs}"
            step_pipe_pi, step_pipe_env = Pipe(duplex=True)
            step_lock = Lock()
            worker_cfg = ParamDict({
                "seed": self.seed + 1024 + i_envs,
                "gpu": gpu
            })
            child = Process(target=Agent_async._environment_worker,
                            name=child_name,
                            args=(worker_cfg, cmd_pipe_child, step_pipe_env,
                                  self._environment_lock, step_lock,
                                  self._sync_signal, deepcopy(environment),
                                  deepcopy(filter_op)))
            self._environment_proc.append(child)
            step_pipe.append((step_pipe_pi, step_lock))
            child.start()

        for i_policies in range(threads_gpu):
            child_name = f"policy_{i_policies}"
            worker_cfg = ParamDict({
                "seed": self.seed + 2048 + i_policies,
                "gpu": gpu
            })
            child = Process(target=Agent_async._policy_worker,
                            name=child_name,
                            args=(worker_cfg, param_pipe_child, step_pipe,
                                  self._policy_lock, self._sync_signal,
                                  deepcopy(policy)))
            self._policy_proc.append(child)
            child.start()
        sleep(5)
Example #13
0
    def __init__(self, args) -> None:
        """
        Constructor
        :param args: Cmd-line arguments
        """
        self.args = args

        # global counter
        self.T = Value('i', 0)
        self.global_reward = Value('d', -np.inf)

        # worker handling
        self.worker_pool = []

        # validity check for input parameter
        if args.optimizer not in ['rmsprop', 'adam']:
            raise Exception(
                'Your given optimizer %s is currently not supported. Choose either "rmsprop" or "adam"',
                args.optimizer)
Example #14
0
 def __init__(self, inputs, mode):
     self.mode = mode
     self.inputs = inputs
     self.queue = Queue()
     self.control = Value('i', 1)
     if self.mode == 0:
         self.process = Process(target=self.worker,
                                args=(self.inputs, self.queue,
                                      self.control))
         self.process.start()
Example #15
0
    def __init__(self, *args):
        """
        Statistics process saves the statistics obtained from workers.
        In particular, the shared models are saved every Config.MODEL_SAVE_FREQUENCY episodes.
        Moreover, some statistics are logged every Config.LOG_STATS_FREQUENCY episodes.
        """
        super(StatProcess, self).__init__()
        self.episode_log_q = Queue(maxsize=Config.MAX_STATS_QUEUE_SIZE)
        self.ae_loss_log_q = Queue(maxsize=Config.MAX_STATS_QUEUE_SIZE)
        self.episode_count = Value('i', 0)
        self.model_save = Value('i', 0)
        self.exit_flag = Value('i', 0)

        #:obj:`dict`: Dictionary of DPS models for RL.
        self.agents = {}
        for model, env_id in zip(args, Config.ENV_IDS):
            self.agents[env_id] = model
        #float: Time at start for logging.
        self._start_time = time.time()
    def init_data(self):
        self.is_working = False
        self.semaphore = True
        self.is_change_bar = Value(
            c_bool, False)  #whether user has dragged the slider,default: False

        self.frame_index = Value('i', 0)
        self.share_lock = Lock()  #shared lock for frame_index
        self.share_lock2 = Lock()  # shared lock for frame_index

        self.mutex = threading.Lock()

        self.timer = QTimer(self)  # used for the updating of progress bar
        self.temp_timer = QTimer(
            self)  #used for detecting whether the frame_total is given.
        self.frame_total = Value('i', -1)
        self.playable = Value(c_bool, True)
        self.is_working = Value(c_bool, False)
        manager = Manager()
        self.play_src = manager.Value(c_char_p, '0')  #用于记录播放的视频地址
        self.mode = None  # 'online' or 'offline'
 def add_agents(self, nb):
     old_length = len(self.agents)
     for index in range(old_length, old_length + nb):
         self.agents.append(
             Agent(id_=index,
                   prediction_queue=self.prediction_queue,
                   training_queue=self.training_queue,
                   states=self.train_set,
                   exit_flag=Value(c_bool, False),
                   statistics_queue=self.statistics_queue,
                   episode_counter=self.nb_episodes,
                   observation_shape=(self.channels, self.height,
                                      self.width),
                   action_space=self.n_outputs,
                   device=self.agent_device,
                   step_max=self.sequence_length))
    def __init__(self, n_workers, actor, args):
        self._now_episode = Value('i', 0)

        self.queue = Queue()
        self.collect_event = Event()

        self.worker = []
        for i in range(n_workers):
            self.worker.append(
                Worker(self.queue, self.collect_event, actor, args, i))
        self.process = [
            Process(target=self.worker[i].run, args=(self._now_episode, ))
            for i in range(n_workers)
        ]

        for p in self.process:
            p.start()
        print(f'Start {n_workers} workers.')
Example #19
0
    def __init__(self, experience_q, prediction_q, observation_q, env_id,
                 episode_log_q, agent_id):
        """
        Workers are the agents interacting with the environment.
        Workers run a copy of the environment with their own specifications. 
        It requires Predictor processes to make decisions.
        Gathered experiences are submitted to a Queue on which the shared models are trained.

        Args:
            experience_q (mp.Queue): Shared memory queue containing experiences across workers of the same type.
            prediction_q (mp.Queue): Shared memory queue containing predictions of this worker.
            observation_q (mp.Queue): Shared memory queue containing observation across workers of the same type.
            env_id (str): The id of the environment instance this worker is interacting with.
            episode_log_q (mp.Queue): Shared memory queue containing the experience of past episodes.
            agent_id (int): The id of the worker process.
        """
        super(WorkerProcess, self).__init__()
        self.experience_q = experience_q
        self.prediction_q = prediction_q
        self.observation_q = observation_q
        self.env_id = env_id
        self.episode_log_q = episode_log_q
        self.id = agent_id

        #:class:`memory.ShortTermMemory`: Short term memory where the history is saved and experiences are memorized.
        self.memory = ShortTermMemory(Config.GLOW)
        #int: Signal for process exit.
        self.exit_flag = Value('i', 0)
        #torch.Tensor of float: Array of actions in one-hot encoding.
        self.actions = torch.Tensor(np.eye(Config.NUM_ACTIONS))
        #:class:`gym.Env`: The environment the agent interacts with.
        self.env = gym.make(Config.ENV_NAME, **Config.ENV_PARAMS[self.env_id])
        #bool: Boolean value that signals that an episode is finished.
        self.done = False
        #int: Current size of batches.
        self.batch_size = 0
        #torch.Tensor: Tensor of observation batch.
        self.o_batch = torch.Tensor([0.])
        #torch.Tensor: Tensor of action batch.
        self.a_batch = torch.Tensor([0.])
        #torch.Tensor: Tensor of target batch.
        self.t_batch = torch.Tensor([0.])
Example #20
0
    def __init__(self, autoencoder, optimizer_ae, agent, optimizer_ps, env_id,
                 select_data, experience_q, training_count, ae_loss_log_q,
                 trainer_id):
        """
        Trainers gather experiences and train respective models model. 

        Args:
            autoencoder (:class:`base_networks.DenseAutoencoder`): The Server.autoencoder model.
            optimizer_ae (:class:`optim.Adam`): The Server.optimizer_ae for encoder.
            agent (:class:`base_networks.DeepPS`): The deep PS model for RL.
            optimizer_ps (:class:`optim.Adam`): The Server.optimizer_ps for deep PS.
            env_id (str): The id of the environment/agent instance this trainer is using.
            select_data (tuple): The data used for training in 'selection' mode.
            experience_q (:class:`mp.Queue`): Shared memory queue containing experiences for training.
            training_count (:class:`mp.Value`): Shared memory value which counts the number of trainings.
            ae_loss_log_q (:class:`mp.Queue`): Shared memory queue containing loss of decoder.
            trainer_id (int): The id of the trainer process.
        """
        super(TrainerProcess, self).__init__()
        self.autoencoder = autoencoder
        self.optimizer_ae = optimizer_ae
        self.agent = agent
        self.optimizer_ps = optimizer_ps
        self.env_id = env_id
        self.experience_q = experience_q
        self.training_count = training_count
        self.ae_loss_log_q = ae_loss_log_q
        self.id = trainer_id

        if Config.TRAIN_MODE == 'selection':
            o_batch, a_batch, t_batch = select_data
            #torch.Tensor: The observation training data set.
            self.o_batch = o_batch.to(Config.DEVICE)
            #torch.Tensor: The action training data set.
            self.a_batch = a_batch.to(Config.DEVICE)
            #torch.Tensor: The target training data set.
            self.t_batch = t_batch.to(Config.DEVICE)

        #int: Signal for process exit.
        self.exit_flag = Value('i', 0)
Example #21
0
    def __init__(self,
                 scheduler: scheduler_type,
                 mode: str = 'nearest',
                 align_corners: bool = None,
                 preserve_range: bool = False,
                 keys: Sequence = ('data', ),
                 grad: bool = False,
                 **kwargs):
        """
        Args:
            scheduler: scheduler which determined the current size.
                The scheduler is called with the current iteration of the
                transform
            mode: one of ``nearest``, ``linear``, ``bilinear``, ``bicubic``,
                    ``trilinear``, ``area`` (for more inforamtion see
                    :func:`torch.nn.functional.interpolate`)
            align_corners: input and output tensors are aligned by the center
                points of their corners pixels, preserving the values at the
                corner pixels.
            preserve_range: output tensor has same range as input tensor
            keys: keys which should be augmented
            grad: enable gradient computation inside transformation
            **kwargs: keyword arguments passed to augment_fn

        Warnings:
            When this transformations is used in combination with
            multiprocessing, the step counter is not perfectly synchronized
            between multiple processes.
            As a result the step count my jump between values
            in a range of the number of processes used.
        """
        super().__init__(size=0,
                         mode=mode,
                         align_corners=align_corners,
                         preserve_range=preserve_range,
                         keys=keys,
                         grad=grad,
                         **kwargs)
        self.scheduler = scheduler
        self._step = Value('i', 0)
Example #22
0
    def __init__(self, agent, observation_q, prediction_qs, env_id,
                 predictor_id):
        """
        Predictors gather observations from agents and make predictions.

        Args:
            agent (:class:`base_networks.DeepPS`): The deep PS model for RL.
            observation_q (:class:`mp.Queue`): Shared memory queue with observations of agents of the same type.
            prediction_qs (:obj:`list` of :class:`mp.Queue`): Shared memory queues containing predictions.
            env_id (str): The identifier for the environment type.
            predictor_id (int): The id of the trainer process.
        """
        super(PredictorProcess, self).__init__()
        self.agent = agent
        self.observation_q = observation_q
        self.prediction_qs = prediction_qs
        self.env_id = env_id
        self.id = predictor_id

        #int: Signal for process exit.
        self.exit_flag = Value('i', 0)
        #torch.Tensor of float: Array of actions in one-hot encoding.
        self.actions = torch.Tensor(np.eye(Config.NUM_ACTIONS)).to(
            Config.DEVICE)
Example #23
0
# os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
# torch.multiprocessing.set_start_method('spawn', force=True)
# torch.multiprocessing.set_sharing_strategy('file_system')

PPO_Transition = namedtuple('PPO_Transition', ('obs', 'action', 'reward', 'done', 'value', 'logproba'))

CPPO_Transition = namedtuple('CPPO_Transition',
                             ('obs', 'action', 'reward', 'cost', 'sum_cost', 'done', 'value', 'c_value', 'logproba'))

TD_Transition = namedtuple('TD_Transition', ('obs', 'action', 'obs_next', 'reward', 'done'))

Safe_TD_Transition = namedtuple('Safe_TD_Transition',
                                ('obs', 'action', 'obs_next', 'reward', 'cost', 'cost_next', 'done'))

# 0正常 1阻塞 主进程完成一次sample后,控制子进程阻塞
Sub_Proc_Blocking = Value('i', 0)


def make_env(config_env, seed, env_index):
    env = gym.make(config_env['id'])
    env.start(str(env_index), path=config_env['path'], gui=config_env['gui'], max_step=config_env['max_step'],
              reward_model=config_env['reward_model'], is_human_model=config_env['human_model'])
    env.seed(seed)
    return env


def is_on_policy(args_algo):
    on_policys = ['ppo2', 'trpo', 'cppo', 'cppo2']
    if args_algo in on_policys:
        return True
    else:
Example #24
0
    def __init__(self,
                 cache_dir,
                 dataset_dir,
                 dataset_list,
                 cuda,
                 batch_size=500,
                 num_workers=3,
                 renew_frequency=5,
                 rejection_radius_position=0,
                 numpatches=900,
                 numneg=3,
                 pos_thr=50.0,
                 reject=True,
                 mode='train',
                 rejection_radius=3000,
                 dist_type='3D',
                 patch_radius=None,
                 use_depth=False,
                 use_normals=False,
                 use_silhouettes=False,
                 color_jitter=False,
                 greyscale=False,
                 maxres=4096,
                 scale_jitter=False,
                 photo_jitter=False,
                 uniform_negatives=False,
                 needles=0,
                 render_only=False,
                 maxitems=200,
                 cache_once=False):
        super(MultimodalPatchesCache, self).__init__()
        self.cache_dir = cache_dir
        self.dataset_dir = dataset_dir
        #self.images_path = images_path
        self.dataset_list = dataset_list
        self.cuda = cuda
        self.batch_size = batch_size

        self.num_workers = num_workers
        self.renew_frequency = renew_frequency
        self.rejection_radius_position = rejection_radius_position
        self.numpatches = numpatches
        self.numneg = numneg
        self.pos_thr = pos_thr
        self.reject = reject
        self.mode = mode
        self.rejection_radius = rejection_radius
        self.dist_type = dist_type
        self.patch_radius = patch_radius
        self.use_depth = use_depth
        self.use_normals = use_normals
        self.use_silhouettes = use_silhouettes
        self.color_jitter = color_jitter
        self.greyscale = greyscale
        self.maxres = maxres
        self.scale_jitter = scale_jitter
        self.photo_jitter = photo_jitter
        self.uniform_negatives = uniform_negatives
        self.needles = needles
        self.render_only = render_only

        self.cache_done_lock = Lock()
        self.all_done = Value('B', 0)  # 0 is False
        self.cache_done = Value('B', 0)  # 0 is False

        self.wait_for_cache_builder = Event()
        # prepare for wait until initial cache is built
        self.wait_for_cache_builder.clear()
        self.cache_builder_resume = Event()

        self.maxitems = maxitems
        self.cache_once = cache_once

        if self.mode == 'eval':
            self.maxitems = -1
        self.cache_builder = Process(target=self.buildCache,
                                     args=[self.maxitems])
        self.current_cache_build = Value('B', 0)  # 0th cache
        self.current_cache_use = Value('B', 1)  # 1th cache

        self.cache_names = ["cache1", "cache2"]  # constant

        rebuild_cache = True
        if self.mode == 'eval':
            validation_dir = os.path.join(
                self.cache_dir,
                self.cache_names[self.current_cache_build.value])
            if os.path.isdir(validation_dir):
                # we don't need to rebuild validation cache
                # TODO: check if cache is VALID
                rebuild_cache = False
        elif cache_once:
            build_dataset_dir = os.path.join(
                self.cache_dir,
                self.cache_names[self.current_cache_build.value])
            if os.path.isdir(build_dataset_dir):
                # we don't need to rebuild training cache if we are training
                # on limited subset of the training set
                rebuild_cache = False

        if rebuild_cache:
            # clear the caches if they already exist
            build_dataset_dir = os.path.join(
                self.cache_dir,
                self.cache_names[self.current_cache_build.value])
            if os.path.isdir(build_dataset_dir):
                shutil.rmtree(build_dataset_dir)
            use_dataset_dir = os.path.join(
                self.cache_dir, self.cache_names[self.current_cache_use.value])
            if os.path.isdir(use_dataset_dir):
                shutil.rmtree(use_dataset_dir)

            os.makedirs(build_dataset_dir)

            self.cache_builder_resume.set()
            self.cache_builder.start()

            # wait until initial cache is built
            # print("before wait to build")
            # print("wait for cache builder state",
            #       self.wait_for_cache_builder.is_set())
            self.wait_for_cache_builder.wait()
            # print("after wait to build")

        # we have been resumed
        if self.mode != 'eval' and (not self.cache_once):
            # for training, we can set up the cache builder to build
            # the second cache
            self.restart()
        else:
            # else for validation we don't need second cache
            # we just need to switch the built cache to the use cache in order
            # to use it
            tmp = self.current_cache_build.value
            self.current_cache_build.value = self.current_cache_use.value
            self.current_cache_use.value = tmp
Example #25
0
            config = json.load(server_file)

        if ('addr' not in config) or ('port' not in config):
            print("IP address (addr) and port number required in config")

        address = config['addr']
        port = int(config['port'])

        server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        server.bind((address, port))
        server.listen()

        param_queue = Queue()
        param_queue.put(net.state_dict())

        shutdown_val = Value('b', 0)

        receiver_proc = Process(target=HandleWorkers,
                                args=(server, replay_memory, mem_lock,
                                      param_queue, shutdown_val))
        receiver_proc.start()

    while True:
        try:
            Train(net, replay_memory, mem_lock, args.output_file)
            if param_queue is not None:
                param_queue.put(net.state_dict)
            torch.save(net.state_dict(), args.output_file)
        except KeyboardInterrupt:
            if server is not None:
                assert (shutdown_val is not None and receiver_proc is not None)
Example #26
0
# Data Worker
def work(loader, queue, control):
    while 1:
        if control.value == 0:
            break
        if queue.qsize() < 5:
            batch = opcaffe.Batch()
            myClass.load(batch)
            data = torch.tensor(batch.data)
            label = torch.tensor(batch.label)
            queue.put([data, label])
        time.sleep(0.1)


queue = Queue()
control = Value('i', 1)
process = Process(target=work, args=(myClass, queue, control))
process.start()

# Iterate
while 1:
    iterations += 1

    # Get Data from Queue
    data, label = queue.get()

    # LR
    if iterations in lr_half_sets:
        print("Half LR")
        half_lr(optimizer)
Example #27
0
def train_ai2thor(model, args, rank=0, b=None):

    seed = args.seed + 10000 * rank
    torch.manual_seed(seed)
    np.random.seed(seed)

    # torch.cuda.set_device(rank)
    # device = torch.device(f'cuda:{rank}')
    device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
    # if torch.cuda.is_available():
    #     os.environ['DISPLAY'] = f':{rank}'

    model = model.to(device)
    model.share_memory()

    # Experience buffer
    storage = PPOBuffer(model.obs_shape,
                        args.steps,
                        args.num_workers,
                        args.state_size,
                        args.gamma,
                        device=device)
    storage.share_memory()

    #torch.multiprocessing.set_start_method('spawn')
    # start multiple processes
    ready_to_works = [Event() for _ in range(args.num_workers)]
    exit_flag = Value('i', 0)
    queue = SimpleQueue()

    processes = []
    # task_config_file = "config_files/multiMugTaskTrain.json"
    task_config_file = "config_files/multiMugTaskTrain.json"
    # start workers
    for worker_id in range(args.num_workers):
        print('START>>>>>>>>>>>>>>>>')
        p = Process(target=worker,
                    args=(worker_id, model, storage, ready_to_works[worker_id],
                          queue, exit_flag, args.use_priors, task_config_file))
        p.start()
        processes.append(p)

    # start trainer
    train_params = {
        "epochs": args.epochs,
        "steps": args.steps,
        "world_size": args.world_size,
        "num_workers": args.num_workers
    }
    ppo_params = {
        "clip_param": args.clip_param,
        "train_iters": args.train_iters,
        "mini_batch_size": args.mini_batch_size,
        "value_loss_coef": args.value_loss_coef,
        "entropy_coef": args.entropy_coef,
        "rnn_steps": args.rnn_steps,
        "lr": args.lr,
        "max_kl": args.max_kl
    }

    distributed = False
    if args.world_size > 1:
        if distributed == True:
            distributed = True
            # Initialize Process Group, distributed backend type
            dist_backend = 'nccl'
            # Url used to setup distributed training
            dist_url = "tcp://127.0.0.1:23456"
            print("Initialize Process Group... pid:", os.getpid())
            dist.init_process_group(backend=dist_backend,
                                    init_method=dist_url,
                                    rank=rank,
                                    world_size=args.world_size)
            # Make model DistributedDataParallel
            model = DistributedDataParallel(model,
                                            device_ids=[rank],
                                            output_device=rank)
    else:
        print('Distribution is not allowed')

    learner(model, storage, train_params, ppo_params, ready_to_works, queue,
            exit_flag, rank, distributed, b)

    for p in processes:
        print("process ", p.pid, " joined")
        p.join()
    def __init__(self, config_file):

        super(Manager, self).__init__()
        # Setting it as daemon child
        self.daemon = True

        # Read config file
        self.config = configparser.ConfigParser()
        # Fixing lower-case keys in config files
        self.config.optionxform = lambda option: option
        self.config.read(config_file)

        # Initializing the device
        if self.config["settings"]["device"] == "cuda":
            assert torch.cuda.is_available()
        self.device = self.config["settings"]["device"]
        self.agent_device = self.config["settings"]["device"]

        # Test and training sets
        self.train_set, self.test_set = [], []
        for key, value in self.config["levels"].items():
            if value == "train":
                self.train_set.append(key)
            elif value == "test":
                self.test_set.append(key)

        # Dimensions of the view
        self.channels = int(self.config["environnement"]["stacks"])
        self.height = int(self.config["environnement"]["height"])
        self.width = int(self.config["environnement"]["width"])

        # Creating the environnement generation function
        self.n_outputs = len(KartMultiDiscretizer.discretized_actions)

        # Impala constants
        self.sequence_length = int(self.config["impala"]["sequence_length"])
        self.rho = float(self.config["impala"]["rho"])
        self.cis = float(self.config["impala"]["cis"])
        self.discount_factor = float(self.config["impala"]["discount_factor"])
        self.entropy_coef = float(self.config["impala"]["entropy_coef"])
        self.value_coef = float(self.config["impala"]["value_coef"])

        # Building the model and share it (cf torch.multiprocessing best practices)
        self.model = torch.jit.script(
            ActorCriticLSTM(c=self.channels,
                            h=self.height,
                            w=self.width,
                            n_outputs=self.n_outputs,
                            sequence_length=self.sequence_length).float()).to(
                                self.device)

        # To have a multi-machine-case, just place on different devices and sync the models once a while
        self.impala = torch.jit.script(
            Impala(sequence_length=self.sequence_length,
                   entropy_coef=self.entropy_coef,
                   value_coef=self.value_coef,
                   discount_factor=self.discount_factor,
                   model=self.model,
                   rho=self.rho,
                   cis=self.cis,
                   device=self.device))

        # Sharing memory between processes
        self.model.share_memory()
        self.impala.share_memory()

        # Building the optimizer
        self.optimizer = optim.RMSprop(
            self.model.parameters(),
            lr=float(self.config["optimizer"]["lr"]),
            alpha=float(self.config["optimizer"]["alpha"]),
            eps=float(self.config["optimizer"]["eps"]),
            momentum=float(self.config["optimizer"]["momentum"]),
            weight_decay=float(self.config["optimizer"]["weight_decay"]),
            centered=self.config["optimizer"]["centered"] == "True")

        # Checkpoints directory
        self.checkpoint_path = self.config["settings"]["checkpoint_path"]

        # Building the torch.multiprocessing-queues
        self.training_queue = Queue(
            maxsize=int(self.config["settings"]["training_queue"]))
        self.prediction_queue = Queue(
            maxsize=int(self.config["settings"]["prediction_queue"]))
        self.statistics_queue = Queue()

        # Building the torch.multiprocessing-values
        self.learning_step = Value('i', 0)
        self.nb_episodes = Value('i', 0)
        self.max_nb_steps = int(self.config["settings"]["max_nb_episodes"])

        # Statistics thread
        self.tensorboard = self.config["settings"]["tensorboard"]
        self.statistics = Statistics(writer_dir=self.tensorboard,
                                     statistics_queue=self.statistics_queue,
                                     nb_episodes=self.nb_episodes)

        # Agents, predictions and learners
        self.training_batch_size = int(
            self.config["settings"]["training_batch_size"])
        self.trainers = []
        self.prediction_batch_size = int(
            self.config["settings"]["prediction_batch_size"])
        self.predictors = []
        self.agents = []

        # Adding the threads and agents
        self.add_trainers(int(self.config["settings"]["trainers"]))
        self.add_agents(int(self.config["settings"]["agents"]))
        self.add_predictors(int(self.config["settings"]["predictors"]))
Example #29
0

'''
    Maps episode length to dictionary with following keys:
        current_idx: which episode in the list are we at (if simply indexing
            into list)
        ep_list: list of episodes of the length of the key
        bucket_complete: if there are no more episodes left to consider in
            the bucket
'''
# Maps episode length to list of episodes
length_to_eps = {}
# List of batches if popping batches
batches = []
# If all episodes have been loaded into memory
load_complete = Value(ctypes.c_bool, False)
# Lock to access batches
batches_lock = Lock()
# Lock to access length_to_eps
cache_lock = Lock()
# Lock for condition variables
fill_cache_lock = RLock()
# Condition notifying Loader to add to cache
add_to_cache_cv = Condition(lock=fill_cache_lock)
# Condition notifying teacher that cache has episodes
cache_filled_cv = Condition(lock=fill_cache_lock)


def batch_cache(function):
    max_cache_size = 10000  # Max unseen eps
    min_cache_size = 1000  # Min unseen eps
Example #30
0
from threading import Thread, Condition, RLock


'''
    Maps episode length to dictionary with following keys:
        current_idx: which episode in the list are we at (if simply indexing
            into list)
        ep_list: list of episodes of the length of the key
        bucket_complete: if there are no more episodes left to consider in
            the bucket
'''
length_to_eps = {}                                # Maps episode length to list
                                                  # of episodes
batches = []                                      # List of batches if popping
                                                  # batches
load_complete = Value(ctypes.c_bool, False)       # If all episodes have been
                                                  # loaded into memory
batches_lock = Lock()                             # Lock to access batches
cache_lock = Lock()                               # Lock to access length_to_eps
fill_cache_lock = RLock()                         # Lock for condition variables
add_to_cache_cv = Condition(lock=fill_cache_lock) # Condition notifying Loader
                                                  # to add to cache
cache_filled_cv = Condition(lock=fill_cache_lock) # Condition notifying teacher
                                                  # that cache has episodes


def batch_cache(function):
    max_cache_size = 10000                   # Max unseen eps
    min_cache_size = 1000                    # Min unseen eps

    def get_cache_size():