def __init__(self, *args, **kwargs): self._shared_size = mp.Value(ctypes.c_long, 0) ObsDictRelabelingBuffer.__init__(self, *args, **kwargs) self._mp_array_info = {} self._shared_obs_info = {} self._shared_next_obs_info = {} for obs_key, obs_arr in self._obs.items(): ctype = ctypes.c_double if obs_arr.dtype == np.uint8: ctype = ctypes.c_uint8 self._shared_obs_info[obs_key] = ( mp.Array(ctype, obs_arr.size), obs_arr.dtype, obs_arr.shape, ) self._shared_next_obs_info[obs_key] = ( mp.Array(ctype, obs_arr.size), obs_arr.dtype, obs_arr.shape, ) self._obs[obs_key] = to_np(*self._shared_obs_info[obs_key]) self._next_obs[obs_key] = to_np( *self._shared_next_obs_info[obs_key]) self._register_mp_array("_actions") self._register_mp_array("_terminals")
def __init__(self, make_env, array_dim, batchsize, max_q_size, num_threads, collate_fn, epoch=0): num_threads = max(num_threads, 1) self.readyQs = [ mp.Queue(maxsize=max_q_size) for i in range(num_threads) ] self.array_dim = array_dim self.num_threads = num_threads self.num_videos_per_thread = batchsize // num_threads self.max_q_size = max_q_size self.batchsize = batchsize self.make_env = make_env self.batch = np.zeros( (self.num_threads, self.num_videos_per_thread, *array_dim), dtype=np.float32) array_dim2 = (self.max_q_size, self.num_videos_per_thread, *array_dim) self.m_arrays = (mp.Array('f', int(np.prod(array_dim2)), lock=mp.Lock()) for _ in range(num_threads)) self.arrays = [(m, np.frombuffer(m.get_obj(), dtype='f').reshape(array_dim2)) for m in self.m_arrays] self.dataset = make_env(proc_id=0, num_procs=0, num_envs=0) self.max_iter = self.dataset.max_iter self.collate_fn = collate_fn self.epoch = epoch
def test_main_process_unclean_exit(self): '''There might be ConnectionResetError or leaked semaphore warning (due to dirty process exit), \ but they are all safe to ignore''' worker_pids = mp.Array('i', [0] * 4) manager_exit_event = mp.Event() p = mp.Process(target=TestDataLoader._manager_process, args=(self.dataset, worker_pids, manager_exit_event)) p.start() manager_exit_event.wait() exit_status = [False] * len(worker_pids) start_time = time.time() pname = 'python' while True: for i in range(len(worker_pids)): pid = worker_pids[i] if not exit_status[i]: if not TestDataLoader._is_process_alive(pid, pname): exit_status[i] = True if all(exit_status): break else: time.sleep(1) self.assertFalse(time.time() - start_time > MANAGER_STATUS_CHECK_INTERVAL + JOIN_TIMEOUT, 'subprocess not terminated')
def remove_rejected(self): # remove the indices belonging to samples that were rejected from the dataset # this changes the length of the dataset rejected = np.array(self.rejected[:]) self.index_mapping = np.argwhere(1 - rejected)[:, 0] self.rejected = mp.Array('b', len(self)) # just in case of num_workers == 0 self.available_indices = None
def main(): params = Params() mp.set_start_method('spawn') lock = mp.Lock() actions = mp.Array('i', [-1] * params.n_process, lock=lock) count = mp.Value('i', 0) best_acc = mp.Value('d', 0.0) state_Queue = mp.JoinableQueue() action_done = mp.SimpleQueue() reward_Queue = mp.JoinableQueue() # shared_model = A3C_LSTM_GA() # shared_model = shared_model.share_memory() # # shared_optimizer = SharedAdam(shared_model.parameters(), lr=params.lr, amsgrad=params.amsgrad, weight_decay=params.weight_decay) # shared_optimizer.share_memory() #run_sim(0, params, shared_model, None, count, lock) #test(params, shared_model, count, lock, best_acc) processes = [] train_process = 0 test_process = 0 p = mp.Process(target=learning, args=( params, state_Queue, action_done, actions, reward_Queue, )) p.start() processes.append(p) # test_process += 1 for rank in range(params.n_process): p = mp.Process(target=run_sim, args=( train_process, params, state_Queue, action_done, actions, reward_Queue, lock, )) train_process += 1 p.start() processes.append(p) for p in processes: p.join()
def __init__(self, array_dim, max_q_size, num_videos_per_thread, dtype): self.array_dim = array_dim self.num_videos_per_thread = num_videos_per_thread self.max_q_size = max_q_size array_dim2 = (self.max_q_size, self.num_videos_per_thread, *array_dim) mp_dtype = "f" if dtype == np.float32 else "b" self.m = mp.Array(mp_dtype, int(np.prod(array_dim2)), lock=mp.Lock()) self.n = np.frombuffer(self.m.get_obj(), dtype=dtype).reshape(array_dim2)
def __init__(self, *datasets, sync=False, transforms=None, rejection_dataset_indices, rejection_criterion, random_jump_after_reject=True): """ Parameters ---------- datasets : list or tuple Datasets to zip. sync : bool Whether to synchronize zipped datasets if a synchronization primitive is available. transforms : callable Transforms to apply on the fetched batch. rejection_dataset_indices : int or list or tuple Indices (or index) corresponding to the datasets which are used to determine whether a batch should be rejected. rejection_criterion : callable Criterion for rejection of batch. Must be a callable that accepts one or more arrays / tensors and returns True if the corresponding batch should be rejected, False otherwise. Should accept as many inputs as the number of elements in `rejection_dataset_indices` if the latter is a list, and 1 otherwise. Note that the order of the inputs to the `rejection_criterion` is the same as the order of the indices in `rejection_dataset_indices`. random_jump_after_reject: bool Whether to try a random index or the rejected index incremented by one after rejection. """ super(ZipReject, self).__init__(*datasets, sync=sync, transforms=transforms) for rejection_dataset_index in pyu.to_iterable( rejection_dataset_indices): assert_( rejection_dataset_index < len(datasets), "Index of the dataset to be used for rejection (= {}) is larger " "than the number of datasets (= {}) minus one.".format( rejection_dataset_index, len(datasets)), IndexError) self.rejection_dataset_indices = pyu.to_iterable( rejection_dataset_indices) assert_(callable(rejection_criterion), "Rejection criterion is not callable as it should be.", TypeError) # return true if fetched should be rejected self.rejection_criterion = rejection_criterion # Array shared over processes to keep track of which indices have been rejected self.rejected = mp.Array('b', len(self)) self.available_indices = None # optional index mapping to exclude rejected indices, reducing dataset size (see remove_rejected()) self.index_mapping = None self.random_jump_after_reject = random_jump_after_reject
def __init__(self, array_dim, num_threads): self.batch = np.zeros( (self.num_threads, self.num_videos_per_thread, *array_dim), dtype=np.float32) array_dim2 = (self.max_q_size, self.num_videos_per_thread, *array_dim) self.m_arrays = (mp.Array('f', int(np.prod(array_dim2)), lock=mp.Lock()) for _ in range(num_threads)) self.arrays = [(m, np.frombuffer(m.get_obj(), dtype='f').reshape(array_dim2)) for m in self.m_arrays]
def _register_mp_array(self, arr_instance_var_name): """ Use this function to register an array to be shared. This will wipe arr. """ assert hasattr(self, arr_instance_var_name), arr_instance_var_name arr = getattr(self, arr_instance_var_name) ctype = ctypes.c_double if arr.dtype == np.uint8: ctype = ctypes.c_uint8 self._mp_array_info[arr_instance_var_name] = ( mp.Array(ctype, arr.size), arr.dtype, arr.shape, ) setattr(self, arr_instance_var_name, to_np(*self._mp_array_info[arr_instance_var_name]))
def test_main_process_unclean_exit(self): r'''There might be ConnectionResetError or leaked semaphore warning (due to dirty process exit), \ but they are all safe to ignore''' # `raise_error` controls if the main process is KILL-ed by OS or just # simply raises an error. Both cases are interesting because # 1. In case of it is KILL-ed by OS, the workers need to automatically # discover that their parent is dead and exit gracefully. # 2. In case of it raises an error itself, the parent process needs to # take care of exiting the worker and then exits itself gracefully. for raise_error in (True, False): worker_pids = mp.Array('i', [0] * 4) main_exit_event = mp.Event() p = mp.Process(target=TestDataLoader._main_process, args=(self.dataset, worker_pids, main_exit_event, raise_error)) p.start() worker_pids[-1] = p.pid main_exit_event.wait() exit_status = [False] * len(worker_pids) start_time = time.time() pname = 'python' while True: for i in range(len(worker_pids)): pid = worker_pids[i] if not exit_status[i]: if not TestDataLoader._is_process_alive(pid, pname): exit_status[i] = True if all(exit_status): break else: if time.time( ) - start_time > MANAGER_STATUS_CHECK_INTERVAL + JOIN_TIMEOUT: self.fail('subprocess not terminated') time.sleep(1) p.join(MANAGER_STATUS_CHECK_INTERVAL + JOIN_TIMEOUT - (time.time() - start_time)) self.assertFalse(p.is_alive(), 'main process not terminated')
# value_target = np.array(value_target) # value_target = (value_target - value_target.min()) / (value_target.max() - value_target.min()) loss = l_net.loss_func(vwrap(np.vstack(bs)), vwrap(np.vstack(ba)), vwrap(np.vstack(value_target))) opt.zero_grad() loss.backward() for lp, gp in zip(l_net.parameters(), g_net.parameters()): gp._grad = lp.grad opt.step() l_net.load_state_dict(g_net.state_dict()) if __name__ == "__main__": g_net = Net(N_S, N_A) g_net.share_memory() opt = SharedAdam(g_net.parameters(), lr=ADAM_LR) g_ep = mp.Value('i', 0) g_rewards = mp.Array(ctypes.c_double, MAX_EP * 2) workers = [ Worker(g_net, opt, g_ep, g_rewards, i) for i in range(mp.cpu_count()) ] [w.start() for w in workers] [w.join() for w in workers] save_rewards = np.frombuffer(g_rewards.get_obj()) plt.plot(save_rewards[:g_ep.value]) plt.show()
def mp_agent(args): assert True, 'annotate the code in agent preceive' if args.seed != -1: torch.manual_seed(args.seed) np.random.seed(args.seed) if args.gpu and False: device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') else: device = torch.device('cpu') # TensorboardX is incompatible with asynchronous event writing: # https://github.com/lanpa/tensorboardX/issues/123 # writer = SummaryWriter() writer = None agent_population = [] for i_population in range(args.n_population): agent_i = init_agent(args, device, writer, i_population) agent_population.append(agent_i) best_history_agent = init_agent(args, device, writer, args.n_population) best_history = -100000 fitness_values = [0. for i in range(args.n_process)] fitness_values = torch_mp.Array('f', fitness_values) done_signals = torch_mp.Array( 'i', [0 for i in range(args.n_process)]) # as a compromise share_signals = torch_mp.Value('i', 0) collected_exp = [] evo = Evolution(args) for i_process in range(args.n_process): # change to population: q = torch_mp.Queue() # change according to local_interval collected_exp.append(q) for i_generation in range(args.n_generation): processes = [] done_signals[:] = [0 for i in range(args.n_process)] share_signals.value = 0 for i_process in range(args.n_process): p = torch_mp.Process( target=train_agent, args=(args, agent_population[i_process], i_process, i_process, i_generation, device, fitness_values, collected_exp[i_process], done_signals, share_signals)) p.start() processes.append(p) # for p in processes: # p.join() while not all(done_signals[:]): # continues to check time.sleep(1) for i_process in range(args.n_process): q = collected_exp[i_process] q_counter = 0 while q.qsize() > 0: q_counter += 1 state, action, mask, next_state, reward = q.get() for agent_i in agent_population: agent_i.preceive(state, action, mask, next_state, reward) if q_counter % agent_i.eval_env.spec.timestep_limit == 0: agent_i.reset_storage() share_signals.value = 1 time.sleep(2) # wait for the subprocess to close all_fitness = fitness_values[:] best_pop_fitness = max(all_fitness) best_index = all_fitness.index(max(all_fitness)) if best_history < best_pop_fitness: best_history = best_pop_fitness best_history_agent.copy_model_mp(agent_population[best_index]) print('eval_performance:', best_history_agent.eval_performance()) for i_fitness in range(len(all_fitness)): # manual syncs # change to your own environment if all_fitness[i_fitness] < best_history + best_history * 0.1: agent_population[i_fitness].copy_model_mp(best_history_agent) # evo.next_g(agent_population, all_fitness) evo.torch_next_g(agent_population, all_fitness) if writer: writer.close()
if __name__ == '__main__': os.system('cls') vis.close() num_processes = 1 shared_queue = mp.Queue() shared_state = dict() shared_state["p"] = Actor(s_dim, a_dim, dev).share_memory() shared_state["q"] = QCritic(s_dim, a_dim, dev).share_memory() shared_state["v"] = VCritic(s_dim, a_dim, dev).share_memory() shared_state["update"] = mp.Array('i', [0 for i in range(num_processes)]) # shared_state["wait"] = mp.Array('i', [0 for i in range(num_processes)]) shared_state["vis"] = mp.Value('i', 0) shared_state["wait"] = mp.Value('i', 0) shared_state["wait"].value = start_frame * 10 act = actor_worker(0, num_frames, shared_state, shared_queue, 0.1, False) act.run() act.run() act.run() lea = learner_worker(1, num_frames, shared_state, shared_queue, False) lea.push_buffer() lea.push_buffer() lea.push_buffer() # for i in range(100):
torch.save({'model': net.state_dict(), 'best_idx': best_idx, 'opt': optimizer.state_dict()}, file_name) print("Net evaluation started") net.eval() if os.name == 'nt' and args.cuda: cd = torch.device("cpu") net.to(cd) best_net.to(cd) cpuf = True else: cpuf = False mp.set_start_method("spawn", force=True) lock = mp.Lock() processes = []; mar = mp.Array('i', 3); mar[0] = 1 for i in range(NUM_PROC): p = mp.Process(target=eval, args=(mar, lock, net, best_net, device, cpuf), daemon=True) p.start() processes.append(p) while 1: lock.acquire() if mar[0] > 0 and (mar[1] >= EVALUATION_ROUNDS*BEST_NET_WIN_RATIO or mar[2]>EVALUATION_ROUNDS*(1-BEST_NET_WIN_RATIO)): mar[0] = 0 lock.release() running = any(p.is_alive() for p in processes) if not running: break time.sleep(0.5)
def test_proper_exit(self): (r'''There might be ConnectionResetError or leaked semaphore warning ''' r'''(due to dirty process exit), but they are all safe to ignore''') # TODO: test the case where the pin_memory_thread triggers an # error/fatal signal. I haven't found out how to properly do that. # Array to store the worker pids. worker_pids = mp.Array('i', [-1 for _ in range(10)]) def wait_pids(pids, timeout): r"""Wait for all process specified in pids to exit in given timeout.""" exit_status = [False for _ in pids] start_time = time.time() pname = 'python' while True: for i in range(len(pids)): pid = pids[i] if not exit_status[i]: if not TestDataLoader._is_process_alive(pid, pname): exit_status[i] = True if all(exit_status): break else: if time.time() - start_time > timeout: break time.sleep(0.5) return exit_status for use_workers, pin_memory, hold_iter_reference in itertools.product( [True, False], repeat=3): # `hold_iter_reference` specifies whether we hold a reference to the # iterator. This is interesting because Python3 error traces holds a # reference to the frames, which hold references to all the local # variables including the iterator, and then the iterator dtor may # not be called before process end. It is important to see that the # processes still exit in both cases. if pin_memory and (not TEST_CUDA or NO_MULTIPROCESSING_SPAWN): # Can't use CUDA without spawn continue # `exit_method` controls the way the loader process ends. # - `*_kill` means that `*` is killed by OS. # - `*_error` means that `*` raises an error. # - `None` means that no error happens. # In all cases, all processes should end properly. if use_workers: exit_methods = [ None, 'main_error', 'main_kill', 'worker_kill', 'worker_error' ] else: exit_methods = [None, 'main_error', 'main_kill'] for exit_method in exit_methods: # clear pids array first for i in range(len(worker_pids)): worker_pids[i] = -1 # Event that the loader process uses to signal testing process # that various things are setup, including that the worker pids # are specified in `worker_pids` array. setup_event = mp.Event() p = ErrorTrackingProcess( target=_test_proper_exit, args=(use_workers, pin_memory, exit_method, hold_iter_reference, worker_pids, setup_event)) p.start() # Wait for loader process to set everything up, i.e., filling # worker pids in `worker_pids`. setup_event.wait(timeout=JOIN_TIMEOUT) self.assertTrue(setup_event.is_set(), 'loader process setup timed out') pids = [pid for pid in worker_pids if pid > 0] try: exit_status = wait_pids(pids, timeout=(MP_STATUS_CHECK_INTERVAL + JOIN_TIMEOUT)) if not all(exit_status): self.fail( 'subprocess (pid(s) {}) not terminated'.format( ', '.join( p for p, exited in zip(pids, exit_status) if not exited))) p.join(JOIN_TIMEOUT + MP_STATUS_CHECK_INTERVAL) self.assertFalse(p.is_alive(), 'loader process not terminated') if exit_method is None: self.assertEqual(p.exitcode, 0) else: self.assertNotEqual(p.exitcode, 0) finally: p.terminate()
type=int, default=3, help='number of channels of output data') parser.add_argument('--cuda', type=bool, default=True, help='use GPU computation') parser.add_argument( '--n_cpu', type=int, default=16, help='number of cpu threads to use during batch generation') opt = parser.parse_args() population = 32 current_fitness_base_A2B = mp.Array('f', range(population)) current_fitness_A2B = np.asarray(current_fitness_base_A2B.get_obj(), dtype=np.float32) current_fitness_base_B2A = mp.Array('f', range(population)) current_fitness_B2A = np.asarray(current_fitness_base_B2A.get_obj(), dtype=np.float32) # Lossess criterion_GAN = torch.nn.MSELoss() criterion_cycle = torch.nn.L1Loss() criterion_identity = torch.nn.L1Loss() def caculate_fitness_for_first_time(mask_input, gpu_id, fitness_id, A2B_or_B2A):
if '-' not in command: target_ids = [int(i.strip()) for i in command.split(",")] else: target_ids = list( range(int(command.split('-')[0]), int(command.split('-')[1]) + 1)) chosen_objects = [ all_visible_objects[target_id] for target_id in target_ids ] check_phase = lambda c: 'train' if os.path.isfile( os.path.join(args.folder, "net_{}.pth".format(c)) ) else 'test' chosen_phases = [check_phase(c) for c in chosen_objects] results = mp.Array('f', len(chosen_objects)) processes = [] for rank, obj in enumerate(chosen_objects): p = mp.Process(target=test, args=(training_scene, obj, rank, shared_model, \ results, config, arguments)) p.start() processes.append(p) for p in processes: p.join() print("Testing accuracies:", list(zip(chosen_objects, chosen_phases, results[:]))) else: arguments['test'] = 1
best_idx=best_idx, url=URL, username=username, device=device) game_nodes = len(mcts_store) dt = time.time() - t speed_steps = game_steps / dt speed_nodes = game_nodes / dt step_idx += 1 print( "Step %d, steps %3d, leaves %4d, steps/s %5.2f, leaves/s %6.2f, best_idx %d" % (step_idx, game_steps, game_nodes, speed_steps, speed_nodes, best_idx)) else: processes = [] mar = mp.Array('i', 2) mar[0] = 1 for i in range(num_proc): mcts_store = mcts.MCTS() p = mp.Process(target=play, args=(mar, lock, mcts_store, net, best_idx, username, device, step_idx), daemon=True) p.start() processes.append(p) while 1: lock.acquire() if mar[0] > 0 and mar[1] >= PLAY_EPISODE * num_proc: mar[0] = 0 lock.release() running = any(p.is_alive() for p in processes) if not running:
env.observation_space.shape[0], env.action_space) shared_model.share_memory() if args.no_shared: optimizer = None else: optimizer = my_optim.SharedAdam(shared_model.parameters(), lr=args.lr) optimizer.share_memory() processes = [] counter = mp.Value('i', 0) num_done = mp.Value('i', 0) num_episode = mp.Value('i', 0) reward_sum = mp.Value('i', 0) arr = mp.Array('i', []) lock = mp.Lock() writer = SummaryWriter("logs/fig"+str(args.gae_lambda1)+"_"+ str(args.gae_lambda2), max_queue = 1) p = mp.Process(target=test, args=(args.num_processes, args, shared_model, counter, num_done, num_episode, reward_sum, lock)) p.start() processes.append(p) def write(a,b,c): i = 0 while counter.value < 120000000: print(a.value, b.value, c.value, counter.value / 10000) writer.add_scalar("test/reward", a.value, counter.value / 10000) writer.add_scalar("train/rate", b.value * 1.0 / c.value, counter.value / 10000) i = i + 1 time.sleep(10) for rank in range(0, args.num_processes):
def train(self, train_steps, eval_every_sec, eval_episodes, goal=None): """ Train agent for given number of steps. :param train_steps: number of steps to train agent :param eval_every_sec: evaluate agent every `eval_every_sec` seconds :param eval_episodes: number of episode to evaluate agent for :param goal: goal which can terminate training if it is reached :return: result """ # Set one thread per core os.environ['OMP_NUM_THREADS'] = '1' # Flag indicating that training is finished stop_flag = mp.Event() # Number of steps for each worker workers_train_steps = int(train_steps / self.num_processes) # Workers' current steps workers_steps = mp.Array('i', self.num_processes) # Queue where the final result is put result_queue = mp.Queue() processes = [] start = timer() # Create and start evaluation process eval_process = EvalProcess( env_fn_serialized=serialize(self.env_fn), agent=self.agent, seed=self.seed + self.num_processes if self.seed is not None else None, train_steps=train_steps, eval_every_sec=eval_every_sec, eval_episodes=eval_episodes, goal_serialized=serialize(goal), stop_flag=stop_flag, workers_steps=workers_steps, result_queue=result_queue) eval_process.start() processes.append(eval_process) # Create and start worker processes for worker in self.agent.create_workers(self.num_processes): worker_process = WorkerProcess( env_fn_serialized=serialize(self.env_fn), worker=worker, seed=self.seed + worker.worker_id if self.seed is not None else None, train_steps=workers_train_steps, workers_steps=workers_steps, stop_flag=stop_flag) worker_process.start() processes.append(worker_process) # Wait until all processes finish execution [process.join() for process in processes] # Get result from queue result = result_queue.get() result.train_time = timer() - start return result