def compute_elo(elo_params, params, generations, elos): nw = elo_params.n_workers if elo_params.n_workers else mp.cpu_count() - 1 gpw = elo_params.games_per_workers if elo_params.games_per_workers else max(1, elo_params.n_games//nw) games_idxs = np.array_split(np.arange(elo_params.n_games), elo_params.n_games//gpw) nw = min(nw, len(games_idxs)) lock = mp.Lock() params = copy.deepcopy(params) params[0].self_play.merge(elo_params.self_play_override) params[1].self_play.merge(elo_params.self_play_override) devices = params[0].self_play.pytorch_devices nn_classes = list(p.nn.model_class for p in params) with mp.Pool(nw, initializer=_worker_init, initargs=(elo_params.hdf_file, devices, lock, generations, nn_classes, params)) as pool: try: tasks = [pool.apply_async(_worker_run, (idxs,), error_callback=_err_cb) for idxs in games_idxs] [t.wait() for t in tasks] except Exception as e: logger.exception("An error occured") raise e pool.map(_worker_teardown, range(nw)) # compute new elo score with pd.HDFStore(elo_params.hdf_file, mode="a") as store: games = store.get("/fresh") del store["/fresh"] store.put("elo{}vs{}".format(*generations), games, format="table", append=False) winners = games[games.z==1].sort_index(level=["move_idx"]).groupby(level=["game_idx"]).head(1) n0 = sum(winners.index.get_level_values("generation")==generations[0]) n1 = len(winners)-n0 elo0, elo1 = elo_rating2(elos[0], elos[1], n0, n1, K=30) print(f"{params[0].nn.model_class.__name__} generation {generations[0]}: wins={n0}, elo={elos[0]} -> {elo0}") print(f"{params[1].nn.model_class.__name__} generation {generations[1]}: wins={n1}, elo={elos[1]} -> {elo1}") return elo0, elo1, n1/len(winners)
def run_n_episodes(self): """Runs game to completion n times and then summarises results and saves model (if asked to)""" start = time.time() results_queue = Queue() gradient_updates_queue = Queue() episode_number = multiprocessing.Value('i', 0) self.optimizer_lock = multiprocessing.Lock() episodes_per_process = int( self.config.num_episodes_to_run / self.worker_processes) + 1 processes = [] self.actor_critic.share_memory() self.actor_critic_optimizer.share_memory() optimizer_worker = multiprocessing.Process( target=self.update_shared_model, args=(gradient_updates_queue, )) optimizer_worker.start() for process_num in range(self.worker_processes): worker = Actor_Critic_Worker( process_num, copy.deepcopy(self.environment), self.actor_critic, episode_number, self.optimizer_lock, self.actor_critic_optimizer, self.config, episodes_per_process, self.hyperparameters["epsilon_decay_rate_denominator"], self.action_size, self.action_types, results_queue, copy.deepcopy(self.actor_critic), gradient_updates_queue) worker.start() processes.append(worker) self.print_results(episode_number, results_queue) for worker in processes: worker.join() # optimizer_worker.kill() optimizer_worker.terminate() if self.config.save_model: self.locally_save_policy() time_taken = time.time() - start return self.game_full_episode_scores, self.rolling_results, time_taken
def __init__(self, args): super(SharedMemory, self).__init__(args) # params for this memory # setup self.pos = mp.Value('l', 0) self.full = mp.Value('b', False) if self.tensortype == torch.FloatTensor: self.state0s = torch.zeros( (self.memory_size, ) + tuple(self.state_shape), dtype=torch.float32) self.state1s = torch.zeros( (self.memory_size, ) + tuple(self.state_shape), dtype=torch.float32) elif self.tensortype == torch.ByteTensor: self.state0s = torch.zeros( (self.memory_size, ) + tuple(self.state_shape), dtype=torch.uint8) self.state1s = torch.zeros( (self.memory_size, ) + tuple(self.state_shape), dtype=torch.uint8) self.actions = torch.zeros(self.memory_size, self.action_shape) self.rewards = torch.zeros(self.memory_size, self.reward_shape) self.gamma1s = torch.zeros(self.memory_size, self.gamma_shape) self.terminal1s = torch.zeros(self.memory_size, self.terminal_shape) self.state0s.share_memory_() self.actions.share_memory_() self.rewards.share_memory_() self.gamma1s.share_memory_() self.state1s.share_memory_() self.terminal1s.share_memory_() self.memory_lock = mp.Lock()
def stream(self, modules, num_sketches, num_epochs, num_workers=-1, max_id=None): """starts a stream of sketches modules: ModulesDataset object the dataset of function to iterate upon num_sketches: int the number of sketches to compute per epoch: each sketch corresponds to one particular functions. num_epochs: int the number of epochs num_workers: int the number of workers to have. a negative value will lead to picking half of the local cores max_id: int or None the maximum index for modules. """ # first stop if it was started before self.stop() # get the number of workers if num_workers < 0 or num_workers is None: # if not defined, take at least 1 and at most half of the cores num_workers = 1e7 # should be enough as a max number =) num_workers = max(1, min(num_workers, int( (mp.cpu_count() - 1) / 2))) print('SketchStream using ', num_workers, 'workers') # now create a queue with a maxsize corresponding to a few times # the number of workers self.queue = mp.Queue(maxsize=2 * num_workers) manager = mp.Manager() # prepare some data for the synchronization of the workers self.shared_data = manager.dict() self.shared_data['num_epochs'] = num_epochs if max_id is None: self.shared_data['max_id'] = (len(modules) if not isinstance( modules, ModulesDataset) else torch.iinfo(torch.int16).max) else: self.shared_data['max_id'] = max_id self.shared_data['pause'] = False self.shared_data['current_pick_epoch'] = 0 self.shared_data['current_put_epoch'] = 0 self.shared_data['current_sketch'] = 0 self.shared_data['done_in_current_epoch'] = 0 self.shared_data['num_sketches'] = (num_sketches if num_sketches > 0 else -1) self.shared_data['sketch_list'] = ( None if num_sketches == -1 else torch.randint( low=0, high=self.shared_data['max_id'], size=(self.shared_data['num_sketches'], )).int()) self.lock = mp.Lock() # prepare the workers processes = [ mp.Process(target=sketch_worker, kwargs={ 'sketcher': self, 'modules': modules }) for n in range(num_workers) ] # # atexit.register(partial(exit_handler, stream=self, # processes=processes)) # go for p in processes: p.start() return self.queue
def __init__(self): self._value = mp.Value("i", 0) self._lock = mp.Lock()
def main(method): params = { 'obs_size': (160, 100), # screen size of cv2 window 'dt': 0.025, # time interval between two frames 'ego_vehicle_filter': 'vehicle.lincoln*', # filter for defining ego vehicle 'port': 2000, # connection port 'task_mode': 'Straight', # mode of the task, [random, roundabout (only for Town03)] 'code_mode': 'train', 'max_time_episode': 100, # maximum timesteps per episode 'desired_speed': 15, # desired speed (m/s) 'max_ego_spawn_times': 100, # maximum times to spawn ego vehicle } args = built_parser(method=method) env = gym.make(args.env_name, params=params) state_dim = env.state_space.shape action_dim = env.action_space.shape[0] args.state_dim = state_dim args.action_dim = action_dim action_high = env.action_space.high action_low = env.action_space.low args.action_high = action_high.tolist() args.action_low = action_low.tolist() args.seed = np.random.randint(0, 30) args.init_time = time.time() num_cpu = mp.cpu_count() print(state_dim, action_dim, action_high, num_cpu) if args.alpha == 'auto' and args.target_entropy == 'auto': delta_a = np.array(args.action_high, dtype=np.float32) - np.array( args.action_low, dtype=np.float32) args.target_entropy = -1 * args.action_dim # + sum(np.log(delta_a/2)) Q_net1 = QNet(args) Q_net1.train() Q_net1.share_memory() Q_net1_target = QNet(args) Q_net1_target.train() Q_net1_target.share_memory() Q_net2 = QNet(args) Q_net2.train() Q_net2.share_memory() Q_net2_target = QNet(args) Q_net2_target.train() Q_net2_target.share_memory() actor1 = PolicyNet(args) print("Network inited") if args.code_model == "eval": actor1.load_state_dict( torch.load('./' + args.env_name + '/method_' + str(args.method) + '/model/policy_' + str(args.max_train) + '.pkl')) actor1.train() actor1.share_memory() actor1_target = PolicyNet(args) actor1_target.train() actor1_target.share_memory() actor2 = PolicyNet(args) actor2.train() actor2.share_memory() actor2_target = PolicyNet(args) actor2_target.train() actor2_target.share_memory() print("Network set") Q_net1_target.load_state_dict(Q_net1.state_dict()) Q_net2_target.load_state_dict(Q_net2.state_dict()) actor1_target.load_state_dict(actor1.state_dict()) actor2_target.load_state_dict(actor2.state_dict()) print("Network loaded!") Q_net1_optimizer = my_optim.SharedAdam(Q_net1.parameters(), lr=args.critic_lr) Q_net1_optimizer.share_memory() Q_net2_optimizer = my_optim.SharedAdam(Q_net2.parameters(), lr=args.critic_lr) Q_net2_optimizer.share_memory() actor1_optimizer = my_optim.SharedAdam(actor1.parameters(), lr=args.actor_lr) actor1_optimizer.share_memory() actor2_optimizer = my_optim.SharedAdam(actor2.parameters(), lr=args.actor_lr) actor2_optimizer.share_memory() log_alpha = torch.zeros(1, dtype=torch.float32, requires_grad=True) log_alpha.share_memory_() alpha_optimizer = my_optim.SharedAdam([log_alpha], lr=args.alpha_lr) alpha_optimizer.share_memory() print("Optimizer done") share_net = [ Q_net1, Q_net1_target, Q_net2, Q_net2_target, actor1, actor1_target, actor2, actor2_target, log_alpha ] share_optimizer = [ Q_net1_optimizer, Q_net2_optimizer, actor1_optimizer, actor2_optimizer, alpha_optimizer ] experience_in_queue = [] experience_out_queue = [] for i in range(args.num_buffers): experience_in_queue.append(Queue(maxsize=10)) experience_out_queue.append(Queue(maxsize=10)) shared_queue = [experience_in_queue, experience_out_queue] step_counter = mp.Value('i', 0) stop_sign = mp.Value('i', 0) iteration_counter = mp.Value('i', 0) shared_value = [step_counter, stop_sign, iteration_counter] lock = mp.Lock() procs = [] if args.code_model == "train": for i in range(args.num_learners): if i % 2 == 0: device = torch.device("cuda:1") else: device = torch.device("cuda:0") # device = torch.device("cpu") procs.append( Process(target=leaner_agent, args=(args, shared_queue, shared_value, share_net, share_optimizer, device, lock, i))) for i in range(args.num_actors): procs.append( Process(target=actor_agent, args=(args, shared_queue, shared_value, [actor1, Q_net1], lock, i))) for i in range(args.num_buffers): procs.append( Process(target=buffer, args=(args, shared_queue, shared_value, i))) procs.append( Process(target=evaluate_agent, args=(args, shared_value, share_net))) elif args.code_model == "simu": procs.append(Process(target=simu_agent, args=(args, shared_value))) for p in procs: p.start() for p in procs: p.join()
def __init__(self): self.val = mp.Value('i', 0) self.lock = mp.Lock()
def __init__(self, val=True): self.val = mp.Value("b", False) self.lock = mp.Lock()
def __init__(self, config): self.config = config self.config.steps_lock = mp.Lock() self.config.network_lock = mp.Lock() self.config.total_steps = mp.Value('i', 0) self.config.stop_signal = mp.Value('i', False)
def __init__(self, config): super(TrainManager, self).__init__() device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') self.config = config self.training_config = self.config['training'] if self.training_config['transfer']: self.target_model = load_model(self.training_config['model_path']) else: self.target_model = create_model(self.config['model']) alt_model = None if self.training_config.get('init_actor', False): alt_model = load_model(self.training_config['model_path']) self.target_model.init_actor(alt_model) if self.training_config.get('init_critic', False): if alt_model is None: alt_model = load_model(self.training_config['model_path']) self.target_model.init_critic(alt_model) self._prime_model(self.target_model, device) self.models = [] self.proxy_models = [] for _ in range(self.training_config['num_threads_training']): model = copy.deepcopy(self.target_model) proxy_model = copy.deepcopy(self.target_model) self._prime_model(model, device) self._prime_model(proxy_model, device) self.models.append(model) self.proxy_models.append(proxy_model) self.processes = [] self.episode_queues = [ torch_mp.Queue(maxsize=128) for _ in range(self.training_config['num_threads_sampling']) ] self.sample_queues = [ torch_mp.Queue( maxsize=self.training_config['sampling_queue_max_len']) for _ in range(self.training_config['num_threads_training']) ] self.action_conns = [ torch_mp.Pipe(duplex=False) for _ in range( self.training_config['num_threads_exploring_virtual']) ] self.observation_conns = [ torch_mp.Pipe(duplex=False) for _ in range( self.training_config['num_threads_exploring_virtual']) ] self.observation_queue = torch_mp.Queue() self.action_queue = torch_mp.Queue() self.start_barrier = torch_mp.Barrier( self.training_config['num_threads_training']) self.finish_barrier = torch_mp.Barrier( self.training_config['num_threads_training']) self.update_lock = torch_mp.Lock() self.best_reward = Value('f', 0.0) self.global_episode = Value('i', 0) self.global_update_step = Value('i', 0)
def batch_training(fileprefix='', tasks=[]): if fileprefix: filename = '{}-main.out'.format(fileprefix) filepath = pathlib.Path(filename).resolve() if not filepath.parent.exists(): filepath.parent.mkdir(parents=True) stdout_target = filepath.open('wt') else: stdout_target = sys.__stdout__ with contextlib.redirect_stdout(stdout_target): print('System-wide logical CPUs:', psutil.cpu_count()) print('System-wide physical CPUs:', psutil.cpu_count(logical=False)) oversubscribe = 2 ngpus = torch.cuda.device_count() nworkers = ngpus * oversubscribe curproc = psutil.Process() createtime = curproc.create_time() print('Main process {} on CPU {} with {} threads'.format( curproc.pid, curproc.cpu_num(), curproc.num_threads())) print('Presently available CPUs:', len(curproc.cpu_affinity())) print('Presently available GPUs:', ngpus) print('Worker processes:', nworkers) # load input tasks into queue task_queue = mp.SimpleQueue() for i, task in enumerate(tasks): print('Task', i + 1, task) task_queue.put(task) # worker locks locks = [] active_processes = [] for i in range(nworkers): locks.append(mp.Lock()) active_processes.append(None) # results queue result_queue = mp.SimpleQueue() itask = 0 while not task_queue.empty(): for ilock, lock in enumerate(locks): if lock.acquire(timeout=1): # acquire lock and expect process == None assert (active_processes[ilock] is None) if task_queue.empty(): lock.release() continue train_kwargs = task_queue.get() igpu = ilock % ngpus args = (itask, ilock, igpu, fileprefix, train_kwargs, result_queue) p = mp.Process(target=gpu_worker, args=args) print( ' Launching task {}/{} on worker {} on GPU {}'.format( itask, len(tasks), ilock, igpu)) itask += 1 p.start() active_processes[ilock] = p else: # locked and expect process != None existing_process = active_processes[ilock] assert (existing_process is not None) if existing_process.exitcode is not None: # process is complete; close and release print(' Process {} finished'.format( existing_process.pid)) active_processes[ilock] = None lock.release() print('Finished task loop') still_running = True while still_running: still_running = False for i, process in enumerate(active_processes): if process is None: continue if process.exitcode is None: still_running = True break else: print(' Process {} finished'.format(process.pid)) active_processes[i] = None time.sleep(1) results = [] while not result_queue.empty(): results.append(result_queue.get()) print('Tasks:', len(tasks), 'results:', len(results)) def sort_func(element): return element[0] results = sorted(results, key=sort_func) for i, result in enumerate(results): print( 'Task {:3d} worker/GPU {:2d}/{:1d} dt {:5.1f}s max/med acc {:5.1f}%/{:5.1f}% kw: {}' .format(*result[0:4], result[4].max(), np.median(result[4]), result[6])) delta_seconds = time.time() - createtime print('Main execution: {:.1f} s'.format(delta_seconds))
import matplotlib.pyplot as plt from tqdm import tqdm import numpy as np ''' Let's see how just non-MAML version adapts ''' task_queue = mp.JoinableQueue() train_episodes_queue = mp.Queue() valid_episodes_queue = mp.Queue() policy_lock = mp.Lock() env_name = "2DNavigation-v0" env_kwargs = { "low": -0.5, "high": 0.5, "task": {"goal": np.array([1, 1])} } env = gym.make(env_name, **env_kwargs) print(env.) policy = get_policy_for_env(env, hidden_sizes=(64, 64), nonlinearity='tanh') policy.share_memory() baseline = LinearFeatureBaseline(get_input_size(env)) seed = None
def __init__(self, x, y): self.ctrl = ReadWriteControl(self) self.ctrl_flick = mp.Lock() self.which_buffer = mp.Value("l", 0) self.buffers = [x, y]
def main(method): args = built_parser(method=method) env = gym.make(args.env_name) state_dim = env.observation_space.shape action_dim = env.action_space.shape[0] args.state_dim = state_dim args.action_dim = action_dim action_high = env.action_space.high action_low = env.action_space.low args.action_high = action_high.tolist() args.action_low = action_low.tolist() args.seed = np.random.randint(0, 30) args.init_time = time.time() if args.alpha == 'auto' and args.target_entropy == 'auto': delta_a = np.array(args.action_high, dtype=np.float32) - np.array( args.action_low, dtype=np.float32) args.target_entropy = -1 * args.action_dim #+ sum(np.log(delta_a/2)) Q_net1 = QNet(args) Q_net1.train() Q_net1.share_memory() Q_net1_target = QNet(args) Q_net1_target.train() Q_net1_target.share_memory() Q_net2 = QNet(args) Q_net2.train() Q_net2.share_memory() Q_net2_target = QNet(args) Q_net2_target.train() Q_net2_target.share_memory() actor1 = PolicyNet(args) actor1.train() actor1.share_memory() actor1_target = PolicyNet(args) actor1_target.train() actor1_target.share_memory() actor2 = PolicyNet(args) actor2.train() actor2.share_memory() actor2_target = PolicyNet(args) actor2_target.train() actor2_target.share_memory() Q_net1_target.load_state_dict(Q_net1.state_dict()) Q_net2_target.load_state_dict(Q_net2.state_dict()) actor1_target.load_state_dict(actor1.state_dict()) actor2_target.load_state_dict(actor2.state_dict()) Q_net1_optimizer = my_optim.SharedAdam(Q_net1.parameters(), lr=args.critic_lr) Q_net1_optimizer.share_memory() Q_net2_optimizer = my_optim.SharedAdam(Q_net2.parameters(), lr=args.critic_lr) Q_net2_optimizer.share_memory() actor1_optimizer = my_optim.SharedAdam(actor1.parameters(), lr=args.actor_lr) actor1_optimizer.share_memory() actor2_optimizer = my_optim.SharedAdam(actor2.parameters(), lr=args.actor_lr) actor2_optimizer.share_memory() log_alpha = torch.zeros(1, dtype=torch.float32, requires_grad=True) log_alpha.share_memory_() alpha_optimizer = my_optim.SharedAdam([log_alpha], lr=args.alpha_lr) alpha_optimizer.share_memory() share_net = [ Q_net1, Q_net1_target, Q_net2, Q_net2_target, actor1, actor1_target, actor2, actor2_target, log_alpha ] share_optimizer = [ Q_net1_optimizer, Q_net2_optimizer, actor1_optimizer, actor2_optimizer, alpha_optimizer ] experience_in_queue = [] experience_out_queue = [] for i in range(args.num_buffers): experience_in_queue.append(Queue(maxsize=10)) experience_out_queue.append(Queue(maxsize=10)) shared_queue = [experience_in_queue, experience_out_queue] step_counter = mp.Value('i', 0) stop_sign = mp.Value('i', 0) iteration_counter = mp.Value('i', 0) shared_value = [step_counter, stop_sign, iteration_counter] lock = mp.Lock() procs = [] if args.code_model == "train": for i in range(args.num_actors): procs.append( Process(target=actor_agent, args=(args, shared_queue, shared_value, [actor1, Q_net1], lock, i))) for i in range(args.num_buffers): procs.append( Process(target=buffer, args=(args, shared_queue, shared_value, i))) procs.append( Process(target=evaluate_agent, args=(args, shared_value, share_net))) for i in range(args.num_learners): #device = torch.device("cuda") device = torch.device("cpu") procs.append( Process(target=leaner_agent, args=(args, shared_queue, shared_value, share_net, share_optimizer, device, lock, i))) elif args.code_model == "simu": procs.append(Process(target=simu_agent, args=(args, shared_value))) for p in procs: p.start() for p in procs: p.join()
def __init__(self, num_inputs): self.lock = mp.Lock() self.n = torch.zeros(num_inputs).share_memory_() self.mean = torch.zeros(num_inputs).share_memory_() self.s = torch.zeros(num_inputs).share_memory_() self.var = torch.zeros(num_inputs).share_memory_()
def __init__(self, models): self.lock = mp.Lock() self.grads = {} for name, p in models.named_parameters(): self.grads[name + '_grad'] = torch.zeros(p.size()).share_memory_()
outputs=act_dim, n_hidden_layers=3, n_hidden_units=128) shared_model = networks.DiscreteActorCriticSplit(actor=act_net, critic=value_net, add_softmax=True) shared_average_model = copy.deepcopy(shared_model) shared_average_model.no_grads( ) # Set requires_grad to false for all parameters shared_model.share_memory() shared_average_model.share_memory() shared_opt = optimizers.SharedAdam(shared_model.parameters(), lr=args.lr) # Create shared variables shared_counter = utils.Counter() shared_model_lock = mp.Lock() if not args.no_lock else None summary_queue = mp.Queue() processes = [] workers = [] for i in range(args.num_workers): w = worker.Worker(worker_id=i, env_name=args.env_name, n_steps=args.worker_steps, max_steps=args.t_max, shared_model=shared_model, shared_avg_model=shared_average_model, shared_optimizer=shared_opt, shared_counter=shared_counter, df=args.discount, c=args.c,
help='Value for gamma') parser.add_argument('--beta', default=0.0001, type=float, help='Value for beta') args = parser.parse_args() args.training = True dataset = mp.Manager().list(Dataset(file=args.data)) model = MCTSnet() model.load(args.load_model) model.share_memory() model_lock = mp.Lock() dataset_lock = mp.Lock() processes = [ mp.Process(target=collector, args=(idx, model, dataset, args, dataset_lock)) for idx in range(args.n_collectors) ] processes.extend([ mp.Process(target=optimiser, args=(idx, model, dataset, args, model_lock)) for idx in range(args.n_collectors, args.n_collectors + args.n_optimisers) ]) processes.append(mp.Process(target=checkpoint, args=(model, dataset, args)))
def main(): os.environ['PYTHONWARNINGS'] = 'ignore:semaphore_tracker:UserWarning' mp.set_start_method('spawn', True) shutil.rmtree('runs', ignore_errors=True) if not os.path.exists('logs'): os.makedirs('logs') if not os.path.exists('trained'): os.makedirs('trained') parser = argparse.ArgumentParser() parser.add_argument( '--environment', default='RLBench', help='Environment to use for training [default = RLBench]') parser.add_argument( '--save_model', default='./model.model', help='Path to save the model [default = "./model.model"]') parser.add_argument('--load_model', default='', help='Path to load the model [default = ' ']') parser.add_argument('--n_workers', default=1, type=int, help='Number of workers [default = 1]') parser.add_argument( '--target_update_frequency', default=100, type=int, help='Frequency for syncing target network [default = 100]') parser.add_argument( '--checkpoint_frequency', default=30, type=int, help='Frequency for creating checkpoints [default = 30]') parser.add_argument('--lr', default=1e-6, type=float, help='Learning rate for the training [default = 1e-6]') parser.add_argument('--batch_size', default=64, type=int, help='Batch size for the training [default = 64]') parser.add_argument( '--gamma', default=0.99, type=float, help='Discount factor for the training [default = 0.99]') parser.add_argument( '--eps', default=0.997, type=float, help='Greedy constant for the training [default = 0.997]') parser.add_argument( '--min_eps', default=0.1, type=float, help='Minimum value for greedy constant [default = 0.1]') parser.add_argument('--buffer_size', default=200000, type=int, help='Buffer size [default = 200000]') parser.add_argument('--episode_length', default=900, type=int, help='Episode length [default=900]') parser.add_argument('--headless', default=False, type=bool, help='Run simulation headless [default=False]') parser.add_argument( '--advance_iteration', default=0, type=int, help='By how many iteration extended eps decay [default=0]') parser.add_argument( '--warmup', default=100, type=int, help='How many full exploration iterations [default=100]') args = parser.parse_args() SIMULATOR, NETWORK = environments[args.environment] model_shared = NETWORK() model_shared.load(args.load_model) model_shared.share_memory() lock = mp.Lock() # Queues queues = [mp.Queue() for idx in range(args.n_workers)] # Workers workers_explore = [ mp.Process(target=explore, args=(idx, SIMULATOR, model_shared, queues[idx], args, lock)) for idx in range(args.n_workers) ] workers_explore.append( mp.Process(target=optimise, args=(args.n_workers, model_shared, queues, args, lock))) workers_explore.append( mp.Process(target=checkpoint, args=(model_shared, args))) [p.start() for p in workers_explore] print("Succesfully started workers!") try: [p.join() for p in workers_explore] except Exception as e: print(e) except KeyboardInterrupt: print('<< EXITING >>') finally: [p.kill() for p in workers_explore] [q.close() for q in queues] os.system('clear') if input('Save model? (y/n): ') in ['y', 'Y', 'yes']: print('<< SAVING MODEL >>') model_shared.save(args.save_model)
critic_global = Critic(n_inputs, args.hiddensize) critic_global.share_memory() if (args.input_path is not None): policy_path = os.path.join(args.input_path, 'policy.pt') critic_path = os.path.join(args.input_path, 'critic.pt') policy_global.load_state_dict(torch.load(policy_path)) critic_global.load_state_dict(torch.load(critic_path)) episode_count = mp.Value('i', 0) steps_global = mp.Value('i', 0) atr = mp.Value('d', 0) steps_lock = mp.Lock() processes = [] p = mp.Process(target=validate, args=(policy_global, critic_global, steps_global, episode_count, args)) p.start() processes.append(p) if (args.render): p = mp.Process(target=render, args=(policy_global, args)) p.start() for i in range(args.num_envs): p = mp.Process(target=train,
def __init__(self): self.lock = mp.Lock() self.weights = None
def __init__(self): self.episodes = mp.Value('i', 0) self.frames = mp.Value('i', 0) self.lock = mp.Lock()
def __init__(self, val=True): self.val = mp.Value("i", 0) self.lock = mp.Lock()
def train(args, model): print("training...") model.train() optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) if args.load is not None and os.path.isfile(args.load + '_optimizer.pth'): source_optimizer = torch.load(args.load+'_optimizer.pth') optimizer.load_state_dict(source_optimizer.state_dict()) del source_optimizer optimizer.zero_grad() state = NormalizedState(screen=None, depth=None, labels=None, variables=None) state.screen = torch.Tensor(args.batch_size, *args.screen_size) state.variables = torch.Tensor(args.batch_size, args.variable_num) action = torch.LongTensor(args.batch_size, 1) reward = torch.Tensor(args.batch_size, 1) terminal = torch.Tensor(args.batch_size) episode_return = torch.zeros(args.batch_size) state.screen.share_memory_() state.variables.share_memory_() action.share_memory_() reward.share_memory_() terminal.share_memory_() episode_return.share_memory_() counter = torch.zeros(1) counter.share_memory_() def instance(args, state, main_lock, main_event, event, id): game = DoomInstance(args.vizdoom_config, args.wad_path, args.skiprate, id, actions=args.action_set, join=True, visible=False, color=id) first_pass = True while True: event.clear() if first_pass: first_pass = False normalized_state = game.get_state_normalized() state.screen[id, :] = torch.from_numpy(normalized_state.screen) state.variables[id, :] = torch.from_numpy(normalized_state.variables) else: normalized_state, step_reward, finished = game.step_normalized(action[id, 0]) #normalized_state = game.get_state_normalized() state.screen[id, :] = torch.from_numpy(normalized_state.screen) state.variables[id, :] = torch.from_numpy(normalized_state.variables) reward[id, 0] = step_reward if finished: episode_return[id] = float(game.get_episode_return()) # cut rewards from future actions terminal[id] = 0 else: terminal[id] = 1 # increase counter and wait main process with main_lock: counter[0] += 1 if counter[0] >= args.batch_size: main_event.set() event.wait() main_event = mp.Event() main_lock = mp.Lock() procs = [] events = [] #mp.set_start_method('spawn') for i in range(args.batch_size): event = mp.Event() p = mp.Process(target=instance, args=(args, state, main_lock, main_event, event, i)) p.start() procs.append(p) events.append(event) main_event.wait() main_event.clear() counter[0] = 0 # start training for episode in range(args.episode_num): batch_time = time.time() for step in range(args.episode_size): # get action action.copy_(model.get_action(state)) # step for event in events: event.set() main_event.wait() main_event.clear() counter[0] = 0 # get step info model.set_reward(reward) model.set_terminal(terminal) # update model model.backward() optimizer.step() optimizer.zero_grad() if episode % 1 == 0: print("{}: mean_return = {:f}, batch_time = {:.3f}".format(episode, episode_return.mean(), time.time()-batch_time)) if episode % 500 == 0: torch.save(model, args.model + '_model_server_cp.pth') torch.save(optimizer, args.model + '_optimizer_server_cp.pth') # terminate games torch.save(model, args.model+'_model.pth') torch.save(optimizer, args.model+'_optimizer.pth')
def __init__(self, grad_norm, optimizer, scheduler): self.optimizer: torch.optim.Optimizer = optimizer self.scheduler = scheduler self.grad_norm = grad_norm self.global_step = torch.tensor(0) self.lock = mp.Lock()
actor=actor, argp=parser_args) worker.run() if __name__ == '__main__': device = CUDA if torch.cuda.is_available() else CPU model_root_dir = str(pathlib.Path(__file__).resolve().parents[1]) + "/models/" if not os.path.isdir(model_root_dir): os.mkdir(model_root_dir) model_dir = model_root_dir + datetime.datetime.now().strftime("%H_%M__%d_%m") if not os.path.isdir(model_dir): os.mkdir(model_dir) lock = mp.Lock() args = parser.parse_args() logger = SummaryWriter() # logger.add_hparams(get_hparam_dict(args), {'mean reward': 0}) actor = Actor(num_actions=NUM_ACTIONS, num_obs=NUM_OBSERVATIONS, log_std_init=np.log(args.init_std)) # logger.add_graph(actor, torch.zeros(3)) actor.share_memory() critic = Critic(num_actions=NUM_ACTIONS, num_obs=NUM_OBSERVATIONS) critic.share_memory() shared_replay_buffer = SharedReplayBuffer(capacity=args.replay_buffer_size,
def main(): params = Params() if not os.path.exists('./log'): os.mkdir('./log') logging.basicConfig(filename='./log/' + params.log_file + '.log', level=logging.INFO) mp.set_start_method('spawn') test_files = [ 'model3115920.ckpt', 'model3070538.ckpt', 'model3067604.ckpt', 'model3043059.ckpt', 'model2994943.ckpt', 'model2983232.ckpt', 'model2912569.ckpt', 'model2849037.ckpt', 'model2741430.ckpt', 'model2696001.ckpt', 'model2685407.ckpt', 'model2659828.ckpt', 'model2626517.ckpt', 'model2621966.ckpt', 'model2583286.ckpt', 'model2583025.ckpt', 'model2548002.ckpt', 'model2545110.ckpt', 'model2484209.ckpt', 'model2461454.ckpt', 'model2449942.ckpt', 'model2444853.ckpt', 'model2424837.ckpt', 'model2414733.ckpt', 'model2383330.ckpt' ] for ckpt in test_files: init_msg = " ".join([ "\n\n++++++++++++++++++++ Initial Task info +++++++++++++++++++++\n", "weight file name = {:s}\n".format(ckpt) ]) print(init_msg) logging.info(init_msg) seen_succ = mp.Value('i', 0) seen_length = mp.Value('i', 0) unseen_succ = mp.Value('i', 0) unseen_length = mp.Value('i', 0) lock = mp.Lock() # with lock: # initialize, is it right? # seen_succ = 0 # seen_length = 0 # unseen_succ = 0 # unseen_length = 0 load_model = params.weight_dir + ckpt # load_model = torch.load(params.weight_dir + ckpt) #test(params, shared_model, count, lock, best_acc) processes = [] test_process = 0 for rank in range(params.n_process): p = mp.Process(target=run_test, args=( test_process, params, load_model, lock, seen_succ, seen_length, unseen_succ, unseen_length, )) test_process += 1 p.start() processes.append(p) for p in processes: p.join() msg = " ".join([ "++++++++++++++++++++ Total Task Stats +++++++++++++++++++++\n", "Seen Avg Length = {:.3f}\n".format(seen_length.value / (20 * params.n_test)), "Seen Total Success rate {:3.2f}%".format(100 * seen_succ.value / (20 * params.n_test)), "UnSeen Avg Length = {:.3f}\n".format(unseen_length.value / (50 * params.n_test)), "UnSeen Total Success rate {:3.2f}%\n\n".format( 100 * unseen_succ.value / (50 * params.n_test)), ]) print(msg) logging.info(msg) print("Done")
num_processes = args.nproc criterion = nn.CrossEntropyLoss() sta_lidx = work_partition[wid] end_lidx = work_partition[wid + 1] sub_net = VGG('VGG19', sta_lidx=sta_lidx, end_lidx=end_lidx) sub_net.to(device) train_proc_list = [] sync_proc_list = [] fp_send_proc_list = [] fp_recv_proc_list = [] bp_send_proc_list = [] bp_recv_proc_list = [] # fp_to_send, fp_recved, bp_send, bp_recv, grad_aggregated should be conter auto-increment sub_net.share_memory() grad_dict = gen_shared_grad(sub_net) sync_lock = mp.Lock() sync_counter = torch.zeros(1, dtype=torch.int32) sync_counter = sync_counter.share_memory_() global_step = torch.zeros(1, dtype=torch.int32) global_step = global_step.share_memory_() for rank in range(num_processes): #fp_head_tensor, fp_tail_tensor, bp_head_tensor, bp_tail_tensor = gen_fp_bp_tensor_list(bs, wid, wn) fp_head_list, fp_tail_list, bp_head_list, bp_tail_list = gen_fp_bp_tensor_list( iter_thresh, wid, wn, input_shp, output_shp) #print(fp_tail_tensor.size()) #print("########") shared_cnters = gen_shared_counter() #rank, bs, wid, wn,fp_tail_list, shared_cnters fp_send_p = mp.Process(target=fp_send_proc,
def batch_training(): print('System-wide logical CPUs:', psutil.cpu_count()) print('System-wide physical CPUs:', psutil.cpu_count(logical=False)) oversubscribe = 2 ngpus = torch.cuda.device_count() nworkers = ngpus * oversubscribe curproc = psutil.Process() createtime = curproc.create_time() print('Main process {} on CPU {} with {} threads'.format( curproc.pid, curproc.cpu_num(), curproc.num_threads())) print('Presently available CPUs:', len(curproc.cpu_affinity())) print('Presently available GPUs:', ngpus) print('Worker processes:', nworkers) # tasks and queue tasks = [] #data = 1 #nrepeat = 10 #epochs = 12 #optim_names = ['Adam','Adagrad','Adamax','SGD','ASGD'] #learning_rates = [1e-2,1e-3,1e-4,1e-5] data = 0 nrepeat = 3 epochs = 4 optim_names = ['Adam', 'Adagrad', 'SGD'] learning_rates = [1e-3, 1e-5] for optim_name in optim_names: for lr in learning_rates: task_kw = { 'data': data, 'epochs': epochs, 'optim_name': optim_name, 'optim_kwargs': { 'lr': lr }, } tasks.extend([task_kw] * nrepeat) task_queue = mp.SimpleQueue() for i, task in enumerate(tasks): print('Task', i + 1, task) task_queue.put(task) # worker locks locks = [] active_processes = [] for i in range(nworkers): locks.append(mp.Lock()) active_processes.append(None) # results queue result_queue = mp.SimpleQueue() itask = 0 while not task_queue.empty(): for ilock, lock in enumerate(locks): if lock.acquire(timeout=1): # acquire lock and expect process == None assert (active_processes[ilock] is None) if task_queue.empty(): lock.release() continue train_kwargs = task_queue.get() igpu = ilock % ngpus args = (itask, ilock, igpu, train_kwargs, result_queue) p = mp.Process(target=gpu_worker, args=args) print('*** Launching task {}/{} on worker {} on GPU {}'.format( itask, len(tasks), ilock, igpu)) itask += 1 p.start() active_processes[ilock] = p else: # locked and expect process != None existing_process = active_processes[ilock] assert (existing_process is not None) if existing_process.exitcode is not None: # process is complete; close and release print('*** Process {} finished'.format( existing_process.pid)) active_processes[ilock] = None lock.release() print('Finished task loop') still_running = True while still_running: still_running = False for i, process in enumerate(active_processes): if process is None: continue if process.exitcode is None: still_running = True break else: print('*** Process {} finished'.format(process.pid)) active_processes[i] = None time.sleep(1) results = [] while not result_queue.empty(): results.append(result_queue.get()) print('Tasks:', len(tasks), 'results:', len(results)) def sort_func(element): return element[0] results = sorted(results, key=sort_func) for i, result in enumerate(results): print( 'Task {:4d} worker {:2d} GPU {:2d} dt {:5.1f} s acc {:5.2f}% kw: {}' .format(*result)) delta_seconds = time.time() - createtime print('Main execution: {:.1f} s'.format(delta_seconds))
def __init__(self): self._value = mp.Value("b", False) self._lock = mp.Lock()