def train(self): training_config = self.config['training'] trainer_seed = training_config['global_seed'] + MAGIC_NUMBER * self.p_id set_seeds(trainer_seed) trainer_logdir = '{}/train_thread_{}'.format( training_config["log_dir"], self.p_id) make_dir_if_required(trainer_logdir) self.logger = Logger(trainer_logdir) self.start_time = time.time() update_step = 0 received_examples = 1 # just hack buffer_size = 0 self.criterion = torch.nn.MSELoss() self.actor_decay = TrainingDecay(training_config['actor_train_decay']) self.critic_decay = TrainingDecay( training_config['critic_train_decay']) while True: #critic_lr = self.critic_lr_decay_fn(self.global_update_step.value) #actor_lr = self.actor_lr_decay_fn(self.global_update_step.value) if update_step > 0: train_data, received_examples, buffer_size = self.sample_queue.get( ) step_metrics, step_info = self.update(train_data) for key, value in step_metrics.items(): self.logger.scalar_summary(key, value, update_step) #self.logger.scalar_summary("actor lr", actor_lr, update_step) #self.logger.scalar_summary("critic lr", critic_lr, update_step) else: time.sleep(training_config['train_delay']) update_step += 1 self.logger.scalar_summary("buffer size", buffer_size, self.global_episode.value) self.logger.scalar_summary( "updates per example", update_step * training_config['batch_size'] / received_examples, self.global_episode.value) self.logger.scalar_summary( "updates per example global", self.global_update_step.value * training_config['batch_size'] / received_examples, self.global_episode.value)
def evaluate_single_thread(p_id, model, config, seeds_per_thread, output: Queue): rewards = [] modified_rewards = [] steps_counts = [] infos = [] for seed_plus in range(p_id * seeds_per_thread, (p_id + 1) * seeds_per_thread): explorer_seed = 721 + seed_plus * 29 set_seeds(explorer_seed) internal_env_args = {'env_type': 'virtual', 'env_init_args': { 'host_tcp': config['training']['client']['host_tcp'], 'port_tcp': config['training']['client']['port_tcp_start'] + p_id }, 'env_config': config['environment']['core'] } internal_env_args['env_config']['seed'] = explorer_seed env = create_env(config, internal_env_args, transfer=config['training']['transfer']) observation = env.reset() done = False steps = 0 reward_sum = 0.0 reward_modified_sum = 0.0 while not done: observation_transformed, _ = observation observation, (reward, reward_modified), done, _ = env.step(model.act(observation_transformed)) reward_sum += reward reward_modified_sum += reward_modified steps += config["environment"]["wrapper"]["repeat_actions"] target_velocities = [[float(v) for v in tv] for tv in np.unique([obs["target_vel"] for obs in env.observations], axis=0)] velocity_similarity_measure = [np.linalg.norm(np.array(obs["target_vel"])[[0, 2]] - np.array(obs["body_vel"]["pelvis"])[[0, 2]]) for obs in env.observations] velocity_confidence_intervals = [mean_confidence_interval(velocity_similarity_measure, 0.95), mean_confidence_interval(velocity_similarity_measure, 0.99)] rewards.append(reward_sum) modified_rewards.append(reward_modified_sum) steps_counts.append(steps) print(explorer_seed, ':', reward_sum, ':', steps) infos.append({"target": target_velocities, "target_similarity_confidence_intervals": velocity_confidence_intervals, "seed": explorer_seed}) output.put((rewards, modified_rewards, steps_counts, infos))
def client_sampling_worker(config, p_id, global_update_step, sample_queues, episode_queue, filter_idx=None): set_seeds(p_id * MAGIC_SEED + (p_id if filter_idx is None else filter_idx)) training_config = config['training'] buffer = create_buffer(training_config) received_examples = 1 counter = 0 while True: taken_replays = 0 while True: if taken_replays > 128: print("Episode queue is too big!") episode_queue.clear() break try: replays = episode_queue.get_nowait() for (observation, action, reward, next_observation, done) in replays: buffer.add(observation, action, reward, next_observation, done) received_examples += len(replays) taken_replays += 1 except py_queue.Empty: break if len(buffer) < training_config['batch_size']: time.sleep(1) continue train_data_list = [] for _ in range(len(sample_queues)): train_data = buffer.sample(batch_size=training_config['batch_size']) train_data_list.append(train_data) if counter % 10000 == 0: for sample_queue in sample_queues: print('sampling queue size: ', sample_queue.qsize()) print() counter += 1 buffer_size = len(buffer) for sample_queue, train_data in zip(sample_queues, train_data_list): sample_queue.put((train_data, received_examples, buffer_size))
def __init__(self, exploration_type, config, p_id, model, internal_env_args, episodes_queues, best_reward, global_episode, global_update_step): self.exploration_type = exploration_type self.config = config self.p_id = p_id if exploration_type == 'exploiting' or exploration_type == 'exploiting_virtual': self.model = create_model(config['model']) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') self.model.train() self.model.to(device) self.training_model = model else: self.model = model self.explorer_seed = config['training'][ 'global_seed'] + MAGIC_NUMBER * self.p_id self.explore_after = config['training'].get('explore_after', 0) self.steps_per_action = config['environment']['wrapper'][ 'repeat_actions'] set_seeds(self.explorer_seed) internal_env_args['env_config']['seed'] = self.explorer_seed self.environment = create_env(self.config, internal_env_args, transfer=config['training']['transfer']) self.episodes_queues = episodes_queues self.best_reward = best_reward self.saving_best_reward = -np.inf self.saving_reward_tolerance = None self.global_episode = global_episode self.global_update_step = global_update_step self.start_time = None self.logger = None
def submit(config, directories, repeats=1): explorer_seed = config['training']['global_seed'] + 0 set_seeds(explorer_seed) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') avg_models = [] for dirs in directories: models = [] for model_directory in dirs: model = create_model(config['model']) model.load(model_directory) model.train() model.to(device) models.append(model) avg_models.append(AverageModel(models, config, repeats)) model = ChooseRandomModel(avg_models) internal_env_args = { 'env_type': 'submit', 'env_init_args': { 'test_speed': False }, 'env_config': { "model": "3D", "prosthetic": True, "difficulty": 1, 'seed': explorer_seed } } env = create_env(config, internal_env_args, config['training']['transfer']) observation = env.get_observation() episodes = 0 counter = 0 reward_sum = 0.0 while True: action = model.act(observation) (observation, _), (reward, _), done, _ = env.step(action) counter += 1 reward_sum += reward print(counter, reward, reward_sum) if done: print() counter = 0 reward_sum = 0 save_info("submit_logs/second/log_{}.json".format(episodes), env.get_episode_info()) episodes += 1 (observation, _) = env.reset(False) if observation is None: break
def explore(self): training_config = self.config['training'] self.saving_reward_tolerance = training_config[ 'saving_reward_tolerance'] dir_pattern = '{}' + '/{}_thread_{}'.format(self.exploration_type, self.p_id) logdir = dir_pattern.format(training_config['log_dir']) replays_dir = dir_pattern.format(training_config['replays_dir']) make_dir_if_required(logdir) if training_config['saving_replays']: make_dir_if_required(replays_dir) self.logger = Logger(logdir) episode_counter = 0 step_counter = 0 self.start_time = time.time() action_random_process = create_action_random_process(self.config) epsilon_cycle_len = random.randint( training_config['epsilon_cycle_len'] // 2, training_config['epsilon_cycle_len'] * 2) epsilon_decay_fn = create_decay_fn( "cycle", initial_value=training_config['initial_epsilon'], final_value=training_config['final_epsilon'], cycle_len=epsilon_cycle_len, num_cycles=training_config['max_episodes'] // epsilon_cycle_len) while True: try: if self.exploration_type == 'exploiting' or self.exploration_type == 'exploiting_virtual': hard_update_ddpg(self.model, self.training_model) if episode_counter > 0 and episode_counter % 128 == 0: self.environment.collect_garbage() epsilon = min( training_config['initial_epsilon'], max(training_config['final_epsilon'], epsilon_decay_fn(episode_counter))) if self.exploration_type == 'exploiting' or self.exploration_type == 'exploiting_virtual': epsilon = 0.0 self.explorer_seed = training_config[ 'global_seed'] + MAGIC_NUMBER * self.p_id + episode_counter % 5 set_seeds(self.explorer_seed) episode_metrics = { "reward": 0.0, "reward_modified": 0.0, "step": 0, "epsilon": epsilon } action_random_process.reset_states() replay, timings = self._explore_episode( action_random_process, epsilon, episode_metrics, training_config) for episode_queue in self.episodes_queues: episode_queue.put(replay) self.global_episode.value += 1 episode_counter += 1 episode_metrics["step"] *= self.config['environment'][ 'wrapper']['repeat_actions'] step_counter += episode_metrics["step"] reward_scale = self.config['environment']['wrapper'][ 'reward_scale'] episode_metrics['reward'] /= reward_scale episode_metrics['reward_modified'] /= reward_scale saving_best_cond = episode_metrics['reward'] > ( self.saving_best_reward + self.saving_reward_tolerance) if saving_best_cond: self.saving_best_reward = episode_metrics['reward'] if (episode_counter % self.config['training']['save_every_episode'] == 0 or saving_best_cond) and \ (self.exploration_type == 'exploiting' or self.exploration_type == 'exploiting_virtual'): save_dir = dir_pattern.format( self.config['training']['save_dir']) self.model.save(self.config, save_dir, episode_counter, episode_metrics["reward"]) self.log(episode_metrics, timings, episode_counter, step_counter, saving_best_cond) except ValueError as e: print('timedout process {} {} with {}'.format( self.exploration_type, self.p_id, e))
def train(args): epochs = 350 batch_size = 288 util.set_seeds(args.rank) model = nn.EfficientNet().cuda() lr = batch_size * torch.cuda.device_count() * 0.256 / 4096 optimizer = nn.RMSprop(util.add_weight_decay(model), lr, 0.9, 1e-3, momentum=0.9) ema = nn.EMA(model) if args.distributed: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank]) else: model = torch.nn.DataParallel(model) criterion = nn.CrossEntropyLoss().cuda() normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) dataset = datasets.ImageFolder(os.path.join(data_dir, 'train'), transforms.Compose([util.RandomResize(), transforms.ColorJitter(0.4, 0.4, 0.4), transforms.RandomHorizontalFlip(), util.RandomAugment(), transforms.ToTensor(), normalize])) if args.distributed: sampler = torch.utils.data.distributed.DistributedSampler(dataset) else: sampler = None loader = data.DataLoader(dataset, batch_size, sampler=sampler, num_workers=8, pin_memory=True) scheduler = nn.StepLR(optimizer) amp_scale = torch.cuda.amp.GradScaler() with open(f'weights/{scheduler.__str__()}.csv', 'w') as f: if args.local_rank == 0: writer = csv.DictWriter(f, fieldnames=['epoch', 'acc@1', 'acc@5']) writer.writeheader() best_acc1 = 0 for epoch in range(0, epochs): if args.distributed: sampler.set_epoch(epoch) if args.local_rank == 0: print(('\n' + '%10s' * 2) % ('epoch', 'loss')) bar = tqdm.tqdm(loader, total=len(loader)) else: bar = loader model.train() for images, target in bar: loss = batch(images, target, model, criterion) optimizer.zero_grad() amp_scale.scale(loss).backward() amp_scale.step(optimizer) amp_scale.update() ema.update(model) torch.cuda.synchronize() if args.local_rank == 0: bar.set_description(('%10s' + '%10.4g') % ('%g/%g' % (epoch + 1, epochs), loss)) scheduler.step(epoch + 1) if args.local_rank == 0: acc1, acc5 = test(ema.model.eval()) writer.writerow({'acc@1': str(f'{acc1:.3f}'), 'acc@5': str(f'{acc5:.3f}'), 'epoch': str(epoch + 1).zfill(3)}) util.save_checkpoint({'state_dict': ema.model.state_dict()}, acc1 > best_acc1) best_acc1 = max(acc1, best_acc1) if args.distributed: torch.distributed.destroy_process_group() torch.cuda.empty_cache()
def evaluate(config, directory, directories, seed_plus): explorer_seed = config['training']['global_seed'] + seed_plus * 29 set_seeds(explorer_seed) directories.append(directory) models = [] for model_directory in directories: models.append(load_model(model_directory)) model = ChooseRandomModel(models) # env = create_env(config['environment'], visualize=True, adapting=True) # config['environment']['wrapper']['features']['body_rot_relative'] = ["pelvis", "torso", "head"] # config['environment']['wrapper']['repeat_actions'] = 3 internal_env_args = {'env_type': 'normal', 'env_init_args': { 'env_type': 'normal', 'env_init_args': { 'visualize': False, 'integrator_accuracy': 5e-4 }, 'visualizers_configs': [ {'save_file': './videos/side_{}'.format(seed_plus), 'camera_rotation': [-0.3, 0., 0.]}, {'save_file': './videos/front_{}'.format(seed_plus), 'camera_rotation': [-0.3, -math.pi / 2, 0.]}, {'save_file': './videos/half_{}'.format(seed_plus), 'camera_rotation': [-0.3, -math.pi / 4, 0.]} ] }, 'env_config': { "model": "3D", "prosthetic": True, "difficulty": 1, "max_steps": 1000, 'seed': explorer_seed} } env = create_env(config, internal_env_args, transfer=config['training']['transfer']) # config['environment']['core']['prosthetic'] = True # config['environment']['wrapper']['repeat_frames'] = 1 # env = create_env(config['environment'], visualize=True, transfer=True) reward_sum = 0.0 reward_modified_sum = 0.0 observation = env.reset() replays_list = [] repeats = 1 done = False j = 0 while not done: observation_transformed, _ = observation action = model.act(observation_transformed) observation, (reward, reward_modified), done, _ = env.step(action) reward_sum += reward reward_modified_sum += reward_modified # print(j, reward, reward_modified, reward_sum, reward_modified_sum) print('{} {:.2f} {:.2f}'.format(j, reward, reward_modified, reward_sum)) j += config["environment"]["wrapper"]["repeat_actions"] # if j == 2: # break if done: print(np.unique(np.array(list(map(lambda obs: obs["target_vel"], env.observations))), axis=0)) return reward_sum