def upsample(self, img): if cuda_available(): upsample=F.interpolate(img,size=(int(self.original_img_tensor.size(2)), \ int(self.original_img_tensor.size(3))),mode='bilinear'\ ,align_corners=False).cuda() else: upsample = F.interpolate(img, size=(int(self.original_img_tensor.size(2)), \ int(self.original_img_tensor.size(3))),mode='bilinear'\ ,align_corners=False) return upsample
def build(self): #mask initialization b, c, w, h = self.original_img_tensor.shape mask_tensor = torch.rand( (b, 1, int(w / self.factor), int(h / self.factor))) if cuda_available(): mask_tensor = mask_tensor.cuda() mask_tensor = Variable(mask_tensor, requires_grad=True) output = self.model(self.original_img_tensor) #target class for explanations class_index = np.argmax(output.data.cpu().numpy()) optimizer = torch.optim.Adam([mask_tensor], self.lr) for i in range(self.iter + 1): #upsampling mask to fit the shape of mask to the shape of image upsampled_mask = self.upsample(mask_tensor) #gjttering jitter = torch.randn((b, c, w, h)) * 0.03 jitter_org_img_tensor = self.original_img_tensor + jitter.cuda() mask_img=torch.mul(upsampled_mask,jitter_org_img_tensor)+torch.mul((1-upsampled_mask),\ self.perturbed_img_tensor) mask_output = torch.nn.Softmax(dim=1)(self.model(mask_img)) mask_prob = mask_output[0, class_index] loss=self.l1_coeff*torch.mean(1-torch.abs(mask_tensor))+\ TV(mask_tensor,self.tv_coeff,self.tv_beta)+mask_prob optimizer.zero_grad() loss.backward() optimizer.step() #allow the values of mask to be [0,1] mask_tensor.data.clamp_(0, 1) if i % 20 == 0: print( f'[{i}/{self.iter}] Loss: {loss} Prob for the target class: {mask_prob}' ) save_img(self.upsample(mask_tensor), self.original_img, self.perturbed_img, self.img_path, self.model_path)
def __call__(self): print('\nGradCAM start ... ') self.img = load_image(self.img_path) #numpy to tensor and normalize self.input = preprocess_image(self.img) output = self.model(self.input) if self.class_index == None: # get class index of highest prob among result probabilities self.class_index = np.argmax(output.cpu().data.numpy()) one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32) one_hot[0][self.class_index] = 1 one_hot = Variable(torch.from_numpy(one_hot), requires_grad=True) if cuda_available(): one_hot = torch.sum(one_hot.cuda() * output) else: one_hot = torch.sum(one_hot * output) self.model.zero_grad() one_hot.backward(retain_graph=True) gradients = self.gradients['value'] activations = self.activations['value'] #reshaping weights = torch.mean(torch.mean(gradients, dim=2), dim=2) weights = weights.reshape(weights.shape[1], 1, 1) activationMap = torch.squeeze(activations[0]) #Get gradcam gradcam = F.relu((weights * activationMap).sum(0)) gradcam = cv2.resize(gradcam.data.cpu().numpy(), (224, 224)) save(gradcam, self.img, self.img_path, self.model_path) print('GradCAM end !!!\n')
def simple_train(args, sync_learner): if args.cuda: os.environ['OMP_NUM_THREADS'] = '1' os.environ['MKL_NUM_THREADS'] = '1' else: os.environ['OMP_NUM_THREADS'] = '16' os.environ['MKL_NUM_THREADS'] = '16' import numpy as np logger = initializer.get_logger(WID.Pop(), args=args) error_logger = initializer.get_error_logger(logger.log_path) # backup current codebase project_name = os.path.basename(os.path.abspath('.')) dst = os.path.join(logger.log_data_path, f'{project_name}.zip') utils.archive_dir('.', dst) # initialization start_time = time.time() args.policy_optimizer.seed(args.seed) device = 'cuda' if args.cuda and utils.cuda_available() else 'cpu' logger.info('use device: {}'.format(device)) logger.table('args', args.__dict__, width=350) # pop method를 참조하여 적절한 init, selection, mutation 방법 설정 args = set_pop_configurations(args) # 환경, 모델 초기화 env = envs.make_environment(args, False, False, init_game=False) env.close() # Actor + Learner 프로세스 실행 args.n_actors = min(mp.cpu_count() // args.n_learners, args.n_actors) processes = list() # mailbox 초기화 mailbox = Mailbox(args.queue_type, args.pop_log_share_method) inputs = mailbox.get_queue(WID.Pop()) mailbox.get_queue(WID.Logger()) mailbox.get_queue(WID.Actor()) mailbox.get_queue(WID.ReplayBufferHub()) for rank in range(args.n_learners): mailbox.get_queue(WID.Control(rank)) mailbox.get_queue(WID.Learner(rank)) mailbox.get_queue(WID.ReplayBuffer(rank)) # actors for rank in range(args.n_actors): # TODO: ip * 100 + rank 필요 p = mp.Process(target=actor_task, args=(rank, args, 'cpu', mailbox.get_queue(WID.Actor()), mailbox), daemon=True) processes.append(p) # test_actor if args.test_actor: logger.info('start test actor') p = mp.Process(target=actor_task, args=(-1, args, 'cpu', mailbox.get_queue(WID.Actor()), mailbox), daemon=True) processes.append(p) # remote actors if args.start_relay_server: p = mp.Process( target=run_device, args=(args.repeater_frontend_port, args.repeater_backend_port), daemon=True, ) processes.append(p) p = mp.Process( target=run_repeater, args=(args.repeater_frontend_port, args.repeater_backend_port, mailbox.get_queue(WID.Actor()), mailbox), daemon=True, ) processes.append(p) # learner if sync_learner: learners = list() for rank in range(args.n_learners): learner_gen = learner_task_gen( rank, args, device, (mailbox.get_queue( WID.Control(rank)), mailbox.get_queue(WID.Learner(rank))), mailbox) learners.append(learner_gen) else: for rank in range(args.n_learners): p = mp.Process(target=learner_task, args=(rank, args, device, (mailbox.get_queue(WID.Control(rank)), mailbox.get_queue(WID.Learner(rank))), mailbox), daemon=True) processes.append(p) # 리플레이 버퍼 p = mp.Process(target=run_replay_buffer_hub, args=(mailbox.get_queue(WID.ReplayBufferHub()), mailbox), daemon=True) processes.append(p) [p.start() for p in processes] # TODO: init population pop = [] generation = 0 generation_start_time = time.time() generation_n_games = 0 generation_frames = 0 n_games = 0 n_steps = 0 total_frames = 0 recent_scores = deque(maxlen=args.pbt_n_evals * args.n_learners) frames_per_epoch = args.frames_per_epoch * args.n_learners frames = 0 heartbeat_recoder = dict() heartbeat_recoder_update = 0 # 모델 초기화 assert args.n_learners > 0 for rank in range(args.n_learners): model = args.policy_optimizer.make_model(device, args, env) sol = pbt.Solution(params=model.to_params(), args=args) sol = sol.random_init(args.pop_init_method) args = sol.apply(args) sol.generation = generation mailbox.put( dict(sender=WID.Pop(), to=WID.Control(rank), msg=Msg.NEW_SOLUTION, solution=sol)) # 학습 시작 iteration = 0 while True: if sync_learner: for learner_gen in learners: next(learner_gen) if frames > frames_per_epoch: logger.table('args', args.__dict__, width=350) logger.info(f'method: {args.pop_method}, Tag: {args.pop_tag}') logger.text('pop', generation, f'Generation: {generation}') # 모든 learner에게 지금까지 학습한 모델을 요청 logger.info('Request solution from all learners') for rank in range(args.n_learners): mailbox.put( dict(sender=WID.Pop(), to=WID.Control(rank), msg=Msg.REQ_SOLUTION)) if sync_learner: resp_list = list() for learner_gen in learners: next(learner_gen) if inputs.qsize() > 0: resp = inputs.get() if resp['msg'] == Msg.SAVE_SOLUTION: resp_list.append(resp) if len(resp_list) >= args.n_learners: break logger.info('Gather current solutions') def proces_resp(resp): if resp['msg'] == Msg.SAVE_SOLUTION: sol = resp['solution'] sol.learner = resp['sender'] sol.generation = generation sol.depth += 1 if sol.loss == np.NaN: sol.mean_score = -1e10 sol.propagate_op_result(pop) pop.append(sol) if sync_learner: for resp in resp_list: proces_resp(resp) else: for _ in range(args.n_learners): resp = inputs.get() proces_resp(resp) pop = sorted(pop, key=lambda sol: sol.mean_score, reverse=True) # Selection: 성능이 좋은 모델 선택 learner_to_stop = list() new_solutions = list() if args.pop_selection_method == PopSelection.NONE: pass elif args.pop_selection_method == PopSelection.TRUNCATED_SELECTION: for i, sol in enumerate(pop): if i + 1 > args.pop_survival_ratio * args.population_size: if sol.generation >= generation: learner_to_stop.append((sol.learner, i)) logger.info('Stop learning: {}'.format(learner_to_stop)) for learner, _ in learner_to_stop: max_idx = min( len(pop) - 1, int(args.pop_selection_ratio * args.population_size) - 1) sol = pop[random.randint(0, max_idx)] new_solutions.append(sol) elif args.pop_selection_method == PopSelection.UCB_SELECTION: def ucb_func(score, total_try, n_try, ucb_c, max_score, min_score): Q = (score - min_score + 1e-10) / \ (max_score - min_score + 1e-10) U = np.sqrt((total_try + 1e-10) / (n_try + 1e-10)) return Q + ucb_c * U # ucb 값 갱신 max_score = max([sol.mean_score for sol in pop]) min_score = min([sol.mean_score for sol in pop]) total_try = sum([sol.n_try for sol in pop]) for sol in pop: sol.ucb = ucb_func(sol.mean_score, total_try, sol.n_try, args.pop_ucb_c, max_score, min_score) # ucb로 정렬 pop = sorted(pop, key=lambda sol: sol.ucb, reverse=True) for i, sol in enumerate(pop): if i + 1 > args.pop_survival_ratio * args.population_size: if sol.generation >= generation: learner_to_stop.append((sol.learner, i)) # progress.write('Stop learning: {}'.format(learner_to_stop)) logger.info('Stop learning: {}'.format(learner_to_stop)) for learner, _ in learner_to_stop: max_idx = min( len(pop) - 1, int(args.pop_selection_ratio * args.population_size) - 1) sol = pop[random.randint(0, max_idx)] sol.n_try += 1 new_solutions.append(sol) # Mutation: 변형 logger.info('Select next solution') if args.pop_mutation_method == PopMutation.NONE: pass elif args.pop_mutation_method == PopMutation.RANDOM: new_solutions = [ sol.random_mutation() for sol in new_solutions ] elif args.pop_mutation_method == PopMutation.BACKUP: new_solutions = [sol.prob_mutation() for sol in new_solutions] elif args.pop_mutation_method == PopMutation.PREDICTION: if len(pop) <= args.population_size: new_solutions = [ sol.random_mutation() for sol in new_solutions ] else: xs, ys = zip(*[sol.vec for sol in pop]) xs, ys = np.array(xs), np.array(ys) x_scaler = StandardScaler() x_scaler.fit(xs) # n_hidden_nodes = len(2 * xs[0]) # hidden_layer = MLPRandomLayer( # n_hidden=n_hidden_nodes, activation_func='tanh') # net = GenELMRegressor(hidden_layer=hidden_layer) xs_ = x_scaler.transform(xs) ys_ = (ys - min(ys)) / (max(ys) - min(ys) + ESP) # net.fit(xs_, ys_) reg = LinearRegression().fit(xs_, ys_) for sol in new_solutions: sol.pred_w = max(ys) - min(ys) sol.pred_bias = min(ys) # pred = net.predict(xs_) pred = reg.predict(xs_) pred_error = ((ys_ - pred)**2).mean() logger.debug('pred error: {:5.4f}'.format(pred_error)) logger.line('loss/prediction error', generation, pred_error) new_solutions = [ sol.predict_mutation(x_scaler, reg) for sol in new_solutions ] # 키보드 이벤트 검사 -> 여기서 수작업으로 파라미터 조작 if sync_learner: if keyboard.event('esc'): print('==== Enter debuge mode ====') embed() # learner에 전송 for (learner, _), sol in zip(learner_to_stop, new_solutions): mailbox.put( dict(sender=WID.Pop(), to=learner, msg=Msg.NEW_SOLUTION, solution=sol)) # 성능이 나쁜 모델 제거 pop = pop[:args.population_size] # 현재 population for i, sol in enumerate(pop): logger.info('{} -> {}'.format(i, sol)) # 남은 frame 초기화 frames = 0 # 결과 출력 score = np.mean(recent_scores) logger.info('Gen: {: >4}, Score: {: >10.3f}'.format( generation, score)) for name in pop[0].hyperparams: value = np.mean([getattr(s, name) for s in pop]) logger.line('hyperparams/' + name, generation, value) # logger.line('sys/fd_counts', generation, utils.count_open_fds(), use_visdom=True) logger.line('sys/memory_usage', generation, utils.get_memory_usage(), visdom_env='sys') logger.line('sys/memory_delta', generation, utils.get_memory_usage_delta(), visdom_env='sys') avg_game_time = (time.time() - start_time) / n_games logger.line('sys/average_game_time', generation, avg_game_time, visdom_env='sys') generation_interval = time.time() - generation_start_time logger.line('sys/game_time', generation, generation_interval / generation_n_games, visdom_env='sys') logger.line('sys/fps', generation, generation_frames / generation_interval, average=3, visdom_env='main') generation += 1 generation_start_time = time.time() generation_n_games = 0 generation_frames = 0 # log 작업 logger_inputs = mailbox.get_mailbox(WID.Logger()) while logger_inputs.qsize() > 0 and frames <= frames_per_epoch: resp = logger_inputs.get() if resp['msg'] == Msg.SAVE_LOG: total_frames += resp['frames'] generation_frames += resp['frames'] if args.pop_log_share_method == PopLogShare.OneToOne: frames += resp['frames'] elif args.pop_log_share_method == PopLogShare.OneToAll: frames += resp['frames'] // args.n_learners else: raise NotImplementedError learner_name = resp['sender'] scores = resp['scores'] recent_scores += scores n_games += len(resp['scores']) generation_n_games += len(resp['scores']) current_time = int(time.time() - start_time) n_queued_episodes = np.mean([ mb.qsize() for name, mb in mailbox.mailbox.items() if name.startswith(WID.Learner()) ]) msg = [ '{}'.format(datetime.timedelta(seconds=current_time)), '{}'.format(learner_name), 'G: {: >4,d}'.format(generation), '{: >9,d} < {: >3.1f}'.format(total_frames, n_queued_episodes), 'Remain: {: >6,d}, FPS: {: >4.1f}'.format( frames_per_epoch - frames, total_frames / current_time), 'Score: {: >8.1f}, #eval: {: >3d}'.format( np.mean(recent_scores), len(scores)), 'Loss: {: >6.4f}'.format(resp['total_loss']) ] msg = ', '.join(msg) logger.info(msg) score_dict = dict(mean_score=np.mean(recent_scores)) logger.line('score', generation, score_dict, visdom_env='main', width=600) logger.line('losses', generation, resp['loss'], visdom_env='main', width=600) logger.line('values', generation, resp['values'], visdom_env='main') logger.bar('gradients', resp['gradients'], visdom_env='main', sort=False) logger.bar('buffer_size', resp['buffer_size'], visdom_env='main', sort=False) elif resp['msg'] == Msg.HEARTBEAT: # actor와 learner가 정상적으로 작동하고 있는지 모니터링 하는 용도 heartbeat = resp['heartbeat'] heartbeats_recv_time = time.time() heartbeat_recoder[heartbeat] = heartbeats_recv_time heartbeat_recoder_update = (heartbeat_recoder_update + 1) % HEARTBEAT_BUFFER_SIZE if heartbeat_recoder_update == 0: heart_beat_eltime = dict() for hb in heartbeat_recoder: heart_beat_eltime[ hb] = heartbeats_recv_time - heartbeat_recoder[hb] logger.bar('heartbeats', heart_beat_eltime, visdom_env='main', sort=True) elif resp['msg'] == Msg.ERROR_INFO: error_logger.error(resp['error_info'] + '\n') iteration = (iteration + 1) % 5 if iteration == 0: logger.progressbar( 'Generations', ['Gen.', 'Frames'], [generation, frames], [args.max_epochs, frames_per_epoch], visdom_env='main', ) logger.line('sys/q_size', generation, mailbox.qsize_dict(), visdom_env='main') # PBT 학습 종료 if generation >= args.max_epochs or n_steps >= args.max_steps: logger.info( '==== DONE TRAIN: Gen. {}, Step: {}, Elapsed Time: {} ===='. format(generation, n_steps, datetime.timedelta(seconds=time.time() - start_time))) utils.log_tools.save_log_to_csv(args.log_path, args.session_id) # # 자식 프로세스 전부 강제 종료 # pid = os.getpid() # utils.kill_children_processes(pid) break
def train(args): if args.cuda: os.environ['OMP_NUM_THREADS'] = '1' os.environ['MKL_NUM_THREADS'] = '1' else: os.environ['OMP_NUM_THREADS'] = '16' os.environ['MKL_NUM_THREADS'] = '16' import numpy as np logger = initializer.get_logger(WID.Pop(), args=args) error_logger = initializer.get_error_logger(logger.log_path) # backup current codebase project_name = path.basename(path.abspath('.')) dst = path.join(logger.log_data_path, f'{project_name}.zip') utils.archive_dir('.', dst) # initialization start_time = time.time() args.policy_optimizer.seed(args.seed) device = 'cuda' if args.cuda and utils.cuda_available() else 'cpu' logger.info('use device: {}'.format(device)) logger.table('args', args.__dict__, width=350) # pop method를 참조하여 적절한 init, selection, mutation 방법 설정 args = set_pop_configurations(args) # 환경, 모델 초기화 env = envs.make_environment(args, False, False, init_game=False) env.close() # Actor + Learner 프로세스 실행 args.n_actors = min(mp.cpu_count(), args.n_actors) processes = list() # mailbox 초기화 mailbox = Mailbox(args.queue_type, args.pop_log_share_method) # inputs = mailbox.get_queue(WID.Pop()) mailbox.get_queue(WID.Logger()) mailbox.get_queue(WID.Actor()) # mailbox.get_queue(WID.ReplayBufferHub()) for rank in range(args.n_learners): mailbox.get_queue(WID.Learner(rank)) # mailbox.get_queue(WID.Control(rank)) # mailbox.get_queue(WID.ReplayBuffer(rank)) # actors for rank in range(args.n_actors): # TODO: ip * 100 + rank 필요 p = mp.Process(target=actor_task, args=(rank, args, 'cpu', mailbox.get_queue(WID.Actor()), mailbox), daemon=True) processes.append(p) # test_actor if args.test_actor: logger.info('start test actor') p = mp.Process(target=actor_task, args=(-1, args, 'cpu', mailbox.get_queue(WID.Actor()), mailbox), daemon=True) processes.append(p) # remote actors if args.start_relay_server: p = mp.Process( target=run_device, args=(args.repeater_frontend_port, args.repeater_backend_port), daemon=True, ) processes.append(p) p = mp.Process( target=run_repeater, args=(args.repeater_frontend_port, args.repeater_backend_port, mailbox.get_queue(WID.Actor()), mailbox), daemon=True, ) processes.append(p) [p.start() for p in processes] # learner learners = list() for rank in range(args.n_learners): learner = Learner(rank, args, device, mailbox.get_queue(WID.Learner(rank)), mailbox) learners.append(learner) # 리플레이 버퍼 # p = mp.Process( # target=run_replay_buffer_hub, # args=(mailbox.get_queue(WID.ReplayBufferHub()), mailbox), # daemon=True) # processes.append(p) # TODO: init population pop = [] # 집단 generation = 0 # 현재 세대 n_games = 0 # 총 플레이한 게임 수 n_steps = 0 # 총 스텝 수 generation_start_time = time.time() # 학습 시작 시간 generation_n_games = 0 # 한 세대동안 플레이한 게임 수 generation_n_steps = 0 # 한 세대동안 스텝 수 # 최근 점수 목록 [learner 마다 따로 점수 기록] recent_scores = [ deque(maxlen=args.pbt_n_evals) for _ in range(args.n_learners) ] current_steps = 0 # 현재 스텝, args.log_share_method에 따라 generation_n_steps와 다를 수 있음 steps_per_generation = args.steps_per_generation # 한 세대 동안 프레임 수 heartbeat_recoder = dict() # heartbeat 정보 저장 # 모델 초기화 assert args.n_learners > 0 for rank in range(args.n_learners): model = args.policy_optimizer.make_model(device, args, env) sol = pbt.Solution(params=model.to_params(), args=args) sol = sol.init(args.pop_init_method) args = sol.apply(args) sol.generation = generation learners[rank].set_solution(sol) # 학습 시작 while True: # Learner (local search) 실행 for learner in learners: learner.step() # Evolution Computation (global search) 실행 if all([learner.done() for learner in learners]): [learner.reset() for learner in learners] # if generation_n_steps > steps_per_generation: logger.table('args', args.__dict__, width=350) logger.info( f'method: {args.pop_method}, seed: {args.seed}, Tag: {args.pop_tag}' ) #---------------- # Evaluation: 현재 learners 에서 solution 수집 #----------------- children = list() for learner in learners: sol = learner.get_solution() if len(sol.scores) >= args.pbt_n_evals: sol.rank = learner.rank sol.learner = WID.Learner(learner.rank) # solution이 생성될 당시의 세대 sol.generation = generation sol.depth += 1 sol.propagate_op_result(pop) children.append(sol) logger.info('child: {}'.format(sol)) if len(pop) >= args.population_size: # -------------- # Selection: 성능이 좋은 모델 선택 # -------------- low_perf_learners = list() new_solutions = list() if args.pop_selection_method == PopSelection.NONE: pass elif args.pop_selection_method == PopSelection.TRUNCATED_SELECTION: # population 평균 점수로 정렬 pop = sorted(pop, key=lambda sol: sol.score, reverse=True) children = sorted(children, key=lambda sol: sol.score, reverse=True) for child in children[1:]: # 점수 샘플 추출 child_scores = np.array( child.scores)[-args.pbt_n_evals // 2:] best_scores = np.array( children[0].scores)[-args.pbt_n_evals // 2:] # 유의성 검사 stat = stats.mannwhitneyu(child_scores, best_scores, alternative='less') if stat.pvalue < 0.01: # sol 교체할 learner 기록 low_perf_learners.append( (child.rank, np.median(child_scores), stat.pvalue)) for _ in low_perf_learners: sol = random.choice(pop) sol.n_try += 1 new_solutions.append(copy.deepcopy(sol)) elif args.pop_selection_method == PopSelection.UCB_SELECTION: # TODO: pop, children 분리 방식에 맞게 업데이트 필요 def ucb_func(score, total_try, n_try, ucb_c, max_score, min_score): Q = (score - min_score + 1e-10) / (max_score - min_score + 1e-10) U = np.sqrt((total_try + 1e-10) / (n_try + 1e-10)) return Q + ucb_c * U # ucb 값 갱신 max_score = max([sol.mean_score for sol in pop + children]) min_score = min([sol.mean_score for sol in pop + children]) total_try = sum([sol.n_try for sol in pop + children]) for sol in pop: sol.ucb = ucb_func(sol.mean_score, total_try, sol.n_try, args.pop_ucb_c, max_score, min_score) # ucb로 정렬 pop = sorted(pop, key=lambda sol: sol.ucb, reverse=True) # TODO: 문제 부분!! # selection 공통부분 if args.pop_selection_method != PopSelection.NONE: # 저성능 learner 제거, 및 새로운 solution 생성 # 이번 세대에 생성된 sol 중에 집단에서 상대적으로 성능이 낮은 sol 검색 # split_value = 1 for i, sol in enumerate(pop): if i >= args.pop_survival_ratio * args.population_size: if sol.generation == generation: low_perf_learners.append(sol.rank) logger.info( 'Stop learning: {}'.format(low_perf_learners)) # 찾은 sol 개수 만큼 새로운 sol 생성 for _ in range(len(low_perf_learners)): n_parents = int(args.pop_selection_ratio * args.population_size) sol = pop[random.randint(0, n_parents - 1)] sol.n_try += 1 new_solutions.append(copy.deepcopy(sol)) # -------------- # Mutation: 변형 # -------------- logger.info('Select next solution') if args.pop_mutation_method == PopMutation.NONE: pass elif args.pop_mutation_method == PopMutation.RANDOM: new_solutions = [ sol.random_mutation() for sol in new_solutions ] elif args.pop_mutation_method == PopMutation.BACKUP: new_solutions = [ sol.prob_mutation() for sol in new_solutions ] elif args.pop_mutation_method == PopMutation.PREDICTION: if len(pop) <= args.population_size: new_solutions = [ sol.random_mutation() for sol in new_solutions ] else: xs, ys = zip(*[sol.vec for sol in pop]) xs, ys = np.array(xs), np.array(ys) x_scaler = StandardScaler() x_scaler.fit(xs) # n_hidden_nodes = len(2 * xs[0]) # hidden_layer = MLPRandomLayer( # n_hidden=n_hidden_nodes, activation_func='tanh') # net = GenELMRegressor(hidden_layer=hidden_layer) xs_ = x_scaler.transform(xs) ys_ = (ys - min(ys)) / (max(ys) - min(ys) + ESP) # net.fit(xs_, ys_) reg = LinearRegression().fit(xs_, ys_) for sol in new_solutions: sol.pred_w = max(ys) - min(ys) sol.pred_bias = min(ys) # pred = net.predict(xs_) pred = reg.predict(xs_) pred_error = ((ys_ - pred)**2).mean() logger.debug('pred error: {:5.4f}'.format(pred_error)) logger.line('loss/prediction error', generation, pred_error) new_solutions = [ sol.predict_mutation(x_scaler, reg) for sol in new_solutions ] # 키보드 이벤트 검사 -> 여기서 수작업으로 파라미터 조작 if keyboard.event('esc'): print('==== Enter debuge mode ====') embed() # learner 에 전송 pvalues = list() for (rank, prev_score, pvalue), sol in zip(low_perf_learners, new_solutions): learners[rank].set_solution(sol) logger.info( 'replace sol: {}, {: >8.1f}, {: >1.3f} <- {}'.format( WID.Learner(rank), prev_score, pvalue, sol)) pvalues.append(pvalue) if pvalues: logger.line('p-value', generation, np.mean(pvalues), visdom_env='main') # chidren 출력 # children = sorted(children, key=lambda sol: sol.rank, reverse=False) # for i, child in enumerate(children): # logger.info('child: {}'.format(child)) # 현재 population 출력 pop = pop + children # 평균성능으로 정렬 pop = sorted(pop, key=lambda sol: sol.score, reverse=True) for i, sol in enumerate(pop): logger.info('{} -> {}'.format(i, sol)) # 성능이 나쁜 모델 제거 pop = pop[:args.population_size] try: # population에서 가장 좋은 개체 결과 출력 # if len(pop) > 0: # for name in pop[0].hyperparams: # value = np.mean([getattr(s, name) for s in pop]) # logger.line('hyperparams-p/' + name, generation, value) # children 하이퍼파라미터 평균 for child in children: for name in child.hyperparams: value = np.mean([getattr(s, name) for s in pop]) logger.line('hyperparams/' + name, generation, value) # logger.line('sys/fd_counts', generation, utils.count_open_fds(), use_visdom=True) logger.line('sys/memory_usage', generation, utils.get_memory_usage(), visdom_env='sys') logger.line('sys/memory_delta', generation, utils.get_memory_usage_delta(), visdom_env='sys') avg_game_time = (time.time() - start_time) / n_games logger.line('sys/average_game_time', generation, avg_game_time, visdom_env='sys') generation_interval = time.time() - generation_start_time logger.line('sys/game_time', generation, generation_interval / generation_n_games, visdom_env='sys') logger.line('sys/fps', generation, generation_n_steps / generation_interval, average=3, visdom_env='main') except Exception as exc: logger.error('Logging error {}'.format(exc)) embed() # 남은 frame 초기화 current_steps = 0 generation += 1 generation_start_time = time.time() generation_n_games = 0 generation_n_steps = 0 # log 작업 logger_inputs = mailbox.get_mailbox(WID.Logger()) # while logger_inputs.qsize() > 0 and current_steps <= steps_per_generation: while logger_inputs.qsize() > 0: resp = logger_inputs.get() if resp['msg'] == Msg.SAVE_LOG: n_steps += resp['frames'] generation_n_steps += resp['frames'] # if args.pop_log_share_method == PopLogShare.OneToOne: # current_steps += resp['frames'] # elif args.pop_log_share_method == PopLogShare.OneToAll: # current_steps += resp['frames'] // args.n_learners # else: # raise NotImplementedError learner_rank = resp['rank'] learner_name = resp['sender'] scores = resp['scores'] recent_scores[learner_rank] += scores n_games += len(resp['scores']) generation_n_games += len(resp['scores']) current_time = int(time.time() - start_time) n_queued_episodes = np.mean([ mb.qsize() for name, mb in mailbox.mailbox.items() if name.startswith(WID.Learner()) ]) msg = [ '{}'.format(args.session_id), '{}'.format(datetime.timedelta(seconds=current_time)), '{}'.format(learner_name), 'G: {: >4,d}'.format(generation), '{: >9,d} < {: >3.1f}'.format(n_steps, n_queued_episodes), 'FPS: {: >4.1f}'.format(n_steps / current_time), 'Score: {: >8.1f}, #eval: {: >3d}'.format( np.mean(recent_scores[learner_rank]), len(scores)), 'Loss: {: >6.4f}'.format(resp['total_loss']) ] msg = ', '.join(msg) logger.info(msg) try: # 평균 점수: # 개별 learner 의 최근 점수 50개(기본값) 의 평균을 구하고, # 그중 최고 점수를 현재 세대의 평가값으로 함 # 아직 최근 점수 개수가 부족하면 계산에서 제외한다. valid_scores = np.array([ s for s in recent_scores if len(s) == args.pbt_n_evals ]) if len(valid_scores) > 0: max_score = np.array(valid_scores).mean(axis=1).max( axis=0) score_dict = dict(max_score=max_score) # 개별 learner 점수 for rank, scores in enumerate(recent_scores): if len(scores) == args.pbt_n_evals: score_dict[f'learner_{rank}'] = np.mean(scores) logger.line('score', generation, score_dict, visdom_env='main', width=600) logger.line('losses', generation, resp['loss'], visdom_env='main', width=600) logger.line('values', generation, resp['values'], visdom_env='main') logger.bar('gradients', resp['gradients'], visdom_env='main', sort=False) logger.bar('buffer_size', resp['buffer_size'], visdom_env='main', sort=False) except Exception as exc: logger.error('Logging error {}'.format(exc)) embed() elif resp['msg'] == Msg.HEARTBEAT: # actor와 learner가 정상적으로 작동하고 있는지 모니터링 하는 용도 heartbeat = resp['heartbeat'] heartbeats_recv_time = time.time() heartbeat_recoder[heartbeat] = heartbeats_recv_time heart_beat_eltime = dict() for hb in heartbeat_recoder: heart_beat_eltime[ hb] = heartbeats_recv_time - heartbeat_recoder[hb] logger.bar('heartbeats', heart_beat_eltime, visdom_env='main', sort=True) elif resp['msg'] == Msg.ERROR_INFO: error_logger.error(resp['error_info'] + '\n') # logger.progressbar( # 'Generations', # ['Gen.', 'Frames'], # [generation, current_steps], # [args.max_generations, steps_per_generation], visdom_env='main',) logger.progressbar( 'Generations', ['Gen.'] + [WID.Learner(l.rank) for l in learners], [generation] + [l.policy_optimizer.cur_steps for l in learners], [args.max_generations] + [l.policy_optimizer.steps_per_generation for l in learners], visdom_env='main', ) logger.line('sys/q_size', generation, mailbox.qsize_dict(), visdom_env='main') # PBT 학습 종료 if generation >= args.max_generations: logger.info( '==== DONE TRAIN: Gen. {}, Step: {}, Elapsed Time: {} ===='. format(generation, n_steps, datetime.timedelta(seconds=time.time() - start_time))) utils.log_tools.save_log_to_csv(args.log_path, args.session_id) # # 자식 프로세스 전부 강제 종료 # pid = os.getpid() # utils.kill_children_processes(pid) # [p.kill() for p in processes] # [learner.policy_optimizer.clear() for learner in learners] break