def multi_solve_environment(self): workers_top20 = [] for arch_epoch in range(self.arch_epochs): results_queue = Queue() processes = [] for episode in range(self.episodes): actions_p, actions_log_p, actions_index = self.controller.sample( ) actions_p = actions_p.cpu().numpy().tolist() actions_log_p = actions_log_p.cpu().numpy().tolist() actions_index = actions_index.cpu().numpy().tolist() if episode < self.episodes // 3: worker = Worker(actions_p, actions_log_p, actions_index, self.args, 'cuda:0') elif self.episodes // 3 <= episode < 2 * self.episodes // 3: worker = Worker(actions_p, actions_log_p, actions_index, self.args, 'cuda:1') else: worker = Worker(actions_p, actions_log_p, actions_index, self.args, 'cuda:3') process = Process(target=consume, args=(worker, results_queue)) process.start() processes.append(process) for process in processes: process.join() workers = [] for episode in range(self.episodes): worker = results_queue.get() worker.actions_p = torch.Tensor(worker.actions_p).to( self.device) worker.actions_index = torch.LongTensor( worker.actions_index).to(self.device) workers.append(worker) for episode, worker in enumerate(workers): if self.baseline == None: self.baseline = worker.acc else: self.baseline = self.baseline * self.baseline_weight + worker.acc * ( 1 - self.baseline_weight) # sort worker retain top20 workers_total = workers_top20 + workers workers_total.sort(key=lambda worker: worker.acc, reverse=True) workers_top20 = workers_total[:20] top1_acc = workers_top20[0].acc top5_avg_acc = np.mean( [worker.acc for worker in workers_top20[:5]]) top20_avg_acc = np.mean([worker.acc for worker in workers_top20]) logging.info( 'arch_epoch {:0>3d} top1_acc {:.4f} top5_avg_acc {:.4f} top20_avg_acc {:.4f} baseline {:.4f} ' .format(arch_epoch, top1_acc, top5_avg_acc, top20_avg_acc, self.baseline)) for i in range(5): print(workers_top20[i].genotype) for ppo_epoch in range(self.ppo_epochs): loss = 0 for worker in workers: actions_p, actions_log_p = self.controller.get_p( worker.actions_index) loss += self.cal_loss(actions_p, actions_log_p, worker, self.baseline) loss /= len(workers) logging.info('ppo_epoch {:0>3d} loss {:.4f} '.format( ppo_epoch, loss)) self.adam.zero_grad() loss.backward() self.adam.step()