예제 #1
0
    def multi_solve_environment(self):
        workers_top20 = []

        for arch_epoch in range(self.arch_epochs):
            results_queue = Queue()
            processes = []

            for episode in range(self.episodes):
                actions_p, actions_log_p, actions_index = self.controller.sample(
                )
                actions_p = actions_p.cpu().numpy().tolist()
                actions_log_p = actions_log_p.cpu().numpy().tolist()
                actions_index = actions_index.cpu().numpy().tolist()

                if episode < self.episodes // 3:
                    worker = Worker(actions_p, actions_log_p, actions_index,
                                    self.args, 'cuda:0')
                elif self.episodes // 3 <= episode < 2 * self.episodes // 3:
                    worker = Worker(actions_p, actions_log_p, actions_index,
                                    self.args, 'cuda:1')
                else:
                    worker = Worker(actions_p, actions_log_p, actions_index,
                                    self.args, 'cuda:3')

                process = Process(target=consume, args=(worker, results_queue))
                process.start()
                processes.append(process)

            for process in processes:
                process.join()

            workers = []
            for episode in range(self.episodes):
                worker = results_queue.get()
                worker.actions_p = torch.Tensor(worker.actions_p).to(
                    self.device)
                worker.actions_index = torch.LongTensor(
                    worker.actions_index).to(self.device)
                workers.append(worker)

            for episode, worker in enumerate(workers):
                if self.baseline == None:
                    self.baseline = worker.acc
                else:
                    self.baseline = self.baseline * self.baseline_weight + worker.acc * (
                        1 - self.baseline_weight)

            # sort worker retain top20
            workers_total = workers_top20 + workers
            workers_total.sort(key=lambda worker: worker.acc, reverse=True)
            workers_top20 = workers_total[:20]
            top1_acc = workers_top20[0].acc
            top5_avg_acc = np.mean(
                [worker.acc for worker in workers_top20[:5]])
            top20_avg_acc = np.mean([worker.acc for worker in workers_top20])
            logging.info(
                'arch_epoch {:0>3d} top1_acc {:.4f} top5_avg_acc {:.4f} top20_avg_acc {:.4f} baseline {:.4f} '
                .format(arch_epoch, top1_acc, top5_avg_acc, top20_avg_acc,
                        self.baseline))
            for i in range(5):
                print(workers_top20[i].genotype)

            for ppo_epoch in range(self.ppo_epochs):
                loss = 0

                for worker in workers:
                    actions_p, actions_log_p = self.controller.get_p(
                        worker.actions_index)

                    loss += self.cal_loss(actions_p, actions_log_p, worker,
                                          self.baseline)

                loss /= len(workers)
                logging.info('ppo_epoch {:0>3d} loss {:.4f} '.format(
                    ppo_epoch, loss))

                self.adam.zero_grad()
                loss.backward()
                self.adam.step()