예제 #1
0
파일: task.py 프로젝트: PHDdonghui/PGMORL
 def __init__(self, sample, scalarization):
     self.sample = Sample.copy_from(sample)
     self.scalarization = deepcopy(scalarization)
예제 #2
0
def run(args):

    # --------------------> Preparation <-------------------- #
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.set_default_dtype(torch.float64)
    torch.set_num_threads(1)
    device = torch.device("cpu")

    # build a scalarization template
    scalarization_template = WeightedSumScalarization(
        num_objs=args.obj_num, weights=np.ones(args.obj_num) / args.obj_num)

    total_num_updates = int(
        args.num_env_steps) // args.num_steps // args.num_processes

    start_time = time.time()

    # initialize ep and population and opt_graph
    ep = EP()
    if args.obj_num == 2:
        population = Population2d(args)
    elif args.obj_num > 2:
        population = Population3d(args)
    else:
        raise NotImplementedError
    opt_graph = OptGraph()

    # Construct tasks for warm up
    elite_batch, scalarization_batch = initialize_warm_up_batch(args, device)
    rl_num_updates = args.warmup_iter
    for sample, scalarization in zip(elite_batch, scalarization_batch):
        sample.optgraph_id = opt_graph.insert(deepcopy(scalarization.weights),
                                              deepcopy(sample.objs), -1)

    episode = 0
    iteration = 0
    while iteration < total_num_updates:
        if episode == 0:
            print_info(
                '\n------------------------------- Warm-up Stage -------------------------------'
            )
        else:
            print_info(
                '\n-------------------- Evolutionary Stage: Generation {:3} --------------------'
                .format(episode))

        episode += 1

        offspring_batch = np.array([])

        # --------------------> RL Optimization <-------------------- #
        # compose task for each elite
        task_batch = []
        for elite, scalarization in \
                zip(elite_batch, scalarization_batch):
            task_batch.append(Task(
                elite, scalarization))  # each task is a (policy, weight) pair

        # run MOPG for each task in parallel
        processes = []
        results_queue = Queue()
        done_event = Event()

        for task_id, task in enumerate(task_batch):
            p = Process(target = MOPG_worker, \
                args = (args, task_id, task, device, iteration, rl_num_updates, start_time, results_queue, done_event))
            p.start()
            processes.append(p)

        # collect MOPG results for offsprings and insert objs into objs buffer
        all_offspring_batch = [[] for _ in range(len(processes))]
        cnt_done_workers = 0
        while cnt_done_workers < len(processes):
            rl_results = results_queue.get()
            task_id, offsprings = rl_results['task_id'], rl_results[
                'offspring_batch']
            for sample in offsprings:
                all_offspring_batch[task_id].append(Sample.copy_from(sample))
            if rl_results['done']:
                cnt_done_workers += 1

        # put all intermidiate policies into all_sample_batch for EP update
        all_sample_batch = []
        # store the last policy for each optimization weight for RA
        last_offspring_batch = [None] * len(processes)
        # only the policies with iteration % update_iter = 0 are inserted into offspring_batch for population update
        # after warm-up stage, it's equivalent to the last_offspring_batch
        offspring_batch = []
        for task_id in range(len(processes)):
            offsprings = all_offspring_batch[task_id]
            prev_node_id = task_batch[task_id].sample.optgraph_id
            opt_weights = deepcopy(
                task_batch[task_id].scalarization.weights).detach().numpy()
            for i, sample in enumerate(offsprings):
                all_sample_batch.append(sample)
                if (i + 1) % args.update_iter == 0:
                    prev_node_id = opt_graph.insert(opt_weights,
                                                    deepcopy(sample.objs),
                                                    prev_node_id)
                    sample.optgraph_id = prev_node_id
                    offspring_batch.append(sample)
            last_offspring_batch[task_id] = offsprings[-1]

        done_event.set()

        # -----------------------> Update EP <----------------------- #
        # update EP and population
        ep.update(all_sample_batch)
        population.update(offspring_batch)

        # ------------------- > Task Selection <--------------------- #
        if args.selection_method == 'moead':
            elite_batch, scalarization_batch = [], []
            weights_batch = []
            generate_weights_batch_dfs(0, args.obj_num, args.min_weight,
                                       args.max_weight, args.delta_weight, [],
                                       weights_batch)
            for weights in weights_batch:
                scalarization = deepcopy(scalarization_template)
                scalarization.update_weights(weights)
                scalarization_batch.append(scalarization)
                best_sample, best_value = None, -np.inf
                for sample in population.sample_batch:
                    value = scalarization.evaluate(torch.Tensor(sample.objs))
                    if value > best_value:
                        best_sample, best_value = sample, value
                elite_batch.append(best_sample)
        elif args.selection_method == 'prediction-guided':
            elite_batch, scalarization_batch, predicted_offspring_objs = population.prediction_guided_selection(
                args, iteration, ep, opt_graph, scalarization_template)
        elif args.selection_method == 'random':
            elite_batch, scalarization_batch = population.random_selection(
                args, scalarization_template)
        elif args.selection_method == 'ra':
            elite_batch = last_offspring_batch
            scalarization_batch = []
            weights_batch = []
            generate_weights_batch_dfs(0, args.obj_num, args.min_weight,
                                       args.max_weight, args.delta_weight, [],
                                       weights_batch)
            for weights in weights_batch:
                scalarization = deepcopy(scalarization_template)
                scalarization.update_weights(weights)
                scalarization_batch.append(scalarization)
        elif args.selection_method == 'pfa':
            if args.obj_num > 2:
                raise NotImplementedError
            elite_batch = last_offspring_batch
            scalarization_batch = []
            delta_ratio = (iteration + rl_num_updates + args.update_iter -
                           args.warmup_iter) / (total_num_updates -
                                                args.warmup_iter)
            delta_ratio = np.clip(delta_ratio, 0.0, 1.0)
            for i in np.arange(args.min_weight,
                               args.max_weight + 0.5 * args.delta_weight,
                               args.delta_weight):
                w = np.clip(i + delta_ratio * args.delta_weight,
                            args.min_weight, args.max_weight)
                weights = np.array([abs(w), abs(1.0 - w)])
                scalarization = deepcopy(scalarization_template)
                scalarization.update_weights(weights)
                scalarization_batch.append(scalarization)
        else:
            raise NotImplementedError

        print_info('Selected Tasks:')
        for i in range(len(elite_batch)):
            print_info('objs = {}, weight = {}'.format(
                elite_batch[i].objs, scalarization_batch[i].weights))

        iteration = min(iteration + rl_num_updates, total_num_updates)

        rl_num_updates = args.update_iter

        # ----------------------> Save Results <---------------------- #
        # save ep
        ep_dir = os.path.join(args.save_dir, str(iteration), 'ep')
        os.makedirs(ep_dir, exist_ok=True)
        with open(os.path.join(ep_dir, 'objs.txt'), 'w') as fp:
            for obj in ep.obj_batch:
                fp.write(('{:5f}' + (args.obj_num - 1) * ',{:5f}' +
                          '\n').format(*obj))

        # save population
        population_dir = os.path.join(args.save_dir, str(iteration),
                                      'population')
        os.makedirs(population_dir, exist_ok=True)
        with open(os.path.join(population_dir, 'objs.txt'), 'w') as fp:
            for sample in population.sample_batch:
                fp.write(('{:5f}' + (args.obj_num - 1) * ',{:5f}' +
                          '\n').format(*(sample.objs)))
        # save optgraph and node id for each sample in population
        with open(os.path.join(population_dir, 'optgraph.txt'), 'w') as fp:
            fp.write('{}\n'.format(len(opt_graph.objs)))
            for i in range(len(opt_graph.objs)):
                fp.write(('{:5f}' + (args.obj_num - 1) * ',{:5f}' + ';{:5f}' +
                          (args.obj_num - 1) * ',{:5f}' + ';{}\n').format(
                              *(opt_graph.weights[i]), *(opt_graph.objs[i]),
                              opt_graph.prev[i]))
            fp.write('{}\n'.format(len(population.sample_batch)))
            for sample in population.sample_batch:
                fp.write('{}\n'.format(sample.optgraph_id))

        # save elites
        elite_dir = os.path.join(args.save_dir, str(iteration), 'elites')
        os.makedirs(elite_dir, exist_ok=True)
        with open(os.path.join(elite_dir, 'elites.txt'), 'w') as fp:
            for elite in elite_batch:
                fp.write(('{:5f}' + (args.obj_num - 1) * ',{:5f}' +
                          '\n').format(*(elite.objs)))
        with open(os.path.join(elite_dir, 'weights.txt'), 'w') as fp:
            for scalarization in scalarization_batch:
                fp.write(('{:5f}' + (args.obj_num - 1) * ',{:5f}' +
                          '\n').format(*(scalarization.weights)))
        if args.selection_method == 'prediction-guided':
            with open(os.path.join(elite_dir, 'predictions.txt'), 'w') as fp:
                for objs in predicted_offspring_objs:
                    fp.write(('{:5f}' + (args.obj_num - 1) * ',{:5f}' +
                              '\n').format(*(objs)))
        with open(os.path.join(elite_dir, 'offsprings.txt'), 'w') as fp:
            for i in range(len(all_offspring_batch)):
                for j in range(len(all_offspring_batch[i])):
                    fp.write(('{:5f}' + (args.obj_num - 1) * ',{:5f}' +
                              '\n').format(*(all_offspring_batch[i][j].objs)))

    # ----------------------> Save Final Model <----------------------

    os.makedirs(os.path.join(args.save_dir, 'final'), exist_ok=True)

    # save ep policies & env_params
    for i, sample in enumerate(ep.sample_batch):
        torch.save(
            sample.actor_critic.state_dict(),
            os.path.join(args.save_dir, 'final', 'EP_policy_{}.pt'.format(i)))
        with open(
                os.path.join(args.save_dir, 'final',
                             'EP_env_params_{}.pkl'.format(i)), 'wb') as fp:
            pickle.dump(sample.env_params, fp)

    # save all ep objectives
    with open(os.path.join(args.save_dir, 'final', 'objs.txt'), 'w') as fp:
        for i, obj in enumerate(ep.obj_batch):
            fp.write(('{:5f}' + (args.obj_num - 1) * ',{:5f}' +
                      '\n').format(*(obj)))

    # save all ep env_params
    if args.obj_rms:
        with open(os.path.join(args.save_dir, 'final', 'env_params.txt'),
                  'w') as fp:
            for sample in ep.sample_batch:
                fp.write('obj_rms: mean: {} var: {}\n'.format(
                    sample.env_params['obj_rms'].mean,
                    sample.env_params['obj_rms'].var))