Ejemplo n.º 1
0
def controller_test_proc(controller, vae, mdrnn):
    step_log('4-3. controller_test_proc START!!')
    # define current best and load parameters
    if not os.path.exists(ctrl_dir):
        os.mkdir(ctrl_dir)
    ctrl_file = os.path.join(ctrl_dir, 'best.tar')

    print("Attempting to load previous best...")
    if os.path.exists(ctrl_file):
        # state = torch.load(ctrl_file, map_location={'cuda:0': 'cpu'})
        state = torch.load(ctrl_file)
        controller.load_state_dict(state['state_dict'])

    print("Controller Test Rollout START!!")
    with torch.no_grad():
        r_gen = RolloutGenerator(vae, mdrnn, controller, device,
                                 rollout_time_limit)
        r_gen.rollout(flatten_parameters(controller.parameters()), render=True)
Ejemplo n.º 2
0
def train_explorer(logdir,
                   epochs=10,
                   n_samples=4,
                   pop_size=4,
                   display=True,
                   max_workers=10):
    results = {}
    results['best'] = []
    # multiprocessing variables
    num_workers = min(max_workers, n_samples * pop_size)
    time_limit = 1000

    # create tmp dir if non existent and clean it if existent
    tmp_dir = join(logdir, 'tmp_exp')
    if not exists(tmp_dir):
        mkdir(tmp_dir)
    else:
        for fname in listdir(tmp_dir):
            unlink(join(tmp_dir, fname))

    # create exp dir if non exitent
    explore_dir = join(logdir, 'explore')
    if not exists(explore_dir):
        mkdir(explore_dir)

    ################################################################################
    #                           Thread routines                                    #
    ################################################################################
    def slave_routine(p_queue, r_queue, e_queue, p_index):
        """ Thread routine.

        Threads interact with p_queue, the parameters queue, r_queue, the result
        queue and e_queue the end queue. They pull parameters from p_queue, execute
        the corresponding rollout, then place the result in r_queue.

        Each parameter has its own unique id. Parameters are pulled as tuples
        (s_id, params) and results are pushed as (s_id, result).  The same
        parameter can appear multiple times in p_queue, displaying the same id
        each time.

        As soon as e_queue is non empty, the thread terminate.

        When multiple gpus are involved, the assigned gpu is determined by the
        process index p_index (gpu = p_index % n_gpus).

        :args p_queue: queue containing couples (s_id, parameters) to evaluate
        :args r_queue: where to place results (s_id, results)
        :args e_queue: as soon as not empty, terminate
        :args p_index: the process index
        """
        # init routine
        gpu = p_index % torch.cuda.device_count()
        device = torch.device(
            'cuda:{}'.format(gpu) if torch.cuda.is_available() else 'cpu')

        # redirect streams
        sys.stdout = open(join(tmp_dir, str(getpid()) + '.out'), 'a')
        sys.stderr = open(join(tmp_dir, str(getpid()) + '.err'), 'a')

        # with torch.no_grad():
        #     r_gen = RolloutGenerator(logdir, device, time_limit)

        #     while e_queue.empty():
        #         if p_queue.empty():
        #             sleep(.1)
        #         else:
        #             s_id, params = p_queue.get()
        #             r_queue.put((s_id, r_gen.rollout(params)))

        with torch.no_grad():
            r_gen = RolloutGenerator(logdir, device, time_limit)

            while e_queue.empty():
                if p_queue.empty():
                    sleep(.1)
                else:
                    s_id, params = p_queue.get()
                    r_queue.put((s_id, r_gen.rollout(params)))

    ################################################################################
    #                Define queues and start workers                               #
    ################################################################################
    p_queue = Queue()
    r_queue = Queue()
    e_queue = Queue()

    for p_index in range(num_workers):
        Process(target=slave_routine,
                args=(p_queue, r_queue, e_queue, p_index)).start()

    ################################################################################
    #                           Evaluation                                         #
    ################################################################################
    def evaluate(solutions, results, rollouts=100):
        """ Give current controller evaluation.

        Evaluation is minus the cumulated reward averaged over rollout runs.

        :args solutions: CMA set of solutions
        :args results: corresponding results
        :args rollouts: number of rollouts

        :returns: minus averaged cumulated reward
        """
        index_min = np.argmin(results)
        best_guess = solutions[index_min]
        restimates = []

        for s_id in range(rollouts):
            p_queue.put((s_id, best_guess))

        print("Evaluating...")
        for _ in tqdm(range(rollouts)):
            while r_queue.empty():
                sleep(.1)
            restimates.append(r_queue.get()[1])

        return best_guess, np.mean(restimates), np.std(restimates)

    ################################################################################
    #                           Launch CMA                                         #
    ################################################################################
    controller = Controller(LSIZE, RSIZE, ASIZE)  # dummy instance

    # define current best and load parameters
    cur_best = None
    ctrl_file = join(explore_dir, 'best.tar')
    print("Attempting to load previous best...")
    if exists(ctrl_file):
        state = torch.load(ctrl_file, map_location={'cuda:0': 'cpu'})
        cur_best = -state['reward']
        controller.load_state_dict(state['state_dict'])
        print("Previous best was {}...".format(-cur_best))

    parameters = controller.parameters()
    es = cma.CMAEvolutionStrategy(flatten_parameters(parameters), 0.1,
                                  {'popsize': pop_size})

    epoch = 0
    log_step = 3
    while not es.stop():
        if cur_best is not None and -cur_best > target_return:
            print("Already better than target, breaking...")
            break

        r_list = [0] * pop_size  # result list
        solutions = es.ask()

        # push parameters to queue
        for s_id, s in enumerate(solutions):
            for _ in range(n_samples):
                p_queue.put((s_id, s))

        # retrieve results
        if display:
            pbar = tqdm(total=pop_size * n_samples)
        for _ in range(pop_size * n_samples):
            while r_queue.empty():
                sleep(.1)
            r_s_id, r = r_queue.get()
            r_list[r_s_id] += r / n_samples
            if display:
                pbar.update(1)
        if display:
            pbar.close()

        es.tell(solutions, r_list)
        es.disp()

        # evaluation and saving
        if epoch % log_step == log_step - 1:
            best_params, best, std_best = evaluate(solutions, r_list)

            # log the best
            results['best'].append(best)

            print("Current evaluation: {}".format(best))
            if not cur_best or cur_best > best:
                cur_best = best
                print("Saving new best with value {}+-{}...".format(
                    -cur_best, std_best))
                load_parameters(best_params, controller)
                torch.save(
                    {
                        'epoch': epoch,
                        'reward': -cur_best,
                        'state_dict': controller.state_dict()
                    }, join(explore_dir, 'best.tar'))

            if -best > target_return:
                print(
                    "Terminating controller training with value {}...".format(
                        best))
                break

        epoch += 1

    es.result_pretty()
    e_queue.put('EOP')

    return results
Ejemplo n.º 3
0
controller = Controller(LSIZE, RSIZE, ASIZE)  # dummy instance

# define current best and load parameters
cur_best = 0
ctrl_file = join(ctrl_dir, 'best.tar')
print("Attempting to load previous best...")
if exists(ctrl_file):
    state = torch.load(ctrl_file, map_location={'cuda:0': 'cpu'})
    cur_best = -state['reward']
    controller.load_state_dict(state['state_dict'])
    print("Previous best was {}...".format(-cur_best))
else:
    print('init controller')

parameters = controller.parameters()
es = cma.CMAEvolutionStrategy(flatten_parameters(parameters), 0.1,
                              {'popsize': pop_size})

epoch = 0
log_step = 3
print('start CMA-ES')
count_g = 0
while not es.stop():
    if cur_best is not None and -cur_best > args.target_return:
        print("Already better than target, breaking...")
        break
    print('training_es', count_g)
    r_list = [0] * pop_size  # result list
    solutions = es.ask()

    # push parameters to queue
Ejemplo n.º 4
0
#                           Launch CMA                                         #
################################################################################
controller = Controller(LSIZE, RSIZE, ASIZE)  # dummy instance

# define current best and load parameters
cur_best = None
ctrl_file = join(ctrl_dir, 'best.tar')
print("Attempting to load previous best...")
if exists(ctrl_file):
    state = torch.load(ctrl_file, map_location={'cuda:0': 'cpu'})
    cur_best = - state['reward']
    controller.load_state_dict(state['state_dict'])
    print("Previous best was {}...".format(-cur_best))

parameters = controller.parameters()
es = cma.CMAEvolutionStrategy(flatten_parameters(parameters), 0.1,
                              {'popsize': pop_size})

epoch = 0
log_step = 3
while not es.stop():
    if cur_best is not None and - cur_best > args.target_return:
        print("Already better than target, breaking...")
        break

    r_list = [0] * pop_size  # result list
    solutions = es.ask()

    # push parameters to queue
    for s_id, s in enumerate(solutions):
        for _ in range(n_samples):
Ejemplo n.º 5
0
def run(args):
    p_queue = Queue()
    r_queue = Queue()
    e_queue = Queue()

    latent = 32
    mixture = 256
    size = latent + mixture
    controller = Controller(size, 3)

    for i in range(args.max_workers):
        Process(target=slave_routine,
                args=(p_queue, r_queue, e_queue, i, args.logdir)).start()

    cur_best = None
    savefile = args.logdir/'best.tar'
    if savefile.exists():
        print(f'Loading from {savefile}')
        state = torch.load(savefile.as_posix(), map_location={'cuda:0': 'cpu'})
        cur_best = -state['reward']
        controller.load_state_dict(state['state_dict'])


    parameters = controller.parameters()
    sigma = 0.1
    es = cma.CMAEvolutionStrategy(flatten_parameters(parameters), sigma,
                                  {'popsize': args.pop_size})

    epoch = 0
    while not es.stop():
        if cur_best is not None and -cur_best > args.target_return:
            print('Already better than target, breaking...')
            break

        r_list = [0] * args.pop_size  # result list
        solutions = es.ask()

        # push parameters to queue
        for s_id, s in enumerate(solutions):
            for _ in range(args.n_samples):
                p_queue.put((s_id, s))

        # Retrieve results
        if args.display:
            pbar = tqdm(total=args.pop_size * args.n_samples)
        for _ in range(args.pop_size * args.n_samples):
            while r_queue.empty():
                sleep(.1)
            r_s_id, r = r_queue.get()
            r_list[r_s_id] += r / args.n_samples
            if args.display:
                pbar.update(1)
        if args.display:
            pbar.close()

        es.tell(solutions, r_list)
        es.disp()

        # CMA-ES seeks to minimize, so we want to multiply the reward we
        # get in a rollout by -1.

        best_params, best, std_best = evaluate(solutions, r_list, p_queue,
                                               r_queue)
        if (not cur_best) or (cur_best > best):
            cur_best = best
            print(f'Saving new best with value {-cur_best}+{-std_best}')
            load_parameters(best_params, controller)
            torch.save({'epoch': epoch,
                        'reward': -cur_best,
                        'state_dict': controller.state_dict()},
                       savefile)
            # Save after every epoch
            torch.save(controller.state_dict(), f'{controller_pt}')
        if -best > args.target_return:
            print(f'Terminating controller training with value {best}...')
            break
        epoch += 1

    es.result_pretty()
    e_queue.put('EOP')
Ejemplo n.º 6
0
def controller_train_proc(ctrl_dir,
                          controller,
                          vae,
                          mdrnn,
                          target_return=950,
                          skip_train=False,
                          display=True):
    step_log('4-2. controller_train_proc START!!')
    # define current best and load parameters
    cur_best = None
    if not os.path.exists(ctrl_dir):
        os.mkdir(ctrl_dir)
    ctrl_file = os.path.join(ctrl_dir, 'best.tar')

    p_queue = Queue()
    r_queue = Queue()
    #e_queue = Queue()   # pipaek : not necessary if not multiprocessing

    print("Attempting to load previous best...")
    if os.path.exists(ctrl_file):
        #state = torch.load(ctrl_file, map_location={'cuda:0': 'cpu'})
        state = torch.load(ctrl_file)
        cur_best = -state['reward']
        controller.load_state_dict(state['state_dict'])
        print("Previous best was {}...".format(-cur_best))

    if skip_train:
        return  # pipaek : 트레이닝을 통한 모델 개선을 skip하고 싶을 때..

    def evaluate(solutions,
                 results,
                 rollouts=100):  # pipaek : rollout 100 -> 10 , originally 100
        """ Give current controller evaluation.

        Evaluation is minus the cumulated reward averaged over rollout runs.

        :args solutions: CMA set of solutions
        :args results: corresponding results
        :args rollouts: number of rollouts

        :returns: minus averaged cumulated reward
        """
        index_min = np.argmin(results)
        best_guess = solutions[index_min]
        restimates = []

        for s_id in range(rollouts):
            print('p_queue.put(), s_id=%d' % s_id)
            p_queue.put((s_id, best_guess))
            print('>>>rollout_routine!!')
            rollout_routine()  # pipaek : 여기서도 p_queue.put 하자마자 바로 처리..

        print(">>>Evaluating...")
        for _ in tqdm(range(rollouts)):
            #while r_queue.empty():
            #    sleep(.1)   # pipaek : multi-process가 아니므로
            if not r_queue.empty(
            ):  # pipaek : 20180718 r_queue.get()에서 stuck되어 있는 것을 방지하기 위해 체크!!
                #print('r_queue.get()')
                #restimates.append(r_queue.get()[1])
                r_s_id, r = r_queue.get()
                print(
                    'in evaluate r_queue.get() r_s_id=%d, r_queue remain=%d' %
                    (r_s_id, r_queue.qsize()))
                restimates.append(r)
            else:
                print('r_queue.empty() -> break!!')
                break

        return best_guess, np.mean(restimates), np.std(restimates)

    def rollout_routine():
        """ Thread routine.

        Threads interact with p_queue, the parameters queue, r_queue, the result
        queue and e_queue the end queue. They pull parameters from p_queue, execute
        the corresponding rollout, then place the result in r_queue.

        Each parameter has its own unique id. Parameters are pulled as tuples
        (s_id, params) and results are pushed as (s_id, result).  The same
        parameter can appear multiple times in p_queue, displaying the same id
        each time.

        As soon as e_queue is non empty, the thread terminate.

        When multiple gpus are involved, the assigned gpu is determined by the
        process index p_index (gpu = p_index % n_gpus).

        :args p_queue: queue containing couples (s_id, parameters) to evaluate
        :args r_queue: where to place results (s_id, results)
        :args e_queue: as soon as not empty, terminate
        :args p_index: the process index
        """
        # init routine
        #gpu = p_index % torch.cuda.device_count()
        #device = torch.device('cuda:{}'.format(gpu) if torch.cuda.is_available() else 'cpu')

        # redirect streams
        #if not os.path.exists(tmp_dir):
        #    os.mkdir(tmp_dir)

        #sys.stdout = open(os.path.join(tmp_dir, 'rollout.out'), 'a')
        #sys.stderr = open(os.path.join(tmp_dir, 'rollout.err'), 'a')

        with torch.no_grad():
            r_gen = RolloutGenerator(vae, mdrnn, controller, device,
                                     rollout_time_limit)

            while not p_queue.empty():
                print('in rollout_routine, p_queue.get()')
                s_id, params = p_queue.get()
                print('r_queue.put() sid=%d' % s_id)
                r_queue.put((s_id, r_gen.rollout(params)))
                print('r_gen.rollout OK, r_queue.put()')
                #r_queue.qsize()

    parameters = controller.parameters()
    es = cma.CMAEvolutionStrategy(flatten_parameters(parameters), 0.1,
                                  {'popsize': C_POP_SIZE})
    print("CMAEvolutionStrategy start OK!!")

    epoch = 0
    log_step = 3
    while not es.stop():
        print("--------------------------------------")
        print("CURRENT EPOCH = %d" % epoch)
        if cur_best is not None and -cur_best > target_return:
            print("Already better than target, breaking...")
            break

        r_list = [0] * C_POP_SIZE  # result list
        solutions = es.ask()
        print("CMAEvolutionStrategy-ask")

        # push parameters to queue
        for s_id, s in enumerate(
                solutions):  # pipaek : 이 for가 C_POP_SIZE 만큼 반복된다.
            #for _ in range(C_POP_SIZE * C_N_SAMPLES):
            for _ in range(C_N_SAMPLES):
                print('in controller_train_proc p_queue.put() s_id : %d' %
                      s_id)
                p_queue.put((s_id, s))
                #print("p_queue.put %d" % s_id)
                rollout_routine(
                )  # pipaek : p_queue.put 하자마자 바로 get해서 rollout하고 나서 r_queue에 결과 입력.
                print("rollout_routine OK, r_queue size=%d" % r_queue.qsize())

        # retrieve results
        if display:
            pbar = tqdm(total=C_POP_SIZE * C_N_SAMPLES)
        #for idx in range(C_POP_SIZE * C_N_SAMPLES):
        while not r_queue.empty(
        ):  # pipaek : 20180718 여기서 r_queue.get을 못해서 영원히 걸려있는 상태를 방지하기 위해 for문을 while문으로 바꾼다.
            #while r_queue.empty():
            #    sleep(.1)
            try:
                r_s_id, r = r_queue.get()
                print(
                    'in controller_train_proc r_queue.get() r_s_id=%d, r_queue remain=%d'
                    % (r_s_id, r_queue.qsize()))
                r_list[r_s_id] += r / C_N_SAMPLES
                if display:
                    pbar.update(1)
            except IndexError as err:
                print('IndexError during r_queue.get()')
                print('cur r_list size:%d, index:%d' % (len(r_list), r_s_id))
        if display:
            pbar.close()

        es.tell(solutions,
                r_list)  # pipaek : solution array에다가 r_list 결과를 업데이트..
        es.disp()

        # evaluation and saving
        if epoch % log_step == log_step - 1:
            print(">>>> TRYING EVALUATION, CURRENT EPOCH = %d" % epoch)
            best_params, best, std_best = evaluate(
                solutions, r_list, rollouts=100
            )  # pipaek : evaluate을 위해서 rollout은 10번만 하자.. originally 100
            print("Current evaluation: {}".format(best))
            if not cur_best or cur_best > best:
                cur_best = best
                print("Saving new best with value {}+-{}...".format(
                    -cur_best, std_best))
                load_parameters(best_params, controller)
                torch.save(
                    {
                        'epoch': epoch,
                        'reward': -cur_best,
                        'state_dict': controller.state_dict()
                    }, os.path.join(ctrl_dir, 'best.tar'))
            if -best > target_return:
                print(
                    "Terminating controller training with value {}...".format(
                        best))
                break

        epoch += 1

    print("es.stop!!")
    es.result_pretty()
Ejemplo n.º 7
0
controller = Controller(LSIZE, RSIZE, ASIZE)  # dummy instance

# define current best and load parameters
cur_best = None
ctrl_file = join(prev_ctrl_dir, "best.tar")
print("Attempting to load previous best...")
if exists(ctrl_file):
    state = torch.load(ctrl_file, map_location={"cuda:0": "cpu"})
    # cur_best = -state["reward"]
    print("Loading Controller from {}".format(ctrl_file))
    controller.load_state_dict(state["state_dict"])
    # print("Previous best was {}...".format(-cur_best))

parameters = controller.parameters()
es = cma.CMAEvolutionStrategy(
    flatten_parameters(parameters), 0.1, {"popsize": pop_size}
)

epoch = 0
log_step = 1
while not es.stop():
    if epoch == 5:
        print("Already better than target, breaking...")
        break

    r_list = [0] * pop_size  # result list
    solutions = es.ask()

    # push parameters to queue
    for s_id, s in enumerate(solutions):
        for _ in range(n_samples):