Esempio n. 1
0
def main():

    # run a linear regression test
    # d = 100
    # lin_reg = LinearRegression(d=d)
    # opt = CMAES(d, m=np.random.rand(d), sig=0.1, fitness=lin_reg, log_dir='log/linreg')
    # opt.fit(200)
    # results = np.load('log/linreg/train_results.npy')
    # plt.xlabel('generation')
    # plt.ylabel('loss')
    # plt.title('CMA-ES on Linear Regression')
    # plt.plot(np.arange(len(results)), results)
    # plt.savefig('log/linreg/results.png')
    # # plt.show()


    # run a test on lunar lander
    lander = LunarLander(continuous=True)
    d = lander.mu.dim
    opt = CMAES(d, m=np.random.rand(d), lam=100, sig=0.5, fitness=lander, log_dir='log/lander', save_models=True)
    opt.fit(100)

    results = np.load('log/lander/train_results.npy')*-1
    # multiply by -1 becasue CMA-ES minimized negative reward so this switches it back
    plt.xlabel('generation')
    plt.ylabel('reward')
    plt.title('CMA-ES on LunarLander')
    plt.plot(np.arange(len(results)), results)
    plt.savefig('log/lander/results.png')
Esempio n. 2
0
def evaluate(mode: str,
             dimensions: int = 10,
             iterations: int = 100,
             lambda_arg: int = 100,
             frequency: int = None,
             objectives: list = None):
    if objectives is None:
        objectives = ['quadratic', 'felli', 'bent']

    ecdf_list = []
    print("Starting evaluation...")
    print(
        f"mode: {mode}; dimensions: {dimensions}; frequency: {frequency}; iterations: {iterations}; "
        f"population: {lambda_arg}")
    for objective in objectives:
        print("    Currently running:", objective)
        for i in range(iterations):
            algo = CMAES(objective, dimensions, mode, frequency, lambda_arg)
            algo.generation_loop()
            ecdf_list.append(algo.ecdf(_TARGETS))

    return _get_ecdf_data(ecdf_list)
Esempio n. 3
0
def learn_representation(args, Q_data, TR_targets=None, fname="deafult.dat"):
    size = (125, 101, 3)
    dsize = (10, 10, 3)
    offset = size[0] * size[1]
    doffset = dsize[0] * dsize[1]
    width = 0.4
    if args.rbf:
        kind = 'rbf'
    elif args.nrbf:
        kind = 'nrbf'
    else:
        kind = 'rbf'

    Q_init = np.zeros(Q_data.size)
    start = time.time()
    Q_hat, F_hat = mp_cma_run(args, Q_data, Q_init, TR_targets, size, dsize,
                              width, kind)
    print("Required time: {}".format(time.time() - start))

    # Saving
    Q_hat.tofile("policies/q_{}".format(fname))
    F_hat.tofile("policies/f_{}".format(fname))

    print("Saving complete")

    mp_size = (size[0], size[1], 1)
    mp_dsize = (dsize[0], dsize[1], 1)
    with CMAES(mp_size, mp_dsize, width, kind, name='plotting') as cmaes:
        for i in range(0, 1):
            q_init = Q_init[offset * i:offset * (i + 1)]
            show_grid_representation(q_init, (0, 1), (125, 101, 1))
            q_data = Q_data[offset * i:offset * (i + 1)]
            show_grid_representation(q_data, (0, 1), (125, 101, 1))
            q_hat = Q_hat[offset * i:offset * (i + 1)]
            show_grid_representation(q_hat, (0, 1), (125, 101, 1))
            q_hat_ff = cmaes.evaluate(F_hat[doffset * i:doffset * (i + 1)])
            show_grid_representation(q_hat_ff, (0, 1), (125, 101, 1))

            assert (q_hat == q_hat_ff).all()

            if TR_targets is not None:
                tr_idxs = np.nonzero(TR_targets[:, 2] == i)[0]
                tr_target = np.copy(TR_targets[tr_idxs, :])
                c_tr = cmaes.tr_cost(q_data, tr_target, size[0])
                print("Current tr cost {}".format(c_tr))
                c_tr = cmaes.tr_cost(q_hat, tr_target, size[0])
                print("Next tr cost {}".format(c_tr))

            waitforbuttonpress()
Esempio n. 4
0
def mp_run(q_datas, q_inits, tr_targets, size, dsize, width, kind, n):
    q_data = q_datas[n]
    q_init = q_inits[n]
    tr_target = tr_targets[n]
    th_name = multiprocessing.current_process().name
    with CMAES(size, dsize, width, kind, name=th_name) as cmaes:
        f_init = cmaes.initial(q_init)
        f_hat, cost0, cost1 = cmaes.optimize(q_data, f_init, tr_target)
        q_hat_ref = cmaes.evaluate(f_hat)
        q_hat = np.copy(q_hat_ref)
        cost0_ = np.array(cost0)
        cost1_ = np.array(cost1)
    print("\tInitial {} cost: {}, {}, {}".format(th_name, cost0_[0], cost0_[1],
                                                 cost0_[2]))
    print("\tFinal {} cost: {}, {}, {}".format(th_name, cost1_[0], cost1_[1],
                                               cost1_[2]))
    return (q_hat, f_hat)
Esempio n. 5
0
def cma_test():
    size = (125, 101, 1)
    dsize = (3, 2, 1)
    width = 0.4
    kind = 'rbf'

    f_true = np.array([0, 500, 0, 0, 0, -500], dtype='float64')
    #f_true = np.zeros((dsize[0]*dsize[1],), dtype='float64')

    with CMAES(size, dsize, width, kind, name='cma_test') as cmaes:
        q_data_ref = cmaes.evaluate(f_true)
        q_data = np.copy(q_data_ref)

        Q_data = np.tile(q_data, 3)
        TR_targets = prepare_targets(
            Q_data, "pendulum_sarsa_grid_rand_play-test-0.csv", 0.97)
        tr_idxs = np.nonzero(TR_targets[:, 2] == 0)[0]
        #tr_idxs = [tr_idxs[0]]
        tr_target = TR_targets[tr_idxs, :]

        f_init = np.zeros(f_true.shape)
        q_init_ref = cmaes.evaluate(f_init)
        q_init = np.copy(q_init_ref)

        c_tr = cmaes.tr_cost(q_init, tr_target, size[0])
        print("Initial tr cost {}".format(c_tr))

        f_hat = cmaes.optimize(q_data, f_init, tr_target)
        q_hat = cmaes.evaluate(f_hat)

        print(np.linalg.norm(q_hat - q_data) + 1 * np.linalg.norm(f_hat))
        print(cmaes.objective(f_hat, q_data))

        show_grid_representation(q_init, (0, 1), (size[0], size[1], 1))
        show_grid_representation(q_data, (0, 1), (size[0], size[1], 1))
        show_grid_representation(q_hat, (0, 1), (size[0], size[1], 1))
        plt.scatter(tr_target[:, 0], tr_target[:, 1], c='k', s=40, marker='+')
        c_tr = cmaes.tr_cost(q_hat, tr_target, size[0])
        print("Final tr cost {}".format(c_tr))

        waitforbuttonpress()
Esempio n. 6
0
def test_compare_qf(fname):
    size = (125, 101, 3)
    dsize = (10, 10, 3)
    offset = size[0] * size[1]
    with CMAES(size, dsize, width=0.4, kind='rbf') as cmaes:
        q0 = load_grid_representation("policies/q_{}".format(fname))
        f0 = np.fromfile("policies/f_{}".format(fname))

        q0_ref = cmaes.evaluate(f0)

        csv_data = csv_read(
            ["trajectories/pendulum_sarsa_grid_rand_play-test-0.csv"])
        tr = load_trajectories(csv_data)

        see_by_layers(q0, tr, offset)
        see_by_layers(q0_ref, tr, offset)

        p0 = calc_grid_policy(q0, (0, 1), (125, 101, 3))
        show_grid_representation(p0, (0, 1), (125, 101, 1))
        plt.scatter(tr[:, 0], tr[:, 1], c='w', s=40, marker='+')
        plt.waitforbuttonpress()
Esempio n. 7
0
def rbf_test():
    size = (125, 101, 3)
    dsize = (10, 10, 3)
    dnum = np.prod(dsize)
    offset = size[0] * size[1]

    with CMAES(size, dsize, width=0.4, kind='rbf') as cmaes:

        #f_init = 500*np.random.uniform(-1, 1, size=(1, dnum))
        f_init = np.ones([
            dnum,
        ]) * 0
        f_init[0, ] = -500
        f_init[1, ] = 500
        q_init_ref = cmaes.evaluate(f_init)
        for i in range(1):
            show_grid_representation(q_init_ref[offset * i:offset * (i + 1)],
                                     (0, 1), (size[0], size[1], 1))

        #q_init_ref.tofile("q_rbf_test.dat")
        #f_init.tofile("f_rbf_test.dat")

        waitforbuttonpress()
Esempio n. 8
0
 def __init__(self, train_featurizer=False):
     self.cmaes = CMAES(train_featurizer)
Esempio n. 9
0
def main(argv):
    global maxsteps
    global environment
    global filedir
    global saveeach
    global nrobots
    global algoname

    argc = len(argv)

    # if called without parameters display help information
    if (argc == 1):
        helper()
        sys.exit(-1)

    # Default parameters:
    filename = None  # configuration file
    cseed = 1  # seed
    nreplications = 1  # nreplications
    filedir = './'  # directory
    testfile = None  # file containing the policy to be tested
    test = 0  # whether we rewant to test a policy (1=show behavior, 2=show neurons)
    displayneurons = 0  # whether we want to display the activation state of the neurons
    useTf = False  # whether we want to use tensorflow to implement the policy

    i = 1
    while (i < argc):
        if (argv[i] == "-f"):
            i += 1
            if (i < argc):
                filename = argv[i]
                i += 1
        elif (argv[i] == "-s"):
            i += 1
            if (i < argc):
                cseed = int(argv[i])
                i += 1
        elif (argv[i] == "-n"):
            i += 1
            if (i < argc):
                nreplications = int(argv[i])
                i += 1
        elif (argv[i] == "-a"):
            i += 1
            if (i < argc):
                algorithm = argv[i]
                i += 1
        elif (argv[i] == "-t"):
            i += 1
            test = 1
            if (i < argc):
                testfile = argv[i]
                i += 1
        elif (argv[i] == "-T"):
            i += 1
            test = 2
            displayneurons = 1
            if (i < argc):
                testfile = argv[i]
                i += 1
        elif (argv[i] == "-d"):
            i += 1
            if (i < argc):
                filedir = argv[i]
                i += 1
        elif (argv[i] == "-tf"):
            i += 1
            useTf = True
        else:
            # We simply ignore the argument
            print("\033[1mWARNING: unrecognized argument %s \033[0m" % argv[i])
            i += 1

    # load the .ini file
    if filename is not None:
        parseConfigFile(filename)
    else:
        print("\033[1mERROR: You need to specify an .ini file\033[0m" %
              filename)
        sys.exit(-1)
    # if a directory is not specified, we use the current directory
    if filedir is None:
        filedir = scriptdirname

    # check whether the user specified a valid algorithm
    availableAlgos = ('CMAES', 'Salimans', 'xNES', 'sNES', 'SSS', 'pepg',
                      'coevo2', 'coevo2r')
    if algoname not in availableAlgos:
        print("\033[1mAlgorithm %s is unknown\033[0m" % algoname)
        print("Please use one of the following algorithms:")
        for a in availableAlgos:
            print("%s" % a)
        sys.exit(-1)

    print("Environment %s nreplications %d maxmsteps %dm " %
          (environment, nreplications, maxsteps / 1000000))
    env = None
    policy = None

    # Evorobot Environments (we expect observation and action made of numpy array of float32)
    if "Er" in environment:
        ErProblem = __import__(environment)
        env = ErProblem.PyErProblem()
        # Create a new doublepole object
        #action_space = spaces.Box(-1., 1., shape=(env.noutputs,), dtype='float32')
        #observation_space = spaces.Box(-np.inf, np.inf, shape=(env.ninputs,), dtype='float32')
        ob = np.arange(env.ninputs * nrobots, dtype=np.float32)
        ac = np.arange(env.noutputs * nrobots, dtype=np.float32)
        done = np.arange(1, dtype=np.int32)
        env.copyObs(ob)
        env.copyAct(ac)
        env.copyDone(done)
        from policy import ErPolicy
        policy = ErPolicy(env, env.ninputs, env.noutputs, env.low, env.high,
                          ob, ac, done, filename, cseed, nrobots,
                          heterogeneous, test)

    # Bullet environment (we expect observation and action made of numpy array of float32)
    if "Bullet" in environment:
        import gym
        from gym import spaces
        import pybullet
        import pybullet_envs
        # import balance_bot
        env = gym.make(environment)
        # Define the objects required (they depend on the environment)
        ob = np.arange(env.observation_space.shape[0], dtype=np.float32)
        ac = np.arange(env.action_space.shape[0], dtype=np.float32)
        # Define the policy
        from policy import BulletPolicy
        policy = BulletPolicy(env, env.observation_space.shape[0],
                              env.action_space.shape[0],
                              env.action_space.low[0],
                              env.action_space.high[0], ob, ac, filename,
                              cseed, nrobots, heterogeneous, test)

# Gym environment (we expect observation and action made of numpy array of float64 or discrete actions)
    if (not "Bullet" in environment) and (not "Er" in environment):
        import gym
        from gym import spaces
        env = gym.make(environment)
        # Define the objects required (they depend on the environment)
        ob = np.arange(env.observation_space.shape[0], dtype=np.float32)
        if (isinstance(env.action_space, gym.spaces.box.Box)):
            ac = np.arange(env.action_space.shape[0], dtype=np.float32)
        else:
            ac = np.arange(env.action_space.n, dtype=np.float32)
        # Define the policy
        if (isinstance(env.action_space, gym.spaces.box.Box)):
            from policy import GymPolicy
            policy = GymPolicy(env, env.observation_space.shape[0],
                               env.action_space.shape[0],
                               env.action_space.low[0],
                               env.action_space.high[0], ob, ac, filename,
                               cseed, nrobots, heterogeneous, test)
        else:
            from policy import GymPolicyDiscr
            policy = GymPolicyDiscr(env, env.observation_space.shape[0],
                                    env.action_space.n, 0.0, 0.0, ob, ac,
                                    filename, cseed, nrobots, heterogeneous,
                                    test)

    policy.environment = environment
    policy.saveeach = saveeach

    # Create the algorithm class
    if (algoname == 'CMAES'):
        from cmaes import CMAES
        algo = CMAES(env, policy, cseed, filedir)
    elif (algoname == 'Salimans'):
        from salimans import Salimans
        algo = Salimans(env, policy, cseed, filedir)
    elif (algoname == 'xNES'):
        from xnes import xNES
        algo = xNES(env, policy, cseed, filedir)
    elif (algoname == 'sNES'):
        from snes import sNES
        algo = sNES(env, policy, cseed, filedir)
    elif (algoname == 'SSS'):
        from sss import SSS
        algo = SSS(env, policy, cseed, filedir)
    elif (algoname == 'coevo2'):
        from coevo2 import coevo2
        algo = coevo2(env, policy, cseed, filedir)
    elif (algoname == 'coevo2r'):
        from coevo2r import coevo2
        algo = coevo2(env, policy, cseed, filedir)
    elif (algoname == 'pepg'):
        from pepg import pepg
        algo = pepg(env, policy, cseed, filedir)
    # Set evolutionary variables
    algo.setEvoVars(sampleSize, stepsize, noiseStdDev, sameenvcond, wdecay,
                    evalCenter, saveeachg, fromgeneration, crossoverrate)

    if (test > 0):
        # test a policy
        print("Run Test: Environment %s testfile %s" % (environment, testfile))
        algo.test(testfile)
    else:
        # run evolution
        if (cseed != 0):
            print("Run Evolve: Environment %s Seed %d Nreplications %d" %
                  (environment, cseed, nreplications))
            for r in range(nreplications):
                algo.run(maxsteps)
                algo.seed += 1
                policy.seed += 1
                algo.reset()
                policy.reset()
        else:
            print("\033[1mPlease indicate the seed to run evolution\033[0m")
Esempio n. 10
0
from cmaes import CMAES
from features import simple_featurizer, dellacherie_featurizer, bcts_featurizer
import numpy as np
from envs.tetris import TetrisEnv
from tqdm import tqdm

N = 20

player = CMAES(bcts_featurizer)
player.load('CMAES.csv')

lines = []
env = TetrisEnv()
for i in range(N):
    done = False
    env.reset()
    while not done and env.state.cleared < 100000:
        _, reward, done, info = env.step(player.act(env.state))
    lines.append(env.state.cleared)
    print(env.state.cleared, 'moving avg', np.average(lines))

print('{} games played'.format(N))
print('Avg lines cleared', float(sum(lines)) / N)
print('Max lines cleared', float(max(lines)))
print('Lines cleared per game', lines)
def bayesOpt(inputs: np.ndarray,
             outputs: np.ndarray,
             func: Callable,
             theta_d: np.ndarray,
             num_iters: int,
             maximize: bool = True,
             chol: bool = False):
    """
        Function which performs bayesian optimization using Gaussian Processes and
        the CMA-ES optimization algorithm.

    :param inputs: a NumPy array containing the previously explored values of the objective function.
    :param outputs: the values of the objective function evaluated at each input value.
    :param func: the objective function.
    :param theta_d: hyperparameter for the Gaussian Process.
    :param num_iters: number of GP iteration to run.
    :param maximize: indicate whether the objective function should be maximized (True) or minimized (False).
    :param chol: indicate whether to use Cholesky decomposition for GP evaluation (True) or to use
                 matrix inversion (False).
    """
    if len(inputs.shape) < 2: inputs = np.expand_dims(inputs, -1)
    if maximize: UCB = -np.inf
    else: UCB = np.inf

    x_t = np.zeros(inputs.shape[1])
    t = 0
    bestx_t = x_t
    bestUCB = UCB
    #GP iterations
    while t < num_iters:
        #this function computes the upper confidence value of the GP at x_t
        def kernelFunc(x_t: np.ndarray, maximize: bool, chol: bool):
            theta_0 = 0.1
            kappa = 0.1

            kernel = np.hstack(
                [inputs.reshape(inputs.shape[0], 1, inputs.shape[1])] *
                inputs.shape[0])
            kernelT = np.transpose(kernel, (1, 0, 2))

            #RBF kernel for covariance matrix
            K = theta_0 * np.exp(-0.5 * np.sum(
                np.power(kernel - kernelT, 2.) / np.power(theta_d, 2.),
                axis=-1))
            # K = theta_0 * np.fromfunction(lambda i, j: np.exp( -0.5 * np.sum(np.power(inputs[i.astype(np.int),:] - inputs[j.astype(np.int),:], 2.) / np.power(theta_d, 2.), axis=-1)),
            #                         (25,25), dtype=inputs.dtype)

            K_t = theta_0 * np.exp(-0.5 * np.sum(
                np.power(inputs - x_t, 2.) / np.power(theta_d, 2.), axis=-1))
            K_tt = theta_0 * (np.exp(-0.5 * np.sum(
                np.power(x_t - x_t, 2.) / np.power(theta_d, 2.), axis=-1)) +
                              np.random.normal(0, 1e-5))

            if chol:
                L = np.linalg.cholesky(K + 1e-3 * np.eye(inputs.shape[0]))
                alpha = np.linalg.solve(L.T, np.linalg.solve(L, outputs))
                mean = K_t.dot(alpha)
                v = np.linalg.solve(L, K_t)
                var = K_tt - v.dot(v)
            else:
                KI_inv = np.linalg.inv(K + 1e-3 * np.eye(inputs.shape[0]))
                mean = np.dot(K_t.transpose(), np.dot(KI_inv, outputs))
                var = K_tt - np.dot(K_t.transpose(), np.dot(KI_inv, K_t))

            assert var > 0, "Var < 0, please reduce K_tt noise"

            if maximize: UCB = mean + kappa * np.sqrt(var)
            else: UCB = mean - kappa * np.sqrt(var)
            return UCB

        #run CMA-ES to find maximum of the GP
        if maximize: xmean = inputs[np.argmax(outputs)]
        else: xmean = inputs[np.argmin(outputs)]

        bestX = CMAES(xmean, 0.1, 100, lambda x: kernelFunc(x, maximize, chol),
                      None, not maximize)

        inputs = np.vstack((inputs, bestX))
        outputs = np.hstack((outputs, func(bestX)))
        if maximize:
            bestx_t = inputs[np.argmax(outputs)]
            print("Current best x_t: {}, best val: {}".format(
                bestx_t, np.max(outputs)))
        else:
            bestx_t = inputs[np.argmin(outputs)]
            print("Current best x_t: {}, best val: {}".format(
                bestx_t, np.min(outputs)))
        t += 1
    return inputs, outputs
Esempio n. 12
0
            self._f = None
            self._penalty = None
            self._violation = []

    def f(solution):
        return np.dot(solution._x_repaired, solution._x_repaired)

    lbound = np.ones(N) * (-5.0)
    ubound = np.ones(N) * 5.0
    A = np.array([[-1.0] + [0.0] * (N - 1)])
    b = np.array([-1.0])

    # --------------------------------------------------------------------- #
    if CASE == 1:
        print("Case 1: (1 linear ineq + bound)")
        cma = CMAES(xmean0=(lbound + ubound) / 2.,
                    sigma0=(ubound - lbound) / 4.)

        ch = ARCHLinear(fobjective=f,
                        matA=A,
                        vecb=b,
                        weight=cma.w,
                        bound=(lbound, ubound))
        checker = Checker(cma)
        logger = Logger(cma, variable_list=['xmean', 'D', 'sigma'])

        issatisfied = False
        fbestsofar = np.inf
        while not issatisfied:
            xx = cma.sample_candidate()
            sol_list = [Solution(x) for x in xx]
            xcov = cma.transform(cma.transform(np.eye(N)).T)