Exemple #1
0
def simple_es():
    df = pd.read_csv('btc_etc.csv').rename(columns={
        'Close': 'close',
        'Date time': 'datetime',
        'Open': 'open',
        'High': 'high',
        'Low': 'low',
        'Volume': 'volume'
    })

    @timeit
    def get_reward(weights, df):
        ds = DataSeries(df)
        bt = NNBT(ds, balance=1000.0, weights=weights)
        bt.run()
        return bt.get_profit() - 200.0

    model = get_model()

    es = EvolutionStrategy(model.get_weights(), get_reward, population_size=10, sigma=0.1, learning_rate=0.001, get_reward_func_args=[df])
    es.run(1000, print_step=1)
Exemple #2
0
def main():
    #make the environment
    env = gym.make(ENV_NAME)

    #declaring model and ES object
    model = Model()
    es = EvolutionStrategy(env,
                           model,
                           population_size=POPULATION_SIZE,
                           alpha=ALPHA,
                           sigma=SIGMA,
                           gamma=GAMMA)

    #for each generation
    for generation in range(NUM_GENERATIONS):
        #show how well the model is doing in its current state
        es.run_act(True, True)
        #train for one generation
        es.train(1)
from es import EvolutionStrategy
import numpy as np
from game import Game, play
from win import Window, GAME_SPEED
import gi
from gi.repository import Gtk, GLib, Gdk
from os import path
import os
import time

es = EvolutionStrategy(fn=play,
                       noisep=50,
                       sigma=0.1,
                       alpha=0.001,
                       layer_sizes=[[4, 500], [500, 1]],
                       input_size=4)
load = path.join(path.dirname(__file__), 'load.npy')

# if load.npy exists, load the parameters from it
if path.exists(load):
    es.layers = np.load(load)


def step(game, update):
    win = Window(game)
    GLib.timeout_add(GAME_SPEED, lambda: timeout_kill(win, game))
    GLib.timeout_add(GAME_SPEED, update)
    GLib.timeout_add(GAME_SPEED, win.update)
    win.show_all()
    Gtk.main()
Exemple #4
0
def main(args):
    print("IT'S DANGEROUS TO GO ALONE! TAKE THIS.")

    np.random.seed(0)
    pt.manual_seed(0)

    env = BipedalWalker()
    env.seed(0)

    obs_dim = env.observation_space.shape[0]
    act_dim = env.action_space.shape[0]

    print(f"Initializing agent (device={device})...")
    rnn = WorldModel(obs_dim, act_dim)
    ctrl = Controller(obs_dim + rnn.hid_dim, act_dim)

    # Adjust population size based on the number of available CPUs.
    num_workers = mp.cpu_count() if args.nproc is None else args.nproc
    num_workers = min(num_workers, mp.cpu_count())
    agents_per_worker = args.popsize // num_workers
    popsize = num_workers * agents_per_worker

    print(f"Initializing population with {popsize} workers...")
    pop = Population(num_workers, agents_per_worker)
    global_mu = np.zeros_like(ctrl.genotype)

    loss_logger = ValueLogger('ha_rnn_loss', bufsize=20)
    best_logger = ValueLogger('ha_ctrl_best', bufsize=100)

    # Train the RNN with random policies.
    print(f"Training M model with a random policy...")
    optimizer = optim.Adam(rnn.parameters(), lr=args.lr)
    train_rnn(rnn,
              optimizer,
              pop,
              random_policy=True,
              num_rollouts=args.num_rollouts,
              logger=loss_logger)
    loss_logger.plot('M model training loss', 'step', 'loss')

    # Upload the trained RNN.
    success = pop.upload_rnn(rnn.cpu())
    assert success

    # Iteratively update controller and RNN.
    for i in range(args.niter):
        # Evolve controllers with the trained RNN.
        print(f"Iter. {i}: Evolving C model...")
        es = EvolutionStrategy(global_mu, args.sigma0, popsize)
        evolve_ctrl(ctrl, es, pop, num_gen=args.num_gen, logger=best_logger)
        best_logger.plot('C model evolution', 'gen', 'fitness')

        # Update the global best individual and upload them.
        global_mu = np.copy(ctrl.genotype)
        success = pop.upload_ctrl(global_mu, noisy=True)
        assert success

        # Train the RNN with the current best controller.
        print(f"Iter. {i}: Training M model...")
        train_rnn(rnn,
                  optimizer,
                  pop,
                  random_policy=False,
                  num_rollouts=args.num_rollouts,
                  logger=loss_logger)
        loss_logger.plot('M model training loss', 'step', 'loss')

        # Upload the trained RNN.
        success = pop.upload_rnn(rnn.cpu())
        assert success

        # Test run!
        rollout(env, rnn, ctrl, render=True)

    success = pop.close()
    assert success
Exemple #5
0
def run(start_run, tot_runs, num_iterations, print_steps, output_results,
        num_workers):
    runs = {}

    hyperparam_search = False
    if (start_run > 0 and tot_runs > 1): hyperparam_search = True

    for i in range(start_run, tot_runs):

        chosen_before = False
        if hyperparam_search:
            npop = np.random.random_integers(1, 150, 1)[0]
            sample = np.random.rand(np.maximum(0, npop))
            sample_std = np.std(sample)
            sigma = np.round(np.sqrt(np.random.chisquare(sample_std, 1)), 2)[0]
            learning_rate_selection = [
                0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5
            ]
            alpha = np.random.choice(learning_rate_selection)

            for key in runs.keys():
                if runs[key] == [npop, sigma, alpha]:
                    chosen_before = True
                    print(
                        'skipping run, as hyperparams [{}] have been chosen before'
                        .format(hyperparams))

        else:  #default - best hyperparams
            npop = 50
            sigma = 0.1
            alpha = 0.001

        # will only run if hyperparams are not chosen before
        if not chosen_before:
            runs[i] = [npop, sigma, alpha]

            print('hyperparams chosen -> npop:{}  sigma:{} alpha:{}'.format(
                npop, sigma, alpha))

            es = EvolutionStrategy(model.get_weights(),
                                   get_reward,
                                   population_size=npop,
                                   sigma=sigma,
                                   learning_rate=alpha)

            if num_workers == 1:
                # single thread version
                metrics = es.run(num_iterations, print_steps)
            else:
                # distributed version
                es.run_dist(num_iterations, print_steps, num_workers)

            if output_results:
                RUN_SUMMARY_LOC = '../run_summaries/'
                print('saving results to loc:', RUN_SUMMARY_LOC)
                results = pd.DataFrame(np.array(metrics).reshape(
                    int((num_iterations // print_steps)), 6),
                                       columns=list([
                                           'run_name', 'iteration',
                                           'timestamp', 'accuracy_test',
                                           'accuracy_val', 'accuracy_train'
                                       ]))

                filename = os.path.join(RUN_SUMMARY_LOC,
                                        results['run_name'][0] + '.csv')
                results.to_csv(filename, sep=',')

    print("Total Time usage: " +
          str(timedelta(seconds=int(round(time.time() - start_time)))))
Exemple #6
0
if new_training:
    training = Training.create(training_name, pop_size, sigma, learning_rate)
else:
    training = Training.load(training_name)

model = training.model.copy()


def get_reward(weights):
    model.set_weights(weights)

    agent = SMBAgent("Level1-1")
    fitness1, _ = agent.play(model, render)
    agent.change_env("Level1-2")
    fitness2, _ = agent.play(model, render)

    fitness = fitness1 + fitness2

    return fitness


es = EvolutionStrategy(training.model.get_weights(), get_reward,
                       training.population_size, training.sigma,
                       training.learning_rate)

while True:
    (main_weights, main_reward), (population_weights,
                                  population_rewards) = es.run_generation()

    training.save(main_weights, main_reward, population_weights,
                  population_rewards)
Exemple #7
0
    model.compile(optimizer='Adam', loss='mse')
    return model


def get_reward(weights):
    model = get_model()
    model.set_weights(weights)
    total_steps = 0
    for i_episode in range(20):
        observation = env.reset()
        for t in range(100):
            # env.render()
            action = np.argmax(model.predict(np.expand_dims(observation, 0)))
            observation, reward, done, info = env.step(action)
            total_steps += 1
            if done:
                break
    reward = total_steps / 20.0
    return reward - 100.0


model = get_model()

es = EvolutionStrategy(model.get_weights(),
                       get_reward,
                       population_size=50,
                       sigma=0.1,
                       learning_rate=0.001)
es.run(1000, print_step=1)