Exemple #1
0
def train(env, mu, std, alpha):
    p = Normal(mu, std)
    num_train_runs = 5
    for t in range(num_train_runs):
        sample_weights = p.sample(pop_size)
        fitnesses = simulate(env, sample_weights)
        scaled_fitnesses = (fitnesses - fitnesses.mean()) / fitnesses.std()

        mean = expectation(scaled_fitnesses, sample_weights, p=p)
        mean.backward()

        with torch.no_grad():
            mu += alpha * mu
            mu.grad.zero_()
Exemple #2
0
import torch
import numpy as np

from evograd import expectation
from evograd.distributions import Normal


def fun(x):
    return 5 * torch.sin(0.2 * x) * torch.sin(20 * x)


mu = torch.tensor([1.0], requires_grad=True)
npop = 500  # population size
std = 0.5  # noise standard deviation
alpha = 0.03  # learning rate
p = Normal(mu, std)

for t in range(2000):
    sample = p.sample(npop)
    behaviors = fun(sample)
    zscores = (behaviors - behaviors.mean()) / behaviors.std()
    variance = expectation(zscores**2, sample, p=p)
    variance.backward()

    with torch.no_grad():
        mu += alpha * mu.grad
        mu.grad.zero_()

    print("step: {}, estimated variance: {:0.5}".format(t, float(mu)))
Exemple #3
0
    y = np.zeros(npop)

    for i in range(0, npop):
        y[i] = -(1 / calc_cd_over_cl(x_np[i]) - 50)**2

    return torch.from_numpy(y)


mu = torch.tensor([0.1, 0.1, 1.0, 1.0, 1.0, 1.0], requires_grad=True)
p = Normal(mu, std)

for t in range(max_iter):
    print('Current iteration ' + str(t) + '/' + str(max_iter))
    sample = p.sample(npop)
    fitnesses = fun(sample)
    fitnesses = (fitnesses - fitnesses.mean()) / fitnesses.std()
    mean = expectation(fitnesses, sample, p=p)
    mean.backward()

    with torch.no_grad():
        mu += alpha * mu.grad
        mu.grad.zero_()

    print('Current fitness: ' + str(1 / calc_cd_over_cl(mu.detach().numpy())))
    # print('step: {}, mean fitness: {:0.5}'.format(t, float(mu)))

print('')
print(mu)
print(1 / calc_cd_over_cl(mu.detach().numpy()))
print(bad)
Exemple #4
0
                break
    return total_reward / num_run


def simulate(batch_weights):
    rewards = []
    for weights in batch_weights:
        rewards.append(simulate_single(weights.numpy()))
    return torch.tensor(rewards)


mu = torch.randn(4, requires_grad=True)  # population mean
npop = 50  # population size
std = 0.5  # noise standard deviation
alpha = 0.03  # learning rate
p = Normal(mu, std)
env = gym.make("CartPole-v0")

for t in range(2000):
    sample = p.sample(npop)
    fitnesses = simulate(sample)
    scaled_fitnesses = (fitnesses - fitnesses.mean()) / fitnesses.std()
    mean = expectation(scaled_fitnesses, sample, p=p)
    mean.backward()

    with torch.no_grad():
        mu += alpha * mu.grad
        mu.grad.zero_()

    print("step: {}, mean fitness: {:0.5}".format(t, float(fitnesses.mean())))
Exemple #5
0
from evograd.distributions import Normal


def fun(x):
    return 5 * torch.sin(0.2 * x) * torch.sin(20 * x)


mu = torch.tensor(1.0, requires_grad=True)
npop = 500  # population size
std = 0.5  # noise standard deviation
k_sigma = 1.0  # kernel standard deviation
alpha = 0.10  # learning rate
p = Normal(mu, std)

for t in range(2000):
    sample = p.sample(npop)
    novelties = fun(sample).unsqueeze(1)
    novelties = (novelties - novelties.mean()) / novelties.std()
    dists = scipy.spatial.distance.squareform(
        scipy.spatial.distance.pdist(novelties, "sqeuclidean"))
    kernel = torch.tensor(scipy.exp(-dists / k_sigma**2), dtype=torch.float32)
    p_x = expectation(kernel, sample, p=p)
    entropy = expectation(-torch.log(p_x), sample, p=p)
    entropy.backward()

    with torch.no_grad():
        mu += alpha * mu.grad
        mu.grad.zero_()

    print('step: {}, estimated entropy: {:0.5}'.format(t, float(mu)))