Ejemplo n.º 1
0
    vel = np.zeros(nsteps)
    # run the simulation
    input_state = np.zeros((1,2), dtype=np.float32)
    for i in xrange(nsteps):
        state = simulator.get_screenshot()
        input_state[0] = state
        a = policy.action((input_state,None))
        simulator.act(a)
        r = simulator.reward()
        rtot += r
        xpos[i], vel[i] = state
        if simulator.episode_over():
            break
    return rtot, xpos, vel


mdp = MountainCar()
simulator = MDPSimulator(mdp)

net = pickle.load(open("../chimp/pre_trained_nets/mountain_car.net", "rb")) 
backend = ChainerBackend(settings)
backend.set_net(net)
learner = DQNLearner(settings, backend)

policy = DQNPolicy(learner)

r, xtrace, vtrace = car_sim(300, simulator, policy, verbose=True)

p.plot(xtrace); p.plot(10.0*vtrace)
p.show()
Ejemplo n.º 2
0
    obs = np.zeros((n_samples, ) + o_dims, dtype=np.float32)
    obsp = np.zeros((n_samples, ) + o_dims, dtype=np.float32)
    a = np.zeros(n_samples, dtype=np.int32)
    r = np.zeros(n_samples, dtype=np.float32)
    term = np.zeros(n_samples, dtype=np.bool)
    for i in xrange(n_samples):
        obs[i] = np.random.uniform(0.0, 1.0, o_dims)
        a[i] = np.random.randint(n_actions)
        obsp[i] = (obs[i] + 0.25) if a[i] == 1 else (obs[i] - 0.25)
        obsp[i] = np.clip(obsp[i], 0.0, 1.0)
        r[i] = np.sum(obs[i])
    return obs, a, r, obsp, term


net = TestNet()
custom_learner = ChainerBackend(settings)
custom_learner.set_net(net)

learner = DQNLearner(settings, custom_learner)

policy = DQNPolicy(learner)

obst, a, r, obsp, term = make_batch(10, o_dims, n_actions)

for i in xrange(10):
    ohist = (obst[i], None)
    a = policy.action(ohist)
    print "Test: ", i, " ", obst[i], " ", a, " ", learner.forward(
        (obst[i], None))

print "TRAINING"
Ejemplo n.º 3
0
    obs = np.zeros((n_samples,)+o_dims, dtype=np.float32)
    obsp = np.zeros((n_samples,)+o_dims, dtype=np.float32)
    a = np.zeros(n_samples, dtype=np.int32)
    r = np.zeros(n_samples, dtype=np.float32)
    term = np.zeros(n_samples, dtype=np.bool)
    for i in xrange(n_samples):
        obs[i] = np.random.uniform(0.0, 1.0, o_dims)
        a[i] = np.random.randint(n_actions)
        obsp[i] = (obs[i] + 0.25) if a[i] == 1 else (obs[i] - 0.25)
        obsp[i] = np.clip(obsp[i], 0.0, 1.0)
        r[i] = np.sum(obs[i])
    return obs, a, r, obsp, term


net = TestNet()
custom_learner = ChainerBackend(settings)
custom_learner.set_net(net)

learner = DQNLearner(settings, custom_learner)

policy = DQNPolicy(learner)

obst, a, r, obsp, term = make_batch(10, o_dims, n_actions)

for i in xrange(10):
    ohist = (obst[i], None)
    a = policy.action(ohist)
    print "Test: ", i, " ", obst[i], " ", a, " ", learner.forward((obst[i], None))

print "TRAINING"
for i in xrange(3000):
Ejemplo n.º 4
0
        # initialize avg_var to prevent divide by zero
        self.bn1.avg_var.fill(0.1),
        self.bn2.avg_var.fill(0.1),

    def __call__(self, ohist, ahist):
        h = F.relu(self.l1(ohist))
        h = F.relu(self.l2(h))
        h = self.bn1(h, test=not self.train)
        h = F.relu(self.l3(h))
        h = F.relu(self.l4(h))
        h = self.bn2(h, test=not self.train)
        output = self.lout(h)
        return output


net = CarNet()

# Initialize Learner with a Chainer backend
backend = ChainerBackend(settings)
backend.set_net(net)
learner = DQNLearner(settings, backend)

# Initialize memory
memory = ReplayMemoryHDF5(settings)

# Initialize Agent Framework
agent = DQNAgent(learner, memory, simulator, settings)

# Start training
agent.train(verbose=True)
Ejemplo n.º 5
0
from chimp.learners.chainer_backend import ChainerBackend

# Agent Framework
from chimp.agents import DQNAgent

# Policy class for evaluation
from chimp.utils.policies import DQNPolicy

# initialize our mountain car simulator
simulator = MountainCar()

# initialize the netowrk
net = CarNet()

# Initialize the learner with a Chainer backend and out net
backend = ChainerBackend(settings) # initialize with the settings dictionary
backend.set_net(net) # set the net for our Chainer backend
learner = DQNLearner(settings, backend) # create the learner

# Initialize replay memory
memory = ReplayMemoryHDF5(settings)

# Initialize the DQNAgent
agent = DQNAgent(learner, memory, simulator, settings) # pass in all 3 and settings

# Start training
agent.train(verbose=True)

import chainer
import chainer.functions as F
import chainer.links as L