class DPendulum:
    def __init__(self):
        self.pendulum = Pendulum(1)
        self.pendulum.DT = DT
        self.pendulum.NDT = 5

    @property
    def nqv(self):
        return [NQ, NV]

    @property
    def nx(self):
        return NQ * NV

    @property
    def nu(self):
        return NU

    @property
    def goal(self):
        return x2i(c2d([0., 0.]))

    def reset(self, x=None):
        if x is None:
            x = [np.random.randint(0, NQ), np.random.randint(0, NV)]
        else:
            x = i2x(x)
        assert (len(x) == 2)
        self.x = x
        return x2i(self.x)

    def step(self, iu):
        self.x = self.dynamics(self.x, iu)
        reward = 1 if x2i(self.x) == self.goal else 0
        return x2i(self.x), reward

    def render(self):
        q = d2cq(self.x[0])
        self.pendulum.display(np.matrix([
            q,
        ]))
        time.sleep(self.pendulum.DT)

    def dynamics(self, ix, iu):
        x = np.matrix(d2c(ix)).T
        u = d2cu(iu)

        self.xc, _ = self.pendulum.dynamics(x, u)
        return c2d(x.T.tolist()[0])
Example #2
0
class DPendulum:
    def __init__(self):
        self.pendulum = Pendulum(1)
        self.pendulum.DT  = DT
        self.pendulum.NDT = 5

    @property
    def nqv(self): return [NQ,NV]
    @property
    def nx(self): return NQ*NV
    @property
    def nu(self): return NU
    @property
    def goal(self): return x2i(c2d([0.,0.]))

    def reset(self,x=None):
        if x is None:
            x = [ np.random.randint(0,NQ), np.random.randint(0,NV) ]
        else: x = i2x(x)
        assert(len(x)==2)
        self.x = x
        return x2i(self.x)

    def step(self,iu):
        self.x     = self.dynamics(self.x,iu)
        reward     = 1 if x2i(self.x)==self.goal else 0
        return x2i(self.x),reward

    def render(self):
        q = d2cq(self.x[0])
        self.pendulum.display(np.matrix([q,]))
        time.sleep(self.pendulum.DT)

    def dynamics(self,ix,iu):
        x   = np.matrix(d2c (ix)).T
        u   = d2cu(iu)
        
        self.xc,_ = self.pendulum.dynamics(x,u)
        return c2d(x.T.tolist()[0])
Example #3
0
#         self.x          = x
#         self.u          = u
#         self.reward     = r
#         self.done       = d
#         self.x2         = x2

ReplayItem = namedtuple('ReplayItem', 'x u reward done x2 value')
ReplayItem.__new__.__defaults__ = (None, )

replayDeque = deque()

### Data
for d in data:
    T = d.cost
    for x, u, t in zip(d.X, d.U, d.T):
        x2 = np.asarray(env.dynamics(np.matrix(x).T, np.matrix(u).T)[0].flat)
        o = env.obs(np.matrix(x).T).flat
        o2 = env.obs(np.matrix(x2).T).flat
        replayDeque.append(
            ReplayItem(x=o,
                       u=u.copy(),
                       reward=env.DT,
                       done=False,
                       x2=o2,
                       value=T - t))
        #if t>T*.9: break  # avoid trajectory ends
    #if len(replayDeque)>BATCH_SIZE: break
    replayDeque[-1] = replayDeque[-1]._replace(done=True)

print 'Done loading the motion lib'