class Agent:

    def __init__(self, num_params, num_inputs, num_actions):

        self.num_units = num_params//num_actions
        self.num_inputs = num_inputs
        self.num_actions = num_actions
        
        self.echo = ESN(
            N       = self.num_units,
            dt      = 1.0,
            tau     = 5.0,
            alpha   = 0.1,
            beta    = 0.9,
            epsilon = 1.0e-10)
        
        self.input_weights = np.random.randn(self.num_inputs, self.num_units)
        self.out_weights = np.random.randn(self.num_units, self.num_actions)

    def save(self, filename):
        with open(filename, "w") as f:
            yaml.dump(self, f)

    @staticmethod
    def load(filename):
        with open(filename, "r") as f:
            return yaml.load(f, Loader=yaml.UnsafeLoader)


    def setParams(self, params):

        self.out_weights = params.reshape(self.num_units, self.num_actions).copy()
    
    def reset(self):

        self.echo.reset()

    def step(self, status):
        
        units_status = np.matmul(status.reshape(1, -1), self.input_weights)
        units = self.echo.step(units_status.ravel())
        action = np.matmul(units, self.out_weights)
        action = np.hstack([ np.tanh(action[0]), sigmoid(action[1])])
        action *= [0.1, 0.5]
        return action
Beispiel #2
0
        #train
        for epoch in range(epochs):
            for ind in range(N_max - 1):
                inp = input_train[ind].reshape(-1, 1)
                desired_target = target_train[ind + 1].reshape(-1, 1)

                if ind >= esn.washout:
                    out = esn.online_train(inp, desired_target)
                    mse = esn.mse(out, desired_target)
                    print(ind)
                    print("target", desired_target.T)
                    print("output", out.T)
                    print("mse", mse)
                    print("\n")

        esn.reset()

    print("hid", esn.W)
    print("in", esn.W_in)
    print("out", esn.W_out)

    #esn.store_net("forza.pickle")
    out_name = os.path.join(output_dir, filenames[index])
    esn.save_genome("%s_2.params" % out_name)

#for i in range(len(filenames)):
input_test, target_test, N_max_t = read_file(
    os.path.join(data_dir, filenames[1]), steer_out)

#test
m = 1800
class ESN_discrete(object):
    """
    Learn trajectoriess to goals.
    Changing the spatial goal also 
    changes the shape of the trajectory.
    """

    def __init__(self, y0=0.0, goal=1.0, timesteps=1000, lmbd=1e-4, input_amplitude=50, **kargs):
        """
        y0          float:  stating point
        goal        float:  final point 
        timesteps   int:    number of steps of the desired trajectory
        lmbd        int:    ridge regularization parameter
        """
        self.goal = goal
        self.y0 = y0
        self.timesteps = timesteps
        self.LMBD = lmbd
        self.input_amplitude = input_amplitude

        # Init the esn
        self.res = ESN(stime=self.timesteps, **kargs)
        # Init the weights to the ESN with sparse random values.
        # The input to the ESN is a 4-elements vector:
        # [ y0, 1-y0, goal, 1-goal ]
        self.input2res_w = np.random.randn(self.res.N, 4)
        self.input2res_w *= np.random.rand(self.res.N, 4) < 0.1

        self.readout_w = np.zeros(self.res.N)

    def imitate_path(self, y_des):
        """
        Learn through ridge regression the weights 
        to the readout unit to reproduce the 'y_des' 
        trajectories.
        """

        Y = np.array([])
        P = np.array([]).reshape(self.res.N, 0)
        X = np.array([]).reshape(self.res.N, 0)

        for path in y_des:

            # learning start-end points
            y0 = path[0]
            goal = path[-1]

            # desired output
            y = self.interpolate(path)
            y = y - goal

            # input pattern
            p_init = np.dot(self.input2res_w, [y0, 1 - y0, goal, 1 - goal])

            # Record the reservoir activity
            self.res.reset()
            for t in xrange(self.timesteps):
                p = p_init * (t == 0)
                self.res.step(self.input_amplitude * p)
                self.res.store(t)
                # append to the input time-series
                P = np.hstack([P, p.reshape(self.res.N, 1)])
            # network activity time-series
            x = self.activations()
            # append activities
            X = np.hstack([X, x])
            # append desired outputs
            Y = np.hstack([Y, y])

        # Ridge regression
        w = self.readout_w
        L = self.LMBD
        # including the bias
        N = self.res.N
        w += np.dot(np.linalg.inv(np.dot(X, X.T) + L * np.eye(N, N)), np.dot(X, Y))

    def activations(self):
        return self.res.data[self.res.out_lab] * np.outer(
            np.ones(self.res.N), np.exp(-np.linspace(1, 0, self.timesteps))
        )

    def rollout(self, y0=0.0, goal=1.0):

        # input pattern
        p_init = np.dot(self.input2res_w, [y0, 1 - y0, goal, 1 - goal])

        # Record the reservoir activity
        self.res.reset()
        for t in xrange(self.timesteps):
            p = p_init * (t == 0)
            self.res.step(self.input_amplitude * p)
            self.res.store(t)
        # network activity time-series
        x = self.activations()

        return np.dot(self.readout_w, x) + goal

    def interpolate(self, path):

        import scipy.interpolate

        x = np.linspace(0, self.timesteps, len(path))
        y = np.zeros(self.timesteps)
        y_gen = scipy.interpolate.interp1d(x, path)

        for t in range(self.timesteps):
            y[t] = y_gen(t)
        return y