Beispiel #1
0
    def init_LWPR(self):
        self.lwpr = []
        for i in range(self.joints_num):
            lwpr = LWPR(1, 1)
            lwpr.init_D = 10000 * np.eye(1)
            lwpr.update_D = True
            lwpr.init_alpha = 10 * np.eye(1)
            lwpr.meta = False
            lwpr.penalty = 0.000000001
            # lwpr.w_gen = 0.2

            # lwpr.init_D = 200 * np.eye(1)
            # lwpr.update_D = True
            # lwpr.init_alpha = 0.1 * np.eye(1)
            # lwpr.meta = False
            # lwpr.penalty = 0.005
            # lwpr.w_gen = 0.2
            # lwpr.w_prune = 0.8

            # double   w_gen=0.2;
            # double   w_prune=0.8;
            # bool     update_D=true;
            # double   init_alpha=0.1;
            # double   penalty=0.005;
            # VectorXd init_D=VectorXd::Constant(input_dim,200);
            self.lwpr.append(lwpr)
Beispiel #2
0
    def all(self):
        """
        The main method bringing together all functionalities
        """
        data = self.get_data()

        # Cut into train and test set, based on percentage
        random.shuffle(data)
        train_set = data[
            int(len(data) * (self.percentage_of_trest_data / 100)):]
        test_set = data[:int(
            len(data) * (self.percentage_of_trest_data / 100))]  # first n

        print "Length full set: %2i" % data.shape[0]
        print "Length train set: %2i" % train_set.shape[0]
        print "Length test set: %2i" % test_set.shape[0]

        row, column = train_set.shape
        dim = column - 1
        Ntrain = row
        Xtr, Ytr = train_set[:, :dim], train_set[:, -1:]

        # initialize the LWPR model
        model = LWPR(dim, 1)
        model.init_D = 20 * eye(dim)
        model.update_D = True
        model.init_alpha = 40 * eye(dim)
        model.meta = False

        # train the model
        model = self.train_model(model, Ntrain, Xtr, Ytr)

        # show data
        # self.show_plot(Xtr, Ytr, test_set, model, dim)

        # establish udp connection etc
        self.networking(model)
    def __init__(self,
                 history_length,
                 prediction_horizon,
                 difference_learning,
                 averaging,
                 streaming,
                 settings=None):
        super().__init__(history_length,
                         prediction_horizon,
                         difference_learning,
                         averaging=averaging,
                         streaming=streaming)
        self.model_ = LWPR(self._get_input_dim(), self.observation_dimension)

        # Default values.
        init_D = 25
        init_alpha = 175
        self.time_threshold = np.inf
        if settings:
            init_D = settings['init_D']
            init_alpha = settings['init_alpha']
            self.time_threshold = settings.get('time_threshold', np.inf)
        self.model_.init_D = init_D * np.eye(self._get_input_dim())
        self.model_.init_alpha = init_alpha * np.eye(self._get_input_dim())
Beispiel #4
0
from numpy import *
from matplotlib import pyplot as plt
from lwpr import LWPR


def testfunc(x):
    return 10 * sin(7.8 * log(1 + x)) / (1 + 0.1 * x**2)


Ntr = 500
Xtr = 10 * random.random((Ntr, 1))
Ytr = 5 + testfunc(Xtr) + 0.1 * random.normal(0, 1, (Ntr, 1)) * Xtr

# initialize the LWPR model
model = LWPR(1, 1)
model.init_D = 20 * eye(1)
model.update_D = True
model.init_alpha = 40 * eye(1)
model.meta = False

print Xtr
print Ytr
# train the model
for k in range(20):
    ind = random.permutation(Ntr)
    mse = 0

    for i in range(Ntr):
        yp = model.update(Xtr[ind[i]], Ytr[ind[i]])
        mse = mse + (Ytr[ind[i], :] - yp)**2
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--environment", type=str, default='AntBulletEnv-v0')
    parser.add_argument("--no_data", type=int, default=10000)
    args = parser.parse_args()

    state_action, state, reward, next_state = gather_data(
        args.no_data, args.environment)
    assert len(state_action) == len(next_state)
    assert len(state_action) == len(reward)
    '''
    data = pickle.load(open('data.p'))
    state2, action2, reward2, next_state2 = data[0]
    state_action2 = np.concatenate([state2, action2], axis=-1)
    state_action = np.concatenate([state_action, state_action2], axis=0)
    reward = np.concatenate([reward, reward2], axis=0)
    next_state = np.concatenate([next_state, next_state2], axis=0)
    '''

    no_data = len(state_action)

    model_state = LWPR(state_action.shape[-1], next_state.shape[-1])
    model_state.init_D = 5. * np.eye(state_action.shape[-1])
    model_state.update_D = True
    model_state.init_alpha = 1. * np.eye(state_action.shape[-1])
    model_state.meta = True
    action_shape = state_action.shape[-1] - next_state.shape[-1]
    model_state.norm_in = np.array(([10.] * state.shape[-1]) +
                                   [2.] * action_shape)

    model_reward = LWPR(state_action.shape[-1], reward.shape[-1])
    model_reward.init_D = 1. * np.eye(state_action.shape[-1])
    model_reward.update_D = True
    model_reward.init_alpha = 20. * np.eye(state_action.shape[-1])
    model_reward.meta = True

    #for k in range(20):
    for k in range(1):
        ind = np.random.permutation(no_data)
        for i in range(no_data):
            print(k, i)
            model_state.update(state_action[ind[i]], next_state[ind[i]])
            #model_state.update(state_action[ind[i]], next_state[ind[i]] - state[ind[i]])
            model_reward.update(state_action[ind[i]], reward[ind[i]])

    uid = str(uuid.uuid4())
    for k in range(10):
        state_action_test, state_test, reward_test, next_state_test = gather_data_epoch(
            1, args.environment)
        '''
        if k % 2 == 0:
            state_action_test, state_test, reward_test, next_state_test = gather_data_epoch(1, args.environment)
        else:
            idx = np.random.randint(1, len(data))
            state_test, action_test, reward_test, next_state_test = data[idx]
            state_action_test = np.concatenate([state_test, action_test], axis=-1)
        '''
        Y = []
        confs = []
        Y_r = []
        confs_r = []
        for i in range(len(state_action_test)):
            y, conf = model_state.predict_conf(state_action_test[i])
            #Y.append(y + state_test[i])
            Y.append(y)
            confs.append(conf)
            y_r, conf_r = model_reward.predict_conf(state_action_test[i])
            Y_r.append(y_r)
            confs_r.append(conf_r)
        Y = np.stack(Y, axis=0)
        confs = np.stack(confs, axis=0)
        Y_r = np.stack(Y_r, axis=0)
        confs_r = np.stack(confs_r, axis=0)

        for i in range(next_state.shape[-1]):
            plt.figure()
            print('Here is the length of the trajectory:',
                  len(next_state_test))
            assert len(next_state_test[:, i:i + 1]) == len(Y[:, i:i + 1])
            #plt.plot(np.arange(len(next_state_test[:, i:i+1])), next_state_test[:, i:i+1] - state_test[:, i:i+1])
            plt.plot(np.arange(len(next_state_test[:, i:i + 1])),
                     next_state_test[:, i:i + 1])
            plt.errorbar(np.arange(len(Y[:, i:i + 1])),
                         Y[:, i:i + 1],
                         yerr=confs[:, i:i + 1],
                         color='r',
                         ecolor='y')
            plt.grid()
            #plt.savefig(args.environment+'_'+'k:'+str(k)+'_'+'dim:'+str(i)+'_'+uid+'.pdf')

        plt.figure()
        plt.plot(np.arange(len(reward_test)), reward_test)
        plt.errorbar(np.arange(len(Y_r)),
                     Y_r,
                     yerr=confs_r,
                     color='r',
                     ecolor='g')
        plt.grid()
        plt.show()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--environment", type=str, default='AntBulletEnv-v0')
    parser.add_argument("--no_data_start", type=int, default=10000)
    parser.add_argument("--train_policy_batch_size", type=int, default=30)
    parser.add_argument("--cma_maxiter", type=int, default=1000)
    parser.add_argument("--unroll_steps", type=int, default=200)
    args = parser.parse_args()

    print args

    env = gym.make(args.environment)

    input_dim = env.observation_space.shape[0]+env.action_space.shape[0]
    output_dim = env.observation_space.shape[0] + 1
    model = LWPR(input_dim, output_dim)
    model.init_D = 1. * np.eye(input_dim)
    model.update_D = True
    model.init_alpha = 20. * np.eye(input_dim)
    model.meta = True

    agent = AGENT(env.observation_space.shape[0],
                  env.action_space.shape[0],
                  action_space_low=env.action_space.low,
                  action_space_high=env.action_space.high,
                  unroll_steps=args.unroll_steps)

    init_states = np.stack([env.reset() for _ in range(args.train_policy_batch_size)], axis=0)

    #Train the dynamics model the intial data.
    data_buffer = gather_data3(env, args.no_data_start)
    states, actions, rewards, next_states, _ = zip(*data_buffer)
    states = np.stack(states, axis=0)
    actions = np.stack(actions, axis=0)
    rewards = np.array(rewards)[..., np.newaxis]
    next_states = np.stack(next_states, axis=0)

    state_actions = np.concatenate([states, actions], axis=-1)
    state_diff = next_states - states
    targets = np.concatenate([state_diff, rewards], axis=-1)

    assert len(state_actions) == len(targets)
    ind = np.random.permutation(len(state_actions))
    for i in range(len(state_actions)):
        model.update(state_actions[ind[i]], targets[ind[i]])

    for epoch in range(1000):
        agent._fit(model, init_states, args.cma_maxiter)

        total_rewards = 0.
        state = env.reset()
        while True:
            action = agent._forward(agent.thetas, state[np.newaxis, ...])[0]
            next_state, reward, done, _ = env.step(action)
            state_action = np.concatenate([state, action])
            state_diff = next_state - state
            target = np.append(state_diff, reward)
            model.update(state_action, target)

            total_rewards += float(reward)

            state = next_state.copy()

            if done:
                print 'epoch:', epoch, 'total_rewards:', total_rewards
                break
Beispiel #7
0
    context1 = [2.0, context[0]]
    context2 = [3.6, context[1]]

    circle1 = plt.Circle((context1[0], context1[1]), 0.1, color='b', fill=False)
    circle2 = plt.Circle((context2[0], context2[1]), 0.1, color='b', fill=False)
    ax = plt.gca()
    ax.add_artist(circle1)
    ax.add_artist(circle2)
plt.grid()
plt.savefig('/home/fmeccanici/Documents/thesis/thesis_workspace/src/promp_demo_2d/figures/lwpr/lwpr_demos.png')
plt.clf()



# initialize lwpr model
model = LWPR(n_in, n_out)
model.init_D = 10*eye(n_in)
model.init_alpha = 0.1* eye(n_in)
# model.kernel = 'BiSquare'

for i in range(10):
    for demonstration in demonstrations:
        output = np.asarray(demonstration[0])
        context = np.asarray(demonstration[1])

        # print("added output: " + str(output))
        # print("added context: " + str(context))
        
        model.update(context, output)

# generalize