Esempio n. 1
0
    model.setRewardBounds(reward_bounds)
    model.setActionBounds(action_bounds)

    arr = list(range(len(states_)))
    random.shuffle(arr)
    given_actions = []
    given_states = []
    for i in range(len(states_)):
        a = actions[arr[i]]
        action_ = np.array([a])
        given_actions.append(action_)
        state_ = np.array([states_[arr[i]]])
        next_state_ = np.array([next_states_[arr[i]]])
        given_states.append(state_)
        # print "Action: " + str([actions[i]])
        experience.insert(state_, action_, next_state_, np.array([1]))
        # print ("Added tuple: ", i)

    errors = []
    for i in range(settings['rounds']):
        # print ("Actions: ", _actions)
        # print ("States: ", _states)
        # (error, lossActor) = model.train(_states, _actions, _result_states, _rewards)
        for j in range(1):
            _states, _actions, _result_states, _rewards, falls_, advantage, exp_actions__ = experience.get_batch(
                batch_size)
            error = model.trainCritic(_states, _actions, _result_states,
                                      _rewards)
        for j in range(5):
            _states, _actions, _result_states, _rewards, falls_, advantage, exp_actions__ = experience.get_batch(
                batch_size)
Esempio n. 2
0
    experience.setRewardBounds(reward_bounds)
    experience.setActionBounds(action_bounds)
    experience.setSettings(settings)
    arr = list(range(experience_length))
    random.shuffle(arr)
    num_samples_to_keep = 300
    given_actions = []
    given_states = []
    for i in range(num_samples_to_keep):
        a = actions[arr[i]]
        action_ = np.array([a])
        given_actions.append(action_)
        state_ = np.array([states[arr[i]]])
        given_states.append(state_)
        # print "Action: " + str([actions[i]])
        experience.insert(state_, state_, action_, np.array([0]))

    errors = []
    for i in range(1000):
        _states, _actions, _result_states, _rewards, fals_, _G_ts, advantage = experience.get_batch(
            batch_size)
        # print ("Actions: ", _actions)
        # print ("States: ", _states)
        error = model.train(_states, _states, _result_states, _actions)
        errors.append(error)
        # print "Error: " + str(error)

    states = np.linspace(-5.0, 5.0, experience_length)
    actionsNoNoise = np.array(map(f, states))
    # print ("Eval States: ", np.transpose(np.array([states])))
Esempio n. 3
0
    # states2 = np.transpose(np.repeat([states], 2, axis=0))
    # print states2
    model = NeuralNetwork(len(state_bounds[0]), len(action_bounds[0]),
                          state_bounds, action_bounds, settings)

    experience = ExperienceMemory(len(state_bounds[0]),
                                  len(action_bounds[0]),
                                  experience_length,
                                  continuous_actions=True)
    for i in range(experience_length):
        action_ = np.array([actions[i]])
        state_ = np.array([states[i]])
        # print "Action: " + str([actions[i]])
        experience.insert(norm_state(state_, state_bounds),
                          norm_action(action_, action_bounds),
                          norm_state(state_, state_bounds),
                          norm_reward(np.array([0]), reward_bounds))

    errors = []
    for i in range(5000):
        _states, _actions, _result_states, _rewards = experience.get_batch(
            batch_size)
        # print _actions
        error = model.train(_states, _actions)
        errors.append(error)
        # print "Error: " + str(error)

    states = np.linspace(-1.0, 6.0, experience_length)
    actionsNoNoise = np.array(map(f, states))

    predicted_actions = np.array(list(map(model.predict, states)))