Esempio n. 1
0
def main(args):
    """
    Environment used in this code is Pendulum-v0 from OpenAI gym.

        States: cos(theta), sin(theta), theta_dt
        Actions: Force application between -2 to 2
        Reward: -(Θ^2 + 0.1*Θ_dt^2 + 0.001*action^2)

    Objective:  Pendulum is vertical, with 0 movement.
    Initialization: Starts at a random angle, and at a random velocity.
    End: After all the steps are exhausted
    """

    # Initialize saver
    saver = tf.train.Saver()

    with tf.Session() as sess:

        # Create the gym environment
        env = MimoCstr(nsim=args['max_episode_len'])
        # env = gym.make(args['env'])

        # Set all the random seeds for the random packages
        np.random.seed(int(args['random_seed']))
        tf.set_random_seed(int(args['random_seed']))
        env.seed(int(args['random_seed']))
        tflearn.init_graph(seed=args['random_seed'])

        # Define all the state and action dimensions, and the bound of the action
        state_dim = env.observation_space.shape[0]
        action_dim = env.action_space.shape[0]
        action_bound = env.action_space.high

        # Ensure action bound is symmetric
        assert (env.action_space.high == -env.action_space.low)

        # Restore old model
        # saver.restore(sess, args['ckpt_dir'])

        # Initialize the actor and critic
        actor = ActorNetwork(sess, state_dim, action_dim, action_bound,
                             float(args['actor_lr']), float(args['tau']),
                             int(args['minibatch_size']))

        critic = CriticNetwork(sess, state_dim, action_dim,
                               float(args['critic_lr']), float(args['tau']),
                               float(args['gamma']),
                               actor.get_num_trainable_vars())

        # Initialize Ornstein Uhlenbeck Noise
        actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim))

        # Train the Actor-Critic Model
        replay_buffer, action_list = train(sess, env, args, actor, critic,
                                           actor_noise)

        # Save the model
        saver.save(sess, args['ckpt_dir'])

        return actor, critic, env, replay_buffer, action_list
Esempio n. 2
0
def simulation():

    # Plant Model
    model_plant = MimoCstr(nsim=50)

    # Build Controller Model
    model_control = MimoCstr(nsim=model_plant.Nsim,
                             nx=model_plant.Nx * 2,
                             xs=np.array([0.878, 324.5, 0.659, 0, 0, 0]),
                             x0=np.array([1, 310, 0.659, 0, 0, 0]),
                             control=True)

    # MPC Object Initiation
    control = ModelPredictiveControl(model_control.Nsim,
                                     10,
                                     model_control.Nx,
                                     model_control.Nu,
                                     0.1,
                                     0.1,
                                     0.1,
                                     model_control.xs,
                                     model_control.us,
                                     dist=True)

    # MPC Construction
    mpc_control = control.get_mpc_controller(model_control.cstr_ode,
                                             control.eval_time,
                                             model_control.x0,
                                             random_guess=False)
    """
    Simulation portion
    """

    for t in range(model_plant.Nsim):

        # Solve the MPC optimization problem, obtain current input and predicted state
        model_control.u[t, :], model_control.x[t + 1, :] = control.solve_mpc(
            model_plant.x, model_plant.xsp, mpc_control, t, control.p)

        # Calculate the next states for the plant
        model_plant.x[t + 1, :] = model_plant.next_state(
            model_plant.x[t, :], model_control.u[t, :])

        # Update the P parameters for offset-free control
        control.p = model_plant.x[t + 1, :] - model_control.x[t + 1, 0:3]

    print(model_plant.cost_function())

    return model_plant, model_control, control
def simulation():

    # MPC Evaluation Period
    eval_period = 5

    # Model Initiation
    model = MimoCstr(nsim=50, k0=8.2e10)

    # MPC Initiation
    mpc_init = ModelPredictiveControl(10, model.Nx, model.Nu, 0.1, 0.1, 0.1,
                                      model.xs, model.us)

    # MPC Construction
    mpc_control = mpc_init.get_mpc_controller(model.cstr_ode,
                                              eval_period,
                                              model.x0,
                                              random=False,
                                              verbosity=0)

    # Output Disturbance
    output_disturb = np.zeros(model.Nx)
    x_corrected = np.zeros([model.Nsim + 1, model.Nx])
    """
    Simulation portion
    """

    for t in range(model.Nsim):
        """
        Disturbance
        """

        # if t == 10:
        #     model.disturbance()
        """
        MPC evaluation
        """

        if t % eval_period == 0 and t != 0:
            # Solve the MPC optimization problem
            mpc_init.solve_mpc(model.x, model.u, model.xsp, mpc_control, t)
            if t != 0:
                output_disturb = model.xs - model.x[t, :]
        elif t < 5:
            model.u[t, :] = [300, 0.1]
        else:
            model.u[t, :] = model.u[t - 1, :]

        # Calculate the next stages
        model.x[t + 1, :] = model.next_state(model.x[t, :], model.u[t, :])
        x_corrected[t + 1, :] = model.x[t + 1, :] + output_disturb

    print(model.cost_function(x_corrected))

    return model, mpc_init, x_corrected
Esempio n. 4
0
import tensorflow as tf
import gym
import numpy as np
import os
import matplotlib.pyplot as plt
from CSTR_model import MimoCstr

os.environ["TF_CPP_MIN_LOG_LEVEL"] = '2'

model = MimoCstr(nsim=50)
action_list = [-1, 1]


def reward_calc(temp, temp_sp):
    rewards = 0

    if temp_sp * 0.999 < temp < temp_sp * 1.001:
        rewards = rewards + 15 - abs(temp - temp_sp) * 20
    else:
        rewards = rewards - np.power(temp - temp_sp, 2)

    return rewards


num_inputs = 3
num_hidden = 4
num_output = 1  # Prob to go left            1 - left = right

initializer = tf.contrib.layers.variance_scaling_initializer()

X = tf.placeholder(tf.float32, shape=[None, num_inputs])
Esempio n. 5
0
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'


def reward_calc(temp, temp_sp):
    rewards = 0

    if temp_sp * 0.997 < temp < temp_sp * 1.003:
        rewards = rewards + 15 - abs(temp - temp_sp) * 20
    else:
        rewards = rewards - np.power(temp - temp_sp, 2)

    return rewards


model = MimoCstr(delta=1, nsim=500)

rl = ReinforceLearning(discount_factor=0.97,
                       states_start=300,
                       states_stop=340,
                       states_interval=0.5,
                       actions_start=-15,
                       actions_stop=15,
                       actions_interval=2.5,
                       learning_rate=0.1,
                       epsilon=0.1,
                       doe=0,
                       eval_period=5)

states = np.zeros([75])
states[0:15] = np.arange(290, 310, 20 / 15)