コード例 #1
0
def main(env, ctrl_type, ctrl_args, overrides, logdir, args):
    ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args})
    cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir)
    logger.info('\n' + pprint.pformat(cfg))    

    # add the part of popsize
    if ctrl_type == "MPC":
        cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg)

    cfg.exp_cfg.misc = copy.copy(cfg)
    exp = MBExperiment(cfg.exp_cfg)

    if not os.path.exists(exp.logdir):
        os.makedirs(exp.logdir)
    with open(os.path.join(exp.logdir, "config.txt"), "w") as f:
        f.write(pprint.pformat(cfg.toDict()))

    exp.run_experiment()
コード例 #2
0
ファイル: mbexp.py プロジェクト: xiaoanshi/handful-of-trials
def main(env, ctrl_type, ctrl_args, overrides, logdir):
    ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args})
    cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir)
    cfg.pprint()

    if ctrl_type == "MPC":
        cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg)
    exp = MBExperiment(cfg.exp_cfg)

    os.makedirs(exp.logdir)
    config_dict = cfg.toDict()
    with open(os.path.join(exp.logdir, "config.txt"), "w") as f:
        f.write(pprint.pformat(config_dict))
    with open(os.path.join(exp.logdir, "variant.json"), "w") as f:
        json.dump(config_dict, f, indent=2, sort_keys=True, cls=MyEncoder)
    save_git_info(exp.logdir)

    exp.run_experiment()
コード例 #3
0
ファイル: render.py プロジェクト: xiaoanshi/handful-of-trials
def main(
    env,
    ctrl_type,
    ctrl_args,
    overrides,
    model_dir,
    logdir,
    init_iter,
    last_iter,
    nrecord,
    rawdir,
):
    ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args})

    overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.model_dir", model_dir])
    overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.load_model", "True"])
    overrides.append(["ctrl_cfg.prop_cfg.model_pretrained", "True"])
    overrides.append(["exp_cfg.exp_cfg.ninit_rollouts", "0"])
    overrides.append(["exp_cfg.exp_cfg.init_iter", str(init_iter)])
    overrides.append(["exp_cfg.exp_cfg.ntrain_iters", str(last_iter)])
    overrides.append(["exp_cfg.log_cfg.nrecord", str(nrecord)])
    overrides.append(["exp_cfg.log_cfg.rawdir", str(rawdir)])

    cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir)
    cfg.pprint()

    if ctrl_type == "MPC":
        cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg)
    exp = MBExperiment(cfg.exp_cfg)

    if os.path.exists(exp.logdir):
        overwrite = user_prompt("{} already exists. Overwrite?".format(
            exp.logdir))
        if not overwrite:
            return
    else:
        os.makedirs(exp.logdir)
    with open(os.path.join(exp.logdir, "config.txt"), "w") as f:
        f.write(pprint.pformat(cfg.toDict()))

    exp.run_experiment()
    print("Saved to")
    print(exp.logdir)
コード例 #4
0
def main(env, ctrl_type, ctrl_args, overrides, model_dir, logdir):
    ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args})

    overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.model_dir", model_dir])
    overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.load_model", "True"])
    overrides.append(["ctrl_cfg.prop_cfg.model_pretrained", "True"])
    overrides.append(["exp_cfg.exp_cfg.ninit_rollouts", "0"])
    overrides.append(["exp_cfg.exp_cfg.ntrain_iters", "1"])
    overrides.append(["exp_cfg.log_cfg.nrecord", "1"])

    cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir)
    cfg.pprint()

    if ctrl_type == "MPC":
        cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg)
    exp = MBExperiment(cfg.exp_cfg)

    os.makedirs(exp.logdir)
    with open(os.path.join(exp.logdir, "config.txt"), "w") as f:
        f.write(pprint.pformat(cfg.toDict()))

    exp.run_experiment()
コード例 #5
0
def main(env, ctrl_type, ctrl_args, overrides, model_dir, logdir):
    ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args})

    overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.model_dir", model_dir])
    overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.load_model", "True"])
    overrides.append(["ctrl_cfg.prop_cfg.model_pretrained", "True"])
    overrides.append(["exp_cfg.exp_cfg.ninit_rollouts", "0"])
    overrides.append(["exp_cfg.exp_cfg.ntrain_iters", "1"])
    overrides.append(["exp_cfg.log_cfg.nrecord", "0"])
    overrides.append(["exp_cfg.exp_cfg.nrollouts_per_iter", "200"])
    overrides.append(["exp_cfg.log_cfg.neval", "200"])

    cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir)
    cfg.val_cfg.model_init_cfg.load_model = True
    cfg.val_cfg.model_init_cfg.model_dir = model_dir

    cfg.exp_cfg.exp_cfg.use_teacher = False
    if cfg.exp_cfg.exp_cfg.use_teacher:
        cfg.exp_cfg.exp_cfg.teacher = cfg.exp_cfg.sim_cfg.env.teacher()
    cfg.exp_cfg.exp_cfg.use_value = False
    if cfg.exp_cfg.exp_cfg.use_value:
        cfg.exp_cfg.exp_cfg.value = DeepValueFunction(cfg.val_cfg)

    cfg.exp_cfg.exp_cfg.ninit_rollouts = 0

    cfg.ctrl_cfg.value_func = cfg.exp_cfg.exp_cfg.value
    cfg.ctrl_cfg.use_value = cfg.exp_cfg.exp_cfg.use_value
    cfg.pprint()

    if ctrl_type == "MPC":
        cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg)
    exp = MBExperiment(cfg.exp_cfg)

    os.makedirs(exp.logdir)
    with open(os.path.join(exp.logdir, "config.txt"), "w") as f:
        f.write(pprint.pformat(cfg.toDict()))

    exp.run_experiment()
コード例 #6
0
class Cluster:
    """
    Computes optimal process parameters, at each layer, given feedback obtained
    from the machine sensors.

    Arguments:
        shared_cfg (dotmap):
            - **env.n_parts** (*int*): Total number of parts built under feedback control.
            - **env.horizon** (*int*): Markov Decision Process horizon (here number of layers).
            - **env.nS** (*int*): Dimension of the state vector.
            - **comms** (*dotmap*): Parameters for communication with other classes.
        pretrained_cfg (dotmap):
            - **n_parts** (*dotmap*): Number of parts built under this control scheme.
            - **ctrl_cfg** (*dotmap*): Configuration parameters passed to the MPC class.
        learned_cfg (dotmap):
            - **n_parts** (*dotmap*): Number of parts built under this control scheme.
            - **ctrl_cfg** (*dotmap*): Configuration parameters passed to the MPC class.
    """
    def __init__(self, shared_cfg, pretrained_cfg, learned_cfg):
        self.s_cfg = shared_cfg
        self.c_pre_cfg = pretrained_cfg
        self.c_ler_cfg = learned_cfg

        self.policyPret = MPC(pretrained_cfg.ctrl_cfg)
        self.policyLear = MPC(learned_cfg.ctrl_cfg)

        self.t = 0
        self.H = shared_cfg.env.horizon
        self.train_freq = learned_cfg.train_freq

        self.n_parts = shared_cfg.env.n_parts
        self.n_parts_pretrained = pretrained_cfg.n_parts
        self.n_parts_learned = learned_cfg.n_parts
        assert self.n_parts_pretrained + self.n_parts_learned == self.n_parts

        self.pret_state_traj = np.zeros(
            (self.n_parts_pretrained, self.H, shared_cfg.env.nS))
        self.pret_action_traj = np.zeros((self.n_parts_pretrained, self.H, 2))
        self.lear_state_traj = np.zeros(
            (self.n_parts_learned, self.H, shared_cfg.env.nS))
        self.lear_action_traj = np.zeros((self.n_parts_learned, self.H, 2))
        self.pred_cost_pret = np.zeros((self.H, self.n_parts_pretrained))
        self.pred_cost_lear = np.zeros((self.H, self.n_parts_learned))

        self.save_dirs = [shared_cfg.save_dir1, shared_cfg.save_dir2]

        self.clearComms()

    # --------------------------------------------------------------------------
    # COMMS FUNCTIONS
    # --------------------------------------------------------------------------
    def clearComms(self):
        cfg = self.s_cfg.comms
        dir_action = cfg.dir + cfg.action.rdy_name
        dir_state = cfg.dir + cfg.state.rdy_name
        if os.path.isdir(dir_action): os.rmdir(dir_action)
        if os.path.isdir(dir_state): os.rmdir(dir_state)

    def getStates(self):
        """Load state vectors uploaded to the server by the `Machine` class.

        This function waits for the `comms.dir/comms.state.rdy_name` folder to be
        created by the `Machine` class, before reading the file where the states
        are located, `comms.dir/comms.state.f_name`

        Returns:
            np.array: State vector with shape (`n_parts`, `nS`)
        """
        print('Waiting for states...')
        dir = self.s_cfg.comms.dir
        cfg = self.s_cfg.comms.state
        rdy = dir + cfg.rdy_name

        # Wait until RDY signal is provided
        while (not os.path.isdir(rdy)):
            pass
        os.rmdir(rdy)  # Delete RDY

        # Read data to array
        states = np.load(dir + cfg.f_name)
        print('States received')
        return states

    def sendAction(self, actions):
        """Saves the computed actions.

        Signals the `Machine` class that actions are ready to be downloaded by
        locally creating the `comms.dir/comms.action.rdy_name` folder

        Arguments:
            actions (np.array): Action vector with shape (`n_parts`, `nU`)
        """
        dir = self.s_cfg.comms.dir
        cfg = self.s_cfg.comms.action

        # Write actions into npy file
        np.save(dir + cfg.f_name, actions)
        os.mkdir(dir + cfg.rdy_name)  # RDY signal
        print('Actions saved')

    # --------------------------------------------------------------------------
    # SAMPLE ACTIONS
    # --------------------------------------------------------------------------
    def computeAction(self, states):
        """Computes the control actions given the observed system states.

        Arguments:
            states (np.array): Observed states, shape (`n_parts`, `nS`)

        Returns:
            np.array: Computed actions, with shape (`n_parts`, `nU`)
        """
        self.pret_state_traj[:,
                             self.t, :] = states[:self.n_parts_pretrained, :]
        self.lear_state_traj[:,
                             self.t, :] = states[self.n_parts_pretrained:, :]

        # At least one part learned, not trained first step
        if self.n_parts_learned > 0 and self.t != 0 and (
                self.t) % self.train_freq == 0:
            print("Training model...")
            obs_in = self.lear_state_traj[:, self.t -
                                          self.train_freq:self.t, :].reshape(
                                              -1,
                                              self.lear_state_traj.shape[-1])
            obs_out = self.lear_state_traj[:, self.t - self.train_freq +
                                           1:self.t + 1, :].reshape(
                                               -1,
                                               self.lear_state_traj.shape[-1])
            acs = self.lear_action_traj[:, self.t -
                                        self.train_freq:self.t, :].reshape(
                                            -1,
                                            self.lear_action_traj.shape[-1])
            self.policyLear.train(obs_in, obs_out, acs)

        # COMPUTE ACTION
        action = np.zeros((self.s_cfg.env.n_parts, 2))
        lastTempId = None
        for part in range(self.s_cfg.env.n_parts):
            print("Sampling actions %d/%d" % (part, self.s_cfg.env.n_parts))
            if part < self.n_parts_pretrained:  # Pretrained policy
                action[part, :], self.pred_cost_pret[
                    self.t, part] = self.policyPret.act(states[part, :],
                                                        self.t,
                                                        get_pred_cost=True)
            else:  # Learned policy
                # Change target
                if self.c_ler_cfg.ctrl_cfg.change_target:
                    for i in range(len(
                            self.c_ler_cfg.ctrl_cfg.n_parts_targets)):
                        if (part - self.n_parts_pretrained
                            ) < self.c_ler_cfg.ctrl_cfg.n_parts_targets[i]:
                            if not i == lastTempId:
                                lastTempId = i
                                self.policyLear.changeTargetCost(
                                    self.c_ler_cfg.ctrl_cfg.targets[i])
                                break

                if self.t < self.train_freq:  # Do not predict cost
                    action[part, :] = self.policyLear.act(states[part, :],
                                                          self.t,
                                                          get_pred_cost=False)
                    self.pred_cost_lear[self.t,
                                        part - self.n_parts_pretrained] = 0
                else:
                    action[part, :], self.pred_cost_lear[self.t,part-self.n_parts_pretrained] = \
                        self.policyLear.act(states[part, :], self.t, get_pred_cost=True)

                # Force inputs from q/v , q/sqrt(v)
                if (self.c_ler_cfg.ctrl_cfg.force.on and  # setting enabled
                    (part >= self.c_ler_cfg.ctrl_cfg.force.start_part - 1)
                        and  # suitable part
                        self.t >= self.c_ler_cfg.ctrl_cfg.force.init_buffer -
                        1):  # past initial buffer time

                    part_rel = part - self.c_ler_cfg.ctrl_cfg.force.start_part + 1  # part w.r.t. first forced part
                    t_rel = self.t - self.c_ler_cfg.ctrl_cfg.force.init_buffer + 1  # t w.r.t. first event t
                    n_after_event = t_rel % self.c_ler_cfg.ctrl_cfg.force.delta  # t w.r.t. last event t

                    print(
                        "t %d, t_rel %d, n_after_event %d, part %d, part_rel %d"
                        % (self.t, t_rel, n_after_event, part, part_rel))

                    part_repeat = int(
                        part_rel /
                        (self.c_ler_cfg.ctrl_cfg.force.n_parts *
                         len(self.c_ler_cfg.ctrl_cfg.force.n_repeats))
                    )  # disregard if its upper/lower bound
                    part_repeat_2 = part_rel % (
                        self.c_ler_cfg.ctrl_cfg.force.n_parts *
                        len(self.c_ler_cfg.ctrl_cfg.force.n_repeats))
                    repeat_i = int(part_repeat_2 /
                                   self.c_ler_cfg.ctrl_cfg.force.n_parts
                                   )  # determines how many repeats

                    print(
                        "part_repeat %d, part_repeat2 %d, repeat_i %d, n_repeats %d"
                        % (part_repeat, part_repeat_2, repeat_i,
                           self.c_ler_cfg.ctrl_cfg.force.n_repeats[repeat_i]))

                    if n_after_event < self.c_ler_cfg.ctrl_cfg.force.n_repeats[
                            repeat_i]:  # Update frequency
                        lev = int(t_rel / self.c_ler_cfg.ctrl_cfg.force.delta
                                  )  # Update number
                        v = self.c_ler_cfg.ctrl_cfg.force.fixed_speed
                        if part_repeat == 0:  # Upper bound
                            upper = self.c_ler_cfg.ctrl_cfg.force.upper_init + self.c_ler_cfg.ctrl_cfg.force.upper_delta * lev
                            q = upper * np.sqrt(v)
                            print(
                                "For part %d, power forced %d (upper limit %d)"
                                % (part, q, upper))
                        else:  # Lower bound
                            lower = self.c_ler_cfg.ctrl_cfg.force.lower_init + self.c_ler_cfg.ctrl_cfg.force.lower_delta * lev
                            q = lower * v
                            print(
                                "For part %d, power forced %d (lower limit %d)"
                                % (part, q, lower))
                        action[part, :] = [v, q]

        self.pret_action_traj[:,
                              self.t, :] = action[:self.n_parts_pretrained, :]
        self.lear_action_traj[:,
                              self.t, :] = action[self.n_parts_pretrained:, :]

        self.t += 1

        return action

    def initAction(self):
        """ Returns the initial action vector.

        This function is required because an initial layer must be built before
        any feedback is available.

        Returns:
            np.array: Initial action vector with shape (`n_parts`, `nU`)
        """
        return np.ones(
            (self.s_cfg.env.n_parts, 2)) * self.s_cfg.env.init_params

    def log(self):
        """ Logs the state and action trajectories, as well as the predicted cost,
        which may be of interest to tune some algorithmic parameters.
        """
        for i in range(len(self.save_dirs)):
            np.save(self.save_dirs[i] + "pret_state_traj.npy",
                    self.pret_state_traj)
            np.save(self.save_dirs[i] + "pret_action_traj.npy",
                    self.pret_action_traj)
            np.save(self.save_dirs[i] + "pret_pred_cost.npy",
                    self.pred_cost_pret)
            np.save(self.save_dirs[i] + "lear_state_traj.npy",
                    self.lear_state_traj)
            np.save(self.save_dirs[i] + "lear_action_traj.npy",
                    self.lear_action_traj)
            np.save(self.save_dirs[i] + "lear_pred_cost.npy",
                    self.pred_cost_lear)

    def loop(self):
        """ While within the time horizon, read the states provided by the `Machine`
        class, and compute and save the corresponding actions.

        Allows the class functionality to be conveniently used as follows::

            cluster = Cluster(s_cfg, cp_cfg, cl_cfg)
            cluster.loop()
        """
        self.sendAction(self.initAction())
        while self.t < self.H:
            states = self.getStates()
            actions = self.computeAction(states)
            self.sendAction(actions)
            self.log()
コード例 #7
0
class Cluster:
    def __init__(self, shared_cfg, pretrained_cfg, learned_cfg):
        self.s_cfg = shared_cfg
        self.c_pre_cfg = pretrained_cfg
        self.c_ler_cfg = learned_cfg

        self.policyPret = MPC(pretrained_cfg.ctrl_cfg)
        self.policyLear = MPC(learned_cfg.ctrl_cfg)

        self.t = 0
        self.H = shared_cfg.env.horizon
        self.train_freq = learned_cfg.train_freq

        self.n_parts = shared_cfg.env.n_parts
        self.n_parts_pretrained = pretrained_cfg.n_parts
        self.n_parts_learned = learned_cfg.n_parts
        assert self.n_parts_pretrained+self.n_parts_learned == self.n_parts

        self.pret_state_traj = np.zeros((self.n_parts_pretrained, self.H, shared_cfg.env.nS))
        self.pret_action_traj = np.zeros((self.n_parts_pretrained, self.H, 2))
        self.lear_state_traj = np.zeros((self.n_parts_learned, self.H, shared_cfg.env.nS))
        self.lear_action_traj = np.zeros((self.n_parts_learned, self.H, 2))
        self.pred_cost_pret = np.zeros((self.H, self.n_parts_pretrained))
        self.pred_cost_lear = np.zeros((self.H, self.n_parts_learned))

        self.save_dirs = [shared_cfg.save_dir1, shared_cfg.save_dir2]

    # --------------------------------------------------------------------------
    # SAMPLE ACTIONS
    # --------------------------------------------------------------------------
    def computeAction(self, states):
        """Return control action given the current machine state"""
        self.pret_state_traj[:, self.t, :] = states[:self.n_parts_pretrained, :]
        self.lear_state_traj[:, self.t, :] = states[self.n_parts_pretrained:, :]

        # At least one part learned, not trained first step
        if self.n_parts_learned > 0 and self.t!=0 and (self.t)%self.train_freq==0:
            print("Training model...")
            obs_in = self.lear_state_traj[:, self.t-self.train_freq:self.t, :].reshape(-1, self.lear_state_traj.shape[-1])
            obs_out = self.lear_state_traj[:, self.t-self.train_freq+1:self.t+1, :].reshape(-1, self.lear_state_traj.shape[-1])
            acs = self.lear_action_traj[:, self.t-self.train_freq:self.t, :].reshape(-1, self.lear_action_traj.shape[-1])
            self.policyLear.train(obs_in, obs_out, acs)

        action = np.zeros((self.s_cfg.env.n_parts, 2))
        for part in range(self.s_cfg.env.n_parts):
            print("Sampling actions %d/%d" % (part, self.s_cfg.env.n_parts))
            if part < self.n_parts_pretrained: # Pretrained policy
                action[part, :], self.pred_cost_pret[self.t,part] = self.policyPret.act(states[part, :], self.t, get_pred_cost=True)
            else: # Learned policy
                if self.t < self.train_freq: # Do not predict cost
                    action[part, :] = self.policyLear.act(states[part, :], self.t, get_pred_cost=False)
                    self.pred_cost_lear[self.t,part-self.n_parts_pretrained] = 0
                else:
                    action[part, :], self.pred_cost_lear[self.t,part-self.n_parts_pretrained] = \
                        self.policyLear.act(states[part, :], self.t, get_pred_cost=True)

        self.pret_action_traj[:, self.t, :] = action[:self.n_parts_pretrained, :]
        self.lear_action_traj[:, self.t, :] = action[self.n_parts_pretrained:, :]

        self.t+=1

        return action

    def initAction(self):
        # Init with 1.125, 110
        print("Initial action is 1.125, 110")
        return np.ones((self.s_cfg.env.n_parts, 2)) * [1.125, 110]

    def loop(self):
        self.sendAction(self.initAction())
        while self.t < self.H:
            states = self.getStates()
            actions = self.computeAction(states)
            self.sendAction(actions)
            self.log()

    def log(self):
        np.save("tttpret_state_traj.npy", self.pret_state_traj)
        np.save("tttpret_action_traj.npy", self.pret_action_traj)
        np.save("tttpret_pred_cost.npy", self.pred_cost_pret)
        np.save("tttlear_state_traj.npy", self.lear_state_traj)
        np.save("tttlear_action_traj.npy", self.lear_action_traj)
        np.save("tttlear_pred_cost.npy", self.pred_cost_lear)