def main(env, ctrl_type, ctrl_args, overrides, logdir, args): ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args}) cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir) logger.info('\n' + pprint.pformat(cfg)) # add the part of popsize if ctrl_type == "MPC": cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg) cfg.exp_cfg.misc = copy.copy(cfg) exp = MBExperiment(cfg.exp_cfg) if not os.path.exists(exp.logdir): os.makedirs(exp.logdir) with open(os.path.join(exp.logdir, "config.txt"), "w") as f: f.write(pprint.pformat(cfg.toDict())) exp.run_experiment()
def main(env, ctrl_type, ctrl_args, overrides, logdir): ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args}) cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir) cfg.pprint() if ctrl_type == "MPC": cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg) exp = MBExperiment(cfg.exp_cfg) os.makedirs(exp.logdir) config_dict = cfg.toDict() with open(os.path.join(exp.logdir, "config.txt"), "w") as f: f.write(pprint.pformat(config_dict)) with open(os.path.join(exp.logdir, "variant.json"), "w") as f: json.dump(config_dict, f, indent=2, sort_keys=True, cls=MyEncoder) save_git_info(exp.logdir) exp.run_experiment()
def main( env, ctrl_type, ctrl_args, overrides, model_dir, logdir, init_iter, last_iter, nrecord, rawdir, ): ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args}) overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.model_dir", model_dir]) overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.load_model", "True"]) overrides.append(["ctrl_cfg.prop_cfg.model_pretrained", "True"]) overrides.append(["exp_cfg.exp_cfg.ninit_rollouts", "0"]) overrides.append(["exp_cfg.exp_cfg.init_iter", str(init_iter)]) overrides.append(["exp_cfg.exp_cfg.ntrain_iters", str(last_iter)]) overrides.append(["exp_cfg.log_cfg.nrecord", str(nrecord)]) overrides.append(["exp_cfg.log_cfg.rawdir", str(rawdir)]) cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir) cfg.pprint() if ctrl_type == "MPC": cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg) exp = MBExperiment(cfg.exp_cfg) if os.path.exists(exp.logdir): overwrite = user_prompt("{} already exists. Overwrite?".format( exp.logdir)) if not overwrite: return else: os.makedirs(exp.logdir) with open(os.path.join(exp.logdir, "config.txt"), "w") as f: f.write(pprint.pformat(cfg.toDict())) exp.run_experiment() print("Saved to") print(exp.logdir)
def main(env, ctrl_type, ctrl_args, overrides, model_dir, logdir): ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args}) overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.model_dir", model_dir]) overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.load_model", "True"]) overrides.append(["ctrl_cfg.prop_cfg.model_pretrained", "True"]) overrides.append(["exp_cfg.exp_cfg.ninit_rollouts", "0"]) overrides.append(["exp_cfg.exp_cfg.ntrain_iters", "1"]) overrides.append(["exp_cfg.log_cfg.nrecord", "1"]) cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir) cfg.pprint() if ctrl_type == "MPC": cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg) exp = MBExperiment(cfg.exp_cfg) os.makedirs(exp.logdir) with open(os.path.join(exp.logdir, "config.txt"), "w") as f: f.write(pprint.pformat(cfg.toDict())) exp.run_experiment()
def main(env, ctrl_type, ctrl_args, overrides, model_dir, logdir): ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args}) overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.model_dir", model_dir]) overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.load_model", "True"]) overrides.append(["ctrl_cfg.prop_cfg.model_pretrained", "True"]) overrides.append(["exp_cfg.exp_cfg.ninit_rollouts", "0"]) overrides.append(["exp_cfg.exp_cfg.ntrain_iters", "1"]) overrides.append(["exp_cfg.log_cfg.nrecord", "0"]) overrides.append(["exp_cfg.exp_cfg.nrollouts_per_iter", "200"]) overrides.append(["exp_cfg.log_cfg.neval", "200"]) cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir) cfg.val_cfg.model_init_cfg.load_model = True cfg.val_cfg.model_init_cfg.model_dir = model_dir cfg.exp_cfg.exp_cfg.use_teacher = False if cfg.exp_cfg.exp_cfg.use_teacher: cfg.exp_cfg.exp_cfg.teacher = cfg.exp_cfg.sim_cfg.env.teacher() cfg.exp_cfg.exp_cfg.use_value = False if cfg.exp_cfg.exp_cfg.use_value: cfg.exp_cfg.exp_cfg.value = DeepValueFunction(cfg.val_cfg) cfg.exp_cfg.exp_cfg.ninit_rollouts = 0 cfg.ctrl_cfg.value_func = cfg.exp_cfg.exp_cfg.value cfg.ctrl_cfg.use_value = cfg.exp_cfg.exp_cfg.use_value cfg.pprint() if ctrl_type == "MPC": cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg) exp = MBExperiment(cfg.exp_cfg) os.makedirs(exp.logdir) with open(os.path.join(exp.logdir, "config.txt"), "w") as f: f.write(pprint.pformat(cfg.toDict())) exp.run_experiment()
class Cluster: """ Computes optimal process parameters, at each layer, given feedback obtained from the machine sensors. Arguments: shared_cfg (dotmap): - **env.n_parts** (*int*): Total number of parts built under feedback control. - **env.horizon** (*int*): Markov Decision Process horizon (here number of layers). - **env.nS** (*int*): Dimension of the state vector. - **comms** (*dotmap*): Parameters for communication with other classes. pretrained_cfg (dotmap): - **n_parts** (*dotmap*): Number of parts built under this control scheme. - **ctrl_cfg** (*dotmap*): Configuration parameters passed to the MPC class. learned_cfg (dotmap): - **n_parts** (*dotmap*): Number of parts built under this control scheme. - **ctrl_cfg** (*dotmap*): Configuration parameters passed to the MPC class. """ def __init__(self, shared_cfg, pretrained_cfg, learned_cfg): self.s_cfg = shared_cfg self.c_pre_cfg = pretrained_cfg self.c_ler_cfg = learned_cfg self.policyPret = MPC(pretrained_cfg.ctrl_cfg) self.policyLear = MPC(learned_cfg.ctrl_cfg) self.t = 0 self.H = shared_cfg.env.horizon self.train_freq = learned_cfg.train_freq self.n_parts = shared_cfg.env.n_parts self.n_parts_pretrained = pretrained_cfg.n_parts self.n_parts_learned = learned_cfg.n_parts assert self.n_parts_pretrained + self.n_parts_learned == self.n_parts self.pret_state_traj = np.zeros( (self.n_parts_pretrained, self.H, shared_cfg.env.nS)) self.pret_action_traj = np.zeros((self.n_parts_pretrained, self.H, 2)) self.lear_state_traj = np.zeros( (self.n_parts_learned, self.H, shared_cfg.env.nS)) self.lear_action_traj = np.zeros((self.n_parts_learned, self.H, 2)) self.pred_cost_pret = np.zeros((self.H, self.n_parts_pretrained)) self.pred_cost_lear = np.zeros((self.H, self.n_parts_learned)) self.save_dirs = [shared_cfg.save_dir1, shared_cfg.save_dir2] self.clearComms() # -------------------------------------------------------------------------- # COMMS FUNCTIONS # -------------------------------------------------------------------------- def clearComms(self): cfg = self.s_cfg.comms dir_action = cfg.dir + cfg.action.rdy_name dir_state = cfg.dir + cfg.state.rdy_name if os.path.isdir(dir_action): os.rmdir(dir_action) if os.path.isdir(dir_state): os.rmdir(dir_state) def getStates(self): """Load state vectors uploaded to the server by the `Machine` class. This function waits for the `comms.dir/comms.state.rdy_name` folder to be created by the `Machine` class, before reading the file where the states are located, `comms.dir/comms.state.f_name` Returns: np.array: State vector with shape (`n_parts`, `nS`) """ print('Waiting for states...') dir = self.s_cfg.comms.dir cfg = self.s_cfg.comms.state rdy = dir + cfg.rdy_name # Wait until RDY signal is provided while (not os.path.isdir(rdy)): pass os.rmdir(rdy) # Delete RDY # Read data to array states = np.load(dir + cfg.f_name) print('States received') return states def sendAction(self, actions): """Saves the computed actions. Signals the `Machine` class that actions are ready to be downloaded by locally creating the `comms.dir/comms.action.rdy_name` folder Arguments: actions (np.array): Action vector with shape (`n_parts`, `nU`) """ dir = self.s_cfg.comms.dir cfg = self.s_cfg.comms.action # Write actions into npy file np.save(dir + cfg.f_name, actions) os.mkdir(dir + cfg.rdy_name) # RDY signal print('Actions saved') # -------------------------------------------------------------------------- # SAMPLE ACTIONS # -------------------------------------------------------------------------- def computeAction(self, states): """Computes the control actions given the observed system states. Arguments: states (np.array): Observed states, shape (`n_parts`, `nS`) Returns: np.array: Computed actions, with shape (`n_parts`, `nU`) """ self.pret_state_traj[:, self.t, :] = states[:self.n_parts_pretrained, :] self.lear_state_traj[:, self.t, :] = states[self.n_parts_pretrained:, :] # At least one part learned, not trained first step if self.n_parts_learned > 0 and self.t != 0 and ( self.t) % self.train_freq == 0: print("Training model...") obs_in = self.lear_state_traj[:, self.t - self.train_freq:self.t, :].reshape( -1, self.lear_state_traj.shape[-1]) obs_out = self.lear_state_traj[:, self.t - self.train_freq + 1:self.t + 1, :].reshape( -1, self.lear_state_traj.shape[-1]) acs = self.lear_action_traj[:, self.t - self.train_freq:self.t, :].reshape( -1, self.lear_action_traj.shape[-1]) self.policyLear.train(obs_in, obs_out, acs) # COMPUTE ACTION action = np.zeros((self.s_cfg.env.n_parts, 2)) lastTempId = None for part in range(self.s_cfg.env.n_parts): print("Sampling actions %d/%d" % (part, self.s_cfg.env.n_parts)) if part < self.n_parts_pretrained: # Pretrained policy action[part, :], self.pred_cost_pret[ self.t, part] = self.policyPret.act(states[part, :], self.t, get_pred_cost=True) else: # Learned policy # Change target if self.c_ler_cfg.ctrl_cfg.change_target: for i in range(len( self.c_ler_cfg.ctrl_cfg.n_parts_targets)): if (part - self.n_parts_pretrained ) < self.c_ler_cfg.ctrl_cfg.n_parts_targets[i]: if not i == lastTempId: lastTempId = i self.policyLear.changeTargetCost( self.c_ler_cfg.ctrl_cfg.targets[i]) break if self.t < self.train_freq: # Do not predict cost action[part, :] = self.policyLear.act(states[part, :], self.t, get_pred_cost=False) self.pred_cost_lear[self.t, part - self.n_parts_pretrained] = 0 else: action[part, :], self.pred_cost_lear[self.t,part-self.n_parts_pretrained] = \ self.policyLear.act(states[part, :], self.t, get_pred_cost=True) # Force inputs from q/v , q/sqrt(v) if (self.c_ler_cfg.ctrl_cfg.force.on and # setting enabled (part >= self.c_ler_cfg.ctrl_cfg.force.start_part - 1) and # suitable part self.t >= self.c_ler_cfg.ctrl_cfg.force.init_buffer - 1): # past initial buffer time part_rel = part - self.c_ler_cfg.ctrl_cfg.force.start_part + 1 # part w.r.t. first forced part t_rel = self.t - self.c_ler_cfg.ctrl_cfg.force.init_buffer + 1 # t w.r.t. first event t n_after_event = t_rel % self.c_ler_cfg.ctrl_cfg.force.delta # t w.r.t. last event t print( "t %d, t_rel %d, n_after_event %d, part %d, part_rel %d" % (self.t, t_rel, n_after_event, part, part_rel)) part_repeat = int( part_rel / (self.c_ler_cfg.ctrl_cfg.force.n_parts * len(self.c_ler_cfg.ctrl_cfg.force.n_repeats)) ) # disregard if its upper/lower bound part_repeat_2 = part_rel % ( self.c_ler_cfg.ctrl_cfg.force.n_parts * len(self.c_ler_cfg.ctrl_cfg.force.n_repeats)) repeat_i = int(part_repeat_2 / self.c_ler_cfg.ctrl_cfg.force.n_parts ) # determines how many repeats print( "part_repeat %d, part_repeat2 %d, repeat_i %d, n_repeats %d" % (part_repeat, part_repeat_2, repeat_i, self.c_ler_cfg.ctrl_cfg.force.n_repeats[repeat_i])) if n_after_event < self.c_ler_cfg.ctrl_cfg.force.n_repeats[ repeat_i]: # Update frequency lev = int(t_rel / self.c_ler_cfg.ctrl_cfg.force.delta ) # Update number v = self.c_ler_cfg.ctrl_cfg.force.fixed_speed if part_repeat == 0: # Upper bound upper = self.c_ler_cfg.ctrl_cfg.force.upper_init + self.c_ler_cfg.ctrl_cfg.force.upper_delta * lev q = upper * np.sqrt(v) print( "For part %d, power forced %d (upper limit %d)" % (part, q, upper)) else: # Lower bound lower = self.c_ler_cfg.ctrl_cfg.force.lower_init + self.c_ler_cfg.ctrl_cfg.force.lower_delta * lev q = lower * v print( "For part %d, power forced %d (lower limit %d)" % (part, q, lower)) action[part, :] = [v, q] self.pret_action_traj[:, self.t, :] = action[:self.n_parts_pretrained, :] self.lear_action_traj[:, self.t, :] = action[self.n_parts_pretrained:, :] self.t += 1 return action def initAction(self): """ Returns the initial action vector. This function is required because an initial layer must be built before any feedback is available. Returns: np.array: Initial action vector with shape (`n_parts`, `nU`) """ return np.ones( (self.s_cfg.env.n_parts, 2)) * self.s_cfg.env.init_params def log(self): """ Logs the state and action trajectories, as well as the predicted cost, which may be of interest to tune some algorithmic parameters. """ for i in range(len(self.save_dirs)): np.save(self.save_dirs[i] + "pret_state_traj.npy", self.pret_state_traj) np.save(self.save_dirs[i] + "pret_action_traj.npy", self.pret_action_traj) np.save(self.save_dirs[i] + "pret_pred_cost.npy", self.pred_cost_pret) np.save(self.save_dirs[i] + "lear_state_traj.npy", self.lear_state_traj) np.save(self.save_dirs[i] + "lear_action_traj.npy", self.lear_action_traj) np.save(self.save_dirs[i] + "lear_pred_cost.npy", self.pred_cost_lear) def loop(self): """ While within the time horizon, read the states provided by the `Machine` class, and compute and save the corresponding actions. Allows the class functionality to be conveniently used as follows:: cluster = Cluster(s_cfg, cp_cfg, cl_cfg) cluster.loop() """ self.sendAction(self.initAction()) while self.t < self.H: states = self.getStates() actions = self.computeAction(states) self.sendAction(actions) self.log()
class Cluster: def __init__(self, shared_cfg, pretrained_cfg, learned_cfg): self.s_cfg = shared_cfg self.c_pre_cfg = pretrained_cfg self.c_ler_cfg = learned_cfg self.policyPret = MPC(pretrained_cfg.ctrl_cfg) self.policyLear = MPC(learned_cfg.ctrl_cfg) self.t = 0 self.H = shared_cfg.env.horizon self.train_freq = learned_cfg.train_freq self.n_parts = shared_cfg.env.n_parts self.n_parts_pretrained = pretrained_cfg.n_parts self.n_parts_learned = learned_cfg.n_parts assert self.n_parts_pretrained+self.n_parts_learned == self.n_parts self.pret_state_traj = np.zeros((self.n_parts_pretrained, self.H, shared_cfg.env.nS)) self.pret_action_traj = np.zeros((self.n_parts_pretrained, self.H, 2)) self.lear_state_traj = np.zeros((self.n_parts_learned, self.H, shared_cfg.env.nS)) self.lear_action_traj = np.zeros((self.n_parts_learned, self.H, 2)) self.pred_cost_pret = np.zeros((self.H, self.n_parts_pretrained)) self.pred_cost_lear = np.zeros((self.H, self.n_parts_learned)) self.save_dirs = [shared_cfg.save_dir1, shared_cfg.save_dir2] # -------------------------------------------------------------------------- # SAMPLE ACTIONS # -------------------------------------------------------------------------- def computeAction(self, states): """Return control action given the current machine state""" self.pret_state_traj[:, self.t, :] = states[:self.n_parts_pretrained, :] self.lear_state_traj[:, self.t, :] = states[self.n_parts_pretrained:, :] # At least one part learned, not trained first step if self.n_parts_learned > 0 and self.t!=0 and (self.t)%self.train_freq==0: print("Training model...") obs_in = self.lear_state_traj[:, self.t-self.train_freq:self.t, :].reshape(-1, self.lear_state_traj.shape[-1]) obs_out = self.lear_state_traj[:, self.t-self.train_freq+1:self.t+1, :].reshape(-1, self.lear_state_traj.shape[-1]) acs = self.lear_action_traj[:, self.t-self.train_freq:self.t, :].reshape(-1, self.lear_action_traj.shape[-1]) self.policyLear.train(obs_in, obs_out, acs) action = np.zeros((self.s_cfg.env.n_parts, 2)) for part in range(self.s_cfg.env.n_parts): print("Sampling actions %d/%d" % (part, self.s_cfg.env.n_parts)) if part < self.n_parts_pretrained: # Pretrained policy action[part, :], self.pred_cost_pret[self.t,part] = self.policyPret.act(states[part, :], self.t, get_pred_cost=True) else: # Learned policy if self.t < self.train_freq: # Do not predict cost action[part, :] = self.policyLear.act(states[part, :], self.t, get_pred_cost=False) self.pred_cost_lear[self.t,part-self.n_parts_pretrained] = 0 else: action[part, :], self.pred_cost_lear[self.t,part-self.n_parts_pretrained] = \ self.policyLear.act(states[part, :], self.t, get_pred_cost=True) self.pret_action_traj[:, self.t, :] = action[:self.n_parts_pretrained, :] self.lear_action_traj[:, self.t, :] = action[self.n_parts_pretrained:, :] self.t+=1 return action def initAction(self): # Init with 1.125, 110 print("Initial action is 1.125, 110") return np.ones((self.s_cfg.env.n_parts, 2)) * [1.125, 110] def loop(self): self.sendAction(self.initAction()) while self.t < self.H: states = self.getStates() actions = self.computeAction(states) self.sendAction(actions) self.log() def log(self): np.save("tttpret_state_traj.npy", self.pret_state_traj) np.save("tttpret_action_traj.npy", self.pret_action_traj) np.save("tttpret_pred_cost.npy", self.pred_cost_pret) np.save("tttlear_state_traj.npy", self.lear_state_traj) np.save("tttlear_action_traj.npy", self.lear_action_traj) np.save("tttlear_pred_cost.npy", self.pred_cost_lear)