def run_loop(self, n_unrolls): ts = self._env.reset() self._traj.reset() self._traj.start(next_state=self._shell.next_state, **dict(ts._asdict())) i = 0 system_logs = {} while True: if n_unrolls is not None: if i == n_unrolls: return with U.Timer() as shell_step_timer: step_output = self._shell.step(step_type=ts.step_type, reward=ts.reward, observation=ts.observation) with U.Timer() as env_step_timer: ts = self._env.step(step_output.action) self._traj.add(step_output=step_output, **dict(ts._asdict())) if len(self._traj) == self._traj_length + 1: with U.Timer() as send_experience_timer: exps = self._traj.debatch_and_stack() self._traj.reset() self._send_experiences(exps) self._traj.start(next_state=self._shell.next_state, **dict(ts._asdict())) system_logs['put_experience_async_sec'] = send_experience_timer.to_seconds() for logger in self._system_loggers: logger.write( dict(shell_step_time_sec=shell_step_timer.to_seconds(), env_step_time_sec=env_step_timer.to_seconds(), **system_logs)) i += 1
def _scip_solve(self, solver): """solves a mip/lp using scip""" if solver is None: solver = Model() solver.hideOutput() if self.config.disable_maxcuts: for param in [ 'separating/maxcuts', 'separating/maxcutsroot', 'propagating/maxrounds', 'propagating/maxroundsroot', 'presolving/maxroundsroot' ]: solver.setIntParam(param, 0) solver.setBoolParam('conflict/enable', False) solver.setPresolve(SCIP_PARAMSETTING.OFF) solver.setBoolParam('randomization/permutevars', True) # seed is set to 0 permanently. solver.setIntParam('randomization/permutationseed', 0) solver.setIntParam('randomization/randomseedshift', 0) with U.Timer() as timer: solver.optimize() assert solver.getStatus() == 'optimal', solver.getStatus() obj = float(solver.getObjVal()) ass = {var.name: solver.getVal(var) for var in solver.getVars()} mip_stats = ConfigDict(mip_work=solver.getNNodes(), n_cuts=solver.getNCuts(), n_cuts_applied=solver.getNCutsApplied(), n_lps=solver.getNLPs(), solving_time=solver.getSolvingTime(), pre_solving_time=solver.getPresolvingTime(), time_elapsed=timer.to_seconds()) return ass, obj, mip_stats
def testUpdate(self): global B, T if not FLAGS.testUpdate: return self._setup() env = _get_env() shell = _create_shell(env) learner = _create_learner(env, shell) exps = self._sample_trajectory(env, shell) batch = learner.batch_and_preprocess_trajs(exps) print('***************') print('Starting....') print('***************') update_times = [] N = 100 for _ in range(10): learner.update(batch) for i in trange(N): with U.Timer() as timer: learner.update(batch) update_times.append(timer.to_seconds()) print( f'\nPer update step time taken (First half): {np.mean(update_times[:N // 2])}' ) print( f'Per update step time taken (Second half): {np.mean(update_times[N // 2:])}' ) print('Test complete!')
def main(argv): env = make_env() ts = env.reset() with U.Timer() as timer: for i in trange(500): mask = ts.observation['mask'] act = np.random.choice(len(mask), env.k, replace=False, p=mask / sum(mask)) ts = env.step(act) print('Total time taken: ', timer.to_seconds())
def main(_): milp = get_sample('milp-cauction-300-filtered', 'train', 102) mip = SCIPMIPInstance.fromMIPInstance(milp.mip) times = [] for _ in range(10): with U.Timer() as timer: model = mip.get_scip_model() times.append(timer.to_seconds()) print(f'Avg time to copy the model: {np.mean(times[5:])}') for i in tqdm(range(20)): fixed_ass = { k: milp.feasible_solution[k] for k in np.random.permutation(list(milp.feasible_solution.keys()))[:500] } ass, obj = fix_and_solve(model, fixed_ass) print(obj) model.freeTransform()
def run_branch_and_bound_scip(m, heuristic): # m must be presolved before passing m.includeHeur( heuristic, "PyEvalHeur", "custom heuristic implemented in python to evaluate RL agent", "Y", timingmask=SCIP_HEURTIMING.BEFORENODE) with U.Timer() as timer: m.optimize() # collect stats results = ConfigDict(mip_work=m.getNNodes(), n_cuts=m.getNCuts(), n_cuts_applied=m.getNCutsApplied(), n_lps=m.getNLPs(), pre_solving_time=m.getPresolvingTime(), solving_time=m.getSolvingTime(), time_elapsed=timer.to_seconds()) # m.freeProb() return results
def main(self): for _ in range(self.config.n_train_steps): system_logs = dict() # fetch the next training batch with U.Timer() as batch_timer: batch = self._exp_fetcher.get() with U.Timer() as step_timer: # run update step on the sampled batch feed_dict = { ph: val for ph, val in zip(nest.flatten(self._traj_phs), nest.flatten(batch)) } profile_kwargs = {} if self.global_step == self._profile_step: profile_kwargs = dict(options=tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE), run_metadata=tf.RunMetadata()) log_vals = self._agent.update(self.sess, feed_dict, profile_kwargs) if profile_kwargs: self._save_profile(**profile_kwargs) with U.Timer() as log_timer: for logger in self._loggers: logger.write(log_vals) # after first sess.run finishes send the metagraph. if self.global_step == 1: self._send_metagraph() # publish the variables if required. if self._publish_tracker.track_increment(): with U.Timer() as publish_timer: self._publish_variables() system_logs['publish_time_sec'] = publish_timer.to_seconds() # Checkpoint if required if self.global_step % self._checkpoint_every == 0: with U.Timer() as ckpt_timer: self._create_ckpt() system_logs['ckpt_time_sec'] = ckpt_timer.to_seconds() with U.Timer() as system_log_timer: # log system profile for logger in self._system_loggers: logger.write( dict(global_step=self.global_step, sps=self._batch_size * self._traj_length / float(step_timer.to_seconds()), per_step_time_sec=step_timer.to_seconds(), batch_fetch_time_sec=batch_timer.to_seconds(), **system_logs)) system_logs['log_time_sec'] = log_timer.to_seconds( ) + system_log_timer.to_seconds() self._publish_queue.put(None) # exit the thread once training ends.