Beispiel #1
0
  def run_loop(self, n_unrolls):
    ts = self._env.reset()
    self._traj.reset()
    self._traj.start(next_state=self._shell.next_state, **dict(ts._asdict()))
    i = 0
    system_logs = {}
    while True:
      if n_unrolls is not None:
        if i == n_unrolls:
          return
      with U.Timer() as shell_step_timer:
        step_output = self._shell.step(step_type=ts.step_type,
                                       reward=ts.reward,
                                       observation=ts.observation)
      with U.Timer() as env_step_timer:
        ts = self._env.step(step_output.action)
      self._traj.add(step_output=step_output, **dict(ts._asdict()))
      if len(self._traj) == self._traj_length + 1:
        with U.Timer() as send_experience_timer:
          exps = self._traj.debatch_and_stack()
          self._traj.reset()
          self._send_experiences(exps)
          self._traj.start(next_state=self._shell.next_state, **dict(ts._asdict()))
        system_logs['put_experience_async_sec'] = send_experience_timer.to_seconds()

      for logger in self._system_loggers:
        logger.write(
            dict(shell_step_time_sec=shell_step_timer.to_seconds(),
                 env_step_time_sec=env_step_timer.to_seconds(),
                 **system_logs))
      i += 1
Beispiel #2
0
  def _scip_solve(self, solver):
    """solves a mip/lp using scip"""
    if solver is None:
      solver = Model()
    solver.hideOutput()
    if self.config.disable_maxcuts:
      for param in [
          'separating/maxcuts', 'separating/maxcutsroot', 'propagating/maxrounds',
          'propagating/maxroundsroot', 'presolving/maxroundsroot'
      ]:
        solver.setIntParam(param, 0)

      solver.setBoolParam('conflict/enable', False)
      solver.setPresolve(SCIP_PARAMSETTING.OFF)

    solver.setBoolParam('randomization/permutevars', True)
    # seed is set to 0 permanently.
    solver.setIntParam('randomization/permutationseed', 0)
    solver.setIntParam('randomization/randomseedshift', 0)

    with U.Timer() as timer:
      solver.optimize()
    assert solver.getStatus() == 'optimal', solver.getStatus()
    obj = float(solver.getObjVal())
    ass = {var.name: solver.getVal(var) for var in solver.getVars()}
    mip_stats = ConfigDict(mip_work=solver.getNNodes(),
                           n_cuts=solver.getNCuts(),
                           n_cuts_applied=solver.getNCutsApplied(),
                           n_lps=solver.getNLPs(),
                           solving_time=solver.getSolvingTime(),
                           pre_solving_time=solver.getPresolvingTime(),
                           time_elapsed=timer.to_seconds())
    return ass, obj, mip_stats
Beispiel #3
0
    def testUpdate(self):
        global B, T
        if not FLAGS.testUpdate:
            return
        self._setup()
        env = _get_env()
        shell = _create_shell(env)
        learner = _create_learner(env, shell)
        exps = self._sample_trajectory(env, shell)
        batch = learner.batch_and_preprocess_trajs(exps)
        print('***************')
        print('Starting....')
        print('***************')

        update_times = []
        N = 100
        for _ in range(10):
            learner.update(batch)
        for i in trange(N):
            with U.Timer() as timer:
                learner.update(batch)
            update_times.append(timer.to_seconds())

        print(
            f'\nPer update step time taken (First half): {np.mean(update_times[:N // 2])}'
        )
        print(
            f'Per update step time taken (Second half): {np.mean(update_times[N // 2:])}'
        )
        print('Test complete!')
Beispiel #4
0
def main(argv):
  env = make_env()
  ts = env.reset()

  with U.Timer() as timer:
    for i in trange(500):
      mask = ts.observation['mask']
      act = np.random.choice(len(mask), env.k, replace=False, p=mask / sum(mask))
      ts = env.step(act)
  print('Total time taken: ', timer.to_seconds())
Beispiel #5
0
def main(_):
  milp = get_sample('milp-cauction-300-filtered', 'train', 102)
  mip = SCIPMIPInstance.fromMIPInstance(milp.mip)
  times = []
  for _ in range(10):
    with U.Timer() as timer:
      model = mip.get_scip_model()
    times.append(timer.to_seconds())

  print(f'Avg time to copy the model: {np.mean(times[5:])}')

  for i in tqdm(range(20)):
    fixed_ass = {
        k: milp.feasible_solution[k]
        for k in np.random.permutation(list(milp.feasible_solution.keys()))[:500]
    }
    ass, obj = fix_and_solve(model, fixed_ass)
    print(obj)
    model.freeTransform()
Beispiel #6
0
def run_branch_and_bound_scip(m, heuristic):
    # m must be presolved before passing
    m.includeHeur(
        heuristic,
        "PyEvalHeur",
        "custom heuristic implemented in python to evaluate RL agent",
        "Y",
        timingmask=SCIP_HEURTIMING.BEFORENODE)

    with U.Timer() as timer:
        m.optimize()

    # collect stats
    results = ConfigDict(mip_work=m.getNNodes(),
                         n_cuts=m.getNCuts(),
                         n_cuts_applied=m.getNCutsApplied(),
                         n_lps=m.getNLPs(),
                         pre_solving_time=m.getPresolvingTime(),
                         solving_time=m.getSolvingTime(),
                         time_elapsed=timer.to_seconds())
    # m.freeProb()
    return results
Beispiel #7
0
    def main(self):
        for _ in range(self.config.n_train_steps):
            system_logs = dict()

            # fetch the next training batch
            with U.Timer() as batch_timer:
                batch = self._exp_fetcher.get()

            with U.Timer() as step_timer:
                # run update step on the sampled batch
                feed_dict = {
                    ph: val
                    for ph, val in zip(nest.flatten(self._traj_phs),
                                       nest.flatten(batch))
                }
                profile_kwargs = {}
                if self.global_step == self._profile_step:
                    profile_kwargs = dict(options=tf.RunOptions(
                        trace_level=tf.RunOptions.FULL_TRACE),
                                          run_metadata=tf.RunMetadata())

                log_vals = self._agent.update(self.sess, feed_dict,
                                              profile_kwargs)

                if profile_kwargs:
                    self._save_profile(**profile_kwargs)

            with U.Timer() as log_timer:
                for logger in self._loggers:
                    logger.write(log_vals)

            # after first sess.run finishes send the metagraph.
            if self.global_step == 1:
                self._send_metagraph()

            # publish the variables if required.
            if self._publish_tracker.track_increment():
                with U.Timer() as publish_timer:
                    self._publish_variables()
                system_logs['publish_time_sec'] = publish_timer.to_seconds()

            # Checkpoint if required
            if self.global_step % self._checkpoint_every == 0:
                with U.Timer() as ckpt_timer:
                    self._create_ckpt()
                system_logs['ckpt_time_sec'] = ckpt_timer.to_seconds()

            with U.Timer() as system_log_timer:
                # log system profile
                for logger in self._system_loggers:
                    logger.write(
                        dict(global_step=self.global_step,
                             sps=self._batch_size * self._traj_length /
                             float(step_timer.to_seconds()),
                             per_step_time_sec=step_timer.to_seconds(),
                             batch_fetch_time_sec=batch_timer.to_seconds(),
                             **system_logs))
            system_logs['log_time_sec'] = log_timer.to_seconds(
            ) + system_log_timer.to_seconds()

        self._publish_queue.put(None)  # exit the thread once training ends.