Example #1
0
    def setup(self, argv):
        """After reading the component name this function will be called."""

        args = parser.parse_args(args=argv)

        self.args = args
        self.experiment_id = args.experiment_id
        self.work_id = args.work_id
        self.experiment_name = args.experiment_name
        self.batch_size = args.batch_size
        self.traj_length = args.traj_length
        self.seed = args.seed
        self.results_folder = args.results_folder
        self.hyper_params = args.hyper_configs

        self.env_config = ConfigDict(to_nested_dicts(args.env_config))
        self.sess_config = ConfigDict(to_nested_dicts(args.sess_config))
        self.agent_config = ConfigDict(to_nested_dicts(args.agent_config))

        if hasattr(args, 'eval_config'):
            self.eval_config = ConfigDict(to_nested_dicts(args.eval_config))
        else:
            self.eval_config = ConfigDict()

        check_config_compatibility(self.env_config, self.sess_config,
                                   self.agent_config, self.eval_config)
Example #2
0
def main(argv):
    args = parser.parse_args(argv[1:])
    assert args.command == 'mode'

    cluster = Cluster.new('tmux')
    exp = cluster.new_experiment(EXP_NAME)
    exp.set_preamble_cmds(PREAMBLE_CMDS)
    serv, cli = create_program(exp)

    if args.mode == 'localhost':
        node = localhost_setup()
        localhost_placement(serv, cli, node)
    elif args.mode == 'ssh':
        nodeloader = NodeLoader(
            ConfigDict(argon.to_nested_dicts(args.cluster_config)),
            args.filter_regex)
        nodes = nodeloader.nodes
        if len(nodes) != 1:
            raise Exception(
                'For this test condition, please specify just a single ssh node.'
            )
        ssh_node = nodes[0]
        ssh_node.setup(res_files=RES_FILES)
        local_node = localhost_setup()
        ssh_placement(serv, cli, local_node, ssh_node)
    elif args.mode == 'slurm':
        nodeloader = NodeLoader(
            ConfigDict(argon.to_nested_dicts(args.cluster_config)),
            args.filter_regex)
        nodes = nodeloader.nodes
        if len(nodes) != 1:
            raise Exception(
                'For this test condition, please specify just a single slurm node.'
            )
        slurm_node = nodes[0]
        slurm_node.setup(res_files=RES_FILES)
        local_node = localhost_setup()
        slurm_placement(serv, cli, local_node, slurm_node)
    else:
        raise Exception('Unknown mode %s' % args.mode)

    try:
        cluster.launch(exp)
        while True:
            time.sleep(100000)
    except KeyboardInterrupt:
        cluster.delete(experiment_name=EXP_NAME)
Example #3
0
def make_env(k_val):
    env_config = ConfigDict(to_nested_dicts(args.env_config))
    env_config.lp_features = False
    env_config.k = k_val
    env_config.n_local_moves = args.n_local_moves
    env_config.primal_gap_reward = True
    env_config.delta_reward = False
    env_config.disable_maxcuts = args.disable_maxcuts
    assert env_config.n_graphs == 1

    env_class = U.import_obj(env_config.class_name, env_config.class_path)
    env = env_class(id=0, seed=args.seed, **env_config)
    return env
Example #4
0
def main(argv):
    global args
    args = parser.parse_args(argv[1:])
    if args.gpu_ids:
        os.environ['CUDA_VISIBLE_DEVICES'] = '_'.join(map(str, args.gpu_ids))
    else:
        os.environ['CUDA_VISIBLE_DEVICES'] = ''

    sess_config = ConfigDict(to_nested_dicts(args.sess_config))
    env_config = ConfigDict(to_nested_dicts(args.env_config))
    agent_config = ConfigDict(to_nested_dicts(args.agent_config))

    shell_class = U.import_obj(sess_config.shell.class_name,
                               sess_config.shell.class_path)
    env_class = U.import_obj(env_config.class_name, env_config.class_path)

    agent_class = U.import_obj(agent_config.class_name,
                               agent_config.class_path)

    if args.standalone:
        if args.heuristic:
            results_dir = Path(
                f'/data/nms/tfp/evaluation/standalone/{args.heuristic}/{args.name}/{env_config.graph_start_idx}/'
            )
        else:
            results_dir = Path(
                f'/data/nms/tfp/evaluation/standalone/agent/{args.name}/{env_config.graph_start_idx}/'
            )
    elif args.without_agent:
        results_dir = Path(
            f'/data/nms/tfp/evaluation/without_agent/{args.name}/{env_config.graph_start_idx}/'
        )
    elif args.heuristic:
        results_dir = Path(
            f'/data/nms/tfp/evaluation/{args.heuristic}/{args.name}/{env_config.graph_start_idx}/'
        )
    else:
        results_dir = Path(
            f'/data/nms/tfp/evaluation/scip/{args.name}/{env_config.graph_start_idx}'
        )
    results_dir.mkdir(parents=True, exist_ok=True)

    evaluator = Evaluator(shell_class=shell_class,
                          shell_config=sess_config.shell,
                          agent_class=agent_class,
                          agent_config=agent_config,
                          env_class=env_class,
                          env_config=env_config,
                          seed=args.seed,
                          dataset=env_config.dataset,
                          dataset_type=env_config.dataset_type,
                          graph_start_idx=env_config.graph_start_idx,
                          gap=args.gap,
                          max_nodes=args.max_nodes,
                          batch_size=args.batch_size,
                          n_local_moves=args.n_local_moves,
                          results_dir=results_dir,
                          use_parallel_envs=args.use_parallel_envs,
                          use_threaded_envs=args.use_threaded_envs,
                          heur_frequency=args.heur_frequency,
                          create_shell=(args.heuristic is None),
                          **sess_config)
    evaluator.run(standalone=args.standalone,
                  without_agent=args.without_agent,
                  heuristic=args.heuristic)
    print('Done!')
Example #5
0
def train(argv):
  tp = TurrealParser()
  tp.add_external_parser(parser)
  func, external_parser_args = tp.main(argv[1:])
  if func.__name__.split('action_')[-1] != 'create':
    return
  args = external_parser_args[0]
  validate_args(args)

  cluster = tp.get_cluster()

  # Specify experiment specific flags here.
  exp_flags = []
  hyper_configs = []
  exps = []
  for work_id, params in enumerate(
      hyper.product(
          hyper.discrete('env_config.k', [10, 25]),
          hyper.discrete('agent_config.lr_init', [1e-4, 2e-4]),
          # hyper.discrete('agent_config.lr_init', [1e-4, 2e-4, 3e-4]),
          # hyper.discrete('env_config.graph_start_idx', list(range(8))),
          hyper.discrete('agent_config.ent_dec_init', [2e-2]),
      )):
    exp = cluster.new_experiment('%s-%d' % (tp.experiment_name, work_id), env_name='liaison')
    # start tensorboard only for the first work unit.
    coloc_constraints = build_program(
        exp,
        args.n_actors,
        ConfigDict(argon.to_nested_dicts(args.resource_req_config)),
        bundle_actors=args.bundle_actors,
        irs_placement=IRS_NODE,
        visualizer_placement=VISUALIZER_NODE,
        with_visualizers=(work_id == 0) and (not args.without_visualizers),
        with_evaluators=(not args.without_evaluators),
        without_valid_and_test_evaluators=args.without_valid_and_test_evaluators,
        with_irs_proxy=args.use_irs_proxy,
        irs_proxy_placement=IRS_PROXY_NODE)

    exp_flag = ['--work_id', str(work_id)]
    exp_flag += ['--hyper_configs', str(shlex.quote(json.dumps(params)))]
    exp_flag += hyper.to_commandline(params)
    exps.append(exp)
    exp_flags.append(exp_flag)
    hyper_configs.append(params)
    if args.disable_sweep:
      break

  exp_procs = [[proc for pg in exp.list_process_groups()
                for proc in pg.list_processes()] + [proc for proc in exp.list_processes()]
               for exp in exps]
  print('-----------exp stats-------------')
  print('Number of work units: %d' % len(exps))
  print('Number of processes total: %d' % sum(map(len, exp_procs)))

  placer = LiaisonPlacer(
      tp.exp_id,
      exps,
      ConfigDict(argon.to_nested_dicts(args.cluster_config)),
      args.filter_nodes_regex,
      args.whitelist_nodes,
      args.spy_measurement_interval,
      pl_constraints=list(map(lambda k: k.split(':'), args.pl_constraints)),
      coloc_constraints=list(
          map(lambda k: k.split(';'), coloc_constraints + args.coloc_constraints)),
      gpu_overload_obj_coeff=args.gpu_overload_obj_coeff,
      gpu_load_balancing_obj_coeff=args.gpu_load_balancing_obj_coeff,
      gpu_wu_consolidation_obj_coeff=args.gpu_wu_consolidation_obj_coeff,
      cpu_overload_obj_coeff=args.cpu_overload_obj_coeff,
      cpu_load_balancing_obj_coeff=args.cpu_load_balancing_obj_coeff,
      cpu_wu_consolidation_obj_coeff=args.cpu_wu_consolidation_obj_coeff,
      slurm_colocate_wunit=args.slurm_colocate_wunit,
      slurm_per_gpu_allocation=args.slurm_per_gpu_allocation,
  )

  tp.launch(exps, exp_flags, hyper_configs)