def get_config(): config = get_base_config() # required fields. config.class_path = "liaison.agents.gcn" config.class_name = "Agent" config.model = ConfigDict() config.model.class_path = "liaison.agents.models.gcn_attn_rins" config.model.n_prop_layers = 4 config.model.node_hidden_layer_sizes = [32] config.model.edge_hidden_layer_sizes = [32] config.model.key_dim = 32 config.model.value_dim = 32 config.model.num_heads = 4 config.model.node_embed_dim = 32 config.model.edge_embed_dim = 32 config.query_key_product_hidden_layer_sizes = [16] config.clip_rho_threshold = 1.0 config.clip_pg_rho_threshold = 1.0 config.loss = ConfigDict() config.loss.vf_loss_coeff = 1.0 return config
def _get_agent(self, action_spec): config = ConfigDict() config.model = ConfigDict() config.model.class_path = "liaison.agents.models.mlp" config.model.hidden_layer_sizes = [64, 64] config.lr_init = 1e-3 config.lr_min = 1e-4 config.lr_start_dec_step = 1000 config.lr_dec_steps = 1000 config.lr_dec_val = .1 config.lr_dec_approach = 'linear' config.ent_dec_init = 1 config.ent_dec_min = 0 config.ent_dec_steps = 1000 config.ent_start_dec_step = 1000 config.ent_dec_val = .1 config.ent_dec_approach = 'linear' config.grad_clip = 1.0 config.discount_factor = 0.99 config.clip_rho_threshold = 1.0 config.clip_pg_rho_threshold = 1.0 config.loss = ConfigDict() config.loss.vf_loss_coeff = 1.0 with tf.variable_scope('mlp', reuse=tf.AUTO_REUSE): return MLPAgent(action_spec=action_spec, name='test', seed=42, **config)
def get_config(): config = ConfigDict() config.model = ConfigDict() config.lr_init = 1e-4 config.lr_min = 1e-7 config.lr_start_dec_step = 1000 config.lr_dec_steps = 1000 config.lr_dec_val = .1 config.lr_dec_approach = 'linear' config.ent_dec_init = 1e-2 config.ent_dec_min = 0.0 config.ent_dec_steps = 1000 config.ent_start_dec_step = 1000 # dec_val not used for linear scheme config.ent_dec_val = .1 config.ent_dec_approach = 'linear' # specify <= 0 here to disable grad clip config.grad_clip = 1.0 config.discount_factor = 1.0 config.optimizer = ConfigDict() # Options: Adam or RMSProp. config.optimizer.name = 'Adam' # hyperparams for RMSProp config.optimizer.decay = .9 config.optimizer.momentum = 0.0 config.optimizer.epsilon = 1e-7 return config
def sample(i): seed = i np.random.seed(seed) milp = get_sample(DATASET, 'train', i % LENGTH_MAP[DATASET]['train']) mip = SCIPMIPInstance.fromMIPInstance(milp.mip) all_integer_vars = [] feasible_ass = milp.feasible_solution for vname, var in mip.varname2var.items(): if var.vtype() in ['INTEGER', 'BINARY']: all_integer_vars.append(vname.lstrip('t_')) K = min(len(all_integer_vars), np.random.randint(20, 50)) fixed_ass = { all_integer_vars[i]: feasible_ass[all_integer_vars[i]] for i in np.random.choice( len(all_integer_vars), len(all_integer_vars) - K, replace=False) } model = mip.fix(fixed_ass) model.setBoolParam('randomization/permutevars', True) model.setIntParam('randomization/permutationseed', seed) model.setIntParam('randomization/randomseedshift', seed) model.optimize() solving_stats = ConfigDict(model.getSolvingStats()) results = ConfigDict( solving_time=solving_stats.solvingtime, determinstic_time=solving_stats.deterministictime, nnodes=model.getNNodes(), ) return results
def _get_agent_instance(self): action_spec = BoundedArraySpec((10, 20), np.int32, 0, N_NODES - 1, name='test_spec') config = ConfigDict() config.model = self._get_model_config() config.lr_init = 1e-3 config.lr_min = 1e-4 config.lr_start_dec_step = 1000 config.lr_dec_steps = 1000 config.lr_dec_val = .1 config.lr_dec_approach = 'linear' config.ent_dec_init = 1 config.ent_dec_min = 0 config.ent_dec_steps = 1000 config.ent_start_dec_step = 1000 config.ent_dec_val = .1 config.ent_dec_approach = 'linear' config.grad_clip = 1.0 config.discount_factor = 0.99 config.clip_rho_threshold = 1.0 config.clip_pg_rho_threshold = 1.0 config.loss = ConfigDict() config.loss.vf_loss_coeff = 1.0 with tf.variable_scope('gcn_rins', reuse=tf.AUTO_REUSE): return MLPAgent(action_spec=action_spec, name='test', seed=42, **config)
def setup(self, argv): """After reading the component name this function will be called.""" args = parser.parse_args(args=argv) self.args = args self.experiment_id = args.experiment_id self.work_id = args.work_id self.experiment_name = args.experiment_name self.batch_size = args.batch_size self.traj_length = args.traj_length self.seed = args.seed self.results_folder = args.results_folder self.hyper_params = args.hyper_configs self.env_config = ConfigDict(to_nested_dicts(args.env_config)) self.sess_config = ConfigDict(to_nested_dicts(args.sess_config)) self.agent_config = ConfigDict(to_nested_dicts(args.agent_config)) if hasattr(args, 'eval_config'): self.eval_config = ConfigDict(to_nested_dicts(args.eval_config)) else: self.eval_config = ConfigDict() check_config_compatibility(self.env_config, self.sess_config, self.agent_config, self.eval_config)
def _get_session_config(self): session_config = ConfigDict() session_config.folder = '/tmp/replay_test' session_config.seed = SEED session_config.replay = ConfigDict() session_config.replay.memory_size = MAX_REPLAY_SIZE session_config.replay.evict_interval = None session_config.replay.tensorboard_display = True session_config.replay.sampling_start_size = 0 session_config.loggerplex = ConfigDict() session_config.loggerplex.level = 'info' session_config.loggerplex.overwrite = True session_config.loggerplex.show_level = True session_config.loggerplex.time_format = 'hms' # enable_local_logger: print log to local stdout AND send to remote. session_config.loggerplex.enable_local_logger = True session_config.loggerplex.local_logger_level = session_config.loggerplex.level session_config.loggerplex.local_logger_time_format = session_config.loggerplex.time_format session_config.tensorplex = ConfigDict() session_config.tensorplex.max_processes = 2 session_config.tensorplex.agent_bin_size = 4 session_config.learner = ConfigDict() session_config.learner.max_prefetch_queue = 100 session_config.learner.prefetch_processes = 1 session_config.learner.prefetch_threads_per_process = 1 return session_config
def get_config(): config = get_base_config() # required fields. config.class_path = "liaison.agents.gcn_multi_actions" config.class_name = "Agent" config.model = ConfigDict() config.model.class_path = 'liaison.agents.models.transformer_auto_regressive' config.model.num_blocks = 4 config.model.d_ff = 32 config.model.num_heads = 4 config.model.d_model = 64 config.model.dropout_rate = 0. config.model.use_mlp_value_func = False # The following code duplicated in gcn_rins.py as well. # Propagate any changes made as needed. config.model.model_kwargs = ConfigDict() config.model.model_kwargs.class_path = "liaison.agents.models.bipartite_gcn_rins" config.model.model_kwargs.n_prop_layers = 4 config.model.model_kwargs.edge_embed_dim = 32 config.model.model_kwargs.node_embed_dim = 32 config.model.model_kwargs.global_embed_dim = 32 config.model.model_kwargs.policy_torso_hidden_layer_sizes = [16, 16] config.model.model_kwargs.value_torso_hidden_layer_sizes = [16, 16] config.model.model_kwargs.policy_summarize_hidden_layer_sizes = [16] config.model.model_kwargs.value_summarize_hidden_layer_sizes = [16] config.model.model_kwargs.supervised_prediction_torso_hidden_layer_sizes = [ 16, 16 ] config.model.model_kwargs.sum_aggregation = False config.model.model_kwargs.use_layer_norm = True config.clip_rho_threshold = 1.0 config.clip_pg_rho_threshold = 1.0 config.loss = ConfigDict() config.loss.vf_loss_coeff = 1.0 config.loss.al_coeff = ConfigDict() config.loss.al_coeff.init_val = 0. config.loss.al_coeff.min_val = 0. config.loss.al_coeff.start_decay_step = int(1e10) config.loss.al_coeff.decay_steps = 5000 # dec_val not used for linear scheme config.loss.al_coeff.dec_val = .1 config.loss.al_coeff.dec_approach = 'linear' # applicable for agent 'liaison.agents.gcn_large_batch' config.apply_grads_every = 1 config.log_features_every = -1 # disable config.freeze_graphnet_weights_step = int(1e9) return config
def get_config(): config = ConfigDict() config.agent_config = ConfigDict() config.agent_config.network = ConfigDict() config.shell_config = ConfigDict() config.shell_config.use_gpu = False config.session_config = ConfigDict() config.session_config.sync_period = 100 return config
def get_config(): config = ConfigDict() config.learner = ConfigDict() config.learner.cpu = 1 config.learner.mem = 0 config.learner.gpu_compute = [10] config.learner.gpu_mem = [14] # directive only applies to slurm allocations config.learner.slurm_exclusive_gpu = True config.bundled_actor = ConfigDict() config.bundled_actor.cpu = 1 config.bundled_actor.mem = 0 config.bundled_actor.gpu_compute = [.5] config.bundled_actor.gpu_mem = [10] config.actor = ConfigDict() config.actor.cpu = 1 config.actor.mem = 0 config.actor.gpu_compute = [] config.actor.gpu_mem = [] config.evaluator = ConfigDict() config.evaluator.cpu = 1 config.evaluator.mem = 0 config.evaluator.gpu_compute = [.1] config.evaluator.gpu_mem = [0] config.replay = ConfigDict() config.replay.cpu = 0 config.replay.mem = 0 #50 * 1e3 # 50 GB config.replay.gpu_compute = [] config.replay.gpu_mem = [] config.ps = ConfigDict(**config.replay) config.irs = ConfigDict() config.irs.cpu = 1 config.irs.mem = 0 config.irs.gpu_compute = [] config.irs.gpu_mem = [] config.visualizers = ConfigDict(**config.irs) config.visualizers.cpu = 1 config.visualizers.mem = 0 config.tensorplex = ConfigDict(**config.irs) config.tensorplex.cpu = 1 config.tensorplex.mem = 0 return config
def debatch_and_stack(self): traj_len = self._traj_len exps = [] for i, finished_ts in enumerate(self._finished_timesteps): chopping_traj = self._chopping_trajs[i] for ts in finished_ts: if len(chopping_traj) == 0: # tihs branch is taken only after reset is called on trajectory. chopping_traj.start( next_state=ts['step_output']['next_state'], # remove step_output from ts **ConfigDict(**{ k: v for k, v in ts.items() if k != 'step_output' })) assert ts['step_output']['action'] is None assert ts['step_output']['logits'] is None continue chopping_traj.add(**ConfigDict(**ts)) assert ts['step_output']['action'] is not None assert len(chopping_traj) <= traj_len + 1 if len(chopping_traj) == traj_len + 1: # TODO: Add dummy batch dimension and use debatch_and_stack # for uniformity. exps.append(chopping_traj.stack()) chopping_traj.reset() chopping_traj.start( next_state=ts['step_output']['next_state'], # remove step_output from ts **ConfigDict(**{ k: v for k, v in ts.items() if k != 'step_output' })) self._finished_timesteps[i] = [] def f(path, spec, v): if path[0] == 'step_output' and path[1] != 'next_state': assert len(v) == traj_len return assert len(v) == traj_len + 1 assert all([ nest.map_structure_with_tuple_paths_up_to(self.spec, f, self.spec, exp) for exp in exps ]) return exps
def get_config(): config = get_base_config() # required fields. config.class_path = "liaison.agents.mlp" config.class_name = "Agent" config.model = ConfigDict() config.model.class_path = "liaison.agents.models.mlp" config.model.hidden_layer_sizes = [32, 32] config.loss = ConfigDict() config.loss.vf_loss_coeff = 1.0 return config
def __init__(self, id, seed, graph_seed=-1, graph_start_idx=0, n_graphs=1, dataset='milp-facilities-3', dataset_type='train', max_nodes=-1, max_edges=-1, **env_config): """If graph_seed < 0, then use the environment seed. max_nodes, max_edges -> Use for padding """ self.config = ConfigDict(env_config) self.id = id self.seed = seed self._max_nodes = max_nodes self._max_edges = max_edges self.set_seed(seed) if graph_seed < 0: graph_seed = seed self._setup_graph_random_state(graph_seed) self._dataset = dataset self._dataset_type = dataset_type self._n_graphs = n_graphs self._graph_start_idx = graph_start_idx self.config.update(NORMALIZATION_CONSTANTS[dataset])
def build_update_ops(self, obs, targets): """ This function will only be called once to create a TF graph which will be run repeatedly during training at the learner. All the arguments are tf placeholders (or nested structures of placeholders). Args: obs: [B, ...] targets: [B, N], N is the max node size. """ self._validate_observations(obs) obs = ConfigDict(**obs) with tf.variable_scope(self._name): # flatten graph_features obs.graph_features = flatten_graphs( gn.graphs.GraphsTuple(**obs.graph_features)) with tf.variable_scope('target_logits'): preds, logits_logged_vals = self._model.get_logits_and_next_state(obs) with tf.variable_scope('loss'): loss = tf.reduce_sum(obs.node_mask * ((preds - targets)**2)) loss /= tf.reduce_sum(obs.node_mask) with tf.variable_scope('optimize'): opt_vals = self._optimize(loss) with tf.variable_scope('logged_vals'): self._logged_values = { 'loss/supervised_loss': tf.reduce_sum(loss), **opt_vals, **logits_logged_vals, **self._extract_logged_values(obs), }
def __init__(self, name, action_spec, seed, model=None, **kwargs): self.set_seed(seed) self.config = ConfigDict(**kwargs) self._name = name self._action_spec = action_spec self._load_model(name, action_spec=action_spec, **(model or {}))
def __init__(self, id, seed, discount=1.0, graph_seed=-1, **env_config): """if graph_seed < 0, then use the environment seed""" self.config = ConfigDict(env_config) self.id = id self.seed = seed self.discount = discount self.set_seed(seed) if graph_seed < 0: graph_seed = seed # generate graph with 32 nodes. nx_graph, self._path = generate_networkx_graph(graph_seed, [32, 33]) nx_graph = nx_graph.to_directed() # max number of steps in an episode. self._max_steps = 3 * len(nx_graph) self._nx_graph = nx_graph self._src_node = self._path[0] self._target_node = self._path[-1] self._shortest_path_length = sum([ nx_graph[u][v][DISTANCE_WEIGHT_NAME] for u, v in pairwise(self._path) ]) self._reset_graph_features = self._networkx_to_graph_features( nx_graph, self._src_node, self._target_node) self._graph_features = copy.deepcopy(self._reset_graph_features) self._curr_node = self._src_node self._reset_next_step = True
def get_env_config(): """get rins env config.""" config = ConfigDict() # required fields. config.class_path = "liaison.env.rins" # should be rel to the parent directory. config.class_name = "Env" # makes observations suitable for the MLP model. config.make_obs_for_mlp = True # adds all the constraints to MLP state space. # adds #variables * #constraints dimensions to the state space. config.mlp_embed_constraints = False config.make_obs_for_self_attention = False """if graph_seed < 0, then use the environment seed""" config.graph_seed = 42 config.dataset = 'milp-facilities-10' config.dataset_type = 'train' config.graph_start_idx = args.graph_start_idx config.n_graphs = 1 config.k = args.k config.steps_per_episode = 2000 return config
def _init_ds(self): # Initialize data structures milp = self.milp self._ep_return = 0 self._n_steps = 0 self._n_local_moves = 0 self._reset_next_step = False self._mip_works = [] # mip stats for the current step self._mip_stats = ConfigDict(mip_work=0, n_cuts=0, n_cuts_applied=0, n_lps=0, solving_time=0., pre_solving_time=0., time_elapsed=0.) self._varnames2varidx = { var_name: i for i, var_name in enumerate(self._var_names) } # optimal solution can be used for supervised auxiliary tasks. self._optimal_soln = np.float32( [milp.optimal_solution[v] for v in self._var_names]) self._optimal_lp_soln = np.float32( [milp.optimal_lp_sol[v] for v in self._var_names]) globals_ = np.zeros(Env.N_GLOBAL_FIELDS, dtype=np.float32) globals_[Env.GLOBAL_STEP_NUMBER] = self._n_steps / np.sqrt( self.k * self.max_local_moves) globals_[Env.GLOBAL_UNFIX_LEFT] = self.k globals_[Env.GLOBAL_N_LOCAL_MOVES] = self._n_local_moves self._globals = globals_ self._n_steps_in_this_local_move = 0 self._set_stop_switch_mask()
def _scip_solve(self, solver): """solves a mip/lp using scip""" if solver is None: solver = Model() solver.hideOutput() if self.config.disable_maxcuts: for param in [ 'separating/maxcuts', 'separating/maxcutsroot', 'propagating/maxrounds', 'propagating/maxroundsroot', 'presolving/maxroundsroot' ]: solver.setIntParam(param, 0) solver.setBoolParam('conflict/enable', False) solver.setPresolve(SCIP_PARAMSETTING.OFF) solver.setBoolParam('randomization/permutevars', True) # seed is set to 0 permanently. solver.setIntParam('randomization/permutationseed', 0) solver.setIntParam('randomization/randomseedshift', 0) with U.Timer() as timer: solver.optimize() assert solver.getStatus() == 'optimal', solver.getStatus() obj = float(solver.getObjVal()) ass = {var.name: solver.getVal(var) for var in solver.getVars()} mip_stats = ConfigDict(mip_work=solver.getNNodes(), n_cuts=solver.getNCuts(), n_cuts_applied=solver.getNCutsApplied(), n_lps=solver.getNLPs(), solving_time=solver.getSolvingTime(), pre_solving_time=solver.getPresolvingTime(), time_elapsed=timer.to_seconds()) return ass, obj, mip_stats
def _pad_graph_features(self, features: dict): features = ConfigDict(**features) features.update(nodes=pad_first_dim(features.nodes, self._max_nodes), edges=pad_first_dim(features.edges, self._max_edges), senders=pad_first_dim(features.senders, self._max_edges), receivers=pad_first_dim(features.receivers, self._max_edges)) return dict(**features)
def __init__(self, seed, evict_interval, compress_before_send, load_balanced=True, index=0, **kwargs): self.config = ConfigDict(kwargs) self.index = index if load_balanced: collector_port = os.environ['SYMPH_COLLECTOR_BACKEND_PORT'] sampler_port = os.environ['SYMPH_SAMPLER_BACKEND_PORT'] else: collector_port = os.environ['SYMPH_COLLECTOR_FRONTEND_PORT'] sampler_port = os.environ['SYMPH_SAMPLER_FRONTEND_PORT'] self._collector_server = ExperienceCollectorServer( host='localhost' if load_balanced else '*', port=collector_port, exp_handler=self._insert_wrapper, load_balanced=load_balanced, compress_before_send=compress_before_send) self._sampler_server = ZmqServer( host='localhost' if load_balanced else '*', port=sampler_port, bind=not load_balanced, serializer=get_serializer(compress_before_send), deserializer=get_deserializer(compress_before_send)) self._sampler_server_thread = None self._evict_interval = evict_interval self._evict_thread = None self._setup_logging()
def __init__(self, id, seed, graph_start_idx=0, n_graphs=1, dataset='', dataset_type='train', k=5, n_local_moves=10, max_nodes=-1, max_edges=-1, sample_every_n_resets=1, **env_config): """k -> Max number of variables to unfix at a time. Informally, this is a bound on the local search neighbourhood size. max_nodes, max_edges -> Use for padding """ self.config = ConfigDict(env_config) self.id = id self.k = self._original_k = k if self.config.k_schedule.enable: self.max_k = max(self.config.k_schedule.values) else: self.max_k = k self.max_local_moves = n_local_moves self.seed = seed if max_nodes < 0: max_nodes = NORMALIZATION_CONSTANTS[dataset]['max_nodes'] if max_edges < 0: max_edges = NORMALIZATION_CONSTANTS[dataset]['max_edges'] self._max_nodes = max_nodes self._max_edges = max_edges self.set_seed(seed) self._dataset = dataset self._dataset_type = dataset_type self._max_graphs = n_graphs self._graph_start_idx = graph_start_idx self._sample_every_n_resets = sample_every_n_resets if dataset: self.config.update(NORMALIZATION_CONSTANTS[dataset]) # call reset so that obs_spec can work without calling reset self._ep_return = None self._prev_ep_return = np.nan self._prev_avg_quality = np.nan self._prev_best_quality = np.nan self._prev_final_quality = np.nan self._prev_mean_work = np.nan self._prev_k = np.nan self._reset_next_step = True if 'SYMPH_PS_SERVING_HOST' in os.environ: self._global_step_fetcher = GlobalStepFetcher(min_request_spacing=4) else: self._global_step_fetcher = None # map from sample to length of the mip self._sample_lengths = None self._n_resets = 0 self._vars_unfixed_so_far = [] self.reset()
def get_config(): config = get_base_config() # required fields. config.class_path = "liaison.agents.mlp" config.class_name = "Agent" config.model = ConfigDict() config.model.class_path = "liaison.agents.models.transformer_rins" config.clip_rho_threshold = 1.0 config.clip_pg_rho_threshold = 1.0 config.loss = ConfigDict() config.loss.vf_loss_coeff = 1.0 return config
def get_config(): config = get_base_config() # required fields. config.class_path = "liaison.agents.gcn" config.class_name = "Agent" # The following code duplicated in gcn_ar.py as well. # Propagate any changes made config.model = ConfigDict() config.model.class_path = "liaison.agents.models.gcn_rins" config.model.n_prop_layers = 4 config.model.edge_embed_dim = 16 config.model.node_embed_dim = 16 config.model.global_embed_dim = 16 config.model.node_hidden_layer_sizes = [16] config.model.edge_hidden_layer_sizes = [16] config.model.policy_torso_hidden_layer_sizes = [16, 16] config.model.value_torso_hidden_layer_sizes = [16, 16] config.model.policy_summarize_hidden_layer_sizes = [16] config.model.value_summarize_hidden_layer_sizes = [16] config.model.supervised_prediction_torso_hidden_layer_sizes = [16, 16] config.model.sum_aggregation = False config.model.use_layer_norm = True config.clip_rho_threshold = 1.0 config.clip_pg_rho_threshold = 1.0 config.loss = ConfigDict() config.loss.vf_loss_coeff = 1.0 config.loss.al_coeff = ConfigDict() config.loss.al_coeff.init_val = 0. config.loss.al_coeff.min_val = 0. config.loss.al_coeff.start_decay_step = int(1e10) config.loss.al_coeff.decay_steps = 5000 # dec_val not used for linear scheme config.loss.al_coeff.dec_val = .1 config.loss.al_coeff.dec_approach = 'linear' # applicable for agent 'liaison.agents.gcn_large_batch' config.apply_grads_every = 1 config.choose_stop_switch = False return config
def _get_model_config(self): config = ConfigDict() if FLAGS.model == 'mlp': config.class_path = "liaison.agents.models.mlp" config.hidden_layer_sizes = [32, 32] elif FLAGS.model == 'gcn': config.class_path = "liaison.agents.models.gcn" else: raise Exception('Unknown model %s' % FLAGS.model) return config
def get_config(): config = get_base_config() # required fields. config.class_path = "liaison.agents.gcn" config.class_name = "Agent" config.model = ConfigDict() config.model.class_path = "liaison.agents.models.gcn" config.model.n_prop_layers = 8 config.model.node_embed_dim = 32 config.clip_rho_threshold = 1.0 config.clip_pg_rho_threshold = 1.0 config.loss = ConfigDict() config.loss.vf_loss_coeff = 1.0 return config
def __init__(self, name, action_spec, seed, model=None, choose_stop_switch=False, **kwargs): self.set_seed(seed) self.config = ConfigDict(**kwargs) self._name = name self._action_spec = action_spec self.choose_stop_switch = choose_stop_switch self._load_model(name, action_spec=action_spec, choose_stop_switch=choose_stop_switch, **(model or {}))
def __init__(self, name, action_spec, seed, model=None, **kwargs): self.set_seed(seed) self.config = ConfigDict(**kwargs) self._name = name self._action_spec = action_spec self._load_model(name, action_spec=action_spec, **(model or {})) self._global_step = tf.train.get_or_create_global_step() self._total_steps = tf.Variable(0, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES], name='total_steps')
def batch_and_preprocess_trajs(self, l): traj = Trajectory.batch(l, self._traj_spec) # feed and overwrite the trajectory traj['step_output'], traj['step_output']['next_state'], traj['step_type'], traj[ 'reward'], traj['observation'], traj['discount'] = self._agent.update_preprocess( step_outputs=ConfigDict(traj['step_output']), prev_states=traj['step_output']['next_state'], step_types=traj['step_type'], rewards=traj['reward'], observations=traj['observation'], discounts=traj['discount']) return traj
def __init__(self, serving_host, serving_port, checkpoint_folder, profile_folder, kvstream_folder, **kwargs): Thread.__init__(self) self.config = ConfigDict(**kwargs) self.checkpoint_folder = checkpoint_folder self.profile_folder = profile_folder self.kvstream_folder = kvstream_folder self.serving_host = serving_host self.serving_port = serving_port # Attributes self._server = None