def _test_dqn_workflow(self, use_gpu=False, use_all_avail_gpus=False): """Run DQN workflow to ensure no crashes, algorithm correctness not tested here.""" with tempfile.TemporaryDirectory() as tmpdirname: lockfile = os.path.join(tmpdirname, "multiprocess_lock") Path(lockfile).touch() params = { "training_data_path": os.path.join( curr_dir, "test_data/discrete_action/cartpole_training.json.bz2"), "eval_data_path": os.path.join( curr_dir, "test_data/discrete_action/cartpole_eval.json.bz2"), "state_norm_data_path": os.path.join(curr_dir, "test_data/discrete_action/cartpole_norm.json"), "model_output_path": tmpdirname, "use_gpu": use_gpu, "use_all_avail_gpus": use_all_avail_gpus, "init_method": "file://" + lockfile, "num_nodes": 1, "node_index": 0, "actions": ["0", "1"], "epochs": 1, "rl": {}, "rainbow": { "double_q_learning": False, "dueling_architecture": False }, "training": { "minibatch_size": 128 }, } dqn_workflow.main(params) predictor_files = glob.glob(tmpdirname + "/model_*.torchscript") assert len( predictor_files) == 1, "Somehow created two predictor files!" predictor = DiscreteDqnTorchPredictor( torch.jit.load(predictor_files[0])) test_float_state_features = [{ "0": 1.0, "1": 1.0, "2": 1.0, "3": 1.0 }] q_values = predictor.predict(test_float_state_features) assert len(q_values[0].keys()) == 2
def test_predictor_torch_export(self): """Verify that q-values before model export equal q-values after model export. Meant to catch issues with export logic.""" environment = Gridworld() samples = Samples( mdp_ids=["0"], sequence_numbers=[0], sequence_number_ordinals=[1], states=[{0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0, 5: 1.0, 15: 1.0, 24: 1.0}], actions=["D"], action_probabilities=[0.5], rewards=[0], possible_actions=[["R", "D"]], next_states=[{5: 1.0}], next_actions=["U"], terminals=[False], possible_next_actions=[["R", "U", "D"]], ) tdps = environment.preprocess_samples(samples, 1) assert len(tdps) == 1, "Invalid number of data pages" trainer, exporter = self.get_modular_sarsa_trainer_exporter( environment, {}, False ) input = rlt.PreprocessedState.from_tensor(tdps[0].states) pre_export_q_values = trainer.q_network(input).q_values.detach().numpy() preprocessor = Preprocessor(environment.normalization, False) cpu_q_network = trainer.q_network.cpu_model() cpu_q_network.eval() dqn_with_preprocessor = DiscreteDqnWithPreprocessor(cpu_q_network, preprocessor) serving_module = DiscreteDqnPredictorWrapper( dqn_with_preprocessor, action_names=environment.ACTIONS ) with tempfile.TemporaryDirectory() as tmpdirname: buf = export_module_to_buffer(serving_module) tmp_path = os.path.join(tmpdirname, "model") with open(tmp_path, "wb") as f: f.write(buf.getvalue()) f.close() predictor = DiscreteDqnTorchPredictor(torch.jit.load(tmp_path)) post_export_q_values = predictor.predict([samples.states[0]]) for i, action in enumerate(environment.ACTIONS): self.assertAlmostEqual( float(pre_export_q_values[0][i]), float(post_export_q_values[0][action]), places=4, )
def main(model_path, temperature): model_path = glob.glob(model_path)[0] predictor = DiscreteDqnTorchPredictor(torch.jit.load(model_path)) predictor.softmax_temperature = temperature env = OpenAIGymEnvironment(gymenv=ENV) avg_rewards, avg_discounted_rewards = env.run_ep_n_times(AVG_OVER_NUM_EPS, predictor, test=True) logger.info( "Achieved an average reward score of {} over {} evaluations.".format( avg_rewards, AVG_OVER_NUM_EPS))
def get_predictor(self, trainer, environment): state_preprocessor = Preprocessor(environment.normalization, False) q_network = trainer.q_network dqn_with_preprocessor = DiscreteDqnWithPreprocessor( q_network.cpu_model().eval(), state_preprocessor) serving_module = DiscreteDqnPredictorWrapper( dqn_with_preprocessor=dqn_with_preprocessor, action_names=environment.ACTIONS, ) predictor = DiscreteDqnTorchPredictor(serving_module) return predictor