def policy(self, float_state_features, int_state_features, actions): float_examples = [] for i in range(len(float_state_features)): float_examples.append({**float_state_features[i], **actions[i]}) if int_state_features is None: return RLPredictor.policy(self, float_examples) return RLPredictor.policy(self, float_examples, int_state_features)
def policy(self, states, actions): examples = [] for i in range(len(states)): examples.append({**states[i], **actions[i]}) return RLPredictor.policy(self, examples)