def __init__( self, env_name, fluents, n_functions, logdir=None, verbose=True ): """Initialize. :param env_name: a gym environment name. :param fluents: the list of propositional atoms that will be predicted. :param n_functions: number of predictions to valuate in parallel. :param logdir: if provided, the automaton just parsed is saved here. :param verbose: log the parsing step, because it may take a long time! """ # Loading if verbose: print("> Parsing", env_name, "constraints") data = selector.read_back(env_name) json_constraints = data["constraints"] # Parsing constraint = " & ".join(json_constraints) formula = LDLfParser()(constraint) # type: LDLfFormula # Check: all atoms must be evaluated atoms = formula.find_labels() all_predicted = all((a in fluents for a in atoms)) if not all_predicted: raise ValueError( "One of the atoms " + str(atoms) + " is not in fluents") # Conversion (slow op) automaton = formula.to_automaton() # type: SymbolicAutomaton automaton = automaton.determinize() if verbose: print("> Parsed") # Visualize the automaton if logdir is not None: graphviz = automaton.to_graphviz() graphviz.render( "constraint.gv", directory=logdir, view=False, cleanup=False) # Store self.env_name = env_name self.fluents = fluents self._str = constraint self._formula = formula self._automaton = automaton self._n_functions = n_functions self._n_fluents = len(self.fluents) # Automaton parallel execution self._tf_automata = TfSymbolicAutomaton(self._automaton, self.fluents) # Prepare buffers self._current_states = tf.Variable( tf.zeros([self._n_functions], dtype=tf.int32), trainable=False, name="current_states") self._final_counts = tf.Variable( tf.zeros( [self._n_functions, len(self._tf_automata.final_states)], dtype=tf.int32), trainable=False, name="final_counts_buffer") self._timestep = tf.Variable(0, trainable=False, name="timestep") # Ready for a new run self._reset()
def __init__( self, env_name, fluents, reward, logdir, load=None, verbose=True, ): """Initialize. :param env_name: a gym atari environment name. :param fluents: the list of propositional atoms that are known at each step. :param reward: (float) this reward is returned when the execution reaches a final state (at the first instant an execution satisfies the restraining specification). :param logdir: the automaton just parsed is saved here. :param load: if provided, the automaton is not computed but loaded from this file. A path to a rb.pickle file. :param verbose: verbose flag (automaton conversion may take a while). """ # Read data = selector.read_back(env_name) json_rb = data["restraining_bolt"] + data["constraints"] # Parsing restraining_spec = " & ".join(json_rb) formula = LDLfParser()(restraining_spec) # type: LDLfFormula # Check: all atoms must be evaluated atoms = formula.find_labels() all_predicted = all((a in fluents for a in atoms)) if not all_predicted: raise ValueError("One of the atoms " + str(atoms) + " is not in fluents") # Parse if load is None: # Conversion (slow op) if verbose: print("> Parsing", env_name, "restraining specification") automaton = formula.to_automaton() # type: SymbolicAutomaton automaton = automaton.determinize().complete() if verbose: print("> Parsed") # Save and visualize the automaton graphviz = automaton.to_graphviz() graphviz.render("rb.gv", directory=logdir, view=False, cleanup=False) with open(os.path.join(logdir, "rb.pickle"), "wb") as f: pickle.dump(automaton, f) # Load else: with open(load, "rb") as f: automaton = pickle.load(f) if verbose: print(">", load, "loaded") # Visualize the automaton loaded (debugging) graphviz = automaton.to_graphviz() graphviz.render("loaded_rb.gv", directory=logdir, view=False, cleanup=False) # Runner simulator = AutomatonSimulator(automaton) # Store self.env_name = env_name self.fluents = fluents self._str = restraining_spec self._formula = formula self._automaton = automaton self._simulator = simulator self._reward = reward self._last_state = None