Ejemplo n.º 1
0
def run_pipeline(examples_sstable: Optional[Text],
                 scrub_parameters: Optional[Text],
                 prover_tasks: List[proof_assistant_pb2.ProverTask],
                 prover_options: deephol_pb2.ProverOptions, path_output: str):
    """Create and run simple prover pipeline."""
    prover.cache_embeddings(prover_options)
    prover_pipeline = make_pipeline(prover_tasks, prover_options, path_output)
    pipeline = prover_pipeline
    if examples_sstable:
        theorem_db = io_util.load_theorem_database_from_file(
            str(prover_options.path_theorem_database))

        def examples_pipeline(root):
            """Examples pipeline."""
            scrub_str_enum_map = {
                'NOTHING':
                options_pb2.ConvertorOptions.NOTHING,
                'TESTING':
                options_pb2.ConvertorOptions.TESTING,
                'VALIDATION_AND_TESTING':
                options_pb2.ConvertorOptions.VALIDATION_AND_TESTING,
            }
            training_examples_pipeline(
                proof_logs=prover_pipeline(root),
                tactics_filename=prover_options.path_tactics,
                theorem_db=theorem_db,
                examples_sstables=[examples_sstable],
                scrub_parameters=scrub_str_enum_map[scrub_parameters])

        pipeline = examples_pipeline
    runner.Runner().run(pipeline).wait_until_finish()
Ejemplo n.º 2
0
    def perform_round(self, initial_examples):
        """Perform a single round of the loop and advance the loop counter."""
        current_round = self.loop_meta.status.current_round
        if current_round == 0:
            self.setup_examples(initial_examples)
        if not self.prover_tasks:
            logging.info('No tasks for proving...')
            return
        logging.info('******** ROUND %d', current_round)
        logging.info('Setting up latest checkpoints for ROUND %d',
                     current_round)
        self.setup_model_checkpoint_and_embeddings()
        logging.info('Creating prover tasks for ROUND %d', current_round)
        tasks = self.create_prover_tasks()
        logging.info('Number of tasks: %d', len(tasks))
        logging.info(
            'Running prover and example generation pipeline '
            'for ROUND %d', current_round)

        def pipeline(root):
            proof_logs = self.prover_pipeline(tasks, root)
            self.reporting_pipeline(proof_logs)
            self.training_examples_pipeline(proof_logs)
            self.historical_examples_pipeline(root)

        runner.Runner().run(pipeline).wait_until_finish()
        self.aggregate_reporting()
        self.loop_meta.prepare_next_round()
Ejemplo n.º 3
0
 def setup_examples(self, initial_examples):
     logging.info('Creating initial examples pipeline')
     pipeline = self.create_initial_examples_sstables(initial_examples)
     if pipeline:
         logging.info('Generating initial examples sstables...')
         runner.Runner().run(pipeline).wait_until_finish()
     else:
         logging.info('Examples are already present')
Ejemplo n.º 4
0
    def run_pipeline(self, proof_logs: Text):
        def pipeline(root):
            proof_logs_collection = make_proof_logs_collection(
                root, proof_logs)
            self.setup_pipeline(proof_logs_collection)

        runner.Runner().run(pipeline).wait_until_finish()
        self.write_final_stats()
        tf.logging.info('Finished reporting.')
Ejemplo n.º 5
0
    def setup_model_checkpoint_and_embeddings(self):
        """Copy embeddings over and precompute theorem database embeddings.

    This function makes sure that we have at least one model checkpoint
    file present. Also it copies over the latest new embeddings when they become
    available and precomputes the embedding store for them.
    """
        logging.info('Setting up model checkpoint and embeddings %s %s',
                     str(self.config.copy_model_checkpoints),
                     str(self.checkpoint_monitor.has_checkpoint()))
        # We can prohibit copying checkpoints by setting copy_model_checkpoints
        # to false, unless we don't have any checkpoint yet, in which case
        # we try to copy a new checkpoint over.
        while self.config.copy_model_checkpoints or not (
                self.checkpoint_monitor.has_checkpoint()):
            # Whether we have a pre-existing checkpoint.
            has_checkpoint = self.checkpoint_monitor.has_checkpoint()
            logging.info('has checkpoint: %s', has_checkpoint)
            # new_checkpoint is None if the training directory does not
            # have a more recent checkpoint than the one stored in the loop
            # directory. Otherwise it refers to the current newest checkpoint.
            new_checkpoint = self.checkpoint_monitor.new_checkpoint()
            logging.info('new checkpoint: %s', new_checkpoint)
            if new_checkpoint is not None:
                # We have a more recent checkpoint than in our local directory.
                logging.info('New checkpoint: "%s"', new_checkpoint)
                self.checkpoint_monitor.copy_latest_checkpoint()
                chkpt = os.path.join(self.loop_meta.checkpoints_path(),
                                     new_checkpoint)
                logging.info('Copied checkpoint: "%s"', chkpt)
                # We try to compute embeddings until we succeed.
                while not gfile.Exists(chkpt + '.npy'):
                    runner.Runner().run(self.embedding_store_pipeline(
                        chkpt)).wait_until_finish()
                    if not gfile.Exists(chkpt + '.npy'):
                        logging.error(
                            'Could not generate embeddings for the latest '
                            'checkpoint %s.', chkpt)
                    else:
                        self.checkpoint_monitor.update_latest_checkpoint(
                            new_checkpoint)
                        break
            # If we had a pre-existing checkpoint or we managed to copy over
            # a new one, then we are succeeded. Let's not check the checkpoint
            # unless we had none.
            if has_checkpoint or self.checkpoint_monitor.has_checkpoint():
                break
            else:
                # We don't have a checkpoint and never had one. Let's wait for
                # one appear in the training directory.
                logging.info(
                    'Waiting for the first model checkpoint to appear.')
                time.sleep(10)