def test_single_genpvar(self):
     theorem = proof_assistant_pb2.Theorem(
         conclusion='(l (v type GEN%PVAR%123) (v type GEN%PVAR%123))')
     expected = proof_assistant_pb2.Theorem(
         conclusion='(l (v type GEN%PVAR%0) (v type GEN%PVAR%0))')
     self.assertEqual(
         normalization_lib.normalize_genpvars(theorem.conclusion),
         expected.conclusion)
     self.assertEqual(normalization_lib.normalize(theorem), expected)
 def compute_embeddings_for_thms_from_db(
         self,
         theorem_database: proof_assistant_pb2.TheoremDatabase) -> None:
     normalized_thms = [
         normalization_lib.normalize(thm).conclusion
         for thm in theorem_database.theorems
     ]
     self.thm_embeddings = self.predictor.batch_thm_embedding(
         normalized_thms)
 def test_normalize_ignoring_hypotheses(self):
     theorem = proof_assistant_pb2.Theorem(
         conclusion='(does contain types ?34598734958 and ?1234)')
     theorem.hypotheses.extend(['(and type ?122143)'])
     expected = proof_assistant_pb2.Theorem(
         conclusion='(does contain types ?0 and ?1)')
     normalized = normalization_lib.normalize(theorem,
                                              consider_hypotheses=False)
     self.assertEqual(normalized.conclusion, expected.conclusion)
     self.assertEqual(normalized.hypotheses, expected.hypotheses)
Ejemplo n.º 4
0
 def compute_network_based_closest(self, goal, thm_number):
   """Compute closest based on premise embeddings."""
   # TODO(kbk): Add unit tests for this section (similar_parameters).
   goal_embedding_as_thm = self.predictor.thm_embedding(
       normalization_lib.normalize(goal).conclusion)
   premise_embeddings = (
       self.embedding_store.get_embeddings_for_preceding_thms(thm_number))
   # distance_score each is in [0,2]
   distance_scores = scipy.spatial.distance.cdist(
       premise_embeddings, goal_embedding_as_thm.reshape(1, -1),
       'cosine').reshape(-1).tolist()
   ranked_closest = sorted(zip(distance_scores, self.thm_names))
   ranked_closest = ranked_closest[:MAX_CLOSEST]
   tf.logging.info(
       'Cosine closest in premise embedding space:\n%s', '\n'.join(
           ['%s: %.6f' % (name, score) for score, name in ranked_closest]))
   # add some noise to top few and rerank
   noise = np.random.normal(scale=0.2, size=MAX_CLOSEST)
   ranked_closest = [(score + noise[i], name)
                     for i, (score, name) in enumerate(ranked_closest)]
   ranked_closest = sorted(ranked_closest)
   return ranked_closest[:self.options.max_theorem_parameters]
Ejemplo n.º 5
0
  def _get_theorem_scores(self, proof_state_enc, thm_number: int,
                          tactic_id: int):
    """Get the scores of all the theorems before the given theorem index.

    This functions scores all preceding theorems in the list of theorems, by
    computing all pairwise scores with the given proof state encoding.

    Args:
       proof_state_enc: A numpy vector of the proof state encoding.
       thm_number: Index of the theorem in the theorem database.
       tactic_id: For tactic dependent prediction, provide tactic id.

    Returns:
       A numpy vector of theorem scores for all preceding theorems in the
       same order they are present in the theorem database.
    """
    if self.embedding_store:
      return self.embedding_store.get_thm_scores_for_preceding_thms(
          proof_state_enc, thm_number, tactic_id)

    relevant_thms = self.theorem_database.theorems[:thm_number]

    if relevant_thms:
      # TODO(smloos): update predictions API to use proof_assistant_pb2.Theorem
      thms_emb = self.predictor.batch_thm_embedding([
          normalization_lib.normalize(thm).conclusion for thm in relevant_thms
      ])
    else:
      thms_emb = np.empty([0])
    tf.logging.debug(thms_emb)
    if len(thms_emb):  # pylint: disable=g-explicit-length-test
      thm_scores = self.predictor.batch_thm_scores(proof_state_enc, thms_emb,
                                                   tactic_id)
    else:
      thm_scores = []
    tf.logging.debug(thm_scores)
    return thm_scores
Ejemplo n.º 6
0
  def step(self, node: proof_search_tree.ProofSearchNode,
           premises: proof_assistant_pb2.PremiseSet) -> List[Suggestion]:
    """Generates a list of possible ApplyTactic argument strings from a goal.

    Args:
      node: state of the proof search, starting at current goal.
      premises: Specification of the selection of premises that can be used for
        tactic parameters. Currently we are supporting only a single
        DatabaseSection.

    Returns:
      List of string arugments for HolLight.ApplyTactic function, along with
      scores (Suggestion).
    """
    assert not premises.reference_sets, ('Premise reference sets are not '
                                         'supported.')
    assert len(premises.sections) == 1, ('Premise set must have exactly one '
                                         'section.')
    # TODO(szegedy): If the premise is not specified, we want the whole
    # database to be used. Not sure if -1 or len(database.theorems) would do
    # that or not. Assertion will certainly fail before that.
    # Also we don't have checks on this use case.
    assert premises.sections[0].HasField('before_premise'), ('Premise is '
                                                             'required.')
    fp = premises.sections[0].before_premise
    thm_number = self.thm_index_by_fingerprint.get(fp)
    assert thm_number is not None
    assert theorem_fingerprint.Fingerprint(
        self.theorem_database.theorems[thm_number]) == fp
    thm_names = self.thm_names[:thm_number]
    tf.logging.debug(thm_names)
    # TODO(smloos): update predictor api to accept theorems directly
    proof_state = predictions.ProofState(
        goal=str(normalization_lib.normalize(node.goal).conclusion))
    proof_state_emb = self.predictor.proof_state_embedding(proof_state)
    proof_state_enc = self.predictor.proof_state_encoding(proof_state_emb)
    tf.logging.debug(proof_state_enc)
    tactic_scores = self._compute_tactic_scores(proof_state_enc)

    empty_emb = self.predictor.thm_embedding('')
    empty_emb_batch = np.reshape(empty_emb, [1, empty_emb.shape[0]])

    enumerated_tactics = enumerate(self.tactics)
    if self.options.asm_meson_only:
      enumerated_tactics = [
          v for v in enumerated_tactics if str(v[1].name) == 'ASM_MESON_TAC'
      ]
      assert enumerated_tactics, (
          'action generator option asm_meson_only requires ASM_MESON_TAC.')

    ranked_closest = self.compute_closest(node.goal, thm_number)
    if ranked_closest:
      tf.logging.info(
          'Cosine closest picked:\n%s', '\n'.join(
              ['%s: %.6f' % (name, score) for score, name in ranked_closest]))

    ret = []
    thm_scores = None
    # TODO(smloos): This computes parameters for all tactics. It should cut off
    # based on the prover BFS options.
    for tactic_id, tactic in enumerated_tactics:
      if (thm_scores is None or self.model_architecture ==
          deephol_pb2.ProverOptions.PARAMETERS_CONDITIONED_ON_TAC):
        thm_scores = self._get_theorem_scores(proof_state_enc, thm_number,
                                              tactic_id)
        tf.logging.debug(thm_scores)
        no_params_score = self.predictor.batch_thm_scores(
            proof_state_enc, empty_emb_batch, tactic_id)[0]
        tf.logging.info('Theorem score for empty theorem: %f0.2',
                        no_params_score)

      thm_ranked = sorted(
          zip(thm_scores, self.thm_names),
          reverse=True)[:self.options.max_theorem_parameters]
      pass_no_arguments = thm_ranked[-1][0] < no_params_score
      thm_ranked = self.add_similar(thm_ranked, ranked_closest)

      tf.logging.info('thm_ranked: %s', str(thm_ranked))
      tactic_str = str(tactic.name)
      try:
        tactic_params = _compute_parameter_string(
            list(tactic.parameter_types), pass_no_arguments, thm_ranked)
        for params_str in tactic_params:
          ret.append(
              Suggestion(
                  string=tactic_str + params_str,
                  score=tactic_scores[tactic_id]))
      except ValueError as e:
        tf.logging.warning('Failed to compute parameters for tactic %s: %s',
                           tactic.name, str(e))
    return ret
 def test_normalize_multiple_types_flipped(self):
     theorem = proof_assistant_pb2.Theorem(
         conclusion='(does contain types ?34598734958 and ?1234)')
     expected = proof_assistant_pb2.Theorem(
         conclusion='(does contain types ?0 and ?1)')
     self.assertEqual(normalization_lib.normalize(theorem), expected)
 def test_idempotency(self, expr):
     # Tests if python normalization is idempotent.
     theorem = proof_assistant_pb2.Theorem(conclusion=expr)
     normalized = proof_assistant_pb2.Theorem()
     normalized.CopyFrom(normalization_lib.normalize(theorem))
     self.assertEqual(normalization_lib.normalize(normalized), normalized)
 def test_normalize_multiple_occurrences(self):
     theorem = proof_assistant_pb2.Theorem(
         conclusion='(does contain types ?1234 and ?1234)')
     expected = proof_assistant_pb2.Theorem(
         conclusion='(does contain types ?0 and ?0)')
     self.assertEqual(normalization_lib.normalize(theorem), expected)
 def test_normalize_single_type(self):
     theorem = proof_assistant_pb2.Theorem(
         conclusion='(does contain type ?1234)')
     expected = proof_assistant_pb2.Theorem(
         conclusion='(does contain type ?0)')
     self.assertEqual(normalization_lib.normalize(theorem), expected)
 def test_gen_exists_exactly_one_quantifier(self):
     theorem = proof_assistant_pb2.Theorem(
         conclusion='(does contain ?123 but ignores ?! quant)')
     expected = proof_assistant_pb2.Theorem(
         conclusion='(does contain ?0 but ignores ?! quant)')
     self.assertEqual(normalization_lib.normalize(theorem), expected)
 def test_normalize_trivial(self):
     theorem = proof_assistant_pb2.Theorem(
         conclusion='(does not contain types)')
     self.assertEqual(normalization_lib.normalize(theorem), theorem)
Ejemplo n.º 13
0
def _process_thms(thms):
  return [normalization_lib.normalize(thm).conclusion for thm in thms]