Example #1
0
def most_relevant_hyp(inter: ScrapedTactic) -> Tuple[str, float]:
    goal, hyp_list = inter.context.focused_goal, inter.context.focused_hyps
    if len(hyp_list) == 0:
        return "", 0
    result = max([(hyp_term, term_relevance(
        goal, serapi_instance.get_hyp_type(hyp_term)))
                  for hyp_term in hyp_list], key=lambda x: x[1])
    return result
Example #2
0
 def _predictDistribution(self, in_data : TacticContext) -> \
     Tuple[torch.FloatTensor, str]:
     if len(in_data.hypotheses) > 0:
         relevant_hyp, relevance = \
             max([(hyp,
                   term_relevance(in_data.goal,
                                        serapi_instance.get_hyp_type(hyp)))
                  for hyp in in_data.hypotheses], key=lambda x: x[1])
     else:
         relevant_hyp = ":"
         relevance = 0
     encoded_hyp = self._encode_term(serapi_instance.get_hyp_type(relevant_hyp))
     encoded_relevance = [relevance]
     encoded_goal = self._encode_term(in_data.goal)
     stem_distribution = self._run_model(encoded_hyp, encoded_relevance, encoded_goal)
     return FloatTensor(stem_distribution), \
         serapi_instance.get_first_var_in_hyp(relevant_hyp)
Example #3
0
def get_closest_hyp(hyps : List[str], goal : str, max_length : int):
    if len(hyps) == 0:
        return ":"

    result = max(hyps, key=lambda hyp:
                 score_hyp_type(limitNumTokens(goal, max_length),
                                limitNumTokens(serapi_instance.get_hyp_type(hyp), max_length),
                                max_length))
    return result
Example #4
0
def get_closest_hyps(hyps : List[str], goal : str, num_hyps : int, max_length : int)\
                        -> List[Tuple[str, float]]:
    if len(hyps) == 0:
        return [Prediction(":", 0)] * num_hyps
    else:
        return list(sorted([(hyp, score_hyp_type(limitNumTokens(goal, max_length),
                                                 limitNumTokens(serapi_instance.get_hyp_type(hyp), max_length),
                                                 max_length))
                            for hyp in hyps],
                           reverse=True,
                           key=lambda hyp_and_score: hyp_and_score[0]))
Example #5
0
 def __call__(self, context: TacticContext) -> List[float]:
     if len(context.hypotheses) == 0:
         return [0.]
     hyp_types = [
         serapi_instance.get_hyp_type(hyp)[:100]
         for hyp in context.hypotheses
     ]
     best_hyp_score = max([
         SequenceMatcher(None, context.goal, hyp).ratio() * len(hyp)
         for hyp in hyp_types
     ])
     return [best_hyp_score / 100]
Example #6
0
    def _predictDistribution(self,
                             in_data: TacticContext) -> torch.FloatTensor:
        hyp_terms = [
            serapi_instance.get_hyp_type(hyp) for hyp in in_data.hypotheses
        ]
        encoded_hyps = FloatTensor(
            [self._encode_term(term) for term in hyp_terms])
        encoded_goals = FloatTensor(self._encode_term(in_data.goal)) \
            .view(1, -1).expand(len(in_data.hypotheses), -1)

        relevance_predictions = \
            self._model(torch.cat((encoded_hyps, encoded_goals), dim=1))
        return relevance_predictions[:, 1]
Example #7
0
 def predictKTactics(self, in_data : TacticContext, k : int) -> List[Prediction]:
     if len(in_data.hypotheses) == 0:
         return [Prediction("eauto", 0)]
     k = min(k, len(in_data.hypotheses))
     best_hyps = \
         sorted(in_data.hypotheses,
                reverse=True,
                key=lambda hyp:
                SequenceMatcher(None, serapi_instance.get_hyp_type(hyp),
                                in_data.goal).ratio()
         )[:k]
     return [Prediction("apply " + serapi_instance.get_first_var_in_hyp(hyp) + ".",
                        .5 ** idx) for idx, hyp in enumerate(best_hyps)]
    def _encode_action(self, context: TacticContext, action: str) \
            -> Tuple[List[int], torch.FloatTensor]:
        stem, argument = serapi_instance.split_tactic(action)
        stem_idx = encode_fpa_stem(self.dataloader_args, self.fpa_metadata,
                                   stem)
        all_prems = context.hypotheses + context.relevant_lemmas
        arg_idx = encode_fpa_arg(self.dataloader_args, self.fpa_metadata,
                                 all_prems, context.goal, argument.strip())

        tokenized_goal = tokenize(self.dataloader_args, self.fpa_metadata,
                                  context.goal)
        premise_features_size = get_premise_features_size(
            self.dataloader_args, self.fpa_metadata)
        if arg_idx == 0:
            # No arg
            arg_type_idx = 0
            encoded_arg = torch.zeros(128 + premise_features_size)
        elif arg_idx <= self.dataloader_args.max_length:
            # Goal token arg
            arg_type_idx = 1
            encoded_arg = torch.cat((self.predictor.goal_token_encoder(
                torch.LongTensor([stem_idx]), torch.LongTensor([
                    tokenized_goal
                ])).squeeze(0)[arg_idx].to(device=torch.device("cpu")),
                                     torch.zeros(premise_features_size)),
                                    dim=0)
        else:
            # Hyp arg
            arg_type_idx = 2
            arg_hyp = all_prems[arg_idx -
                                (self.dataloader_args.max_length + 1)]
            entire_encoded_goal = self.predictor.entire_goal_encoder(
                torch.LongTensor([tokenized_goal]))
            tokenized_arg_hyp = tokenize(self.dataloader_args,
                                         self.fpa_metadata,
                                         serapi_instance.get_hyp_type(arg_hyp))
            encoded_arg = torch.cat(
                (self.predictor.hyp_encoder(
                    torch.LongTensor([stem_idx]), entire_encoded_goal,
                    torch.LongTensor([tokenized_arg_hyp
                                      ])).to(device=torch.device("cpu")),
                 torch.FloatTensor(
                     get_premise_features(self.dataloader_args,
                                          self.fpa_metadata, context.goal,
                                          arg_hyp))),
                dim=0)

        return [stem_idx, arg_type_idx], encoded_arg
Example #9
0
 def __call__(self, context: TacticContext) -> int:
     if len(context.hypotheses) == 0:
         return 0
     hyp_types = [
         limitNumTokens(serapi_instance.get_hyp_type(hyp), self.max_length)
         for hyp in context.hypotheses
     ]
     goal = limitNumTokens(context.goal, self.max_length)
     closest_hyp_type = max(hyp_types,
                            key=lambda x: SequenceMatcher(None, goal, x).
                            ratio() * len(get_symbols(x)))
     headToken = get_symbols(closest_hyp_type)[0]
     if headToken in self.headKeywords:
         return self.headKeywords.index(headToken) + 1
     else:
         return 0
Example #10
0
 def from_data(init_dataset: List[TacticContext],
               args: argparse.Namespace) -> 'TopLevelTokenInBestHyp':
     headTokenCounts: typing.Counter[str] = Counter()
     for relevant_lemmas, prev_tactics, hyps, goal in init_dataset:
         for hyp in hyps:
             headToken = get_symbols(serapi_instance.get_hyp_type(hyp))[0]
             headTokenCounts[headToken] += 1
     if args.load_head_keywords and Path2(args.load_head_keywords).exists():
         result = TopLevelTokenInBestHyp(
             args, torch.load(args.load_head_keywords))
     else:
         result = TopLevelTokenInBestHyp(args, [
             word for word, count in headTokenCounts.most_common(
                 args.num_head_keywords)
         ])
     eprint("Hypothesis head keywords are {}".format(result.headKeywords),
            guard=args.print_keywords)
     return result
Example #11
0
 def _predictDistributions(self, in_datas : List[TacticContext]) -> torch.FloatTensor:
     assert self._tokenizer
     assert self._embedding
     assert self.training_args
     goals_batch = [normalizeSentenceLength(self._tokenizer.toTokenList(goal),
                                            self.training_args.max_length)
                    for _, _, _, goal in in_datas]
     hyps = [get_closest_hyp(hyps, goal, self.training_args.max_length)
             for _, _, hyps, goal in in_datas]
     hyp_types = [serapi_instance.get_hyp_type(hyp) for hyp in hyps]
     hyps_batch = [normalizeSentenceLength(
         self._tokenizer.toTokenList(hyp_type),
                   self.training_args.max_length)
                   for hyp_type in hyp_types]
     word_features_batch = [self._get_word_features(in_data) for in_data in in_datas]
     vec_features_batch = [self._get_vec_features(in_data) for in_data in in_datas]
     stem_distribution = self._model(LongTensor(goals_batch),
                                     LongTensor(hyps_batch),
                                     FloatTensor(vec_features_batch),
                                     LongTensor(word_features_batch))
     return stem_distribution
Example #12
0
def get_closest_hyp_type(tokenizer : Tokenizer, max_length : int, context : TacticContext):
    return tokenizer.toTokenList(serapi_instance.get_hyp_type(
        get_closest_hyp(context.hypotheses, context.goal, max_length)))