def _encode_data(self, data : RawDataset, arg_values : Namespace) \
     -> Tuple[EncFeaturesDataset, Tuple[Tokenizer, Embedding,
                                        List[VecFeature], List[WordFeature]]]:
     stripped_data = [strip_scraped_output(dat) for dat in data]
     self._vec_feature_functions = [
         feature_constructor(stripped_data, arg_values) for  # type: ignore
         feature_constructor in vec_feature_constructors
     ]
     self._word_feature_functions = [
         feature_constructor(stripped_data, arg_values) for  # type: ignore
         feature_constructor in word_feature_constructors
     ]
     embedding, embedded_data = embed_data(data)
     tokenizer, tokenized_goals = tokenize_goals(embedded_data, arg_values)
     result_data = EncFeaturesDataset([
         EncFeaturesSample(
             self._get_vec_features(
                 TacticContext([], prev_tactics, hypotheses, goal)),
             self._get_word_features(
                 TacticContext([], prev_tactics, hypotheses, goal)),
             normalizeSentenceLength(tokenized_goal, arg_values.max_length),
             tactic)
         for (relevant_lemmas, prev_tactics, hypotheses, goal,
              tactic), tokenized_goal in zip(embedded_data, tokenized_goals)
     ])
     return result_data, (tokenizer, embedding, self._vec_feature_functions,
                          self._word_feature_functions)
Example #2
0
def commandLinePredict(predictor: EncDecRNNPredictor, k: int) -> None:
    sentence = ""
    next_line = sys.stdin.readline()
    while next_line != "+++++\n":
        sentence += next_line
        next_line = sys.stdin.readline()
    for result in predictor.predictKTactics(
            TacticContext([], [], [], sentence), k):
        print(result)
Example #3
0
    def gen_samples():
        for transition in transitions:
            if len(transition.before.fg_goals) == 0:
                continue
            context = TacticContext(transition.relevant_lemmas,
                                    transition.prev_tactics,
                                    transition.before.fg_goals[0].hypotheses,
                                    transition.before.fg_goals[0].goal)

            random_certainty = random.random()
            yield (context, transition.tactic,
                   random_certainty, random_certainty * 50)
Example #4
0
 def generate() -> Iterator[LabeledTransition]:
     for transition in transitions:
         if len(transition.before.fg_goals) == 0:
             context = TacticContext(transition.relevant_lemmas,
                                     transition.prev_tactics,
                                     [], "")
         else:
             context = TacticContext(
                 transition.relevant_lemmas,
                 transition.prev_tactics,
                 transition.before.fg_goals[0].hypotheses,
                 transition.before.fg_goals[0].goal)
         yield assign_reward(args,
                             transition.relevant_lemmas,
                             transition.prev_tactics,
                             context_r2py(transition.before),
                             context_r2py(transition.after),
                             transition.tactic,
                             certainty_of(predictor,
                                          args.num_predictions * 2,
                                          context,
                                          transition.tactic))
Example #5
0
def q_report(args: argparse.Namespace) -> None:
    num_originally_correct = 0
    num_correct = 0
    num_top3 = 0
    num_total = 0
    num_possible = 0

    predictor = predict_tactic.loadPredictorByFile(args.predictor_weights)
    q_estimator_name, *saved  = \
        torch.load(args.estimator_weights)
    q_estimator = FeaturesQEstimator(0, 0, 0)
    q_estimator.load_saved_state(*saved)

    for filename in args.test_files:
        points = dataloader.scraped_tactics_from_file(
            str(filename) + ".scrape", None)
        for point in points:
            context = TacticContext(point.relevant_lemmas, point.prev_tactics,
                                    point.prev_hyps, point.prev_goal)
            predictions = [
                p.prediction for p in predictor.predictKTactics(
                    context, args.num_predictions)
            ]
            q_choices = zip(
                q_estimator([(context, prediction)
                             for prediction in predictions]), predictions)
            ordered_actions = [
                p[1]
                for p in sorted(q_choices, key=lambda q: q[0], reverse=True)
            ]

            num_total += 1
            if point.tactic.strip() in predictions:
                num_possible += 1

            if ordered_actions[0] == point.tactic.strip():
                num_correct += 1

            if point.tactic.strip() in ordered_actions[:3]:
                num_top3 += 1

            if predictions[0] == point.tactic.strip():
                num_originally_correct += 1
            pass

    print(f"num_correct: {num_correct}")
    print(f"num_originally_correct: {num_originally_correct}")
    print(f"num_top3: {num_top3}")
    print(f"num_total: {num_total}")
    print(f"num_possible: {num_possible}")
Example #6
0
def mkHFSample(max_length : int,
               word_feature_functions : List[WordFeature],
               vec_feature_functions : List[VecFeature],
               zipped : Tuple[EmbeddedSample, List[int], List[int]]) \
    -> HypFeaturesSample:
    context, goal, best_hyp = zipped
    (relevant_lemmas, prev_tactic_list, hypotheses, goal_str, tactic) = context
    tac_context = TacticContext(relevant_lemmas, prev_tactic_list, hypotheses, goal_str)
    return HypFeaturesSample([feature(tac_context)
                              for feature in word_feature_functions],
                             [feature_val for feature in vec_feature_functions
                              for feature_val in feature(tac_context)],
                             normalizeSentenceLength(goal, max_length),
                             normalizeSentenceLength(best_hyp, max_length),
                             tactic)
def mkCopySample(max_length : int,
                 word_feature_functions : List[WordFeature],
                 vec_feature_functions : List[VecFeature],
                 zipped : Tuple[EmbeddedSample, List[int], int]) \
                 -> CopyArgSample:
    context, goal, arg_idx = zipped
    (relevant_lemmas, prev_tactic_list, hypotheses, goal_str,
     tactic_idx) = context
    tac_context = TacticContext(relevant_lemmas, prev_tactic_list, hypotheses,
                                goal_str)
    word_features = [
        feature(tac_context) for feature in word_feature_functions
    ]
    assert len(word_features) == 3
    return CopyArgSample(normalizeSentenceLength(goal, max_length),
                         word_features, [
                             feature_val for feature in vec_feature_functions
                             for feature_val in feature(tac_context)
                         ], tactic_idx, arg_idx)
Example #8
0
 def get_should_filter(data: MixedDataset) \
         -> Iterable[Tuple[ScrapedCommand, bool]]:
     list_data: List[ScrapedCommand] = list(data)
     extended_list: List[Optional[ScrapedCommand]] = \
         cast(List[Optional[ScrapedCommand]], list_data[1:]) + [None]
     for point, nextpoint in zip(list_data, extended_list):
         if isinstance(point, ScrapedTactic) \
            and not re.match(r"\s*[{}]\s*", point.tactic) and \
            point.context.focused_goal.strip() != "":
             if isinstance(nextpoint, ScrapedTactic):
                 context_after = strip_scraped_output(nextpoint)
             else:
                 context_after = TacticContext([], [], [], "")
             should_filter = not context_filter(strip_scraped_output(point),
                                                point.tactic, context_after,
                                                training_args)
             yield (point, should_filter)
         else:
             yield (point, True)
Example #9
0
    def generate() -> Iterator[Tuple[TacticContext, str, float, float]]:
        contexts_trunced = [truncate_tactic_context(
            transition.after_context,
            args.max_term_length)
                            for transition in transitions]
        prediction_lists = cast(features_polyarg_predictor
                                .FeaturesPolyargPredictor,
                                predictor) \
            .predictKTactics_batch(
                contexts_trunced,
                args.num_predictions,
                args.verbose)
        queries = [(truncate_tactic_context(transition.after_context,
                                            args.max_term_length),
                    prediction.prediction, prediction.certainty)
                   for transition, predictions in zip(transitions,
                                                      prediction_lists)
                   for prediction in predictions]
        estimate_lists_flattened = q_estimator(queries)
        estimate_lists = [estimate_lists_flattened
                          [i:i+args.num_predictions]
                          for i in range(0, len(estimate_lists_flattened),
                                         args.num_predictions)]
        for transition, estimates in zip(transitions, estimate_lists):
            before_ctxt = truncate_tactic_context(
                transition.before_context, args.max_term_length)

            if len(transition.after.all_goals) == 0:
                new_q = transition.reward
                assert new_q == 50
            else:
                estimated_future_q = \
                    args.time_discount * max(estimates)
                new_q = transition.reward + estimated_future_q

            yield TacticContext(
                transition.relevant_lemmas,
                transition.prev_tactics,
                before_ctxt.hypotheses,
                before_ctxt.goal), \
                transition.action, transition.original_certainty, new_q
Example #10
0
    def process_file(self, args : argparse.Namespace, file_idx : int, filename : str) \
        -> None:
        global gresult
        fresult = FileResult(filename)

        if self.debug:
            print("Preprocessing...")
        commands = self.get_commands(args, file_idx, filename)

        command_results: List[CommandResult] = []

        with serapi_instance.SerapiContext(self.coqargs, self.includes,
                                           self.prelude) as coq:
            coq.debug = self.debug
            nb_commands = len(commands)
            for i in range(nb_commands):
                command = commands[i]
                # print("Processing command {}/{}".format(str(i+1), str(nb_commands)))
                in_proof = (coq.proof_context
                            and not re.match(".*Proof.*", command.strip()))
                if re.match("[{}]", command):
                    coq.run_stmt(command)
                    continue
                if in_proof:
                    prev_tactics = coq.prev_tactics
                    initial_context = coq.proof_context
                    assert initial_context
                    hyps = coq.hypotheses
                    goals = coq.goals
                    relevant_lemmas = coq.local_lemmas
                    if self.baseline:
                        predictions_and_certanties = [baseline_tactic + ".", 1] \
                                                     * num_predictions
                    else:
                        predictions_and_certainties, loss = net.predictKTacticsWithLoss(
                            TacticContext(relevant_lemmas, prev_tactics, hyps,
                                          goals), num_predictions, command)

                    prediction_runs = [
                        run_prediction(coq, prediction) for prediction,
                        certainty in predictions_and_certainties
                    ]

                    try:
                        coq.run_stmt(command)
                        actual_result_context = coq.proof_context
                        actual_result_goal = coq.goals
                        actual_result_hypotheses = coq.hypotheses
                        actual_result_lemmas = coq.local_lemmas
                        assert isinstance(actual_result_context, str)
                    except (AckError, CompletedError, CoqExn, BadResponse,
                            ParseError, LexError, TimeoutError):
                        print("In file {}:".format(filename))
                        raise

                    prediction_results = [
                        (prediction,
                         evaluate_prediction(fresult, initial_context, command,
                                             actual_result_context,
                                             prediction_run), certainty)
                        for prediction_run, (prediction, certainty) in zip(
                            prediction_runs, predictions_and_certainties)
                    ]
                    assert net.training_args
                    if self.cfilter(
                            TacticContext(relevant_lemmas, prev_tactics, hyps,
                                          goals), command,
                            TacticContext(actual_result_lemmas,
                                          prev_tactics + [command],
                                          actual_result_hypotheses,
                                          actual_result_goal),
                            net.training_args):
                        fresult.add_command_result([
                            pred for pred, ctxt, ex in prediction_runs
                        ], [
                            grade
                            for pred, grade, certainty in prediction_results
                        ], command, loss)

                        command_results.append(
                            (command, hyps, goals, prediction_results))
                    else:
                        command_results.append((command, ))
                else:
                    try:
                        coq.run_stmt(command)
                    except (AckError, CompletedError, CoqExn, BadResponse,
                            ParseError, LexError, TimeoutError):
                        print("In file {}:".format(filename))
                        raise
                    command_results.append((command, ))

        write_csv(fresult.details_filename(), self.output_dir, gresult.options,
                  command_results)

        doc, tag, text, line = Doc().ttl()

        with tag('html'):
            details_header(tag, doc, text, filename)
            with tag('div', id='overlay', onclick='event.stopPropagation();'):
                with tag('div', id='predicted'):
                    pass
                with tag('div', id='context'):
                    pass
                with tag('div', id='stats'):
                    pass
                pass
            with tag('body',
                     onclick='deselectTactic()',
                     onload='setSelectedIdx()'), tag('pre'):
                for idx, command_result in enumerate(command_results):
                    if len(command_result) == 1:
                        with tag('code', klass='plaincommand'):
                            text(command_result[0])
                    else:
                        command, hyps, goal, prediction_results = \
                            cast(TacticResult, command_result)
                        predictions, grades, certainties = zip(
                            *prediction_results)
                        search_index = 0
                        for pidx, prediction_result in enumerate(
                                prediction_results):
                            prediction, grade, certainty = prediction_result
                            if (grade != "failedcommand"
                                    and grade != "superfailedcommand"):
                                search_index = pidx
                                break
                        with tag(
                                'span', ('data-hyps', "\n".join(hyps)),
                            ('data-goal', shorten_whitespace(goal)),
                            ('data-num-total', str(fresult.num_tactics)),
                            ('data-predictions',
                             to_list_string(cast(List[str], predictions))),
                            ('data-num-predicteds',
                             to_list_string([
                                 fresult.predicted_tactic_frequency.get(
                                     get_stem(prediction), 0)
                                 for prediction in cast(
                                     List[str], predictions)
                             ])),
                            ('data-num-corrects',
                             to_list_string([
                                 fresult.correctly_predicted_frequency.get(
                                     get_stem(prediction), 0)
                                 for prediction in cast(
                                     List[str], predictions)
                             ])),
                            ('data-certainties',
                             to_list_string(cast(List[float], certainties))),
                            ('data-num-actual-corrects',
                             fresult.correctly_predicted_frequency.get(
                                 get_stem(command), 0)),
                            ('data-num-actual-in-file',
                             fresult.actual_tactic_frequency.get(
                                 get_stem(command))),
                            ('data-actual-tactic', strip_comments(command)),
                            ('data-grades',
                             to_list_string(cast(List[str], grades))),
                            ('data-search-idx', search_index),
                                id='command-' + str(idx),
                                onmouseover='hoverTactic({})'.format(idx),
                                onmouseout='unhoverTactic()',
                                onclick=
                                'selectTactic({}); event.stopPropagation();'.
                                format(idx)):
                            doc.stag("br")
                            for idx, prediction_result in enumerate(
                                    prediction_results):
                                prediction, grade, certainty = prediction_result
                                if search_index == idx:
                                    with tag('code', klass=grade):
                                        text(" " + command.strip())
                                else:
                                    with tag('span', klass=grade):
                                        doc.asis(" ⬤")

        with open(
                "{}/{}.html".format(self.output_dir,
                                    fresult.details_filename()), "w") as fout:
            fout.write(doc.getvalue())

        gresult.add_file_result(fresult)
        rows.put(fresult)
 def predictKTacticsWithLoss_batch(self,
                                   in_data : List[TacticContext],
                                   k : int, correct : List[str]) -> \
                                   Tuple[List[List[Prediction]], float]:
     return [self.predictKTactics(TacticContext([], [], [], ""), k)
             ] * len(in_data), 0.
Example #12
0
 def before_context(self) -> TacticContext:
     return TacticContext(self.relevant_lemmas, self.prev_tactics,
                          self.before.focused_hyps,
                          self.before.focused_goal)
Example #13
0
 def after_context(self) -> TacticContext:
     return TacticContext(self.relevant_lemmas, self.prev_tactics,
                          self.after.focused_hyps, self.after.focused_goal)