def _encode_data(self, data : RawDataset, arg_values : Namespace) \ -> Tuple[EncFeaturesDataset, Tuple[Tokenizer, Embedding, List[VecFeature], List[WordFeature]]]: stripped_data = [strip_scraped_output(dat) for dat in data] self._vec_feature_functions = [ feature_constructor(stripped_data, arg_values) for # type: ignore feature_constructor in vec_feature_constructors ] self._word_feature_functions = [ feature_constructor(stripped_data, arg_values) for # type: ignore feature_constructor in word_feature_constructors ] embedding, embedded_data = embed_data(data) tokenizer, tokenized_goals = tokenize_goals(embedded_data, arg_values) result_data = EncFeaturesDataset([ EncFeaturesSample( self._get_vec_features( TacticContext([], prev_tactics, hypotheses, goal)), self._get_word_features( TacticContext([], prev_tactics, hypotheses, goal)), normalizeSentenceLength(tokenized_goal, arg_values.max_length), tactic) for (relevant_lemmas, prev_tactics, hypotheses, goal, tactic), tokenized_goal in zip(embedded_data, tokenized_goals) ]) return result_data, (tokenizer, embedding, self._vec_feature_functions, self._word_feature_functions)
def commandLinePredict(predictor: EncDecRNNPredictor, k: int) -> None: sentence = "" next_line = sys.stdin.readline() while next_line != "+++++\n": sentence += next_line next_line = sys.stdin.readline() for result in predictor.predictKTactics( TacticContext([], [], [], sentence), k): print(result)
def gen_samples(): for transition in transitions: if len(transition.before.fg_goals) == 0: continue context = TacticContext(transition.relevant_lemmas, transition.prev_tactics, transition.before.fg_goals[0].hypotheses, transition.before.fg_goals[0].goal) random_certainty = random.random() yield (context, transition.tactic, random_certainty, random_certainty * 50)
def generate() -> Iterator[LabeledTransition]: for transition in transitions: if len(transition.before.fg_goals) == 0: context = TacticContext(transition.relevant_lemmas, transition.prev_tactics, [], "") else: context = TacticContext( transition.relevant_lemmas, transition.prev_tactics, transition.before.fg_goals[0].hypotheses, transition.before.fg_goals[0].goal) yield assign_reward(args, transition.relevant_lemmas, transition.prev_tactics, context_r2py(transition.before), context_r2py(transition.after), transition.tactic, certainty_of(predictor, args.num_predictions * 2, context, transition.tactic))
def q_report(args: argparse.Namespace) -> None: num_originally_correct = 0 num_correct = 0 num_top3 = 0 num_total = 0 num_possible = 0 predictor = predict_tactic.loadPredictorByFile(args.predictor_weights) q_estimator_name, *saved = \ torch.load(args.estimator_weights) q_estimator = FeaturesQEstimator(0, 0, 0) q_estimator.load_saved_state(*saved) for filename in args.test_files: points = dataloader.scraped_tactics_from_file( str(filename) + ".scrape", None) for point in points: context = TacticContext(point.relevant_lemmas, point.prev_tactics, point.prev_hyps, point.prev_goal) predictions = [ p.prediction for p in predictor.predictKTactics( context, args.num_predictions) ] q_choices = zip( q_estimator([(context, prediction) for prediction in predictions]), predictions) ordered_actions = [ p[1] for p in sorted(q_choices, key=lambda q: q[0], reverse=True) ] num_total += 1 if point.tactic.strip() in predictions: num_possible += 1 if ordered_actions[0] == point.tactic.strip(): num_correct += 1 if point.tactic.strip() in ordered_actions[:3]: num_top3 += 1 if predictions[0] == point.tactic.strip(): num_originally_correct += 1 pass print(f"num_correct: {num_correct}") print(f"num_originally_correct: {num_originally_correct}") print(f"num_top3: {num_top3}") print(f"num_total: {num_total}") print(f"num_possible: {num_possible}")
def mkHFSample(max_length : int, word_feature_functions : List[WordFeature], vec_feature_functions : List[VecFeature], zipped : Tuple[EmbeddedSample, List[int], List[int]]) \ -> HypFeaturesSample: context, goal, best_hyp = zipped (relevant_lemmas, prev_tactic_list, hypotheses, goal_str, tactic) = context tac_context = TacticContext(relevant_lemmas, prev_tactic_list, hypotheses, goal_str) return HypFeaturesSample([feature(tac_context) for feature in word_feature_functions], [feature_val for feature in vec_feature_functions for feature_val in feature(tac_context)], normalizeSentenceLength(goal, max_length), normalizeSentenceLength(best_hyp, max_length), tactic)
def mkCopySample(max_length : int, word_feature_functions : List[WordFeature], vec_feature_functions : List[VecFeature], zipped : Tuple[EmbeddedSample, List[int], int]) \ -> CopyArgSample: context, goal, arg_idx = zipped (relevant_lemmas, prev_tactic_list, hypotheses, goal_str, tactic_idx) = context tac_context = TacticContext(relevant_lemmas, prev_tactic_list, hypotheses, goal_str) word_features = [ feature(tac_context) for feature in word_feature_functions ] assert len(word_features) == 3 return CopyArgSample(normalizeSentenceLength(goal, max_length), word_features, [ feature_val for feature in vec_feature_functions for feature_val in feature(tac_context) ], tactic_idx, arg_idx)
def get_should_filter(data: MixedDataset) \ -> Iterable[Tuple[ScrapedCommand, bool]]: list_data: List[ScrapedCommand] = list(data) extended_list: List[Optional[ScrapedCommand]] = \ cast(List[Optional[ScrapedCommand]], list_data[1:]) + [None] for point, nextpoint in zip(list_data, extended_list): if isinstance(point, ScrapedTactic) \ and not re.match(r"\s*[{}]\s*", point.tactic) and \ point.context.focused_goal.strip() != "": if isinstance(nextpoint, ScrapedTactic): context_after = strip_scraped_output(nextpoint) else: context_after = TacticContext([], [], [], "") should_filter = not context_filter(strip_scraped_output(point), point.tactic, context_after, training_args) yield (point, should_filter) else: yield (point, True)
def generate() -> Iterator[Tuple[TacticContext, str, float, float]]: contexts_trunced = [truncate_tactic_context( transition.after_context, args.max_term_length) for transition in transitions] prediction_lists = cast(features_polyarg_predictor .FeaturesPolyargPredictor, predictor) \ .predictKTactics_batch( contexts_trunced, args.num_predictions, args.verbose) queries = [(truncate_tactic_context(transition.after_context, args.max_term_length), prediction.prediction, prediction.certainty) for transition, predictions in zip(transitions, prediction_lists) for prediction in predictions] estimate_lists_flattened = q_estimator(queries) estimate_lists = [estimate_lists_flattened [i:i+args.num_predictions] for i in range(0, len(estimate_lists_flattened), args.num_predictions)] for transition, estimates in zip(transitions, estimate_lists): before_ctxt = truncate_tactic_context( transition.before_context, args.max_term_length) if len(transition.after.all_goals) == 0: new_q = transition.reward assert new_q == 50 else: estimated_future_q = \ args.time_discount * max(estimates) new_q = transition.reward + estimated_future_q yield TacticContext( transition.relevant_lemmas, transition.prev_tactics, before_ctxt.hypotheses, before_ctxt.goal), \ transition.action, transition.original_certainty, new_q
def process_file(self, args : argparse.Namespace, file_idx : int, filename : str) \ -> None: global gresult fresult = FileResult(filename) if self.debug: print("Preprocessing...") commands = self.get_commands(args, file_idx, filename) command_results: List[CommandResult] = [] with serapi_instance.SerapiContext(self.coqargs, self.includes, self.prelude) as coq: coq.debug = self.debug nb_commands = len(commands) for i in range(nb_commands): command = commands[i] # print("Processing command {}/{}".format(str(i+1), str(nb_commands))) in_proof = (coq.proof_context and not re.match(".*Proof.*", command.strip())) if re.match("[{}]", command): coq.run_stmt(command) continue if in_proof: prev_tactics = coq.prev_tactics initial_context = coq.proof_context assert initial_context hyps = coq.hypotheses goals = coq.goals relevant_lemmas = coq.local_lemmas if self.baseline: predictions_and_certanties = [baseline_tactic + ".", 1] \ * num_predictions else: predictions_and_certainties, loss = net.predictKTacticsWithLoss( TacticContext(relevant_lemmas, prev_tactics, hyps, goals), num_predictions, command) prediction_runs = [ run_prediction(coq, prediction) for prediction, certainty in predictions_and_certainties ] try: coq.run_stmt(command) actual_result_context = coq.proof_context actual_result_goal = coq.goals actual_result_hypotheses = coq.hypotheses actual_result_lemmas = coq.local_lemmas assert isinstance(actual_result_context, str) except (AckError, CompletedError, CoqExn, BadResponse, ParseError, LexError, TimeoutError): print("In file {}:".format(filename)) raise prediction_results = [ (prediction, evaluate_prediction(fresult, initial_context, command, actual_result_context, prediction_run), certainty) for prediction_run, (prediction, certainty) in zip( prediction_runs, predictions_and_certainties) ] assert net.training_args if self.cfilter( TacticContext(relevant_lemmas, prev_tactics, hyps, goals), command, TacticContext(actual_result_lemmas, prev_tactics + [command], actual_result_hypotheses, actual_result_goal), net.training_args): fresult.add_command_result([ pred for pred, ctxt, ex in prediction_runs ], [ grade for pred, grade, certainty in prediction_results ], command, loss) command_results.append( (command, hyps, goals, prediction_results)) else: command_results.append((command, )) else: try: coq.run_stmt(command) except (AckError, CompletedError, CoqExn, BadResponse, ParseError, LexError, TimeoutError): print("In file {}:".format(filename)) raise command_results.append((command, )) write_csv(fresult.details_filename(), self.output_dir, gresult.options, command_results) doc, tag, text, line = Doc().ttl() with tag('html'): details_header(tag, doc, text, filename) with tag('div', id='overlay', onclick='event.stopPropagation();'): with tag('div', id='predicted'): pass with tag('div', id='context'): pass with tag('div', id='stats'): pass pass with tag('body', onclick='deselectTactic()', onload='setSelectedIdx()'), tag('pre'): for idx, command_result in enumerate(command_results): if len(command_result) == 1: with tag('code', klass='plaincommand'): text(command_result[0]) else: command, hyps, goal, prediction_results = \ cast(TacticResult, command_result) predictions, grades, certainties = zip( *prediction_results) search_index = 0 for pidx, prediction_result in enumerate( prediction_results): prediction, grade, certainty = prediction_result if (grade != "failedcommand" and grade != "superfailedcommand"): search_index = pidx break with tag( 'span', ('data-hyps', "\n".join(hyps)), ('data-goal', shorten_whitespace(goal)), ('data-num-total', str(fresult.num_tactics)), ('data-predictions', to_list_string(cast(List[str], predictions))), ('data-num-predicteds', to_list_string([ fresult.predicted_tactic_frequency.get( get_stem(prediction), 0) for prediction in cast( List[str], predictions) ])), ('data-num-corrects', to_list_string([ fresult.correctly_predicted_frequency.get( get_stem(prediction), 0) for prediction in cast( List[str], predictions) ])), ('data-certainties', to_list_string(cast(List[float], certainties))), ('data-num-actual-corrects', fresult.correctly_predicted_frequency.get( get_stem(command), 0)), ('data-num-actual-in-file', fresult.actual_tactic_frequency.get( get_stem(command))), ('data-actual-tactic', strip_comments(command)), ('data-grades', to_list_string(cast(List[str], grades))), ('data-search-idx', search_index), id='command-' + str(idx), onmouseover='hoverTactic({})'.format(idx), onmouseout='unhoverTactic()', onclick= 'selectTactic({}); event.stopPropagation();'. format(idx)): doc.stag("br") for idx, prediction_result in enumerate( prediction_results): prediction, grade, certainty = prediction_result if search_index == idx: with tag('code', klass=grade): text(" " + command.strip()) else: with tag('span', klass=grade): doc.asis(" ⬤") with open( "{}/{}.html".format(self.output_dir, fresult.details_filename()), "w") as fout: fout.write(doc.getvalue()) gresult.add_file_result(fresult) rows.put(fresult)
def predictKTacticsWithLoss_batch(self, in_data : List[TacticContext], k : int, correct : List[str]) -> \ Tuple[List[List[Prediction]], float]: return [self.predictKTactics(TacticContext([], [], [], ""), k) ] * len(in_data), 0.
def before_context(self) -> TacticContext: return TacticContext(self.relevant_lemmas, self.prev_tactics, self.before.focused_hyps, self.before.focused_goal)
def after_context(self) -> TacticContext: return TacticContext(self.relevant_lemmas, self.prev_tactics, self.after.focused_hyps, self.after.focused_goal)