def count_lengths(args: argparse.Namespace, filename: str): print(f"Counting {filename}") full_filename = args.prelude + "/" + filename scraped_commands = list( read_all_text_data(Path2(full_filename + ".scrape"))) scraped_iter = iter(scraped_commands) if args.post_linear: original_commands = serapi_instance.load_commands_preserve( args, 0, full_filename + ".lin") else: original_commands = serapi_instance.load_commands_preserve( args, 0, full_filename) with open(full_filename + ".csv", 'w') as fout: rowwriter = csv.writer(fout) lemma_statement = "" in_proof = False cur_len = 0 for cmd in original_commands: if not serapi_instance.possibly_starting_proof( cmd) and not in_proof: continue if serapi_instance.possibly_starting_proof(cmd) and not in_proof: normalized_command = norm(cmd) cur_scraped = norm(next(scraped_iter)) while cur_scraped != normalized_command: cur_scraped = norm(next(scraped_iter)) try: next_after_start = next(scraped_iter) except StopIteration: next_after_start = "" if isinstance(next_after_start, ScrapedTactic): lemma_statement = norm(cmd) in_proof = True cur_len = 0 else: scraped_iter = itertools.chain([next_after_start], scraped_iter) elif serapi_instance.ending_proof(cmd): assert in_proof rowwriter.writerow([lemma_statement.strip(), cur_len]) cur_len = -1 in_proof = False elif in_proof: assert cur_len >= 0 if re.match("[{}]|[*-+]*$", norm(cmd)): continue if re.match("Proof\.", norm(cmd)): continue cur_len += 1 if args.add_semis or args.post_linear: cur_len += count_outside_matching("\{\|", "\|\}", ";", norm(cmd)) return full_filename + ".csv"
def main(): parser = argparse.ArgumentParser(description="linearize a set of files") parser.add_argument('--prelude', default=".") parser.add_argument('--hardfail', default=False, const=True, action='store_const') parser.add_argument('-v', '--verbose', action='count', default=0) parser.add_argument('--skip-nochange-tac', default=False, const=True, action='store_const', dest='skip_nochange_tac') parser.add_argument("--progress", action='store_const', const=True, default=False) parser.add_argument('filenames', nargs="+", help="proof file name (*.v)") arg_values = parser.parse_args() base = os.path.dirname(os.path.abspath(__file__)) + "/.." coqargs = ["sertop", "--implicit"] for filename in arg_values.filenames: if arg_values.verbose: eprint("Linearizing {}".format(filename)) local_filename = arg_values.prelude + "/" + filename original_commands = serapi_instance.load_commands_preserve( arg_values, 0, arg_values.prelude + "/" + filename) fresh_commands = preprocess_file_commands(arg_values, 0, original_commands, coqargs, arg_values.prelude, local_filename, filename, False) serapi_instance.save_lin(fresh_commands, local_filename)
def get_file_commands(args : argparse.Namespace, file_idx : int, filename : str) -> List[str]: local_filename = args.prelude + "/" + filename loaded_commands = try_load_lin(args, file_idx, local_filename) if loaded_commands is None: print("Warning: this version of the reports can't linearize files! " "Using original commands.") return load_commands_preserve(args, file_idx, local_filename) else: return loaded_commands
def scrape_file(coqargs: List[str], args: argparse.Namespace, includes: str, file_tuple: Tuple[int, str]) -> Optional[str]: sys.setrecursionlimit(4500) file_idx, filename = file_tuple full_filename = args.prelude + "/" + filename result_file = full_filename + ".scrape" temp_file = full_filename + ".scrape.partial" if args.cont: with contextlib.suppress(FileNotFoundError): with open(result_file, 'r') as f: if args.verbose: eprint(f"Found existing scrape at {result_file}! Using it") return result_file try: if args.linearize: commands = serapi_instance.try_load_lin(args, file_idx, full_filename) if not commands: commands = linearize_semicolons.preprocess_file_commands( args, file_idx, serapi_instance.load_commands_preserve( args, 0, full_filename), coqargs, args.prelude, full_filename, filename, args.skip_nochange_tac) serapi_instance.save_lin(commands, full_filename) else: with Path2(full_filename).open(mode='r') as vf: commands = serapi_instance.read_commands_preserve( args, file_idx, vf.read()) with serapi_instance.SerapiContext( coqargs, serapi_instance.get_module_from_filename(filename), args.prelude, args.relevant_lemmas == "hammer") as coq: coq.verbose = args.verbose try: with open(temp_file, 'w') as f: for command in tqdm(commands, file=sys.stdout, disable=(not args.progress), position=file_idx * 2, desc="Scraping file", leave=False, dynamic_ncols=True, bar_format=mybarfmt): process_statement(args, coq, command, f) shutil.move(temp_file, result_file) return result_file except serapi_instance.TimeoutError: eprint("Command in {} timed out.".format(filename)) return temp_file except Exception as e: eprint("FAILED: In file {}:".format(filename)) eprint(e) if args.hardfail or len(args.inputs) == 1 or args.hardfail_scrape: raise e return None
def get_commands(self, args: argparse.Namespace, file_idx: int, filename: str) -> List[str]: local_filename = self.prelude + "/" + filename loaded_commands = serapi_instance.try_load_lin(args, file_idx, local_filename) if loaded_commands is None: fresh_commands = linearize_semicolons.preprocess_file_commands( args, file_idx, serapi_instance.load_commands_preserve( args, file_idx, self.prelude + "/" + filename), self.coqargs, self.includes, filename, local_filename, self.skip_nochange_tac) serapi_instance.save_lin(fresh_commands, local_filename) return fresh_commands else: return loaded_commands
def get_linearized(args: argparse.Namespace, coqargs: List[str], bar_idx: int, filename: str) -> List[str]: local_filename = args.prelude + "/" + filename loaded_commands = serapi_instance.try_load_lin(args, bar_idx, local_filename) if loaded_commands is None: original_commands = \ serapi_instance.load_commands_preserve(args, bar_idx, args.prelude + "/" + filename) fresh_commands = preprocess_file_commands(args, bar_idx, original_commands, coqargs, args.prelude, local_filename, filename, False) serapi_instance.save_lin(fresh_commands, local_filename) return fresh_commands else: return loaded_commands
def main(): parser = argparse.ArgumentParser(description="linearize a set of files") parser.add_argument('--prelude', default=".") parser.add_argument('--debug', default=False, const=True, action='store_const') parser.add_argument('--hardfail', default=False, const=True, action='store_const') parser.add_argument('--verbose', default=False, const=True, action='store_const') parser.add_argument('--skip-nochange-tac', default=False, const=True, action='store_const', dest='skip_nochange_tac') parser.add_argument("--progress", action='store_const', const=True, default=False) parser.add_argument('filenames', nargs="+", help="proof file name (*.v)") arg_values = parser.parse_args() base = os.path.dirname(os.path.abspath(__file__)) + "/.." includes = subprocess.Popen( ['make', '-C', arg_values.prelude, 'print-includes'], stdout=subprocess.PIPE).communicate()[0].decode('utf-8') coqargs = ["sertop"] for filename in arg_values.filenames: if arg_values.verbose: eprint("Linearizing {}".format(filename)) local_filename = arg_values.prelude + "/" + filename original_commands = serapi_instance.load_commands_preserve( arg_values, 0, arg_values.prelude + "/" + filename) fresh_commands = preprocess_file_commands(arg_values, 0, original_commands, coqargs, includes, local_filename, filename, False) serapi_instance.save_lin(fresh_commands, local_filename)
def reinforce(args: argparse.Namespace) -> None: # Load the scraped (demonstrated) samples, the proof environment # commands, and the predictor replay_memory = assign_rewards( dataloader.tactic_transitions_from_file(args.scrape_file, args.buffer_size)) predictor = predict_tactic.loadPredictorByFile(args.predictor_weights) q_estimator = FeaturesQEstimator(args.learning_rate, args.batch_step, args.gamma) signal.signal( signal.SIGINT, lambda signal, frame: progn(q_estimator.save_weights( args.out_weights, args), # type: ignore exit())) if args.start_from: q_estimator_name, *saved = \ torch.load(args.start_from) q_estimator.load_saved_state(*saved) elif args.pretrain: pre_train(args, q_estimator, dataloader.tactic_transitions_from_file( args.scrape_file, args.buffer_size * 10)) epsilon = 0.3 gamma = 0.9 if args.proof is not None: assert len(args.environment_files) == 1, \ "Can't use multiple env files with --proof!" env_commands = serapi_instance.load_commands_preserve( args, 0, args.prelude / args.environment_files[0]) num_proofs = len([cmd for cmd in env_commands if cmd.strip() == "Qed." or cmd.strip() == "Defined."]) with serapi_instance.SerapiContext( ["sertop", "--implicit"], serapi_instance.get_module_from_filename( args.environment_files[0]), str(args.prelude)) as coq: coq.quiet = True coq.verbose = args.verbose rest_commands, run_commands = coq.run_into_next_proof(env_commands) lemma_statement = run_commands[-1] while coq.cur_lemma_name != args.proof: if not rest_commands: eprint("Couldn't find lemma {args.proof}! Exiting...") return rest_commands, _ = coq.finish_proof(rest_commands) rest_commands, run_commands = coq.run_into_next_proof( rest_commands) lemma_statement = run_commands[-1] reinforce_lemma(args, predictor, q_estimator, coq, lemma_statement, epsilon, gamma, replay_memory) q_estimator.save_weights(args.out_weights, args) else: for env_file in args.environment_files: env_commands = serapi_instance.load_commands_preserve( args, 0, args.prelude / env_file) num_proofs = len([cmd for cmd in env_commands if cmd.strip() == "Qed." or cmd.strip() == "Defined."]) rest_commands = env_commands all_run_commands: List[str] = [] with tqdm(total=num_proofs, disable=(not args.progress), leave=True, desc=env_file.stem) as pbar: while rest_commands: with serapi_instance.SerapiContext( ["sertop", "--implicit"], serapi_instance.get_module_from_filename( env_file), str(args.prelude), log_outgoing_messages=args.log_outgoing_messages) \ as coq: coq.quiet = True coq.verbose = args.verbose for command in all_run_commands: coq.run_stmt(command) while rest_commands: rest_commands, run_commands = \ coq.run_into_next_proof(rest_commands) if not rest_commands: break all_run_commands += run_commands[:-1] lemma_statement = run_commands[-1] # Check if the definition is # proof-relevant. If it is, then finishing # subgoals doesn't necessarily mean you've # solved the problem, so don't try to # train on it. proof_relevant = False for cmd in rest_commands: if serapi_instance.ending_proof(cmd): if cmd.strip() == "Defined.": proof_relevant = True break proof_relevant = proof_relevant or \ bool(re.match(r"\s*Derive", lemma_statement)) for sample in replay_memory: sample.graph_node = None if not proof_relevant: try: reinforce_lemma(args, predictor, q_estimator, coq, lemma_statement, epsilon, gamma, replay_memory) except serapi_instance.CoqAnomaly: if args.log_anomalies: with args.log_anomalies.open('a') as f: traceback.print_exc(file=f) if args.hardfail: eprint( "Hit an anomaly!" "Quitting due to --hardfail") raise eprint( "Hit an anomaly! " "Restarting coq instance") rest_commands.insert(0, lemma_statement) break pbar.update(1) rest_commands, run_commands = \ coq.finish_proof(rest_commands) all_run_commands.append(lemma_statement) all_run_commands += run_commands q_estimator.save_weights(args.out_weights, args)