def dump_data(self, rel_path: Union[str, List[str]], data: Any, fmt: IOUtils.Format, is_batched: bool = False, per_batch: int = 100, exist_ok: bool = False, ): abs_path = self.data_dir / self.assemble_rel_path(rel_path) if abs_path.exists() and not exist_ok: LoggingUtils.log_and_raise(self.logger, f"Cannot rewrite existing data at {abs_path}", IOError) # end if abs_path.parent.mkdir(parents=True, exist_ok=True) if not is_batched: if self.is_json_format(fmt): data = IOUtils.jsonfy(data) # end if IOUtils.dump(abs_path, data, fmt) else: # In batched mode, the data need to be slice-able and sizable IOUtils.rm(abs_path) abs_path.mkdir(parents=True) for batch_i in tqdm(range(math.ceil(len(data)/per_batch))): data_batch = data[per_batch*batch_i : per_batch*(batch_i+1)] if self.is_json_format(fmt): data_batch = IOUtils.jsonfy(data_batch) # end if IOUtils.dump(abs_path/f"batch-{batch_i}.{fmt.get_extension()}", data_batch, fmt) # end for # end if return
def clean_path(self, rel_path: Union[str, List[str]]): abs_path = self.data_dir / self.assemble_rel_path(rel_path) if abs_path.exists(): self.logger.info(f"Removing existing things at {abs_path}") IOUtils.rm(abs_path) # end if return
def dump_ckpt(self, rel_path: Union[str, List[str]], obj: Any, ckpt_id: int, dump_func: Callable[[Any, str], NoReturn], ckpt_keep_max: int = 5, ) -> NoReturn: abs_path = self.data_dir / self.assemble_rel_path(rel_path) abs_path.mkdir(parents=True, exist_ok=True) ckpt_file_name = str(abs_path / str(ckpt_id)) dump_func(obj, ckpt_file_name) # Remove older checkpoints if ckpt_keep_max != -1: ckpt_ids = [int(str(f.name)) for f in abs_path.iterdir()] for ckpt_id in sorted(ckpt_ids)[:-ckpt_keep_max]: IOUtils.rm(abs_path / str(ckpt_id)) # end for # end if return
def collect_lemmas_doc( cls, doc: CoqDocument, ast_sexp_list: List[SexpNode], serapi_options: str, ) -> List[Lemma]: lemmas_doc: List[Lemma] = list() data_index = doc.get_data_index() # Maintain a stack of module modules: List[str] = list() # Prepare qualified name prefix qprefix_this_doc = "./" + doc.file_name[:-2] # Remove .v for m in cls.RE_PATH_TO_QUALIFIED_PREFIX.finditer(serapi_options): path = m.group("path") if path != ".": path = "./" + path qprefix = m.group("qprefix") if qprefix_this_doc.startswith(path): qprefix_this_doc = qprefix + qprefix_this_doc[len(path):] break # end if # end for if qprefix_this_doc.startswith("./"): qprefix_this_doc = qprefix_this_doc[len("./"):] qprefix_this_doc = qprefix_this_doc.replace("/", ".") for sent_i, sent in enumerate(doc.sentences): ast_sexp = ast_sexp_list[sent_i] vernac = SexpAnalyzer.analyze_vernac(ast_sexp) if vernac.vernac_type in cls.VTYPES_MODULE_BEG: # (VernacExpr()(VernacDefineModule() ( ( v ( Id <module name>)) ... # 0 1 2 20 21 22 220 2201 22011 module_name = vernac.vernac_sexp[2][2][0][1][ 1].content_no_quote modules.append(module_name) elif vernac.vernac_type in cls.VTYPES_MODULE_END: # (VernacExpr()(VernacEndSegment ( ( v ( Id <module name>)) ... # 0 1 2 20 21 210 2101 21011 try: module_name = vernac.vernac_sexp[2][1][0][1][ 1].content_no_quote except: print(vernac.vernac_sexp.pretty_format()) raise # end try if len(modules) > 0 and module_name == modules[-1]: modules.pop( ) # EndModule and EndSection share the same vernac type elif vernac.vernac_type in cls.VTYPES_LEMMA: # (VernacExpr()(VernacStartTheoremProof Lemma ( ( ( ( ( v ( Id <lemma name>)) # 0 1 2 20 21 22 2200000 2200001 22000011 lemma = Lemma() lemma.data_index = data_index lemma.name = vernac.vernac_sexp[2][2][0][0][0][0][1][ 1].content_no_quote lemma.qname = qprefix_this_doc + "." + ".".join(modules + [lemma.name]) # Find lemma content, after the first token matching the lemma name tok_i = 0 for tok in sent.tokens: if tok.content == lemma.name: break tok_i += 1 # end for if tok_i == len(sent.tokens): LoggingUtils.log_and_raise( cls.logger, f"Lemma name {lemma.name} didn't appear in the source code {sent.str_with_space()}", Exception) lemma.vernac_command = sent.tokens[:tok_i] lemma.statement = sent.tokens[tok_i + 1:] lemma.ast_sexp = vernac.vernac_sexp lemmas_doc.append(lemma) # end if # end for # Use sername to get the backend representations lemma_qnames: str = "".join([l.qname + "\n" for l in lemmas_doc]) lemma_qnames_file = BashUtils.get_temp_file() IOUtils.dump(lemma_qnames_file, lemma_qnames, IOUtils.Format.txt) lemma_qnames_backend_sexps_str: str = BashUtils.run( f"sername {serapi_options} --require-lib={qprefix_this_doc} {lemma_qnames_file}", expected_return_code=0).stdout IOUtils.rm(lemma_qnames_file) for qname_backend_sexp_str in lemma_qnames_backend_sexps_str.splitlines( ): qname, backend_sexp_str = qname_backend_sexp_str.split(":", 1) backend_sexp = SexpParser.parse(backend_sexp_str) for lemma in lemmas_doc: if lemma.qname == qname: lemma.backend_sexp = backend_sexp break # end if # end for # end for lemmas_doc = [l for l in lemmas_doc if l.backend_sexp is not None] return lemmas_doc