Beispiel #1
0
    def dump_data(self,
            rel_path: Union[str, List[str]],
            data: Any,
            fmt: IOUtils.Format,
            is_batched: bool = False,
            per_batch: int = 100,
            exist_ok: bool = False,
    ):
        abs_path = self.data_dir / self.assemble_rel_path(rel_path)
        if abs_path.exists() and not exist_ok:
            LoggingUtils.log_and_raise(self.logger, f"Cannot rewrite existing data at {abs_path}", IOError)
        # end if

        abs_path.parent.mkdir(parents=True, exist_ok=True)
        if not is_batched:
            if self.is_json_format(fmt):
                data = IOUtils.jsonfy(data)
            # end if
            IOUtils.dump(abs_path, data, fmt)
        else:
            # In batched mode, the data need to be slice-able and sizable
            IOUtils.rm(abs_path)
            abs_path.mkdir(parents=True)

            for batch_i in tqdm(range(math.ceil(len(data)/per_batch))):
                data_batch = data[per_batch*batch_i : per_batch*(batch_i+1)]
                if self.is_json_format(fmt):
                    data_batch = IOUtils.jsonfy(data_batch)
                # end if
                IOUtils.dump(abs_path/f"batch-{batch_i}.{fmt.get_extension()}", data_batch, fmt)
            # end for
        # end if
        return
Beispiel #2
0
 def clean_path(self, rel_path: Union[str, List[str]]):
     abs_path = self.data_dir / self.assemble_rel_path(rel_path)
     if abs_path.exists():
         self.logger.info(f"Removing existing things at {abs_path}")
         IOUtils.rm(abs_path)
     # end if
     return
Beispiel #3
0
    def dump_ckpt(self, rel_path: Union[str, List[str]], obj: Any, ckpt_id: int,
            dump_func: Callable[[Any, str], NoReturn],
            ckpt_keep_max: int = 5,
    ) -> NoReturn:
        abs_path = self.data_dir / self.assemble_rel_path(rel_path)
        abs_path.mkdir(parents=True, exist_ok=True)

        ckpt_file_name = str(abs_path / str(ckpt_id))
        dump_func(obj, ckpt_file_name)

        # Remove older checkpoints
        if ckpt_keep_max != -1:
            ckpt_ids = [int(str(f.name)) for f in abs_path.iterdir()]
            for ckpt_id in sorted(ckpt_ids)[:-ckpt_keep_max]:
                IOUtils.rm(abs_path / str(ckpt_id))
            # end for
        # end if
        return
Beispiel #4
0
    def collect_lemmas_doc(
        cls,
        doc: CoqDocument,
        ast_sexp_list: List[SexpNode],
        serapi_options: str,
    ) -> List[Lemma]:
        lemmas_doc: List[Lemma] = list()
        data_index = doc.get_data_index()

        # Maintain a stack of module
        modules: List[str] = list()

        # Prepare qualified name prefix
        qprefix_this_doc = "./" + doc.file_name[:-2]  # Remove .v
        for m in cls.RE_PATH_TO_QUALIFIED_PREFIX.finditer(serapi_options):
            path = m.group("path")
            if path != ".": path = "./" + path
            qprefix = m.group("qprefix")

            if qprefix_this_doc.startswith(path):
                qprefix_this_doc = qprefix + qprefix_this_doc[len(path):]
                break
            # end if
        # end for
        if qprefix_this_doc.startswith("./"):
            qprefix_this_doc = qprefix_this_doc[len("./"):]
        qprefix_this_doc = qprefix_this_doc.replace("/", ".")

        for sent_i, sent in enumerate(doc.sentences):
            ast_sexp = ast_sexp_list[sent_i]
            vernac = SexpAnalyzer.analyze_vernac(ast_sexp)

            if vernac.vernac_type in cls.VTYPES_MODULE_BEG:
                # (VernacExpr()(VernacDefineModule()  (  (   v   ( Id <module name>)) ...
                #  0         1 2 20               21  22 220  2201    22011
                module_name = vernac.vernac_sexp[2][2][0][1][
                    1].content_no_quote
                modules.append(module_name)
            elif vernac.vernac_type in cls.VTYPES_MODULE_END:
                # (VernacExpr()(VernacEndSegment  (  (   v   ( Id <module name>)) ...
                #  0         1 2 20               21 210  2101    21011
                try:
                    module_name = vernac.vernac_sexp[2][1][0][1][
                        1].content_no_quote
                except:
                    print(vernac.vernac_sexp.pretty_format())
                    raise
                # end try
                if len(modules) > 0 and module_name == modules[-1]:
                    modules.pop(
                    )  # EndModule and EndSection share the same vernac type
            elif vernac.vernac_type in cls.VTYPES_LEMMA:
                # (VernacExpr()(VernacStartTheoremProof Lemma ( ( ( ( ( v (       Id <lemma name>))
                #  0         1 2 20                     21   22   2200000 2200001    22000011
                lemma = Lemma()
                lemma.data_index = data_index

                lemma.name = vernac.vernac_sexp[2][2][0][0][0][0][1][
                    1].content_no_quote
                lemma.qname = qprefix_this_doc + "." + ".".join(modules +
                                                                [lemma.name])

                # Find lemma content, after the first token matching the lemma name
                tok_i = 0
                for tok in sent.tokens:
                    if tok.content == lemma.name: break
                    tok_i += 1
                # end for
                if tok_i == len(sent.tokens):
                    LoggingUtils.log_and_raise(
                        cls.logger,
                        f"Lemma name {lemma.name} didn't appear in the source code {sent.str_with_space()}",
                        Exception)

                lemma.vernac_command = sent.tokens[:tok_i]
                lemma.statement = sent.tokens[tok_i + 1:]
                lemma.ast_sexp = vernac.vernac_sexp

                lemmas_doc.append(lemma)
            # end if
        # end for

        # Use sername to get the backend representations
        lemma_qnames: str = "".join([l.qname + "\n" for l in lemmas_doc])
        lemma_qnames_file = BashUtils.get_temp_file()
        IOUtils.dump(lemma_qnames_file, lemma_qnames, IOUtils.Format.txt)

        lemma_qnames_backend_sexps_str: str = BashUtils.run(
            f"sername {serapi_options} --require-lib={qprefix_this_doc} {lemma_qnames_file}",
            expected_return_code=0).stdout
        IOUtils.rm(lemma_qnames_file)
        for qname_backend_sexp_str in lemma_qnames_backend_sexps_str.splitlines(
        ):
            qname, backend_sexp_str = qname_backend_sexp_str.split(":", 1)
            backend_sexp = SexpParser.parse(backend_sexp_str)

            for lemma in lemmas_doc:
                if lemma.qname == qname:
                    lemma.backend_sexp = backend_sexp
                    break
                # end if
            # end for
        # end for

        lemmas_doc = [l for l in lemmas_doc if l.backend_sexp is not None]
        return lemmas_doc