Ejemplo n.º 1
0
def test_write_file():
    d1 = {"a": "1", "b": "2"}
    jsonutil.write_file("/tmp/labm8.write_file.json", d1)
    d2 = jsonutil.read_file("/tmp/labm8.write_file.json")
    fs.rm("/tmp/labm8.write_file.json")

    jsonutil.write_file("/tmp/labm8.write_file2.json", d1)
    d3 = jsonutil.read_file("/tmp/labm8.write_file2.json")
    fs.rm("/tmp/labm8.write_file2.json")

    assert d1 == d2 == d3
Ejemplo n.º 2
0
    def test_write_file(self):
        d1 = {
            "a": "1",
            "b": "2"
        }
        jsonutil.write_file("/tmp/labm8.write_file.json", d1)
        d2 = jsonutil.read_file("/tmp/labm8.write_file.json")
        fs.rm("/tmp/labm8.write_file.json")

        jsonutil.write_file("/tmp/labm8.write_file2.json", d1)
        d3 = jsonutil.read_file("/tmp/labm8.write_file2.json")
        fs.rm("/tmp/labm8.write_file2.json")

        self.assertEqual(d1, d2)
        self.assertEqual(d1, d3)
Ejemplo n.º 3
0
def test_read_file():
    a_str = """{
          "a": 1,  // this has comments
          "b": [1, 2, 3]
      } # end comment
      // begin with comment
      """
    system.echo(a_str, "/tmp/labm8.loaf.json")
    a = jsonutil.read_file("/tmp/labm8.loaf.json")
    assert a == {'a': 1, 'b': [1, 2, 3]}
Ejemplo n.º 4
0
    def test_read_file(self):
        a_str = """{
            "a": 1,  // this has comments
            "b": [1, 2, 3]
        } # end comment
        // begin with comment
        """
        system.echo(a_str, "/tmp/labm8.loaf.json")
        a = jsonutil.read_file("/tmp/labm8.loaf.json")

        self.assertEqual(a["a"], 1)
        self.assertEqual(a["b"], [1, 2, 3])
        self.assertFalse("c" in a)
Ejemplo n.º 5
0
    def cache(self, model: clgen.Model):
        """
        Return sampler cache.

        Parameters
        ----------
        model : clgen.Model
            CLgen model.

        Returns
        -------
        labm8
            FSCache: Cache.
        """
        sampler_model_hash = crypto.sha1_str(self.hash + model.hash)

        cache = clgen.mkcache("sampler", sampler_model_hash)

        # validate metadata against cache
        self.stats = {
            "time": 0,
            "progress": 0
        }
        meta = deepcopy(self.to_json())
        if cache.get("META"):
            cached_meta = jsonutil.read_file(cache["META"])

            if "stats" in cached_meta:
                self.stats = cached_meta["stats"]
                del cached_meta["stats"]

            if "created" in cached_meta["sampler"]:
                del cached_meta["sampler"]["created"]
            del meta["sampler"]["created"]

            if "min_samples" in cached_meta["sampler"]:
                del cached_meta["sampler"]["min_samples"]
            del meta["sampler"]["min_samples"]

            if "min_kernels" in cached_meta["sampler"]:
                del cached_meta["sampler"]["min_kernels"]
            del meta["sampler"]["min_kernels"]

            if meta != cached_meta:
                raise clgen.InternalError("sampler metadata mismatch")
        else:
            self._flush_meta(cache)

        return cache
Ejemplo n.º 6
0
def models() -> Iterator[Model]:
    """
    Iterate over all cached models.

    Returns
    -------
    Iterator[Model]
        An iterable over all cached models.
    """
    if fs.isdir(clgen.cachepath(), "model"):
        modeldirs = fs.ls(fs.path(clgen.cachepath(), "model"), abspaths=True)
        for modeldir in modeldirs:
            meta = jsonutil.read_file(fs.path(modeldir, "META"))
            model = Model.from_json(meta)
            yield model
Ejemplo n.º 7
0
        def _main() -> None:
            cache = clgen.cachepath()

            log.warning("Not Implemented: refresh corpuses")

            if fs.isdir(cache, "model"):
                cached_modeldirs = fs.ls(fs.path(cache, "model"), abspaths=True)
                for cached_modeldir in cached_modeldirs:
                    cached_model_id = fs.basename(cached_modeldir)
                    cached_meta = jsonutil.read_file(fs.path(cached_modeldir, "META"))

                    model = clgen.Model.from_json(cached_meta)

                    if cached_model_id != model.hash:
                        log.info(cached_model_id, '->', model.hash)

                        if fs.isdir(model.cache.path):
                            log.fatal("cache conflict", file=sys.stderr)

                        fs.mv(cached_modeldir, model.cache.path)

            log.warning("Not Implemented: refresh samplers")
Ejemplo n.º 8
0
    def __init__(self, contentid: str, path: str=None, **opts):
        """
        Instantiate a corpus.

        If this is a new corpus, a number of files will be created, which may
        take some time.

        Parameters
        ----------
        contentid : str
            ID of corpus content.
        path : str, optional
            Path to corpus.
        **opts
            Keyword options.
        """
        # Validate options
        for key in opts.keys():
            if key not in DEFAULT_CORPUS_OPTS:
                raise clgen.UserError(
                    "Unsupported corpus option '{}'. Valid keys: {}".format(
                        key, ','.join(sorted(DEFAULT_CORPUS_OPTS.keys()))))

        self.opts = deepcopy(DEFAULT_CORPUS_OPTS)
        types.update(self.opts, opts)
        self.opts["id"] = contentid

        # check that contentid exists
        self.language = clgen.Language.from_str(opts.get("language"))
        if (path is None and
            not fs.isdir(clgen.cachepath("contentfiles", f"{self.language}-{contentid}"))):
            raise clgen.UserError("corpus {self.language}-{contentid} not found"
                                  .format(**vars()))

        self.contentid = contentid
        self.contentcache = clgen.mkcache("contentfiles", f"{self.language}-{contentid}")
        self.kernels_db = self.contentcache.keypath('kernels.db')

        self.hash = self._hash(contentid, self.opts)
        self.cache = clgen.mkcache("corpus", f"{self.language}-{self.hash}")

        log.debug("contentfiles {self.contentid}".format(**vars()))
        log.debug("corpus {hash}".format(hash=self.hash))

        # validate metadata against cache
        self.stats = {
            "preprocess_time": 0
        }
        meta = deepcopy(self.to_json())
        if self.cache.get("META"):
            cached_meta = jsonutil.read_file(self.cache["META"])
            self.stats = cached_meta["stats"]  # restore stats

            if "created" in cached_meta:
                del cached_meta["created"]
            del meta["created"]

            if "stats" in cached_meta:
                del cached_meta["stats"]
            del meta["stats"]

            if meta != cached_meta:
                raise clgen.InternalError("corpus metadata mismatch")
        else:
            self._flush_meta()

        with self.lock.acquire(replace_stale=True):
            self._create_files(path)
Ejemplo n.º 9
0
    def test_read_file_bad_path(self):
        with self.assertRaises(fs.File404):
            jsonutil.read_file("/not/a/real/path")

        self.assertEqual({}, jsonutil.read_file("/not/a/real/path",
                                                must_exist=False))
Ejemplo n.º 10
0
    def __init__(self, corpus: clgen.Corpus, **opts):
        """
        Instantiate model.

        Parameters
        ----------
        corpus : clgen.Corpus
            Corpus instance.
        **opts
            Training options.
        """
        assert(isinstance(corpus, clgen.Corpus))

        def _hash(corpus: clgen.Corpus, opts: dict) -> str:
            """ compute model hash """
            hashopts = deepcopy(opts)
            del hashopts["created"]
            del hashopts["train_opts"]["epochs"]
            return crypto.sha1_list(corpus.hash, *types.dict_values(hashopts))

        # Validate options
        for key in opts:
            if key not in DEFAULT_MODEL_OPTS:
                raise clgen.UserError(
                    "Unsupported model option '{}'. Valid keys: {}".format(
                        key, ','.join(sorted(DEFAULT_MODEL_OPTS.keys()))))

        # set properties
        self.opts = types.update(deepcopy(DEFAULT_MODEL_OPTS), opts)
        self.corpus = corpus
        self.hash = _hash(self.corpus, self.opts)
        self.cache = clgen.mkcache("model", f"{corpus.language}-{self.hash}")

        log.debug("model", self.hash)

        # validate metadata against cache, and restore stats
        self.stats = {
            "epoch_times": [],
            "epoch_costs": [],
            "epoch_batches": []
        }
        meta = deepcopy(self.to_json())
        if self.cache.get("META"):
            cached_meta = jsonutil.read_file(self.cache["META"])
            self.stats = cached_meta["stats"]  # restore stats

            if "created" in cached_meta:
                del cached_meta["created"]
            del meta["created"]

            if "created" in cached_meta["corpus"]:
                del cached_meta["corpus"]["created"]
            del meta["corpus"]["created"]

            if "stats" in cached_meta:
                del cached_meta["stats"]
            del meta["stats"]

            if "epochs" in cached_meta["train_opts"]:
                del cached_meta["train_opts"]["epochs"]
            del meta["train_opts"]["epochs"]

            if meta != cached_meta:
                log.error("Computed META:", jsonutil.format_json(meta))
                raise clgen.InternalError(
                    "metadata mismatch in model %s" % self.cache["META"])
        else:
            self._flush_meta()
Ejemplo n.º 11
0
def test_read_file_bad_path():
    with pytest.raises(fs.File404):
        jsonutil.read_file("/not/a/real/path")
    assert not jsonutil.read_file("/not/a/real/path", must_exist=False)