Ejemplo n.º 1
0
def ProcessBatch(
    batch: typing.List[typing.Tuple[str, ]],
    db: grewe_features_db.Database,
    pool: multiprocessing.Pool,
):
    app.Log(1, "Formatting files")
    srcs = pool.imap_unordered(_PrettifySource, [row[0] for row in batch])
    srcs = [s for s in srcs if s]

    with tempfile.TemporaryDirectory(prefix="phd_clgen_import_") as d:
        d = pathlib.Path(d)
        paths = [d / f"{i}.cl" for i in range(1, len(srcs) + 1)]
        for path, src in zip(paths, srcs):
            with open(path, "w") as f:
                f.write(src)

        app.Log(1, "Importing files")
        success_count, new_row_count = db.ImportStaticFeaturesFromPaths(
            paths, FLAGS.origin, pool)
        app.Log(
            1,
            "Extracted features from %d of %d kernels, %d new rows",
            success_count,
            len(batch),
            new_row_count,
        )
Ejemplo n.º 2
0
def Sample(
    instance: clgen.Instance,
    db: grewe_features_db.Database,
    profiler: prof.AutoCsvProfiler,
    pool: multiprocessing.Pool,
):
    observer = SampleObserver()
    with profiler.Profile(f"Create {FLAGS.batch_size} samples"):
        samples = instance.model.Sample(instance.sampler, [observer])
    prefix = "phd_experimental_deeplearning_"
    with tempfile.TemporaryDirectory(prefix=prefix) as d:
        d = pathlib.Path(d)
        with profiler.Profile(f"Create {FLAGS.batch_size} tempfiles"):
            paths_to_import = [
                CreateTempFileFromSample(d, sample, i)
                for i, sample in enumerate(observer.samples)
            ]
        with profiler.Profile() as p:
            num_successes = db.ImportStaticFeaturesFromPaths(
                paths_to_import, FLAGS.origin)
            p.name = f"Import {num_successes} / {FLAGS.batch_size} samples"