Esempio n. 1
0
def main():
    db = _db.Database(fs.path("joblist.db"))
    data = [row for row in
            db.execute("SELECT device,Count(*) AS count\n"
                       "FROM jobs\n"
                       "GROUP BY device\n"
                       "ORDER BY count")]
    io.info("Job list:")
    print(fmt.table(data, columns=("Device", "Jobs")))
    print()

    jobs = [row for row in db.execute("SELECT * FROM jobs")]

    fs.mkdir("jobs")
    logs = {
        "monza": open("jobs/monza.txt", "w"),
        "whz5": open("jobs/whz5.txt", "w"),
        "monza": open("jobs/monza.txt", "w"),
        "cec": open("jobs/cec.txt", "w"),
        "florence": open("jobs/florence.txt", "w"),
    }

    for job in jobs:
        enum_job(logs, db, *job)

    lab.exit()
Esempio n. 2
0
def main():
  parser = ArgumentParser(description=__description__)
  parser.add_argument("classification")
  parser.add_argument("outdir")
  args = parser.parse_args()

  db.init("cc1")
  session = db.make_session()

  program_ids = [
    x[0] for x in session.query(sql.distinct(CLSmithResult.program_id)) \
      .filter(CLSmithResult.classification == args.classification).all()]

  header = fs.read_file(dsmith.data_path("include", "clsmith.h"))

  fs.mkdir(args.outdir)

  for program_id in ProgressBar()(program_ids):
    outpath = fs.path(args.outdir, program_id + ".cl")

    if not fs.exists(outpath):
      program = session.query(CLSmithProgram) \
        .filter(CLSmithProgram.id == program_id).one()

      pre, post = program.src.split('#include "CLSmith.h"')

      inlined = pre + header + post

      with open(outpath, "w") as outfile:
        print(inlined, file=outfile)
Esempio n. 3
0
def main():
    db = _db.Database(fs.path("joblist.db"))
    data = [
        row for row in db.execute("SELECT device,Count(*) AS count\n"
                                  "FROM jobs\n"
                                  "GROUP BY device\n"
                                  "ORDER BY count")
    ]
    io.info("Job list:")
    print(fmt.table(data, columns=("Device", "Jobs")))
    print()

    jobs = [row for row in db.execute("SELECT * FROM jobs")]

    fs.mkdir("jobs")
    logs = {
        "monza": open("jobs/monza.txt", "w"),
        "whz5": open("jobs/whz5.txt", "w"),
        "monza": open("jobs/monza.txt", "w"),
        "cec": open("jobs/cec.txt", "w"),
        "florence": open("jobs/florence.txt", "w"),
    }

    for job in jobs:
        enum_job(logs, db, *job)

    lab.exit()
Esempio n. 4
0
    def __init__(self, path, tables={}, enable_traces=True):
        """
        Arguments:
            path (str): The path to the database file.
            tables (dictionary of {str: tuple of str}, optional): A diction
              of {name: schema} pairs, where a schema is list of tuple pairs,
              of the form: (name, type).
           enable_traces(bool, optional): Enable traces for user
             defined functions and aggregates.
        """
        self.path = fs.path(path)

        # Create directory if needed.
        parent_dir = fs.dirname(path)
        if parent_dir:
            fs.mkdir(parent_dir)

        self.connection = sql.connect(self.path)

        for name,schema in six.iteritems(tables):
            self.create_table(name, schema)

        io.debug("Opened connection to '{0}'".format(self.path))

        # Register exit handler
        atexit.register(self.close)

        # Enable traces for user defined functions and aggregates. See:
        #
        # https://docs.python.org/2/library/sqlite3.html#sqlite3.enable_callback_tracebacks
        if enable_traces:
            sql.enable_callback_tracebacks(True)
Esempio n. 5
0
def train_and_save(model_desc, platform, source,
                   atomizer="CharacterAtomizer", maxlen=1024,
                   n_splits=10, split_i=0, seed=204):
  np.random.seed(seed)

  name = model_desc["name"]
  outpath = "models/{name}/{platform}-{source}-{atomizer}:{maxlen}-{seed}-{n_splits}-{split_i}.model".format(
      **vars())
  if not fs.exists(outpath):
    create_fn = model_desc.get("create_model", _nop)
    train_fn = model_desc.get("train_fn", _nop)
    save_fn = model_desc["save_fn"]
    _atomizer = globals().get(atomizer)

    # load training data
    data_desc = load_data_desc(platform=platform, source=source,
                               max_seq_len=maxlen, atomizer=_atomizer)
    train, test = get_training_data(data_desc, seed=seed, split_i=split_i,
                                    n_splits=n_splits)

    # create model
    model = create_fn(seed=seed, data_desc=data_desc)

    # train model
    train_fn(model=model, train=train, seed=seed, platform=platform,
             source=source)

    fs.mkdir("models/{name}".format(**vars()))
    save_fn(outpath, model)
    print("model saved as", outpath)

  # evaluate model
  return load_and_test(model_desc, platform, source, n_splits=n_splits,
                       split_i=split_i, atomizer=atomizer, maxlen=maxlen,
                       seed=seed)
Esempio n. 6
0
    def __init__(self, name: str):
        """
        Create filesystem cache.
        """
        self.path = fs.path(ROOT, name)
        self.name = name

        fs.mkdir(self.path)
Esempio n. 7
0
 def test_cp_dir(self):
     fs.rm("/tmp/labm8")
     fs.rm("/tmp/labm8.copy")
     fs.mkdir("/tmp/labm8/foo/bar")
     self._test(False, fs.exists("/tmp/labm8.copy"))
     fs.cp("/tmp/labm8/", "/tmp/labm8.copy")
     self._test(True, fs.isdir("/tmp/labm8.copy"))
     self._test(True, fs.isdir("/tmp/labm8.copy/foo"))
     self._test(True, fs.isdir("/tmp/labm8.copy/foo/bar"))
Esempio n. 8
0
def test_cp_dir():
    fs.rm("/tmp/labm8")
    fs.rm("/tmp/labm8.copy")
    fs.mkdir("/tmp/labm8/foo/bar")
    assert not fs.exists("/tmp/labm8.copy")
    fs.cp("/tmp/labm8/", "/tmp/labm8.copy")
    assert fs.isdir("/tmp/labm8.copy")
    assert fs.isdir("/tmp/labm8.copy/foo")
    assert fs.isdir("/tmp/labm8.copy/foo/bar")
Esempio n. 9
0
def export_todir(s: session_t, table, dir: Path) -> None:
    fs.mkdir(dir)
    q = s.query(table)
    num = s.query(sql.sql.func.count(table.id)).scalar()
    for result in ProgressBar(max_value=num)(q):
        buf = result.toProtobuf().SerializeToString()
        checksum = crypto.sha1(buf)
        with open(f"{dir}/{checksum}.pb", "wb") as f:
            f.write(buf)
Esempio n. 10
0
    def __init__(self, root, escape_key=hash_key):
        """
        Create filesystem cache.

        Arguments:
            root (str): String.
            escape_key (fn, optional): Function to convert keys to file names.
        """
        self.path = root
        self.escape_key = escape_key

        fs.mkdir(self.path)
Esempio n. 11
0
    def __init__(self, root, escape_key=hash_key):
        """
    Create filesystem cache.

    Arguments:
        root (str): String.
        escape_key (fn, optional): Function to convert keys to file names.
    """
        self.path = pathlib.Path(root)
        self.escape_key = escape_key

        fs.mkdir(self.path)
Esempio n. 12
0
 def test_rmtrash(self):
     system.echo("Hello, world!", "/tmp/labm8.tmp")
     self.assertTrue(fs.isfile("/tmp/labm8.tmp"))
     fs.rmtrash("/tmp/labm8.tmp")
     self.assertFalse(fs.isfile("/tmp/labm8.tmp"))
     fs.rmtrash("/tmp/labm8.tmp")
     fs.rm("/tmp/labm8.tmp")
     fs.rm("/tmp/labm8.dir")
     fs.mkdir("/tmp/labm8.dir/foo/bar")
     system.echo("Hello, world!", "/tmp/labm8.dir/foo/bar/baz")
     self.assertTrue(fs.isfile("/tmp/labm8.dir/foo/bar/baz"))
     fs.rmtrash("/tmp/labm8.dir")
     self.assertFalse(fs.isfile("/tmp/labm8.dir/foo/bar/baz"))
     self.assertFalse(fs.isfile("/tmp/labm8.dir/"))
Esempio n. 13
0
def test_rm():
    system.echo("Hello, world!", "/tmp/labm8.tmp")
    assert fs.isfile("/tmp/labm8.tmp")
    fs.rm("/tmp/labm8.tmp")
    assert not fs.isfile("/tmp/labm8.tmp")
    fs.rm("/tmp/labm8.tmp")
    fs.rm("/tmp/labm8.tmp")
    fs.rm("/tmp/labm8.dir")
    fs.mkdir("/tmp/labm8.dir/foo/bar")
    system.echo("Hello, world!", "/tmp/labm8.dir/foo/bar/baz")
    assert fs.isfile("/tmp/labm8.dir/foo/bar/baz")
    fs.rm("/tmp/labm8.dir")
    assert not fs.isfile("/tmp/labm8.dir/foo/bar/baz")
    assert not fs.isfile("/tmp/labm8.dir/")
Esempio n. 14
0
 def test_rm(self):
     system.echo("Hello, world!", "/tmp/labm8.tmp")
     self._test(True, fs.isfile("/tmp/labm8.tmp"))
     fs.rm("/tmp/labm8.tmp")
     self._test(False, fs.isfile("/tmp/labm8.tmp"))
     fs.rm("/tmp/labm8.tmp")
     fs.rm("/tmp/labm8.tmp")
     fs.rm("/tmp/labm8.dir")
     fs.mkdir("/tmp/labm8.dir/foo/bar")
     system.echo("Hello, world!", "/tmp/labm8.dir/foo/bar/baz")
     self._test(True, fs.isfile("/tmp/labm8.dir/foo/bar/baz"))
     fs.rm("/tmp/labm8.dir")
     self._test(False, fs.isfile("/tmp/labm8.dir/foo/bar/baz"))
     self._test(False, fs.isfile("/tmp/labm8.dir/"))
Esempio n. 15
0
def test_rmtrash():
    with tempfile.NamedTemporaryFile(prefix='labm8_') as f:
        assert fs.isfile(f.name)
        fs.rmtrash(f.name)
        assert not fs.isfile(f.name)
        fs.rmtrash(f.name)
        fs.rm(f.name)
    with tempfile.TemporaryDirectory() as d:
        fs.rm(d)
        fs.mkdir(d, "foo/bar")
        system.echo("Hello, world!", fs.path(d, "foo/bar/baz"))
        assert fs.isfile(f, "foo/bar/baz")
        fs.rmtrash(d)
        assert not fs.isfile(d, "foo/bar/baz")
        assert not fs.isdir(d)
Esempio n. 16
0
 def test_cp_over_dir(self):
     fs.mkdir("/tmp/labm8.tmp.src")
     system.echo("Hello, world!", "/tmp/labm8.tmp.src/foo")
     fs.rm("/tmp/labm8.tmp.copy")
     fs.mkdir("/tmp/labm8.tmp.copy")
     self._test(True, fs.isdir("/tmp/labm8.tmp.src"))
     self._test(True, fs.isfile("/tmp/labm8.tmp.src/foo"))
     self._test(True, fs.isdir("/tmp/labm8.tmp.copy"))
     self._test(False, fs.isfile("/tmp/labm8.tmp.copy/foo"))
     fs.cp("/tmp/labm8.tmp.src", "/tmp/labm8.tmp.copy/")
     self._test(True, fs.isdir("/tmp/labm8.tmp.src"))
     self._test(True, fs.isfile("/tmp/labm8.tmp.src/foo"))
     self._test(True, fs.isdir("/tmp/labm8.tmp.copy"))
     self._test(True, fs.isfile("/tmp/labm8.tmp.copy/foo"))
     self._test(fs.read("/tmp/labm8.tmp.src/foo"),
                fs.read("/tmp/labm8.tmp.copy/foo"))
Esempio n. 17
0
def test_cp_over_dir():
    fs.mkdir("/tmp/labm8.tmp.src")
    system.echo("Hello, world!", "/tmp/labm8.tmp.src/foo")
    fs.rm("/tmp/labm8.tmp.copy")
    fs.mkdir("/tmp/labm8.tmp.copy")
    assert fs.isdir("/tmp/labm8.tmp.src")
    assert fs.isfile("/tmp/labm8.tmp.src/foo")
    assert fs.isdir("/tmp/labm8.tmp.copy")
    assert not fs.isfile("/tmp/labm8.tmp.copy/foo")
    fs.cp("/tmp/labm8.tmp.src", "/tmp/labm8.tmp.copy/")
    assert fs.isdir("/tmp/labm8.tmp.src")
    assert fs.isfile("/tmp/labm8.tmp.src/foo")
    assert fs.isdir("/tmp/labm8.tmp.copy")
    assert fs.isfile("/tmp/labm8.tmp.copy/foo")
    assert (fs.read("/tmp/labm8.tmp.src/foo") == fs.read(
        "/tmp/labm8.tmp.copy/foo"))
Esempio n. 18
0
    def __setitem__(self, key, value):
        """
    Emplace file in cache.

    Arguments:
        key: Key.
        value (str): Path of file to insert in cache.

    Raises:
        ValueError: If no "value" does nto exist.
    """
        if not fs.exists(value):
            raise ValueError(value)

        path = self.keypath(key)
        fs.mkdir(self.path)
        fs.mv(value, path)
Esempio n. 19
0
    def __setitem__(self, key, value):
        """
        Emplace file in cache.

        Arguments:
            key: Key.
            value (str): Path of file to insert in cache.

        Raises:
            ValueError: If no "value" does nto exist.
        """
        if not fs.exists(value):
            raise ValueError(value)

        path = self.keypath(key)
        fs.mkdir(self.path)
        fs.mv(value, path)
Esempio n. 20
0
def main():
    """
    Gather databases from experimental setups.
    """
    fs.mkdir(experiment.DATA_ROOT)
    fs.mkdir(experiment.DB_DEST)

    if system.HOSTNAME != "cec":
        io.fatal("script must be ran on machine `cec'")

    # TODO: Perform integrity checks. If they fail, transfer again.
    cp_loc("~/.omnitune/skelcl.db", "cec")
    cp_rmt("brendel.inf.ed.ac.uk", path="~/florence.db", name="florence")
    cp_rmt("dhcp-90-060")
    cp_rmt("monza")
    cp_rmt("tim")
    cp_rmt("whz5")
Esempio n. 21
0
def cachepath(*relative_path_components: list) -> str:
    """
    Return path to file system cache.

    Parameters
    ----------
    *relative_path_components
        Relative path of cache.

    Returns
    -------
    str
        Absolute path of file system cache.
    """
    cache_root = os.environ.get(
        "CLGEN_CACHE",
        f"~/.cache/clgen/{version_info.major}.{version_info.minor}.x")

    fs.mkdir(cache_root)
    return fs.path(cache_root, *relative_path_components)
Esempio n. 22
0
def test_rm_glob():
    fs.mkdir("/tmp/labm8.glob")
    system.echo("Hello, world!", "/tmp/labm8.glob/1")
    system.echo("Hello, world!", "/tmp/labm8.glob/2")
    system.echo("Hello, world!", "/tmp/labm8.glob/abc")

    fs.rm("/tmp/labm8.glob/a*", glob=False)
    assert fs.isfile("/tmp/labm8.glob/1")
    assert fs.isfile("/tmp/labm8.glob/2")
    assert fs.isfile("/tmp/labm8.glob/abc")

    fs.rm("/tmp/labm8.glob/a*")
    assert fs.isfile("/tmp/labm8.glob/1")
    assert fs.isfile("/tmp/labm8.glob/2")
    assert not fs.isfile("/tmp/labm8.glob/abc")

    fs.rm("/tmp/labm8.glob/*")
    assert not fs.isfile("/tmp/labm8.glob/1")
    assert not fs.isfile("/tmp/labm8.glob/2")
    assert not fs.isfile("/tmp/labm8.glob/abc")
Esempio n. 23
0
    def test_rm_glob(self):
        fs.mkdir("/tmp/labm8.glob")
        system.echo("Hello, world!", "/tmp/labm8.glob/1")
        system.echo("Hello, world!", "/tmp/labm8.glob/2")
        system.echo("Hello, world!", "/tmp/labm8.glob/abc")

        fs.rm("/tmp/labm8.glob/a*", glob=False)
        self._test(True, fs.isfile("/tmp/labm8.glob/1"))
        self._test(True, fs.isfile("/tmp/labm8.glob/2"))
        self._test(True, fs.isfile("/tmp/labm8.glob/abc"))

        fs.rm("/tmp/labm8.glob/a*")
        self._test(True, fs.isfile("/tmp/labm8.glob/1"))
        self._test(True, fs.isfile("/tmp/labm8.glob/2"))
        self._test(False, fs.isfile("/tmp/labm8.glob/abc"))

        fs.rm("/tmp/labm8.glob/*")
        self._test(False, fs.isfile("/tmp/labm8.glob/1"))
        self._test(False, fs.isfile("/tmp/labm8.glob/2"))
        self._test(False, fs.isfile("/tmp/labm8.glob/abc"))
Esempio n. 24
0
def visualise_regression_job(db, job):
    runtimedir = "img/runtime_regression/{}/".format(job)
    runtimeclassificationdir = "img/runtime_classification/{}/".format(job)
    speedupdir = "img/speedup_regression/{}/".format(job)
    speedupclassificationdir = "img/speedup_classification/{}/".format(job)

    fs.mkdir(runtimedir)
    fs.mkdir(runtimeclassificationdir)
    fs.mkdir(speedupdir)
    fs.mkdir(speedupclassificationdir)

    # Line plot of all classifiers.
    visualise.runtime_regression(db,
                                 "img/runtime_regression/{}.png".format(job),
                                 job=job)
    visualise.runtime_classification(
        db, "img/runtime_classification/{}.png".format(job), job=job)
    visualise.speedup_regression(db,
                                 "img/speedup_regression/{}.png".format(job),
                                 job=job)
    visualise.speedup_classification(
        db, "img/speedup_classification/{}.png".format(job), job=job)
Esempio n. 25
0
def visualise_regression_job(db, job):
    runtimedir = "img/runtime_regression/{}/".format(job)
    runtimeclassificationdir = "img/runtime_classification/{}/".format(job)
    speedupdir = "img/speedup_regression/{}/".format(job)
    speedupclassificationdir = "img/speedup_classification/{}/".format(job)

    fs.mkdir(runtimedir)
    fs.mkdir(runtimeclassificationdir)
    fs.mkdir(speedupdir)
    fs.mkdir(speedupclassificationdir)

    # Line plot of all classifiers.
    visualise.runtime_regression(db,
                                 "img/runtime_regression/{}.png".format(job),
                                 job=job)
    visualise.runtime_classification(db,
                                     "img/runtime_classification/{}.png"
                                     .format(job), job=job)
    visualise.speedup_regression(db,
                                 "img/speedup_regression/{}.png".format(job),
                                 job=job)
    visualise.speedup_classification(db,
                                     "img/speedup_classification/{}.png"
                                     .format(job), job=job)
Esempio n. 26
0
def write_file(path: str, contents: str) -> None:
    if fs.dirname(path):
        fs.mkdir(fs.dirname(path))
    with open(path, 'w') as outfile:
        outfile.write(contents)
Esempio n. 27
0
def evaluate(model, device, data_folder, out_folder, embeddings,
             dense_layer_size, print_summary, num_epochs,
             batch_size) -> pd.DataFrame:
    from sklearn.model_selection import StratifiedKFold

    # Create device list
    if device == 'all':
        device_list = ["amd", "nvidia"]
    else:
        device_list = [device]

    data = []
    for i, platform in enumerate(device_list):
        platform_name = platform2str(platform)

        # Load runtime data
        data_file = os.path.join(data_folder, "cgo17-{}.csv".format(platform))
        print('\n--- Read data from', data_file)
        df = pd.read_csv(data_file)

        # Encode input source codes
        sequences, maxlen = encode_srcs(data_folder, df)

        # Load embeddings
        import tensorflow as tf  # for embeddings lookup
        embedding_matrix_normalized = tf.nn.l2_normalize(embeddings, axis=1)
        vocabulary_size, embedding_dimension = embedding_matrix_normalized.shape
        seq_ = tf.compat.v1.placeholder(dtype=tf.int32)
        # Tensor of shape (num_input_files, sequence length, embbedding dimension)
        embedding_input_ = tf.compat.v1.nn.embedding_lookup(
            params=embedding_matrix_normalized, ids=seq_)

        # Make tf block less gpu memory
        config = tf.compat.v1.ConfigProto()
        config.gpu_options.allow_growth = True

        with tf.compat.v1.Session(config=config) as sess:
            embedding_input = sess.run(embedding_input_,
                                       feed_dict={seq_: sequences})

        # Values used for training & predictions
        aux_in = auxiliary_inputs(df)

        # Optimal mappings
        y = np.array([1 if x == "GPU" else 0 for x in df["oracle"].values])
        y_1hot = encode_1hot(y)

        # 10-fold cross-validation
        n_splits = 10
        kf = StratifiedKFold(n_splits=n_splits,
                             shuffle=True,
                             random_state=seed)
        for j, (train_index, test_index) in enumerate(kf.split(sequences, y)):
            print('--- Cross validation step [', j, '/ ', n_splits, ']')

            model_name = model.__name__
            model_basename = model.__basename__
            model_path = os.path.join(
                out_folder,
                "models/{model_basename}-{platform}-{j}.model".format(
                    model_basename=model_basename, platform=platform, j=j))
            predictions_path = os.path.join(
                out_folder,
                "predictions/{model_basename}-{platform}-{j}.result".format(
                    model_basename=model_basename, platform=platform, j=j))
            log_dir = os.path.join(out_folder, "logs")

            if fs.exists(predictions_path):
                # load result from cache
                print("\tFound predictions in", predictions_path,
                      ", skipping...")
                with open(predictions_path, 'rb') as infile:
                    p = pickle.load(infile)
            else:

                if fs.exists(model_path):
                    # restore trained model from cache
                    print("\n\tFound trained model in", model_path,
                          ", skipping...")
                    model.restore(model_path)
                else:

                    # Initialize model and print summary
                    model.init(seed=seed,
                               maxlen=maxlen,
                               embedding_dim=int(embedding_dimension),
                               dense_layer_size=dense_layer_size)
                    if print_summary:
                        model.model.summary()

                    # Train and cache a model
                    print('\n--- Training model... ')
                    model.train(df=df,
                                aux_in=aux_in[train_index],
                                sequences=embedding_input[train_index, :, :],
                                y=y[train_index],
                                y_1hot=y_1hot[train_index],
                                verbose=False,
                                epochs=num_epochs,
                                batch_size=batch_size,
                                log_dir=log_dir)
                    fs.mkdir(fs.dirname(model_path))
                    model.save(model_path)
                    print('\tsaved model to', model_path)

                # test model
                print('\n--- Testing model... ')
                p = model.predict(batch_size=batch_size,
                                  aux_in=aux_in[test_index],
                                  sequences=embedding_input[test_index, :, :],
                                  y=y[test_index],
                                  y_1hot=y_1hot[test_index],
                                  verbose=False)

                # cache results
                fs.mkdir(fs.dirname(predictions_path))
                with open(predictions_path, 'wb') as outfile:
                    pickle.dump(p, outfile)
                print('\tsaved predictions to', predictions_path)

            benchmarks = df['benchmark'].values[test_index]  # benchmarks names
            o = y[test_index]  # oracle device mappings (true values)
            correct = p == o  # predictions' correctness
            # runtimes of baseline mapping (CPU on AMD, GPU on NVIDIA)
            zero_r_dev = "runtime_cpu" if platform == "amd" else "runtime_gpu"
            zer_r_runtimes = df[zero_r_dev][test_index]
            # speedups of predictions
            runtimes = df[['runtime_cpu', 'runtime_gpu']].values[test_index]
            p_runtimes = [r[p_] for p_, r in zip(p, runtimes)]
            p_speedup = zer_r_runtimes / p_runtimes

            # sanity check
            assert (len(benchmarks) == len(o) == len(correct) == len(p) ==
                    len(p_speedup))

            # record results
            for benchmark_, o_, p_, correct_, p_speedup_ in zip(
                    benchmarks, o, p, correct, p_speedup):
                data.append({
                    "Model": model_basename,
                    "Platform": platform_name,
                    'Benchmark': escape_benchmark_name(benchmark_),
                    'Benchmark Suite': escape_suite_name(benchmark_),
                    "Oracle Mapping": o_,
                    "Predicted Mapping": p_,
                    "Correct?": correct_,
                    "Speedup": p_speedup_,
                })

    return pd.DataFrame(data,
                        index=range(1,
                                    len(data) + 1),
                        columns=[
                            "Model", "Platform", "Benchmark",
                            "Benchmark Suite", "Oracle Mapping",
                            "Predicted Mapping", "Correct?", "Speedup"
                        ])
Esempio n. 28
0
    def _init_tensorflow(self, infer: bool = False):
        """
        Deferred importing of tensorflow and initializing model for training
        or sampling.

        This is necessary for two reasons: first, the tensorflow graph is
        different for training and inference, so must be reset when switching
        between modes. Second, importing tensorflow takes a long time, so
        we only want to do it if we actually need to.

        Arguments:
            infer (bool): If True, initialize model for inference. If False,
                initialize model for training.

        Returns:
            module: imported TensorFlow module
        """
        import tensorflow as tf
        from tensorflow.python.ops import rnn_cell
        from tensorflow.python.ops import seq2seq

        # Use self.tensorflow_state to mark whether or not model is configured
        # for training or inference.
        try:
            if self.tensorflow_state == infer:
                return tf
        except AttributeError:
            pass

        self.cell_fn = {
            "lstm": rnn_cell.BasicLSTMCell,
            "gru": rnn_cell.GRUCell,
            "rnn": rnn_cell.BasicRNNCell
        }.get(self.model_type, None)
        if self.cell_fn is None:
            raise clgen.UserError("Unrecognized model type")

        # reset the graph when switching between training and inference
        tf.reset_default_graph()

        # corpus info:
        batch_size = 1 if infer else self.corpus.batch_size
        seq_length = 1 if infer else self.corpus.seq_length
        vocab_size = self.corpus.vocab_size

        fs.mkdir(self.cache.path)

        cell = self.cell_fn(self.rnn_size, state_is_tuple=True)
        self.cell = cell = rnn_cell.MultiRNNCell([cell] * self.num_layers,
                                                 state_is_tuple=True)
        self.input_data = tf.placeholder(tf.int32, [batch_size, seq_length])
        self.targets = tf.placeholder(tf.int32, [batch_size, seq_length])
        self.initial_state = self.cell.zero_state(batch_size, tf.float32)

        scope_name = 'rnnlm'
        with tf.variable_scope(scope_name):
            softmax_w = tf.get_variable("softmax_w",
                                        [self.rnn_size, vocab_size])
            softmax_b = tf.get_variable("softmax_b", [vocab_size])

            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding",
                                            [vocab_size, self.rnn_size])
                inputs = tf.split(
                    1, seq_length,
                    tf.nn.embedding_lookup(embedding, self.input_data))
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = seq2seq.rnn_decoder(
            inputs,
            self.initial_state,
            cell,
            loop_function=loop if infer else None,
            scope=scope_name)
        output = tf.reshape(tf.concat(1, outputs), [-1, self.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example(
            [self.logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([batch_size * seq_length])], vocab_size)
        self.cost = tf.reduce_sum(loss) / batch_size / seq_length
        self.final_state = last_state
        self.learning_rate = tf.Variable(0.0, trainable=False)
        self.epoch = tf.Variable(0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          self.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.learning_rate)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        # set model status
        self.tensorflow_state = infer

        return tf
Esempio n. 29
0
def visualise_classification_job(db, job):
    basedir = "img/classification/{}/".format(job)

    fs.mkdir(basedir)
    fs.mkdir(basedir + "classifiers")
    fs.mkdir(basedir + "err_fns")

    visualise.err_fn_performance(db, basedir + "err_fns.png", job=job)

    # Bar plot of all results.
    visualise.classification(db, "img/classification/{}.png".format(job),
                             job=job)

    # Per-classifier plots.
    for i,classifier in enumerate(db.classification_classifiers):
        visualise.classifier_speedups(db, classifier,
                                      basedir + "classifiers/{}.png".format(i),
                                      job=job)
    # Per-err_fn plots.
    for err_fn in db.err_fns:
        visualise.err_fn_speedups(db, err_fn,
                                  basedir + "err_fns/{}.png".format(err_fn),
                                  job=job, sort=True)

    # Results table.
    query = db.execute(
        "SELECT classifier,err_fn,Count(*) AS count\n"
        "FROM classification_results\n"
        "WHERE job=? GROUP BY classifier,err_fn",
        (job,)
    )
    results = []
    for classifier,err_fn,count in query:
        correct, illegal, refused, performance, speedup = zip(*[
            row for row in db.execute(
                "SELECT correct,illegal,refused,performance,speedup\n"
                "FROM classification_results\n"
                "WHERE job=? AND classifier=? AND err_fn=?",
                (job, classifier, err_fn)
            )
        ])
        results.append([
            classifier,
            err_fn,
            (sum(correct) / count) * 100,
            (sum(illegal) / count) * 100,
            (sum(refused) / count) * 100,
            min(performance) * 100,
            labmath.geomean(performance) * 100,
            max(performance) * 100,
            min(speedup),
            labmath.geomean(speedup),
            max(speedup)
        ])

    str_args = {
        "float_format": lambda f: "{:.2f}".format(f)
    }

    for i in range(len(results)):
        results[i][0] = ml.classifier_basename(results[i][0])

    columns=(
        "CLASSIFIER",
        "ERR_FN",
        "ACC %",
        "INV %",
        "REF %",
        "Omin %",
        "Oavg %",
        "Omax %",
        "Smin",
        "Savg",
        "Smax",
    )

    latex.table(results, output=fs.path(experiment.TAB_ROOT, job + ".tex"),
                columns=columns, **str_args)
Esempio n. 30
0
 def test_mkdir(self):
     fs.rm("/tmp/labm8.dir")
     self._test(False, fs.isdir("/tmp/labm8.dir"))
     fs.mkdir("/tmp/labm8.dir")
     self._test(True, fs.isdir("/tmp/labm8.dir"))
def test_accuracy(model, embeddings, folder_data, samples_per_class,
                  folder_results, dense_layer_size, print_summary, num_epochs,
                  batch_size):
    seed = 204

    num_classes = 104
    y_test = np.array([], dtype=np.int32)
    X_test = list()
    folder_data_test = os.path.join(folder_data, 'seq_test')
    print('Getting file names for', num_classes, 'classes from folders:')
    print(folder_data_test)
    for i in range(1, num_classes + 1):
        folder = os.path.join(folder_data_test, str(i))
        assert os.path.exists(folder), "Folder: " + folder + ' does not exist'
        print('\ttest      : Read file names from folder ', folder)
        listing = os.listdir(folder + '/')
        seq_files = [
            os.path.join(folder, f) for f in listing if f[-4:] == '.rec'
        ]
        assert len(seq_files) > 0, "No .rec files found in" + folder
        X_test += seq_files
        y_test = np.concatenate(
            [y_test,
             np.array([int(i)] * len(seq_files), dtype=np.int32)])

    folder_vocabulary = FLAGS.vocabulary_dir
    dictionary_pickle = os.path.join(folder_vocabulary, 'dic_pickle')
    print('\tLoading dictionary from file', dictionary_pickle)
    with open(dictionary_pickle, 'rb') as f:
        dictionary = pickle.load(f)
    unk_index = dictionary[rgx.unknown_token]
    del dictionary

    X_seq_test, maxlen_test = encode_srcs(X_test, 'testing', unk_index)
    maxlen = maxlen_test
    print('Max. sequence length overall:', maxlen)
    if FLAGS.maxlen > 0:
        maxlen = FLAGS.maxlen
    print('Padding sequences to length', maxlen)
    X_seq_test = pad_src(X_seq_test, maxlen, unk_index)

    model.__name__ = FLAGS.model_name
    model_name = model.__name__
    model_path = os.path.join(folder_results,
                              "models/{}.model".format(model_name))
    predictions_path = os.path.join(
        folder_results,
        "predictions/{}_top{}.result".format(model_name, FLAGS.topk))

    if fs.exists(predictions_path):
        print("\tFound predictions in", predictions_path, ", skipping...")
        with open(predictions_path, 'rb') as infile:
            ind = pickle.load(infile)

    else:
        import tensorflow as tf
        embedding_matrix_normalized = tf.nn.l2_normalize(embeddings, axis=1)
        vocabulary_size, embedding_dimension = embedding_matrix_normalized.shape
        print('EMB:\n', embedding_matrix_normalized)

        print('\n--- Initializing model...')
        model.init(seed=seed,
                   maxlen=maxlen,
                   embedding_dim=int(embedding_dimension),
                   num_classes=num_classes,
                   dense_layer_size=dense_layer_size,
                   embedding_matrix=embedding_matrix_normalized)
        model.load_weights(
            os.path.join(FLAGS.out, model.__name__ + '_weights.h5'))
        if print_summary:
            model.model.summary()

        print('\n--- Testing model...')
        ind, prob = model.predict_topk(X_seq_test, batch_size, FLAGS.topk)
        del prob
        fs.mkdir(fs.dirname(predictions_path))
        with open(predictions_path, 'wb') as outfile:
            pickle.dump(ind, outfile)
        print('\tsaved predictions to', predictions_path)

    accuracy = np.zeros_like(y_test)
    ind = np.transpose(np.array(ind))
    for i in range(FLAGS.topk):
        accuracy += np.array(ind[i]) == y_test
    print('\nTest top{} accuracy:'.format(FLAGS.topk),
          sum(accuracy) * 100.0 / len(accuracy), '%')

    from sklearn.metrics import confusion_matrix
    conf_matr = confusion_matrix(y_test, ind[0])

    import matplotlib.pyplot as plt
    fig, ax = plt.subplots()
    values = plt.imshow(conf_matr)
    ax.xaxis.tick_top()
    ax.xaxis.set_label_position('top')

    fig.colorbar(values)
    ax.set_xlabel('Настоящие классы')
    ax.set_ylabel('Предсказанные классы')
    conf_png = os.path.join(folder_results,
                            "models/conf_matr_{}.png".format(model_name))
    plt.savefig(conf_png)
Esempio n. 32
0
def evaluate(model, embeddings, folder_data, samples_per_class, folder_results, dense_layer_size, print_summary,
             num_epochs, batch_size):
    # Set seed for reproducibility
    seed = 204

    ####################################################################################################################
    # Get data
    vsamples_per_class = FLAGS.vsamples

    # Data acquisition
    num_classes = 104
    y_train = np.empty(0)  # training
    X_train = list()
    folder_data_train = folder_data + '_train'
    y_val = np.empty(0)  # validation
    X_val = list()
    folder_data_val = folder_data + '_val'
    y_test = np.empty(0)  # testing
    X_test = list()
    folder_data_test = folder_data + '_test'
    print('Getting file names for', num_classes, 'classes from folders:')
    print(folder_data_train)
    print(folder_data_val)
    print(folder_data_test)
    for i in range(1, num_classes + 1):  # loop over classes

        # training: Read data file names
        folder = os.path.join(folder_data_train, str(i))
        assert os.path.exists(folder), "Folder: " + folder + ' does not exist'
        print('\ttraining  : Read file names from folder ', folder)
        listing = os.listdir(folder + '/')
        seq_files = [os.path.join(folder, f) for f in listing if f[-4:] == '.rec']

        # training: Randomly pick programs
        assert len(seq_files) >= samples_per_class, "Cannot sample " + str(samples_per_class) + " from " + str(
            len(seq_files)) + " files found in " + folder
        X_train += resample(seq_files, replace=False, n_samples=samples_per_class, random_state=seed)
        y_train = np.concatenate([y_train, np.array([int(i)] * samples_per_class, dtype=np.int32)])

        # validation: Read data file names
        folder = os.path.join(folder_data_val, str(i))
        assert os.path.exists(folder), "Folder: " + folder + ' does not exist'
        print('\tvalidation: Read file names from folder ', folder)
        listing = os.listdir(folder + '/')
        seq_files = [os.path.join(folder, f) for f in listing if f[-4:] == '.rec']

        # validation: Randomly pick programs
        if vsamples_per_class > 0:
            assert len(seq_files) >= vsamples_per_class, "Cannot sample " + str(vsamples_per_class) + " from " + str(
                len(seq_files)) + " files found in " + folder
            X_val += resample(seq_files, replace=False, n_samples=vsamples_per_class, random_state=seed)
            y_val = np.concatenate([y_val, np.array([int(i)] * vsamples_per_class, dtype=np.int32)])
        else:
            assert len(seq_files) > 0, "No .rec files found in" + folder
            X_val += seq_files
            y_val = np.concatenate([y_val, np.array([int(i)] * len(seq_files), dtype=np.int32)])

        # test: Read data file names
        folder = os.path.join(folder_data_test, str(i))
        assert os.path.exists(folder), "Folder: " + folder + ' does not exist'
        print('\ttest      : Read file names from folder ', folder)
        listing = os.listdir(folder + '/')
        seq_files = [os.path.join(folder, f) for f in listing if f[-4:] == '.rec']
        assert len(seq_files) > 0, "No .rec files found in" + folder
        X_test += seq_files
        y_test = np.concatenate([y_test, np.array([int(i)] * len(seq_files), dtype=np.int32)])

    # Load dictionary and cutoff statements
    folder_vocabulary = FLAGS.vocabulary_dir
    dictionary_pickle = os.path.join(folder_vocabulary, 'dic_pickle')
    print('\tLoading dictionary from file', dictionary_pickle)
    with open(dictionary_pickle, 'rb') as f:
        dictionary = pickle.load(f)
    unk_index = dictionary[rgx.unknown_token]
    del dictionary

    # Encode source codes and get max. sequence length
    X_seq_train, maxlen_train = encode_srcs(X_train, 'training', unk_index)
    X_seq_val, maxlen_val = encode_srcs(X_val, 'validation', unk_index)
    X_seq_test, maxlen_test = encode_srcs(X_test, 'testing', unk_index)
    maxlen = max(maxlen_train, maxlen_test, maxlen_val)
    print('Max. sequence length overall:', maxlen)
    print('Padding sequences')
    X_seq_train = pad_src(X_seq_train, maxlen, unk_index)
    X_seq_val = pad_src(X_seq_val, maxlen, unk_index)
    X_seq_test = pad_src(X_seq_test, maxlen, unk_index)

    # Get one-hot vectors for classification
    print('YTRAIN\n', y_train)
    y_1hot_train = get_onehot(y_train, num_classes)
    y_1hot_val = get_onehot(y_val, num_classes)

    ####################################################################################################################
    # Setup paths

    # Set up names paths
    model_name = model.__name__
    model_path = os.path.join(folder_results,
                              "classifyapp/models/{}.model".format(model_name))
    predictions_path = os.path.join(folder_results,
                                    "classifyapp/predictions/{}.result".format(model_name))

    # If predictions have already been made with these embeddings, load them
    if fs.exists(predictions_path):
        print("\tFound predictions in", predictions_path, ", skipping...")
        with open(predictions_path, 'rb') as infile:
            p = pickle.load(infile)

    else:  # could not find predictions already computed with these embeddings

        # Embeddings
        import tensorflow as tf  # for embeddings lookup
        embedding_matrix_normalized = tf.nn.l2_normalize(embeddings, axis=1)
        vocabulary_size, embedding_dimension = embedding_matrix_normalized.shape
        print('XSEQ:\n', X_seq_train)
        print('EMB:\n', embedding_matrix_normalized)

        gen_test = EmbeddingPredictionSequence(batch_size, X_seq_test, embedding_matrix_normalized)

        # If models have already been made with these embeddings, load them
        if fs.exists(model_path):
            print("\n\tFound trained model in", model_path, ", skipping...")
            model.restore(model_path)

        else:  # could not find models already computed with these embeddings

            gen_train = EmbeddingSequence(batch_size, X_seq_train, y_1hot_train, embedding_matrix_normalized)
            gen_val = EmbeddingSequence(batch_size, X_seq_val, y_1hot_val, embedding_matrix_normalized)

            ############################################################################################################
            # Train

            # Create a new model and train it
            print('\n--- Initializing model...')
            model.init(seed=seed,
                       maxlen=maxlen,
                       embedding_dim=int(embedding_dimension),
                       num_classes=num_classes,
                       dense_layer_size=dense_layer_size)
            if print_summary:
                model.model.summary()
            print('\n--- Training model...')
            model.train_gen(train_generator=gen_train,
                            validation_generator=gen_val,
                            verbose=True,
                            epochs=num_epochs)

            # Save the model
            fs.mkdir(fs.dirname(model_path))
            model.save(model_path)
            print('\tsaved model to', model_path)

        ################################################################################################################
        # Test

        # Test model
        print('\n--- Testing model...')
        p = model.predict_gen(generator=gen_test)[0]

        # cache the prediction
        fs.mkdir(fs.dirname(predictions_path))
        with open(predictions_path, 'wb') as outfile:
            pickle.dump(p, outfile)
        print('\tsaved predictions to', predictions_path)

    ####################################################################################################################
    # Return accuracy
    accuracy = p == y_test  # prediction accuracy
    return accuracy
Esempio n. 33
0
def evaluate(model, device, data_folder, out_folder, embeddings,
             dense_layer_size, print_summary, num_epochs, batch_size):

    data = []

    # Create device list
    if device == 'all':
        device_list = ["Cypress", "Tahiti", "Fermi", "Kepler"]
    else:
        device_list = [device]

    for i, platform in enumerate(device_list):
        print(
            '\n------------------------------------------------------------------'
        )
        print('--- Platform', platform, '[', i + 1, '/ 4 ]')
        print(
            '------------------------------------------------------------------'
        )
        platform_name = platform2str(platform)

        # Read data
        oracle_file = os.path.join(data_folder, "pact-2014-oracles.csv")
        oracles = pd.read_csv(oracle_file)
        runtimes_file = os.path.join(data_folder, "pact-2014-runtimes.csv")
        df = pd.read_csv(runtimes_file)
        print('\tRead data from', oracle_file, '\n\tand', runtimes_file)

        # Extract data
        oracle_runtimes = np.array(
            [float(x) for x in oracles["runtime_" + platform]])
        y = np.array([int(x) for x in oracles["cf_" + platform]],
                     dtype=np.int32)
        y_1hot = get_onehot(oracles, platform)

        # Encode source codes
        X_seq, maxlen = encode_srcs(data_folder, df)

        # Embeddings
        import tensorflow as tf  # for embeddings lookup
        embedding_matrix_normalized = tf.nn.l2_normalize(embeddings, axis=1)
        vocabulary_size, embedding_dimension = embedding_matrix_normalized.shape
        seq_ = tf.placeholder(dtype=tf.int32)

        # Tensor of shape (num_input_files, sequence length, embbedding dimension)
        embedding_input_ = tf.nn.embedding_lookup(embedding_matrix_normalized,
                                                  seq_)

        # Make tf block less gpu memory
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True

        with tf.Session(config=config) as sess:
            embedding_input = sess.run(embedding_input_,
                                       feed_dict={seq_: X_seq})

        # Leave-one-out cross-validation
        kf = KFold(n_splits=len(y), shuffle=False)

        for j, (train_index, test_index) in enumerate(kf.split(y)):
            print('--- Cross validation step [', j + 1, '/ ', len(y), ']')
            kernel = sorted(set(df["kernel"]))[test_index[0]]
            X_cc, y_cc = get_magni_features(df, oracles, platform)

            model_name = model.__name__
            model_basename = model.__basename__

            model_path = os.path.join(
                out_folder,
                "models/{model_basename}-{platform}-{j}.model".format(
                    model_basename=model_basename, platform=platform, j=j))
            predictions_path = os.path.join(
                out_folder,
                "predictions/{model_basename}-{platform}-{j}.result".format(
                    model_basename=model_basename, platform=platform, j=j))

            if fs.exists(predictions_path):
                # load result from cache
                print("\tFound predictions in", predictions_path,
                      ", skipping...")
                with open(predictions_path, 'rb') as infile:
                    p = pickle.load(infile)
            else:

                if fs.exists(model_path):
                    # load a trained model from cache
                    print("\n\tFound trained model in", model_path,
                          ", skipping...")
                    model.restore(model_path)
                else:

                    # Initialize model and print summary
                    print('\n--- Training model...')
                    model.init(seed, maxlen, int(embedding_dimension),
                               dense_layer_size)
                    if print_summary:
                        model.model.summary()

                    # Train and cache a model
                    model.train(sequences=embedding_input[train_index, :, :],
                                verbose=True,
                                y_1hot=y_1hot[train_index],
                                epochs=num_epochs,
                                batch_size=batch_size)

                    # cache the model
                    fs.mkdir(fs.dirname(model_path))
                    model.save(model_path)
                    print('\tsaved model to', model_path)

                # test model
                print('\n--- Testing model...')
                p = model.predict(sequences=embedding_input[test_index, :, :],
                                  batch_size=batch_size)[0]

                # The runtimes of some coarsening factors are not recorded in the data table. If that is the case for
                # the predicted cf, clamp it down to the highest cf for which the runtime is recorded
                p = min(p, 2**(len(X_cc[test_index[0]]) - 1))

                # cache the prediction
                fs.mkdir(fs.dirname(predictions_path))
                with open(predictions_path, 'wb') as outfile:
                    pickle.dump(p, outfile)
                print('\tsaved predictions to', predictions_path)

            o = y[test_index[0]]  # oracle prediction (true value)
            correct = p == o  # predictions' correctness

            # get runtime without thread coarsening
            row = df[(df["kernel"] == kernel) & (df["cf"] == 1)]
            assert (len(row) == 1)  # sanity check
            nocf_runtime = float(row["runtime_" + platform])

            # get runtime of prediction
            row = df[(df["kernel"] == kernel) & (df["cf"] == p)]
            assert (len(row) == 1)  # sanity check
            p_runtime = float(row["runtime_" + platform])

            # get runtime of oracle coarsening factor
            o_runtime = oracle_runtimes[test_index[0]]

            # speedup and % oracle
            s_oracle = nocf_runtime / o_runtime
            p_speedup = nocf_runtime / p_runtime
            p_oracle = o_runtime / p_runtime

            # record result
            data.append({
                "Model": model_name,
                "Platform": platform_name,
                "Kernel": kernel,
                "Oracle-CF": o,
                "Predicted-CF": p,
                "Speedup": p_speedup,
                "Oracle": p_oracle
            })

    return pd.DataFrame(data,
                        columns=[
                            "Model", "Platform", "Kernel", "Oracle-CF",
                            "Predicted-CF", "Speedup", "Oracle"
                        ])
Esempio n. 34
0
def write_file(path, contents):
    fs.mkdir(fs.dirname(path))
    with open(path, 'w') as outfile:
        outfile.write(contents)
Esempio n. 35
0
def test_mkdir():
    fs.rm("/tmp/labm8.dir")
    assert not fs.isdir("/tmp/labm8.dir")
    fs.mkdir("/tmp/labm8.dir")
    assert fs.isdir("/tmp/labm8.dir")
Esempio n. 36
0
def test_mkdir_parents():
    assert not fs.isdir("/tmp/labm8.dir/foo/bar")
    fs.mkdir("/tmp/labm8.dir/foo/bar")
    assert fs.isdir("/tmp/labm8.dir/foo/bar")
Esempio n. 37
0
def test_mkdir_exists():
    fs.mkdir("/tmp/labm8.dir/")
    assert fs.isdir("/tmp/labm8.dir/")
    fs.mkdir("/tmp/labm8.dir/")
    fs.mkdir("/tmp/labm8.dir/")
    assert fs.isdir("/tmp/labm8.dir/")
Esempio n. 38
0
 def test_mkdir_exists(self):
     fs.mkdir("/tmp/labm8.dir/")
     self._test(True, fs.isdir("/tmp/labm8.dir/"))
     fs.mkdir("/tmp/labm8.dir/")
     fs.mkdir("/tmp/labm8.dir/")
     self._test(True, fs.isdir("/tmp/labm8.dir/"))
Esempio n. 39
0
 def test_mkdir_parents(self):
     self._test(False, fs.isdir("/tmp/labm8.dir/foo/bar"))
     fs.mkdir("/tmp/labm8.dir/foo/bar")
     self._test(True, fs.isdir("/tmp/labm8.dir/foo/bar"))
Esempio n. 40
0
def evaluate(model):
    from progressbar import ProgressBar
    progressbar = [0, ProgressBar(maxval=68)]
    progressbar[1].start()
    data = []

    X_seq = None  # defer sequence encoding (it's expensive)

    for i, platform in enumerate(["Cypress", "Tahiti", "Fermi", "Kepler"]):
        platform_name = platform2str(platform)

        # 读取四个平台下标签的运行时
        oracle_runtimes = np.array(
            [float(x) for x in oracles["runtime_" + platform]])
        # 读取四个平台下的标签(粗化因子)
        y = np.array([int(x) for x in oracles["cf_" + platform]],
                     dtype=np.int32)
        # 对标签6种情况一热编码
        y_1hot = get_onehot(oracles, platform)
        X_cc, y_cc = get_features(df, oracles, platform)
        embed = np.load(f"{data_path}caseb_128.npy")

        kf = KFold(n_splits=len(y), shuffle=False)

        for j, (train_index, test_index) in enumerate(kf.split(y)):
            kernel = sorted(set(df["kernel"]))[test_index[0]]

            model_name = model.__name__
            model_basename = model.__basename__

            model_path = f"result_caseB/modelb_caseB/{model_basename}-{platform}-{j}.model"
            predictions_path = f"result_caseB/predictionb_caseB/{model_basename}-{platform}-{j}.result"

            if fs.exists(predictions_path):
                # load result from cache
                with open(predictions_path, 'rb') as infile:
                    p = pickle.load(infile)
            else:
                if fs.exists(model_path):
                    # load a trained model from cache
                    model.restore(model_path)
                else:

                    # create a new model and train it
                    model.init(seed=seed)
                    model.train(
                        sequences=embed[train_index],
                        verbose=True,  # TODO
                        y_1hot=y_1hot[train_index])

                    # cache the model
                    fs.mkdir(fs.dirname(model_path))
                    model.save(model_path)
                # make prediction
                p = model.predict(sequences=np.array(embed[test_index[0]]))[0]

                p = min(p, 2**(len(X_cc[test_index[0]]) - 1))

                # cache the prediction
                fs.mkdir(fs.dirname(predictions_path))
                with open(predictions_path, 'wb') as outfile:
                    pickle.dump(p, outfile)

            # oracle prediction
            o = y[test_index[0]]
            correct = p == o
            # get runtime without thread coarsening
            row = df[(df["kernel"] == kernel) & (df["cf"] == 1)]
            assert (len(row) == 1)  # sanity check
            nocf_runtime = float(row["runtime_" + platform])

            # get runtime of prediction
            row = df[(df["kernel"] == kernel) & (df["cf"] == p)]
            assert (len(row) == 1)  # sanity check
            p_runtime = float(row["runtime_" + platform])

            # get runtime of oracle coarsening factor
            o_runtime = oracle_runtimes[test_index[0]]
            # speedup and % oracle
            s_oracle = nocf_runtime / o_runtime
            p_speedup = nocf_runtime / p_runtime
            p_oracle = o_runtime / p_runtime

            # record result
            data.append({
                "Model": model_name,
                "Platform": platform_name,
                "Kernel": kernel,
                "Oracle-CF": o,
                "Predicted-CF": p,
                "Speedup": p_speedup,
                "Oracle": p_oracle
            })

            progressbar[0] += 1  # update progress bar
            progressbar[1].update(progressbar[0])

    return pd.DataFrame(data,
                        columns=[
                            "Model", "Platform", "Kernel", "Oracle-CF",
                            "Predicted-CF", "Speedup", "Oracle"
                        ])
Esempio n. 41
0
if __name__ == "__main__":
    parser = ArgumentParser(description=__doc__)
    parser.add_argument("-H",
                        "--hostname",
                        type=str,
                        default="cc1",
                        help="MySQL database hostname")
    args = parser.parse_args()

    db.init(args.hostname)

    with Session(commit=False) as s:
        # Export results
        #
        print("Exporting CLgen results ...")
        fs.mkdir("export/clgen/result")

        # Pick up where we left off
        done = set([
            int(fs.basename(path))
            for path in Path("export/clgen/result").iterdir()
        ])
        print(len(done), "done")
        ids = set([x[0] for x in s.query(CLgenResult.id).all()])
        print(len(ids), "in total")
        todo = ids - done
        print(len(todo), "todo")

        for result_id in ProgressBar()(todo):
            result = s.query(CLgenResult).filter(
                CLgenResult.id == result_id).scalar()
Esempio n. 42
0
 def test_ls_empty_dir(self):
     fs.mkdir("/tmp/labm8.empty")
     self._test([], fs.ls("/tmp/labm8.empty"))
     fs.rm("/tmp/labm8.empty")
Esempio n. 43
0
if __name__ == "__main__":
    parser = ArgumentParser(description=__doc__)
    parser.add_argument("-H",
                        "--hostname",
                        type=str,
                        default="cc1",
                        help="MySQL database hostname")
    args = parser.parse_args()

    db.init(args.hostname)
    session = db.make_session()

    clsmith_wrong_code_programs = session.query(CLSmithResult) \
      .filter(CLSmithResult.classification == "w")
    fs.mkdir("../data/difftest/unreduced/clsmith/wrong_code")
    fs.mkdir("../data/difftest/unreduced/clsmith/wrong_code/reports")
    for result in clsmith_wrong_code_programs:
        vendor = vendor_str(result.testbed.platform)

        with open(
                f"../data/difftest/unreduced/clsmith/wrong_code/{vendor}-{result.program.id}.cl",
                "w") as outfile:
            print(result.program.src, file=outfile)

        with open(
                f"../data/difftest/unreduced/clsmith/wrong_code/reports/{vendor}-{result.id}.txt",
                "w") as outfile:
            print(outfile.name)
            print(generate_wrong_code_report(result), file=outfile)
Esempio n. 44
0
def main():
    db = _db.Database(experiment.ORACLE_PATH)
    ml.start()

    # Delete any old stuff.
    fs.rm(experiment.IMG_ROOT + "/*")
    fs.rm(experiment.TAB_ROOT + "/*")

    # Make directories
    fs.mkdir(experiment.TAB_ROOT)
    fs.mkdir(fs.path(experiment.IMG_ROOT, "scenarios/bars"))
    fs.mkdir(fs.path(experiment.IMG_ROOT, "scenarios/heatmap"))
    fs.mkdir(fs.path(experiment.IMG_ROOT, "scenarios/trisurf"))

    fs.mkdir(fs.path(experiment.IMG_ROOT, "coverage/devices"))
    fs.mkdir(fs.path(experiment.IMG_ROOT, "coverage/kernels"))
    fs.mkdir(fs.path(experiment.IMG_ROOT, "coverage/datasets"))

    fs.mkdir(fs.path(experiment.IMG_ROOT, "safety/devices"))
    fs.mkdir(fs.path(experiment.IMG_ROOT, "safety/kernels"))
    fs.mkdir(fs.path(experiment.IMG_ROOT, "safety/datasets"))

    fs.mkdir(fs.path(experiment.IMG_ROOT, "oracle/devices"))
    fs.mkdir(fs.path(experiment.IMG_ROOT, "oracle/kernels"))
    fs.mkdir(fs.path(experiment.IMG_ROOT, "oracle/datasets"))

    visualise.pie(db.num_scenarios_by_device,
                  fs.path(experiment.IMG_ROOT, "num_sceanrios_by_device"))
    visualise.pie(db.num_runtime_stats_by_device,
                  fs.path(experiment.IMG_ROOT, "num_runtime_stats_by_device"))
    visualise.pie(db.num_scenarios_by_dataset,
                  fs.path(experiment.IMG_ROOT, "num_sceanrios_by_dataset"))
    visualise.pie(db.num_runtime_stats_by_dataset,
                  fs.path(experiment.IMG_ROOT, "num_runtime_stats_by_dataset"))
    visualise.pie(db.num_runtime_stats_by_kernel,
                  fs.path(experiment.IMG_ROOT, "num_runtime_stats_by_kernel"))
    visualise.pie(db.num_runtime_stats_by_kernel,
                  fs.path(experiment.IMG_ROOT, "num_runtime_stats_by_kernel"))

    # Per-scenario plots
    for row in db.scenario_properties:
        scenario,device,kernel,north,south,east,west,max_wgsize,width,height,tout = row
        title = ("{device}: {kernel}[{n},{s},{e},{w}]\n"
                 "{width} x {height} {type}s"
                 .format(device=text.truncate(device, 18), kernel=kernel,
                         n=north, s=south, e=east, w=west,
                         width=width, height=height, type=tout))
        output = fs.path(experiment.IMG_ROOT,
                         "scenarios/heatmap/{id}.png".format(id=scenario))
        space = _space.ParamSpace.from_dict(db.perf_scenario(scenario))
        max_c = min(25, len(space.c))
        max_r = min(25, len(space.r))
        space.reshape(max_c=max_c, max_r=max_r)

        # Heatmaps.
        mask = _space.ParamSpace(space.c, space.r)
        for j in range(len(mask.r)):
            for i in range(len(mask.c)):
                if space.matrix[j][i] == 0:
                    r, c = space.r[j], space.c[i]
                    # TODO: Get values from refused_params table.
                    if r * c >= max_wgsize:
                        # Illegal
                        mask.matrix[j][i] = -1
                    else:
                        # Refused
                        db.execute("INSERT OR IGNORE INTO refused_params VALUES(?,?)",
                                   (scenario, hash_params(c, r)))
                        space.matrix[j][i] = -1
                        mask.matrix[j][i] = 1

        db.commit()
        new_order = list(reversed(range(space.matrix.shape[0])))
        data = space.matrix[:][new_order]

        figsize=(12,6)

        _, ax = plt.subplots(1, 2, figsize=figsize, sharey=True)
        sns.heatmap(data, ax=ax[0], vmin=-1, vmax=1,
                    xticklabels=space.c,
                    yticklabels=list(reversed(space.r)), square=True)

        ax[0].set_title(title)

        new_order = list(reversed(range(mask.matrix.shape[0])))
        data = mask.matrix[:][new_order]

        sns.heatmap(data, ax=ax[1], vmin=-1, vmax=1,
                    xticklabels=space.c,
                    yticklabels=list(reversed(space.r)), square=True)

        # Set labels.
        ax[0].set_ylabel("Rows")
        ax[0].set_xlabel("Columns")
        ax[1].set_ylabel("Rows")
        ax[1].set_xlabel("Columns")

        # plt.tight_layout()
        # plt.gcf().set_size_inches(*figsize, dpi=300)

        viz.finalise(output)

        # 3D bars.
        output = fs.path(experiment.IMG_ROOT,
                         "scenarios/bars/{id}.png".format(id=scenario))
        space.bar3d(output=output, title=title, zlabel="Performance",
                    rotation=45)

        # Trisurfs.
        output = fs.path(experiment.IMG_ROOT,
                         "scenarios/trisurf/{id}.png".format(id=scenario))
        space.trisurf(output=output, title=title, zlabel="Performance",
                      rotation=45)

    #####################
    # ML Visualisations #
    #####################
    #features_tab(db, experiment.TAB_ROOT)

    visualise_classification_job(db, "xval")
    visualise_classification_job(db, "arch")
    visualise_classification_job(db, "xval_real")
    visualise_classification_job(db, "synthetic_real")

    # Runtime regression accuracy.
    visualise_regression_job(db, "xval")
    visualise_regression_job(db, "arch")
    visualise_regression_job(db, "xval_real")
    visualise_regression_job(db, "synthetic_real")

    # Whole-dataset plots
    visualise.runtimes_variance(db, fs.path(experiment.IMG_ROOT,
                                            "runtime_variance.png"),
                                min_samples=30)
    visualise.num_samples(db, fs.path(experiment.IMG_ROOT,
                                      "num_samples.png"))
    visualise.runtimes_range(db, fs.path(experiment.IMG_ROOT,
                                         "runtimes_range.png"))
    visualise.max_speedups(db, fs.path(experiment.IMG_ROOT,
                                       "max_speedups.png"))
    visualise.kernel_performance(db, fs.path(experiment.IMG_ROOT,
                                             "kernel_performance.png"))
    visualise.device_performance(db, fs.path(experiment.IMG_ROOT,
                                             "device_performance.png"))
    visualise.dataset_performance(db, fs.path(experiment.IMG_ROOT,
                                              "dataset_performance.png"))
    visualise.num_params_vs_accuracy(db, fs.path(experiment.IMG_ROOT,
                                                 "num_params_vs_accuracy.png"))
    visualise.performance_vs_coverage(db,
                                      fs.path(experiment.IMG_ROOT,
                                              "performance_vs_coverage.png"))
    visualise.performance_vs_max_wgsize(
        db, fs.path(experiment.IMG_ROOT, "performance_vs_max_wgsize.png")
    )
    visualise.performance_vs_wgsize(db, fs.path(experiment.IMG_ROOT,
                                                "performance_vs_wgsize.png"))
    visualise.performance_vs_wg_c(db, fs.path(experiment.IMG_ROOT,
                                              "performance_vs_wg_c.png"))
    visualise.performance_vs_wg_r(db, fs.path(experiment.IMG_ROOT,
                                              "performance_vs_wg_r.png"))
    visualise.max_wgsizes(db, fs.path(experiment.IMG_ROOT, "max_wgsizes.png"))
    visualise.oracle_speedups(db, fs.path(experiment.IMG_ROOT,
                                          "oracle_speedups.png"))

    visualise.coverage(db,
                       fs.path(experiment.IMG_ROOT, "coverage/coverage.png"))
    visualise.safety(db, fs.path(experiment.IMG_ROOT, "safety/safety.png"))
    visualise.oracle_wgsizes(db, fs.path(experiment.IMG_ROOT, "oracle/all.png"))

    # Per-device plots
    for i,device in enumerate(db.devices):
        where = ("scenario IN "
                 "(SELECT id from scenarios WHERE device='{0}')"
                 .format(device))
        output = fs.path(experiment.IMG_ROOT,
                         "coverage/devices/{0}.png".format(i))
        visualise.coverage(db, output=output, where=where, title=device)
        output = fs.path(experiment.IMG_ROOT,
                         "safety/devices/{0}.png".format(i))
        visualise.safety(db, output, where=where, title=device)
        output = fs.path(experiment.IMG_ROOT,
                         "oracle/devices/{0}.png".format(i))
        visualise.oracle_wgsizes(db, output, where=where, title=device)

        where = ("scenario IN (\n"
                 "    SELECT id from scenarios WHERE device='{0}'\n"
                 ") AND scenario IN (\n"
                 "    SELECT id FROM scenarios WHERE kernel IN (\n"
                 "        SELECT id FROM kernel_names WHERE synthetic=0\n"
                 "    )\n"
                 ")"
                 .format(device))
        output = fs.path(experiment.IMG_ROOT,
                         "coverage/devices/{0}_real.png".format(i))
        visualise.coverage(db, output=output, where=where,
                           title=device + ", real")
        output = fs.path(experiment.IMG_ROOT,
                         "safety/devices/{0}_real.png".format(i))
        visualise.safety(db, output, where=where,
                         title=device + ", real")
        output = fs.path(experiment.IMG_ROOT,
                         "oracle/devices/{0}_real.png".format(i))
        visualise.oracle_wgsizes(db, output, where=where,
                                 title=device + ", real")


        where = ("scenario IN (\n"
                 "    SELECT id from scenarios WHERE device='{0}'\n"
                 ") AND scenario IN (\n"
                 "    SELECT id FROM scenarios WHERE kernel IN (\n"
                 "        SELECT id FROM kernel_names WHERE synthetic=1\n"
                 "    )\n"
                 ")"
                 .format(device))
        output = fs.path(experiment.IMG_ROOT,
                         "coverage/devices/{0}_synthetic.png".format(i))
        visualise.coverage(db, output=output, where=where,
                           title=device + ", synthetic")
        output = fs.path(experiment.IMG_ROOT,
                         "safety/devices/{0}_synthetic.png".format(i))
        visualise.safety(db, output, where=where,
                         title=device + ", synthetic")
        output = fs.path(experiment.IMG_ROOT,
                         "oracle/devices/{0}_synthetic.png".format(i))
        visualise.oracle_wgsizes(db, output, where=where,
                                 title=device + ", synthetic")

    # Per-kernel plots
    for kernel,ids in db.lookup_named_kernels().iteritems():
        id_wrapped = ['"' + id + '"' for id in ids]
        where = ("scenario IN "
                 "(SELECT id from scenarios WHERE kernel IN ({0}))"
                 .format(",".join(id_wrapped)))
        output = fs.path(experiment.IMG_ROOT,
                         "coverage/kernels/{0}.png".format(kernel))
        visualise.coverage(db, output=output, where=where, title=kernel)
        output = fs.path(experiment.IMG_ROOT,
                         "safety/kernels/{0}.png".format(kernel))
        visualise.safety(db, output=output, where=where, title=kernel)
        output = fs.path(experiment.IMG_ROOT,
                         "oracle/kernels/{0}.png".format(kernel))
        visualise.safety(db, output=output, where=where, title=kernel)

    # Per-dataset plots
    for i,dataset in enumerate(db.datasets):
        where = ("scenario IN "
                 "(SELECT id from scenarios WHERE dataset='{0}')"
                 .format(dataset))
        output = fs.path(experiment.IMG_ROOT,
                         "coverage/datasets/{0}.png".format(i))
        visualise.coverage(db, output, where=where, title=dataset)
        output = fs.path(experiment.IMG_ROOT,
                         "safety/datasets/{0}.png".format(i))
        visualise.safety(db, output, where=where, title=dataset)
        output = fs.path(experiment.IMG_ROOT,
                         "oracle/datasets/{0}.png".format(i))
        visualise.safety(db, output, where=where, title=dataset)

    ml.stop()
Esempio n. 45
0
def test_ls_empty_dir():
    fs.mkdir("/tmp/labm8.empty")
    assert not fs.ls("/tmp/labm8.empty")
    fs.rm("/tmp/labm8.empty")
Esempio n. 46
0
#!/usr/bin/env python3.6

import sys

from progressbar import ProgressBar

from labm8 import crypto
from labm8 import fs

if __name__ == "__main__":
    inpath = sys.argv[1]
    outdir = sys.argv[2]
    print(f"reading from {inpath} into {outdir}")

    assert fs.isfile(inpath)
    assert not fs.exists(outdir) or fs.isdir(outdir)
    fs.mkdir(outdir)

    with open(inpath) as infile:
        text = infile.read()

    kernels = text.split("// ==== START SAMPLE ====")
    kernels = [kernel.strip() for kernel in kernels if kernel.strip()]
    print(len(kernels), "kernels")

    sha1s = [crypto.sha1_str(kernel) for kernel in kernels]
    for kernel, sha1 in ProgressBar()(list(zip(kernels, sha1s))):
        with open(f"{outdir}/{sha1}.txt", "w") as outfile:
            print(kernel, file=outfile)