def main(): db = _db.Database(fs.path("joblist.db")) data = [row for row in db.execute("SELECT device,Count(*) AS count\n" "FROM jobs\n" "GROUP BY device\n" "ORDER BY count")] io.info("Job list:") print(fmt.table(data, columns=("Device", "Jobs"))) print() jobs = [row for row in db.execute("SELECT * FROM jobs")] fs.mkdir("jobs") logs = { "monza": open("jobs/monza.txt", "w"), "whz5": open("jobs/whz5.txt", "w"), "monza": open("jobs/monza.txt", "w"), "cec": open("jobs/cec.txt", "w"), "florence": open("jobs/florence.txt", "w"), } for job in jobs: enum_job(logs, db, *job) lab.exit()
def main(): parser = ArgumentParser(description=__description__) parser.add_argument("classification") parser.add_argument("outdir") args = parser.parse_args() db.init("cc1") session = db.make_session() program_ids = [ x[0] for x in session.query(sql.distinct(CLSmithResult.program_id)) \ .filter(CLSmithResult.classification == args.classification).all()] header = fs.read_file(dsmith.data_path("include", "clsmith.h")) fs.mkdir(args.outdir) for program_id in ProgressBar()(program_ids): outpath = fs.path(args.outdir, program_id + ".cl") if not fs.exists(outpath): program = session.query(CLSmithProgram) \ .filter(CLSmithProgram.id == program_id).one() pre, post = program.src.split('#include "CLSmith.h"') inlined = pre + header + post with open(outpath, "w") as outfile: print(inlined, file=outfile)
def main(): db = _db.Database(fs.path("joblist.db")) data = [ row for row in db.execute("SELECT device,Count(*) AS count\n" "FROM jobs\n" "GROUP BY device\n" "ORDER BY count") ] io.info("Job list:") print(fmt.table(data, columns=("Device", "Jobs"))) print() jobs = [row for row in db.execute("SELECT * FROM jobs")] fs.mkdir("jobs") logs = { "monza": open("jobs/monza.txt", "w"), "whz5": open("jobs/whz5.txt", "w"), "monza": open("jobs/monza.txt", "w"), "cec": open("jobs/cec.txt", "w"), "florence": open("jobs/florence.txt", "w"), } for job in jobs: enum_job(logs, db, *job) lab.exit()
def __init__(self, path, tables={}, enable_traces=True): """ Arguments: path (str): The path to the database file. tables (dictionary of {str: tuple of str}, optional): A diction of {name: schema} pairs, where a schema is list of tuple pairs, of the form: (name, type). enable_traces(bool, optional): Enable traces for user defined functions and aggregates. """ self.path = fs.path(path) # Create directory if needed. parent_dir = fs.dirname(path) if parent_dir: fs.mkdir(parent_dir) self.connection = sql.connect(self.path) for name,schema in six.iteritems(tables): self.create_table(name, schema) io.debug("Opened connection to '{0}'".format(self.path)) # Register exit handler atexit.register(self.close) # Enable traces for user defined functions and aggregates. See: # # https://docs.python.org/2/library/sqlite3.html#sqlite3.enable_callback_tracebacks if enable_traces: sql.enable_callback_tracebacks(True)
def train_and_save(model_desc, platform, source, atomizer="CharacterAtomizer", maxlen=1024, n_splits=10, split_i=0, seed=204): np.random.seed(seed) name = model_desc["name"] outpath = "models/{name}/{platform}-{source}-{atomizer}:{maxlen}-{seed}-{n_splits}-{split_i}.model".format( **vars()) if not fs.exists(outpath): create_fn = model_desc.get("create_model", _nop) train_fn = model_desc.get("train_fn", _nop) save_fn = model_desc["save_fn"] _atomizer = globals().get(atomizer) # load training data data_desc = load_data_desc(platform=platform, source=source, max_seq_len=maxlen, atomizer=_atomizer) train, test = get_training_data(data_desc, seed=seed, split_i=split_i, n_splits=n_splits) # create model model = create_fn(seed=seed, data_desc=data_desc) # train model train_fn(model=model, train=train, seed=seed, platform=platform, source=source) fs.mkdir("models/{name}".format(**vars())) save_fn(outpath, model) print("model saved as", outpath) # evaluate model return load_and_test(model_desc, platform, source, n_splits=n_splits, split_i=split_i, atomizer=atomizer, maxlen=maxlen, seed=seed)
def __init__(self, name: str): """ Create filesystem cache. """ self.path = fs.path(ROOT, name) self.name = name fs.mkdir(self.path)
def test_cp_dir(self): fs.rm("/tmp/labm8") fs.rm("/tmp/labm8.copy") fs.mkdir("/tmp/labm8/foo/bar") self._test(False, fs.exists("/tmp/labm8.copy")) fs.cp("/tmp/labm8/", "/tmp/labm8.copy") self._test(True, fs.isdir("/tmp/labm8.copy")) self._test(True, fs.isdir("/tmp/labm8.copy/foo")) self._test(True, fs.isdir("/tmp/labm8.copy/foo/bar"))
def test_cp_dir(): fs.rm("/tmp/labm8") fs.rm("/tmp/labm8.copy") fs.mkdir("/tmp/labm8/foo/bar") assert not fs.exists("/tmp/labm8.copy") fs.cp("/tmp/labm8/", "/tmp/labm8.copy") assert fs.isdir("/tmp/labm8.copy") assert fs.isdir("/tmp/labm8.copy/foo") assert fs.isdir("/tmp/labm8.copy/foo/bar")
def export_todir(s: session_t, table, dir: Path) -> None: fs.mkdir(dir) q = s.query(table) num = s.query(sql.sql.func.count(table.id)).scalar() for result in ProgressBar(max_value=num)(q): buf = result.toProtobuf().SerializeToString() checksum = crypto.sha1(buf) with open(f"{dir}/{checksum}.pb", "wb") as f: f.write(buf)
def __init__(self, root, escape_key=hash_key): """ Create filesystem cache. Arguments: root (str): String. escape_key (fn, optional): Function to convert keys to file names. """ self.path = root self.escape_key = escape_key fs.mkdir(self.path)
def __init__(self, root, escape_key=hash_key): """ Create filesystem cache. Arguments: root (str): String. escape_key (fn, optional): Function to convert keys to file names. """ self.path = pathlib.Path(root) self.escape_key = escape_key fs.mkdir(self.path)
def test_rmtrash(self): system.echo("Hello, world!", "/tmp/labm8.tmp") self.assertTrue(fs.isfile("/tmp/labm8.tmp")) fs.rmtrash("/tmp/labm8.tmp") self.assertFalse(fs.isfile("/tmp/labm8.tmp")) fs.rmtrash("/tmp/labm8.tmp") fs.rm("/tmp/labm8.tmp") fs.rm("/tmp/labm8.dir") fs.mkdir("/tmp/labm8.dir/foo/bar") system.echo("Hello, world!", "/tmp/labm8.dir/foo/bar/baz") self.assertTrue(fs.isfile("/tmp/labm8.dir/foo/bar/baz")) fs.rmtrash("/tmp/labm8.dir") self.assertFalse(fs.isfile("/tmp/labm8.dir/foo/bar/baz")) self.assertFalse(fs.isfile("/tmp/labm8.dir/"))
def test_rm(): system.echo("Hello, world!", "/tmp/labm8.tmp") assert fs.isfile("/tmp/labm8.tmp") fs.rm("/tmp/labm8.tmp") assert not fs.isfile("/tmp/labm8.tmp") fs.rm("/tmp/labm8.tmp") fs.rm("/tmp/labm8.tmp") fs.rm("/tmp/labm8.dir") fs.mkdir("/tmp/labm8.dir/foo/bar") system.echo("Hello, world!", "/tmp/labm8.dir/foo/bar/baz") assert fs.isfile("/tmp/labm8.dir/foo/bar/baz") fs.rm("/tmp/labm8.dir") assert not fs.isfile("/tmp/labm8.dir/foo/bar/baz") assert not fs.isfile("/tmp/labm8.dir/")
def test_rm(self): system.echo("Hello, world!", "/tmp/labm8.tmp") self._test(True, fs.isfile("/tmp/labm8.tmp")) fs.rm("/tmp/labm8.tmp") self._test(False, fs.isfile("/tmp/labm8.tmp")) fs.rm("/tmp/labm8.tmp") fs.rm("/tmp/labm8.tmp") fs.rm("/tmp/labm8.dir") fs.mkdir("/tmp/labm8.dir/foo/bar") system.echo("Hello, world!", "/tmp/labm8.dir/foo/bar/baz") self._test(True, fs.isfile("/tmp/labm8.dir/foo/bar/baz")) fs.rm("/tmp/labm8.dir") self._test(False, fs.isfile("/tmp/labm8.dir/foo/bar/baz")) self._test(False, fs.isfile("/tmp/labm8.dir/"))
def test_rmtrash(): with tempfile.NamedTemporaryFile(prefix='labm8_') as f: assert fs.isfile(f.name) fs.rmtrash(f.name) assert not fs.isfile(f.name) fs.rmtrash(f.name) fs.rm(f.name) with tempfile.TemporaryDirectory() as d: fs.rm(d) fs.mkdir(d, "foo/bar") system.echo("Hello, world!", fs.path(d, "foo/bar/baz")) assert fs.isfile(f, "foo/bar/baz") fs.rmtrash(d) assert not fs.isfile(d, "foo/bar/baz") assert not fs.isdir(d)
def test_cp_over_dir(self): fs.mkdir("/tmp/labm8.tmp.src") system.echo("Hello, world!", "/tmp/labm8.tmp.src/foo") fs.rm("/tmp/labm8.tmp.copy") fs.mkdir("/tmp/labm8.tmp.copy") self._test(True, fs.isdir("/tmp/labm8.tmp.src")) self._test(True, fs.isfile("/tmp/labm8.tmp.src/foo")) self._test(True, fs.isdir("/tmp/labm8.tmp.copy")) self._test(False, fs.isfile("/tmp/labm8.tmp.copy/foo")) fs.cp("/tmp/labm8.tmp.src", "/tmp/labm8.tmp.copy/") self._test(True, fs.isdir("/tmp/labm8.tmp.src")) self._test(True, fs.isfile("/tmp/labm8.tmp.src/foo")) self._test(True, fs.isdir("/tmp/labm8.tmp.copy")) self._test(True, fs.isfile("/tmp/labm8.tmp.copy/foo")) self._test(fs.read("/tmp/labm8.tmp.src/foo"), fs.read("/tmp/labm8.tmp.copy/foo"))
def test_cp_over_dir(): fs.mkdir("/tmp/labm8.tmp.src") system.echo("Hello, world!", "/tmp/labm8.tmp.src/foo") fs.rm("/tmp/labm8.tmp.copy") fs.mkdir("/tmp/labm8.tmp.copy") assert fs.isdir("/tmp/labm8.tmp.src") assert fs.isfile("/tmp/labm8.tmp.src/foo") assert fs.isdir("/tmp/labm8.tmp.copy") assert not fs.isfile("/tmp/labm8.tmp.copy/foo") fs.cp("/tmp/labm8.tmp.src", "/tmp/labm8.tmp.copy/") assert fs.isdir("/tmp/labm8.tmp.src") assert fs.isfile("/tmp/labm8.tmp.src/foo") assert fs.isdir("/tmp/labm8.tmp.copy") assert fs.isfile("/tmp/labm8.tmp.copy/foo") assert (fs.read("/tmp/labm8.tmp.src/foo") == fs.read( "/tmp/labm8.tmp.copy/foo"))
def __setitem__(self, key, value): """ Emplace file in cache. Arguments: key: Key. value (str): Path of file to insert in cache. Raises: ValueError: If no "value" does nto exist. """ if not fs.exists(value): raise ValueError(value) path = self.keypath(key) fs.mkdir(self.path) fs.mv(value, path)
def main(): """ Gather databases from experimental setups. """ fs.mkdir(experiment.DATA_ROOT) fs.mkdir(experiment.DB_DEST) if system.HOSTNAME != "cec": io.fatal("script must be ran on machine `cec'") # TODO: Perform integrity checks. If they fail, transfer again. cp_loc("~/.omnitune/skelcl.db", "cec") cp_rmt("brendel.inf.ed.ac.uk", path="~/florence.db", name="florence") cp_rmt("dhcp-90-060") cp_rmt("monza") cp_rmt("tim") cp_rmt("whz5")
def cachepath(*relative_path_components: list) -> str: """ Return path to file system cache. Parameters ---------- *relative_path_components Relative path of cache. Returns ------- str Absolute path of file system cache. """ cache_root = os.environ.get( "CLGEN_CACHE", f"~/.cache/clgen/{version_info.major}.{version_info.minor}.x") fs.mkdir(cache_root) return fs.path(cache_root, *relative_path_components)
def test_rm_glob(): fs.mkdir("/tmp/labm8.glob") system.echo("Hello, world!", "/tmp/labm8.glob/1") system.echo("Hello, world!", "/tmp/labm8.glob/2") system.echo("Hello, world!", "/tmp/labm8.glob/abc") fs.rm("/tmp/labm8.glob/a*", glob=False) assert fs.isfile("/tmp/labm8.glob/1") assert fs.isfile("/tmp/labm8.glob/2") assert fs.isfile("/tmp/labm8.glob/abc") fs.rm("/tmp/labm8.glob/a*") assert fs.isfile("/tmp/labm8.glob/1") assert fs.isfile("/tmp/labm8.glob/2") assert not fs.isfile("/tmp/labm8.glob/abc") fs.rm("/tmp/labm8.glob/*") assert not fs.isfile("/tmp/labm8.glob/1") assert not fs.isfile("/tmp/labm8.glob/2") assert not fs.isfile("/tmp/labm8.glob/abc")
def test_rm_glob(self): fs.mkdir("/tmp/labm8.glob") system.echo("Hello, world!", "/tmp/labm8.glob/1") system.echo("Hello, world!", "/tmp/labm8.glob/2") system.echo("Hello, world!", "/tmp/labm8.glob/abc") fs.rm("/tmp/labm8.glob/a*", glob=False) self._test(True, fs.isfile("/tmp/labm8.glob/1")) self._test(True, fs.isfile("/tmp/labm8.glob/2")) self._test(True, fs.isfile("/tmp/labm8.glob/abc")) fs.rm("/tmp/labm8.glob/a*") self._test(True, fs.isfile("/tmp/labm8.glob/1")) self._test(True, fs.isfile("/tmp/labm8.glob/2")) self._test(False, fs.isfile("/tmp/labm8.glob/abc")) fs.rm("/tmp/labm8.glob/*") self._test(False, fs.isfile("/tmp/labm8.glob/1")) self._test(False, fs.isfile("/tmp/labm8.glob/2")) self._test(False, fs.isfile("/tmp/labm8.glob/abc"))
def visualise_regression_job(db, job): runtimedir = "img/runtime_regression/{}/".format(job) runtimeclassificationdir = "img/runtime_classification/{}/".format(job) speedupdir = "img/speedup_regression/{}/".format(job) speedupclassificationdir = "img/speedup_classification/{}/".format(job) fs.mkdir(runtimedir) fs.mkdir(runtimeclassificationdir) fs.mkdir(speedupdir) fs.mkdir(speedupclassificationdir) # Line plot of all classifiers. visualise.runtime_regression(db, "img/runtime_regression/{}.png".format(job), job=job) visualise.runtime_classification( db, "img/runtime_classification/{}.png".format(job), job=job) visualise.speedup_regression(db, "img/speedup_regression/{}.png".format(job), job=job) visualise.speedup_classification( db, "img/speedup_classification/{}.png".format(job), job=job)
def visualise_regression_job(db, job): runtimedir = "img/runtime_regression/{}/".format(job) runtimeclassificationdir = "img/runtime_classification/{}/".format(job) speedupdir = "img/speedup_regression/{}/".format(job) speedupclassificationdir = "img/speedup_classification/{}/".format(job) fs.mkdir(runtimedir) fs.mkdir(runtimeclassificationdir) fs.mkdir(speedupdir) fs.mkdir(speedupclassificationdir) # Line plot of all classifiers. visualise.runtime_regression(db, "img/runtime_regression/{}.png".format(job), job=job) visualise.runtime_classification(db, "img/runtime_classification/{}.png" .format(job), job=job) visualise.speedup_regression(db, "img/speedup_regression/{}.png".format(job), job=job) visualise.speedup_classification(db, "img/speedup_classification/{}.png" .format(job), job=job)
def write_file(path: str, contents: str) -> None: if fs.dirname(path): fs.mkdir(fs.dirname(path)) with open(path, 'w') as outfile: outfile.write(contents)
def evaluate(model, device, data_folder, out_folder, embeddings, dense_layer_size, print_summary, num_epochs, batch_size) -> pd.DataFrame: from sklearn.model_selection import StratifiedKFold # Create device list if device == 'all': device_list = ["amd", "nvidia"] else: device_list = [device] data = [] for i, platform in enumerate(device_list): platform_name = platform2str(platform) # Load runtime data data_file = os.path.join(data_folder, "cgo17-{}.csv".format(platform)) print('\n--- Read data from', data_file) df = pd.read_csv(data_file) # Encode input source codes sequences, maxlen = encode_srcs(data_folder, df) # Load embeddings import tensorflow as tf # for embeddings lookup embedding_matrix_normalized = tf.nn.l2_normalize(embeddings, axis=1) vocabulary_size, embedding_dimension = embedding_matrix_normalized.shape seq_ = tf.compat.v1.placeholder(dtype=tf.int32) # Tensor of shape (num_input_files, sequence length, embbedding dimension) embedding_input_ = tf.compat.v1.nn.embedding_lookup( params=embedding_matrix_normalized, ids=seq_) # Make tf block less gpu memory config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True with tf.compat.v1.Session(config=config) as sess: embedding_input = sess.run(embedding_input_, feed_dict={seq_: sequences}) # Values used for training & predictions aux_in = auxiliary_inputs(df) # Optimal mappings y = np.array([1 if x == "GPU" else 0 for x in df["oracle"].values]) y_1hot = encode_1hot(y) # 10-fold cross-validation n_splits = 10 kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=seed) for j, (train_index, test_index) in enumerate(kf.split(sequences, y)): print('--- Cross validation step [', j, '/ ', n_splits, ']') model_name = model.__name__ model_basename = model.__basename__ model_path = os.path.join( out_folder, "models/{model_basename}-{platform}-{j}.model".format( model_basename=model_basename, platform=platform, j=j)) predictions_path = os.path.join( out_folder, "predictions/{model_basename}-{platform}-{j}.result".format( model_basename=model_basename, platform=platform, j=j)) log_dir = os.path.join(out_folder, "logs") if fs.exists(predictions_path): # load result from cache print("\tFound predictions in", predictions_path, ", skipping...") with open(predictions_path, 'rb') as infile: p = pickle.load(infile) else: if fs.exists(model_path): # restore trained model from cache print("\n\tFound trained model in", model_path, ", skipping...") model.restore(model_path) else: # Initialize model and print summary model.init(seed=seed, maxlen=maxlen, embedding_dim=int(embedding_dimension), dense_layer_size=dense_layer_size) if print_summary: model.model.summary() # Train and cache a model print('\n--- Training model... ') model.train(df=df, aux_in=aux_in[train_index], sequences=embedding_input[train_index, :, :], y=y[train_index], y_1hot=y_1hot[train_index], verbose=False, epochs=num_epochs, batch_size=batch_size, log_dir=log_dir) fs.mkdir(fs.dirname(model_path)) model.save(model_path) print('\tsaved model to', model_path) # test model print('\n--- Testing model... ') p = model.predict(batch_size=batch_size, aux_in=aux_in[test_index], sequences=embedding_input[test_index, :, :], y=y[test_index], y_1hot=y_1hot[test_index], verbose=False) # cache results fs.mkdir(fs.dirname(predictions_path)) with open(predictions_path, 'wb') as outfile: pickle.dump(p, outfile) print('\tsaved predictions to', predictions_path) benchmarks = df['benchmark'].values[test_index] # benchmarks names o = y[test_index] # oracle device mappings (true values) correct = p == o # predictions' correctness # runtimes of baseline mapping (CPU on AMD, GPU on NVIDIA) zero_r_dev = "runtime_cpu" if platform == "amd" else "runtime_gpu" zer_r_runtimes = df[zero_r_dev][test_index] # speedups of predictions runtimes = df[['runtime_cpu', 'runtime_gpu']].values[test_index] p_runtimes = [r[p_] for p_, r in zip(p, runtimes)] p_speedup = zer_r_runtimes / p_runtimes # sanity check assert (len(benchmarks) == len(o) == len(correct) == len(p) == len(p_speedup)) # record results for benchmark_, o_, p_, correct_, p_speedup_ in zip( benchmarks, o, p, correct, p_speedup): data.append({ "Model": model_basename, "Platform": platform_name, 'Benchmark': escape_benchmark_name(benchmark_), 'Benchmark Suite': escape_suite_name(benchmark_), "Oracle Mapping": o_, "Predicted Mapping": p_, "Correct?": correct_, "Speedup": p_speedup_, }) return pd.DataFrame(data, index=range(1, len(data) + 1), columns=[ "Model", "Platform", "Benchmark", "Benchmark Suite", "Oracle Mapping", "Predicted Mapping", "Correct?", "Speedup" ])
def _init_tensorflow(self, infer: bool = False): """ Deferred importing of tensorflow and initializing model for training or sampling. This is necessary for two reasons: first, the tensorflow graph is different for training and inference, so must be reset when switching between modes. Second, importing tensorflow takes a long time, so we only want to do it if we actually need to. Arguments: infer (bool): If True, initialize model for inference. If False, initialize model for training. Returns: module: imported TensorFlow module """ import tensorflow as tf from tensorflow.python.ops import rnn_cell from tensorflow.python.ops import seq2seq # Use self.tensorflow_state to mark whether or not model is configured # for training or inference. try: if self.tensorflow_state == infer: return tf except AttributeError: pass self.cell_fn = { "lstm": rnn_cell.BasicLSTMCell, "gru": rnn_cell.GRUCell, "rnn": rnn_cell.BasicRNNCell }.get(self.model_type, None) if self.cell_fn is None: raise clgen.UserError("Unrecognized model type") # reset the graph when switching between training and inference tf.reset_default_graph() # corpus info: batch_size = 1 if infer else self.corpus.batch_size seq_length = 1 if infer else self.corpus.seq_length vocab_size = self.corpus.vocab_size fs.mkdir(self.cache.path) cell = self.cell_fn(self.rnn_size, state_is_tuple=True) self.cell = cell = rnn_cell.MultiRNNCell([cell] * self.num_layers, state_is_tuple=True) self.input_data = tf.placeholder(tf.int32, [batch_size, seq_length]) self.targets = tf.placeholder(tf.int32, [batch_size, seq_length]) self.initial_state = self.cell.zero_state(batch_size, tf.float32) scope_name = 'rnnlm' with tf.variable_scope(scope_name): softmax_w = tf.get_variable("softmax_w", [self.rnn_size, vocab_size]) softmax_b = tf.get_variable("softmax_b", [vocab_size]) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [vocab_size, self.rnn_size]) inputs = tf.split( 1, seq_length, tf.nn.embedding_lookup(embedding, self.input_data)) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(prev, _): prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) outputs, last_state = seq2seq.rnn_decoder( inputs, self.initial_state, cell, loop_function=loop if infer else None, scope=scope_name) output = tf.reshape(tf.concat(1, outputs), [-1, self.rnn_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) loss = seq2seq.sequence_loss_by_example( [self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([batch_size * seq_length])], vocab_size) self.cost = tf.reduce_sum(loss) / batch_size / seq_length self.final_state = last_state self.learning_rate = tf.Variable(0.0, trainable=False) self.epoch = tf.Variable(0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), self.grad_clip) optimizer = tf.train.AdamOptimizer(self.learning_rate) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) # set model status self.tensorflow_state = infer return tf
def visualise_classification_job(db, job): basedir = "img/classification/{}/".format(job) fs.mkdir(basedir) fs.mkdir(basedir + "classifiers") fs.mkdir(basedir + "err_fns") visualise.err_fn_performance(db, basedir + "err_fns.png", job=job) # Bar plot of all results. visualise.classification(db, "img/classification/{}.png".format(job), job=job) # Per-classifier plots. for i,classifier in enumerate(db.classification_classifiers): visualise.classifier_speedups(db, classifier, basedir + "classifiers/{}.png".format(i), job=job) # Per-err_fn plots. for err_fn in db.err_fns: visualise.err_fn_speedups(db, err_fn, basedir + "err_fns/{}.png".format(err_fn), job=job, sort=True) # Results table. query = db.execute( "SELECT classifier,err_fn,Count(*) AS count\n" "FROM classification_results\n" "WHERE job=? GROUP BY classifier,err_fn", (job,) ) results = [] for classifier,err_fn,count in query: correct, illegal, refused, performance, speedup = zip(*[ row for row in db.execute( "SELECT correct,illegal,refused,performance,speedup\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier, err_fn) ) ]) results.append([ classifier, err_fn, (sum(correct) / count) * 100, (sum(illegal) / count) * 100, (sum(refused) / count) * 100, min(performance) * 100, labmath.geomean(performance) * 100, max(performance) * 100, min(speedup), labmath.geomean(speedup), max(speedup) ]) str_args = { "float_format": lambda f: "{:.2f}".format(f) } for i in range(len(results)): results[i][0] = ml.classifier_basename(results[i][0]) columns=( "CLASSIFIER", "ERR_FN", "ACC %", "INV %", "REF %", "Omin %", "Oavg %", "Omax %", "Smin", "Savg", "Smax", ) latex.table(results, output=fs.path(experiment.TAB_ROOT, job + ".tex"), columns=columns, **str_args)
def test_mkdir(self): fs.rm("/tmp/labm8.dir") self._test(False, fs.isdir("/tmp/labm8.dir")) fs.mkdir("/tmp/labm8.dir") self._test(True, fs.isdir("/tmp/labm8.dir"))
def test_accuracy(model, embeddings, folder_data, samples_per_class, folder_results, dense_layer_size, print_summary, num_epochs, batch_size): seed = 204 num_classes = 104 y_test = np.array([], dtype=np.int32) X_test = list() folder_data_test = os.path.join(folder_data, 'seq_test') print('Getting file names for', num_classes, 'classes from folders:') print(folder_data_test) for i in range(1, num_classes + 1): folder = os.path.join(folder_data_test, str(i)) assert os.path.exists(folder), "Folder: " + folder + ' does not exist' print('\ttest : Read file names from folder ', folder) listing = os.listdir(folder + '/') seq_files = [ os.path.join(folder, f) for f in listing if f[-4:] == '.rec' ] assert len(seq_files) > 0, "No .rec files found in" + folder X_test += seq_files y_test = np.concatenate( [y_test, np.array([int(i)] * len(seq_files), dtype=np.int32)]) folder_vocabulary = FLAGS.vocabulary_dir dictionary_pickle = os.path.join(folder_vocabulary, 'dic_pickle') print('\tLoading dictionary from file', dictionary_pickle) with open(dictionary_pickle, 'rb') as f: dictionary = pickle.load(f) unk_index = dictionary[rgx.unknown_token] del dictionary X_seq_test, maxlen_test = encode_srcs(X_test, 'testing', unk_index) maxlen = maxlen_test print('Max. sequence length overall:', maxlen) if FLAGS.maxlen > 0: maxlen = FLAGS.maxlen print('Padding sequences to length', maxlen) X_seq_test = pad_src(X_seq_test, maxlen, unk_index) model.__name__ = FLAGS.model_name model_name = model.__name__ model_path = os.path.join(folder_results, "models/{}.model".format(model_name)) predictions_path = os.path.join( folder_results, "predictions/{}_top{}.result".format(model_name, FLAGS.topk)) if fs.exists(predictions_path): print("\tFound predictions in", predictions_path, ", skipping...") with open(predictions_path, 'rb') as infile: ind = pickle.load(infile) else: import tensorflow as tf embedding_matrix_normalized = tf.nn.l2_normalize(embeddings, axis=1) vocabulary_size, embedding_dimension = embedding_matrix_normalized.shape print('EMB:\n', embedding_matrix_normalized) print('\n--- Initializing model...') model.init(seed=seed, maxlen=maxlen, embedding_dim=int(embedding_dimension), num_classes=num_classes, dense_layer_size=dense_layer_size, embedding_matrix=embedding_matrix_normalized) model.load_weights( os.path.join(FLAGS.out, model.__name__ + '_weights.h5')) if print_summary: model.model.summary() print('\n--- Testing model...') ind, prob = model.predict_topk(X_seq_test, batch_size, FLAGS.topk) del prob fs.mkdir(fs.dirname(predictions_path)) with open(predictions_path, 'wb') as outfile: pickle.dump(ind, outfile) print('\tsaved predictions to', predictions_path) accuracy = np.zeros_like(y_test) ind = np.transpose(np.array(ind)) for i in range(FLAGS.topk): accuracy += np.array(ind[i]) == y_test print('\nTest top{} accuracy:'.format(FLAGS.topk), sum(accuracy) * 100.0 / len(accuracy), '%') from sklearn.metrics import confusion_matrix conf_matr = confusion_matrix(y_test, ind[0]) import matplotlib.pyplot as plt fig, ax = plt.subplots() values = plt.imshow(conf_matr) ax.xaxis.tick_top() ax.xaxis.set_label_position('top') fig.colorbar(values) ax.set_xlabel('Настоящие классы') ax.set_ylabel('Предсказанные классы') conf_png = os.path.join(folder_results, "models/conf_matr_{}.png".format(model_name)) plt.savefig(conf_png)
def evaluate(model, embeddings, folder_data, samples_per_class, folder_results, dense_layer_size, print_summary, num_epochs, batch_size): # Set seed for reproducibility seed = 204 #################################################################################################################### # Get data vsamples_per_class = FLAGS.vsamples # Data acquisition num_classes = 104 y_train = np.empty(0) # training X_train = list() folder_data_train = folder_data + '_train' y_val = np.empty(0) # validation X_val = list() folder_data_val = folder_data + '_val' y_test = np.empty(0) # testing X_test = list() folder_data_test = folder_data + '_test' print('Getting file names for', num_classes, 'classes from folders:') print(folder_data_train) print(folder_data_val) print(folder_data_test) for i in range(1, num_classes + 1): # loop over classes # training: Read data file names folder = os.path.join(folder_data_train, str(i)) assert os.path.exists(folder), "Folder: " + folder + ' does not exist' print('\ttraining : Read file names from folder ', folder) listing = os.listdir(folder + '/') seq_files = [os.path.join(folder, f) for f in listing if f[-4:] == '.rec'] # training: Randomly pick programs assert len(seq_files) >= samples_per_class, "Cannot sample " + str(samples_per_class) + " from " + str( len(seq_files)) + " files found in " + folder X_train += resample(seq_files, replace=False, n_samples=samples_per_class, random_state=seed) y_train = np.concatenate([y_train, np.array([int(i)] * samples_per_class, dtype=np.int32)]) # validation: Read data file names folder = os.path.join(folder_data_val, str(i)) assert os.path.exists(folder), "Folder: " + folder + ' does not exist' print('\tvalidation: Read file names from folder ', folder) listing = os.listdir(folder + '/') seq_files = [os.path.join(folder, f) for f in listing if f[-4:] == '.rec'] # validation: Randomly pick programs if vsamples_per_class > 0: assert len(seq_files) >= vsamples_per_class, "Cannot sample " + str(vsamples_per_class) + " from " + str( len(seq_files)) + " files found in " + folder X_val += resample(seq_files, replace=False, n_samples=vsamples_per_class, random_state=seed) y_val = np.concatenate([y_val, np.array([int(i)] * vsamples_per_class, dtype=np.int32)]) else: assert len(seq_files) > 0, "No .rec files found in" + folder X_val += seq_files y_val = np.concatenate([y_val, np.array([int(i)] * len(seq_files), dtype=np.int32)]) # test: Read data file names folder = os.path.join(folder_data_test, str(i)) assert os.path.exists(folder), "Folder: " + folder + ' does not exist' print('\ttest : Read file names from folder ', folder) listing = os.listdir(folder + '/') seq_files = [os.path.join(folder, f) for f in listing if f[-4:] == '.rec'] assert len(seq_files) > 0, "No .rec files found in" + folder X_test += seq_files y_test = np.concatenate([y_test, np.array([int(i)] * len(seq_files), dtype=np.int32)]) # Load dictionary and cutoff statements folder_vocabulary = FLAGS.vocabulary_dir dictionary_pickle = os.path.join(folder_vocabulary, 'dic_pickle') print('\tLoading dictionary from file', dictionary_pickle) with open(dictionary_pickle, 'rb') as f: dictionary = pickle.load(f) unk_index = dictionary[rgx.unknown_token] del dictionary # Encode source codes and get max. sequence length X_seq_train, maxlen_train = encode_srcs(X_train, 'training', unk_index) X_seq_val, maxlen_val = encode_srcs(X_val, 'validation', unk_index) X_seq_test, maxlen_test = encode_srcs(X_test, 'testing', unk_index) maxlen = max(maxlen_train, maxlen_test, maxlen_val) print('Max. sequence length overall:', maxlen) print('Padding sequences') X_seq_train = pad_src(X_seq_train, maxlen, unk_index) X_seq_val = pad_src(X_seq_val, maxlen, unk_index) X_seq_test = pad_src(X_seq_test, maxlen, unk_index) # Get one-hot vectors for classification print('YTRAIN\n', y_train) y_1hot_train = get_onehot(y_train, num_classes) y_1hot_val = get_onehot(y_val, num_classes) #################################################################################################################### # Setup paths # Set up names paths model_name = model.__name__ model_path = os.path.join(folder_results, "classifyapp/models/{}.model".format(model_name)) predictions_path = os.path.join(folder_results, "classifyapp/predictions/{}.result".format(model_name)) # If predictions have already been made with these embeddings, load them if fs.exists(predictions_path): print("\tFound predictions in", predictions_path, ", skipping...") with open(predictions_path, 'rb') as infile: p = pickle.load(infile) else: # could not find predictions already computed with these embeddings # Embeddings import tensorflow as tf # for embeddings lookup embedding_matrix_normalized = tf.nn.l2_normalize(embeddings, axis=1) vocabulary_size, embedding_dimension = embedding_matrix_normalized.shape print('XSEQ:\n', X_seq_train) print('EMB:\n', embedding_matrix_normalized) gen_test = EmbeddingPredictionSequence(batch_size, X_seq_test, embedding_matrix_normalized) # If models have already been made with these embeddings, load them if fs.exists(model_path): print("\n\tFound trained model in", model_path, ", skipping...") model.restore(model_path) else: # could not find models already computed with these embeddings gen_train = EmbeddingSequence(batch_size, X_seq_train, y_1hot_train, embedding_matrix_normalized) gen_val = EmbeddingSequence(batch_size, X_seq_val, y_1hot_val, embedding_matrix_normalized) ############################################################################################################ # Train # Create a new model and train it print('\n--- Initializing model...') model.init(seed=seed, maxlen=maxlen, embedding_dim=int(embedding_dimension), num_classes=num_classes, dense_layer_size=dense_layer_size) if print_summary: model.model.summary() print('\n--- Training model...') model.train_gen(train_generator=gen_train, validation_generator=gen_val, verbose=True, epochs=num_epochs) # Save the model fs.mkdir(fs.dirname(model_path)) model.save(model_path) print('\tsaved model to', model_path) ################################################################################################################ # Test # Test model print('\n--- Testing model...') p = model.predict_gen(generator=gen_test)[0] # cache the prediction fs.mkdir(fs.dirname(predictions_path)) with open(predictions_path, 'wb') as outfile: pickle.dump(p, outfile) print('\tsaved predictions to', predictions_path) #################################################################################################################### # Return accuracy accuracy = p == y_test # prediction accuracy return accuracy
def evaluate(model, device, data_folder, out_folder, embeddings, dense_layer_size, print_summary, num_epochs, batch_size): data = [] # Create device list if device == 'all': device_list = ["Cypress", "Tahiti", "Fermi", "Kepler"] else: device_list = [device] for i, platform in enumerate(device_list): print( '\n------------------------------------------------------------------' ) print('--- Platform', platform, '[', i + 1, '/ 4 ]') print( '------------------------------------------------------------------' ) platform_name = platform2str(platform) # Read data oracle_file = os.path.join(data_folder, "pact-2014-oracles.csv") oracles = pd.read_csv(oracle_file) runtimes_file = os.path.join(data_folder, "pact-2014-runtimes.csv") df = pd.read_csv(runtimes_file) print('\tRead data from', oracle_file, '\n\tand', runtimes_file) # Extract data oracle_runtimes = np.array( [float(x) for x in oracles["runtime_" + platform]]) y = np.array([int(x) for x in oracles["cf_" + platform]], dtype=np.int32) y_1hot = get_onehot(oracles, platform) # Encode source codes X_seq, maxlen = encode_srcs(data_folder, df) # Embeddings import tensorflow as tf # for embeddings lookup embedding_matrix_normalized = tf.nn.l2_normalize(embeddings, axis=1) vocabulary_size, embedding_dimension = embedding_matrix_normalized.shape seq_ = tf.placeholder(dtype=tf.int32) # Tensor of shape (num_input_files, sequence length, embbedding dimension) embedding_input_ = tf.nn.embedding_lookup(embedding_matrix_normalized, seq_) # Make tf block less gpu memory config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: embedding_input = sess.run(embedding_input_, feed_dict={seq_: X_seq}) # Leave-one-out cross-validation kf = KFold(n_splits=len(y), shuffle=False) for j, (train_index, test_index) in enumerate(kf.split(y)): print('--- Cross validation step [', j + 1, '/ ', len(y), ']') kernel = sorted(set(df["kernel"]))[test_index[0]] X_cc, y_cc = get_magni_features(df, oracles, platform) model_name = model.__name__ model_basename = model.__basename__ model_path = os.path.join( out_folder, "models/{model_basename}-{platform}-{j}.model".format( model_basename=model_basename, platform=platform, j=j)) predictions_path = os.path.join( out_folder, "predictions/{model_basename}-{platform}-{j}.result".format( model_basename=model_basename, platform=platform, j=j)) if fs.exists(predictions_path): # load result from cache print("\tFound predictions in", predictions_path, ", skipping...") with open(predictions_path, 'rb') as infile: p = pickle.load(infile) else: if fs.exists(model_path): # load a trained model from cache print("\n\tFound trained model in", model_path, ", skipping...") model.restore(model_path) else: # Initialize model and print summary print('\n--- Training model...') model.init(seed, maxlen, int(embedding_dimension), dense_layer_size) if print_summary: model.model.summary() # Train and cache a model model.train(sequences=embedding_input[train_index, :, :], verbose=True, y_1hot=y_1hot[train_index], epochs=num_epochs, batch_size=batch_size) # cache the model fs.mkdir(fs.dirname(model_path)) model.save(model_path) print('\tsaved model to', model_path) # test model print('\n--- Testing model...') p = model.predict(sequences=embedding_input[test_index, :, :], batch_size=batch_size)[0] # The runtimes of some coarsening factors are not recorded in the data table. If that is the case for # the predicted cf, clamp it down to the highest cf for which the runtime is recorded p = min(p, 2**(len(X_cc[test_index[0]]) - 1)) # cache the prediction fs.mkdir(fs.dirname(predictions_path)) with open(predictions_path, 'wb') as outfile: pickle.dump(p, outfile) print('\tsaved predictions to', predictions_path) o = y[test_index[0]] # oracle prediction (true value) correct = p == o # predictions' correctness # get runtime without thread coarsening row = df[(df["kernel"] == kernel) & (df["cf"] == 1)] assert (len(row) == 1) # sanity check nocf_runtime = float(row["runtime_" + platform]) # get runtime of prediction row = df[(df["kernel"] == kernel) & (df["cf"] == p)] assert (len(row) == 1) # sanity check p_runtime = float(row["runtime_" + platform]) # get runtime of oracle coarsening factor o_runtime = oracle_runtimes[test_index[0]] # speedup and % oracle s_oracle = nocf_runtime / o_runtime p_speedup = nocf_runtime / p_runtime p_oracle = o_runtime / p_runtime # record result data.append({ "Model": model_name, "Platform": platform_name, "Kernel": kernel, "Oracle-CF": o, "Predicted-CF": p, "Speedup": p_speedup, "Oracle": p_oracle }) return pd.DataFrame(data, columns=[ "Model", "Platform", "Kernel", "Oracle-CF", "Predicted-CF", "Speedup", "Oracle" ])
def write_file(path, contents): fs.mkdir(fs.dirname(path)) with open(path, 'w') as outfile: outfile.write(contents)
def test_mkdir(): fs.rm("/tmp/labm8.dir") assert not fs.isdir("/tmp/labm8.dir") fs.mkdir("/tmp/labm8.dir") assert fs.isdir("/tmp/labm8.dir")
def test_mkdir_parents(): assert not fs.isdir("/tmp/labm8.dir/foo/bar") fs.mkdir("/tmp/labm8.dir/foo/bar") assert fs.isdir("/tmp/labm8.dir/foo/bar")
def test_mkdir_exists(): fs.mkdir("/tmp/labm8.dir/") assert fs.isdir("/tmp/labm8.dir/") fs.mkdir("/tmp/labm8.dir/") fs.mkdir("/tmp/labm8.dir/") assert fs.isdir("/tmp/labm8.dir/")
def test_mkdir_exists(self): fs.mkdir("/tmp/labm8.dir/") self._test(True, fs.isdir("/tmp/labm8.dir/")) fs.mkdir("/tmp/labm8.dir/") fs.mkdir("/tmp/labm8.dir/") self._test(True, fs.isdir("/tmp/labm8.dir/"))
def test_mkdir_parents(self): self._test(False, fs.isdir("/tmp/labm8.dir/foo/bar")) fs.mkdir("/tmp/labm8.dir/foo/bar") self._test(True, fs.isdir("/tmp/labm8.dir/foo/bar"))
def evaluate(model): from progressbar import ProgressBar progressbar = [0, ProgressBar(maxval=68)] progressbar[1].start() data = [] X_seq = None # defer sequence encoding (it's expensive) for i, platform in enumerate(["Cypress", "Tahiti", "Fermi", "Kepler"]): platform_name = platform2str(platform) # 读取四个平台下标签的运行时 oracle_runtimes = np.array( [float(x) for x in oracles["runtime_" + platform]]) # 读取四个平台下的标签(粗化因子) y = np.array([int(x) for x in oracles["cf_" + platform]], dtype=np.int32) # 对标签6种情况一热编码 y_1hot = get_onehot(oracles, platform) X_cc, y_cc = get_features(df, oracles, platform) embed = np.load(f"{data_path}caseb_128.npy") kf = KFold(n_splits=len(y), shuffle=False) for j, (train_index, test_index) in enumerate(kf.split(y)): kernel = sorted(set(df["kernel"]))[test_index[0]] model_name = model.__name__ model_basename = model.__basename__ model_path = f"result_caseB/modelb_caseB/{model_basename}-{platform}-{j}.model" predictions_path = f"result_caseB/predictionb_caseB/{model_basename}-{platform}-{j}.result" if fs.exists(predictions_path): # load result from cache with open(predictions_path, 'rb') as infile: p = pickle.load(infile) else: if fs.exists(model_path): # load a trained model from cache model.restore(model_path) else: # create a new model and train it model.init(seed=seed) model.train( sequences=embed[train_index], verbose=True, # TODO y_1hot=y_1hot[train_index]) # cache the model fs.mkdir(fs.dirname(model_path)) model.save(model_path) # make prediction p = model.predict(sequences=np.array(embed[test_index[0]]))[0] p = min(p, 2**(len(X_cc[test_index[0]]) - 1)) # cache the prediction fs.mkdir(fs.dirname(predictions_path)) with open(predictions_path, 'wb') as outfile: pickle.dump(p, outfile) # oracle prediction o = y[test_index[0]] correct = p == o # get runtime without thread coarsening row = df[(df["kernel"] == kernel) & (df["cf"] == 1)] assert (len(row) == 1) # sanity check nocf_runtime = float(row["runtime_" + platform]) # get runtime of prediction row = df[(df["kernel"] == kernel) & (df["cf"] == p)] assert (len(row) == 1) # sanity check p_runtime = float(row["runtime_" + platform]) # get runtime of oracle coarsening factor o_runtime = oracle_runtimes[test_index[0]] # speedup and % oracle s_oracle = nocf_runtime / o_runtime p_speedup = nocf_runtime / p_runtime p_oracle = o_runtime / p_runtime # record result data.append({ "Model": model_name, "Platform": platform_name, "Kernel": kernel, "Oracle-CF": o, "Predicted-CF": p, "Speedup": p_speedup, "Oracle": p_oracle }) progressbar[0] += 1 # update progress bar progressbar[1].update(progressbar[0]) return pd.DataFrame(data, columns=[ "Model", "Platform", "Kernel", "Oracle-CF", "Predicted-CF", "Speedup", "Oracle" ])
if __name__ == "__main__": parser = ArgumentParser(description=__doc__) parser.add_argument("-H", "--hostname", type=str, default="cc1", help="MySQL database hostname") args = parser.parse_args() db.init(args.hostname) with Session(commit=False) as s: # Export results # print("Exporting CLgen results ...") fs.mkdir("export/clgen/result") # Pick up where we left off done = set([ int(fs.basename(path)) for path in Path("export/clgen/result").iterdir() ]) print(len(done), "done") ids = set([x[0] for x in s.query(CLgenResult.id).all()]) print(len(ids), "in total") todo = ids - done print(len(todo), "todo") for result_id in ProgressBar()(todo): result = s.query(CLgenResult).filter( CLgenResult.id == result_id).scalar()
def test_ls_empty_dir(self): fs.mkdir("/tmp/labm8.empty") self._test([], fs.ls("/tmp/labm8.empty")) fs.rm("/tmp/labm8.empty")
if __name__ == "__main__": parser = ArgumentParser(description=__doc__) parser.add_argument("-H", "--hostname", type=str, default="cc1", help="MySQL database hostname") args = parser.parse_args() db.init(args.hostname) session = db.make_session() clsmith_wrong_code_programs = session.query(CLSmithResult) \ .filter(CLSmithResult.classification == "w") fs.mkdir("../data/difftest/unreduced/clsmith/wrong_code") fs.mkdir("../data/difftest/unreduced/clsmith/wrong_code/reports") for result in clsmith_wrong_code_programs: vendor = vendor_str(result.testbed.platform) with open( f"../data/difftest/unreduced/clsmith/wrong_code/{vendor}-{result.program.id}.cl", "w") as outfile: print(result.program.src, file=outfile) with open( f"../data/difftest/unreduced/clsmith/wrong_code/reports/{vendor}-{result.id}.txt", "w") as outfile: print(outfile.name) print(generate_wrong_code_report(result), file=outfile)
def main(): db = _db.Database(experiment.ORACLE_PATH) ml.start() # Delete any old stuff. fs.rm(experiment.IMG_ROOT + "/*") fs.rm(experiment.TAB_ROOT + "/*") # Make directories fs.mkdir(experiment.TAB_ROOT) fs.mkdir(fs.path(experiment.IMG_ROOT, "scenarios/bars")) fs.mkdir(fs.path(experiment.IMG_ROOT, "scenarios/heatmap")) fs.mkdir(fs.path(experiment.IMG_ROOT, "scenarios/trisurf")) fs.mkdir(fs.path(experiment.IMG_ROOT, "coverage/devices")) fs.mkdir(fs.path(experiment.IMG_ROOT, "coverage/kernels")) fs.mkdir(fs.path(experiment.IMG_ROOT, "coverage/datasets")) fs.mkdir(fs.path(experiment.IMG_ROOT, "safety/devices")) fs.mkdir(fs.path(experiment.IMG_ROOT, "safety/kernels")) fs.mkdir(fs.path(experiment.IMG_ROOT, "safety/datasets")) fs.mkdir(fs.path(experiment.IMG_ROOT, "oracle/devices")) fs.mkdir(fs.path(experiment.IMG_ROOT, "oracle/kernels")) fs.mkdir(fs.path(experiment.IMG_ROOT, "oracle/datasets")) visualise.pie(db.num_scenarios_by_device, fs.path(experiment.IMG_ROOT, "num_sceanrios_by_device")) visualise.pie(db.num_runtime_stats_by_device, fs.path(experiment.IMG_ROOT, "num_runtime_stats_by_device")) visualise.pie(db.num_scenarios_by_dataset, fs.path(experiment.IMG_ROOT, "num_sceanrios_by_dataset")) visualise.pie(db.num_runtime_stats_by_dataset, fs.path(experiment.IMG_ROOT, "num_runtime_stats_by_dataset")) visualise.pie(db.num_runtime_stats_by_kernel, fs.path(experiment.IMG_ROOT, "num_runtime_stats_by_kernel")) visualise.pie(db.num_runtime_stats_by_kernel, fs.path(experiment.IMG_ROOT, "num_runtime_stats_by_kernel")) # Per-scenario plots for row in db.scenario_properties: scenario,device,kernel,north,south,east,west,max_wgsize,width,height,tout = row title = ("{device}: {kernel}[{n},{s},{e},{w}]\n" "{width} x {height} {type}s" .format(device=text.truncate(device, 18), kernel=kernel, n=north, s=south, e=east, w=west, width=width, height=height, type=tout)) output = fs.path(experiment.IMG_ROOT, "scenarios/heatmap/{id}.png".format(id=scenario)) space = _space.ParamSpace.from_dict(db.perf_scenario(scenario)) max_c = min(25, len(space.c)) max_r = min(25, len(space.r)) space.reshape(max_c=max_c, max_r=max_r) # Heatmaps. mask = _space.ParamSpace(space.c, space.r) for j in range(len(mask.r)): for i in range(len(mask.c)): if space.matrix[j][i] == 0: r, c = space.r[j], space.c[i] # TODO: Get values from refused_params table. if r * c >= max_wgsize: # Illegal mask.matrix[j][i] = -1 else: # Refused db.execute("INSERT OR IGNORE INTO refused_params VALUES(?,?)", (scenario, hash_params(c, r))) space.matrix[j][i] = -1 mask.matrix[j][i] = 1 db.commit() new_order = list(reversed(range(space.matrix.shape[0]))) data = space.matrix[:][new_order] figsize=(12,6) _, ax = plt.subplots(1, 2, figsize=figsize, sharey=True) sns.heatmap(data, ax=ax[0], vmin=-1, vmax=1, xticklabels=space.c, yticklabels=list(reversed(space.r)), square=True) ax[0].set_title(title) new_order = list(reversed(range(mask.matrix.shape[0]))) data = mask.matrix[:][new_order] sns.heatmap(data, ax=ax[1], vmin=-1, vmax=1, xticklabels=space.c, yticklabels=list(reversed(space.r)), square=True) # Set labels. ax[0].set_ylabel("Rows") ax[0].set_xlabel("Columns") ax[1].set_ylabel("Rows") ax[1].set_xlabel("Columns") # plt.tight_layout() # plt.gcf().set_size_inches(*figsize, dpi=300) viz.finalise(output) # 3D bars. output = fs.path(experiment.IMG_ROOT, "scenarios/bars/{id}.png".format(id=scenario)) space.bar3d(output=output, title=title, zlabel="Performance", rotation=45) # Trisurfs. output = fs.path(experiment.IMG_ROOT, "scenarios/trisurf/{id}.png".format(id=scenario)) space.trisurf(output=output, title=title, zlabel="Performance", rotation=45) ##################### # ML Visualisations # ##################### #features_tab(db, experiment.TAB_ROOT) visualise_classification_job(db, "xval") visualise_classification_job(db, "arch") visualise_classification_job(db, "xval_real") visualise_classification_job(db, "synthetic_real") # Runtime regression accuracy. visualise_regression_job(db, "xval") visualise_regression_job(db, "arch") visualise_regression_job(db, "xval_real") visualise_regression_job(db, "synthetic_real") # Whole-dataset plots visualise.runtimes_variance(db, fs.path(experiment.IMG_ROOT, "runtime_variance.png"), min_samples=30) visualise.num_samples(db, fs.path(experiment.IMG_ROOT, "num_samples.png")) visualise.runtimes_range(db, fs.path(experiment.IMG_ROOT, "runtimes_range.png")) visualise.max_speedups(db, fs.path(experiment.IMG_ROOT, "max_speedups.png")) visualise.kernel_performance(db, fs.path(experiment.IMG_ROOT, "kernel_performance.png")) visualise.device_performance(db, fs.path(experiment.IMG_ROOT, "device_performance.png")) visualise.dataset_performance(db, fs.path(experiment.IMG_ROOT, "dataset_performance.png")) visualise.num_params_vs_accuracy(db, fs.path(experiment.IMG_ROOT, "num_params_vs_accuracy.png")) visualise.performance_vs_coverage(db, fs.path(experiment.IMG_ROOT, "performance_vs_coverage.png")) visualise.performance_vs_max_wgsize( db, fs.path(experiment.IMG_ROOT, "performance_vs_max_wgsize.png") ) visualise.performance_vs_wgsize(db, fs.path(experiment.IMG_ROOT, "performance_vs_wgsize.png")) visualise.performance_vs_wg_c(db, fs.path(experiment.IMG_ROOT, "performance_vs_wg_c.png")) visualise.performance_vs_wg_r(db, fs.path(experiment.IMG_ROOT, "performance_vs_wg_r.png")) visualise.max_wgsizes(db, fs.path(experiment.IMG_ROOT, "max_wgsizes.png")) visualise.oracle_speedups(db, fs.path(experiment.IMG_ROOT, "oracle_speedups.png")) visualise.coverage(db, fs.path(experiment.IMG_ROOT, "coverage/coverage.png")) visualise.safety(db, fs.path(experiment.IMG_ROOT, "safety/safety.png")) visualise.oracle_wgsizes(db, fs.path(experiment.IMG_ROOT, "oracle/all.png")) # Per-device plots for i,device in enumerate(db.devices): where = ("scenario IN " "(SELECT id from scenarios WHERE device='{0}')" .format(device)) output = fs.path(experiment.IMG_ROOT, "coverage/devices/{0}.png".format(i)) visualise.coverage(db, output=output, where=where, title=device) output = fs.path(experiment.IMG_ROOT, "safety/devices/{0}.png".format(i)) visualise.safety(db, output, where=where, title=device) output = fs.path(experiment.IMG_ROOT, "oracle/devices/{0}.png".format(i)) visualise.oracle_wgsizes(db, output, where=where, title=device) where = ("scenario IN (\n" " SELECT id from scenarios WHERE device='{0}'\n" ") AND scenario IN (\n" " SELECT id FROM scenarios WHERE kernel IN (\n" " SELECT id FROM kernel_names WHERE synthetic=0\n" " )\n" ")" .format(device)) output = fs.path(experiment.IMG_ROOT, "coverage/devices/{0}_real.png".format(i)) visualise.coverage(db, output=output, where=where, title=device + ", real") output = fs.path(experiment.IMG_ROOT, "safety/devices/{0}_real.png".format(i)) visualise.safety(db, output, where=where, title=device + ", real") output = fs.path(experiment.IMG_ROOT, "oracle/devices/{0}_real.png".format(i)) visualise.oracle_wgsizes(db, output, where=where, title=device + ", real") where = ("scenario IN (\n" " SELECT id from scenarios WHERE device='{0}'\n" ") AND scenario IN (\n" " SELECT id FROM scenarios WHERE kernel IN (\n" " SELECT id FROM kernel_names WHERE synthetic=1\n" " )\n" ")" .format(device)) output = fs.path(experiment.IMG_ROOT, "coverage/devices/{0}_synthetic.png".format(i)) visualise.coverage(db, output=output, where=where, title=device + ", synthetic") output = fs.path(experiment.IMG_ROOT, "safety/devices/{0}_synthetic.png".format(i)) visualise.safety(db, output, where=where, title=device + ", synthetic") output = fs.path(experiment.IMG_ROOT, "oracle/devices/{0}_synthetic.png".format(i)) visualise.oracle_wgsizes(db, output, where=where, title=device + ", synthetic") # Per-kernel plots for kernel,ids in db.lookup_named_kernels().iteritems(): id_wrapped = ['"' + id + '"' for id in ids] where = ("scenario IN " "(SELECT id from scenarios WHERE kernel IN ({0}))" .format(",".join(id_wrapped))) output = fs.path(experiment.IMG_ROOT, "coverage/kernels/{0}.png".format(kernel)) visualise.coverage(db, output=output, where=where, title=kernel) output = fs.path(experiment.IMG_ROOT, "safety/kernels/{0}.png".format(kernel)) visualise.safety(db, output=output, where=where, title=kernel) output = fs.path(experiment.IMG_ROOT, "oracle/kernels/{0}.png".format(kernel)) visualise.safety(db, output=output, where=where, title=kernel) # Per-dataset plots for i,dataset in enumerate(db.datasets): where = ("scenario IN " "(SELECT id from scenarios WHERE dataset='{0}')" .format(dataset)) output = fs.path(experiment.IMG_ROOT, "coverage/datasets/{0}.png".format(i)) visualise.coverage(db, output, where=where, title=dataset) output = fs.path(experiment.IMG_ROOT, "safety/datasets/{0}.png".format(i)) visualise.safety(db, output, where=where, title=dataset) output = fs.path(experiment.IMG_ROOT, "oracle/datasets/{0}.png".format(i)) visualise.safety(db, output, where=where, title=dataset) ml.stop()
def test_ls_empty_dir(): fs.mkdir("/tmp/labm8.empty") assert not fs.ls("/tmp/labm8.empty") fs.rm("/tmp/labm8.empty")
#!/usr/bin/env python3.6 import sys from progressbar import ProgressBar from labm8 import crypto from labm8 import fs if __name__ == "__main__": inpath = sys.argv[1] outdir = sys.argv[2] print(f"reading from {inpath} into {outdir}") assert fs.isfile(inpath) assert not fs.exists(outdir) or fs.isdir(outdir) fs.mkdir(outdir) with open(inpath) as infile: text = infile.read() kernels = text.split("// ==== START SAMPLE ====") kernels = [kernel.strip() for kernel in kernels if kernel.strip()] print(len(kernels), "kernels") sha1s = [crypto.sha1_str(kernel) for kernel in kernels] for kernel, sha1 in ProgressBar()(list(zip(kernels, sha1s))): with open(f"{outdir}/{sha1}.txt", "w") as outfile: print(kernel, file=outfile)