def call(self): t = Thread.currentThread() if t.isInterrupted() or not t.isAlive(): return None t0 = System.nanoTime() r = self.fn(*self.args, **self.kwargs) return r, (System.nanoTime() - t0) / 1000000.0
def _isPaused(self, keycode): if keycode not in self.keyHeld: self.keyHeld[keycode] = { 'pressed': False, 'delay': False, 'time': 0 } key = self.keyHeld[keycode] if not key['pressed']: key['pressed'] = True paused = False if self.keyRepeat[0]: key['delay'] = True key['time'] = System.nanoTime() // 1000000 else: paused = True if self.keyRepeat[0]: time = System.nanoTime() // 1000000 if key['delay']: if time - key['time'] > self.keyRepeat[0]: key['time'] = time key['delay'] = False paused = False elif time - key['time'] > self.keyRepeat[1]: key['time'] = time paused = False return paused
def execute(self) -> object: self.block_size = self._block_size["block_size_1d"] result = [0] * self.K # Call the kernels; start_comp = System.nanoTime() start = System.nanoTime() for i in range(self.K): self.execute_phase( f"bs_{i}", self.bs_kernel(self.num_blocks, self.block_size), self.x[i], self.y[i], self.size, R, V, T, K) if self.time_phases: start = System.nanoTime() for i in range(self.K): result[i] = self.y[i][0] end = System.nanoTime() if self.time_phases: self.benchmark.add_phase({ "name": "sync", "time_sec": (end - start) / 1_000_000_000 }) self.benchmark.add_computation_time((end - start_comp) / 1_000_000_000) self.benchmark.add_to_benchmark("gpu_result", result[0]) if self.benchmark.debug: BenchmarkResult.log_message(f"\tgpu result: {result[0]}") return result[0]
def cpu_validation(self, gpu_result: object, reinit: bool) -> None: # Recompute the CPU result only if necessary; start = System.nanoTime() if self.current_iter == 0 or reinit: # Re-initialize the random number generator with the same seed as the GPU to generate the same values; seed(self.random_seed) if self.benchmark.random_init: x_g = np.zeros(self.size) y_g = np.zeros(self.size) for i in range(self.size): x_g[i] = randint(0, 10) y_g[i] = randint(0, 10) else: x_g = 1 / np.linspace(1, self.size, self.size) y_g = 1 / np.linspace(1, self.size, self.size) x_g += 1 y_g += 1 self.cpu_result = x_g[0] + y_g[0] cpu_time = System.nanoTime() - start difference = np.abs(self.cpu_result - gpu_result) self.benchmark.add_to_benchmark("cpu_time_sec", cpu_time) self.benchmark.add_to_benchmark("cpu_gpu_res_difference", difference) if self.benchmark.debug: BenchmarkResult.log_message(f"\tcpu result: {self.cpu_result:.4f}, " + f"difference: {difference:.4f}, time: {cpu_time:.4f} sec")
def tick(self, framerate=0): """ Call once per program cycle, returns ms since last call. An optional framerate will add pause to limit rate. """ while self._repaint_sync.get(): try: self._thread.sleep(1) except InterruptedException: Thread.currentThread().interrupt() break self._time = System.nanoTime()//1000000 if framerate: time_pause = (1000//framerate) - (self._time-self._time_init) if time_pause > 0: try: self._thread.sleep(time_pause) except InterruptedException: Thread.currentThread().interrupt() self._time = System.nanoTime()//1000000 if self._pos: self._pos -= 1 else: self._pos = 9 self._time_diff[self._pos] = self._time-self._time_init self._time_init = self._time return self._time_diff[self._pos]
def tick(self, framerate=0): """ Call once per program cycle, returns ms since last call. An optional framerate will add pause to limit rate. """ while self._repaint_sync.get(): try: self._thread.sleep(1) except InterruptedException: Thread.currentThread().interrupt() break self._time = System.nanoTime() // 1000000 if framerate: time_pause = ((1000 // framerate) - (self._time - self._time_init)) if time_pause > 0: try: self._thread.sleep(time_pause) except InterruptedException: Thread.currentThread().interrupt() self._time = System.nanoTime() // 1000000 if self._pos: self._pos -= 1 else: self._pos = 9 self._time_diff[self._pos] = self._time - self._time_init self._time_init = self._time return self._time_diff[self._pos]
def execute(self) -> object: self.block_size = self._block_size["block_size_1d"] start_comp = System.nanoTime() start = 0 # A, B. Call the kernel. The 2 computations are independent, and can be done in parallel; self.execute_phase( "square_1", self.square_kernel(self.num_blocks, self.block_size), self.x, self.x1, self.size) self.execute_phase( "square_2", self.square_kernel(self.num_blocks, self.block_size), self.y, self.y1, self.size) # C. Compute the sum of the result; self.execute_phase( "reduce", self.reduce_kernel(self.num_blocks, self.block_size), self.x1, self.y1, self.res, self.size) # Add a final sync step to measure the real computation time; if self.time_phases: start = System.nanoTime() result = self.res[0] end = System.nanoTime() if self.time_phases: self.benchmark.add_phase({ "name": "sync", "time_sec": (end - start) / 1_000_000_000 }) self.benchmark.add_computation_time((end - start_comp) / 1_000_000_000) self.benchmark.add_to_benchmark("gpu_result", result) if self.benchmark.debug: BenchmarkResult.log_message(f"\tgpu result: {result:.4f}") return result
def delay(time): """ **pyj2d.time.delay** Pause for given time (in ms). Return ms paused. """ start = System.nanoTime()/1000000 Thread.sleep(time) return (System.nanoTime()/1000000) - start
def func_call(self, *args, **kwargs) -> object: start = System.nanoTime() result = func(self, *args, **kwargs) end = System.nanoTime() self.benchmark.add_phase({ "name": phase_name, "time_sec": (end - start) / 1_000_000_000 }) return result
def delay(time): """ **pyj2d.time.delay** Pause for given time (in ms). Return ms paused. """ start = System.nanoTime() / 1000000 Thread.sleep(time) return (System.nanoTime() / 1000000) - start
def timeit(n_iterations, fn, *args, **kwargs): times = [] for i in xrange(n_iterations): t0 = System.nanoTime() imp = fn(*args, **kwargs) t1 = System.nanoTime() times.append(t1 - t0) print("min: %.2f ms, max: %.2f ms, mean: %.2f ms" % (min(times) / 1000000.0, max(times) / 1000000.0, sum(times) / (len(times) * 1000000.0)))
def cpu_validation(self, gpu_result: object, reinit: bool) -> None: def spmv(ptr, idx, val, vec): res = np.zeros(len(ptr) - 1) for i in range(len(ptr) - 1): curr_sum = 0 start = int(ptr[i]) end = int(ptr[i + 1]) for j in range(start, end): curr_sum += val[j] * vec[idx[j]] res[i] = curr_sum return res # Recompute the CPU result only if necessary; start = System.nanoTime() if self.current_iter == 0 or reinit: # Re-initialize the random number generator with the same seed as the GPU to generate the same values; seed(self.random_seed) # Initialize the support device arrays; N = self.size x = np.ones(N) # r = b - A * x r = np.array(self.b_cpu) - np.array(spmv(self.ptr_cpu, self.idx_cpu, self.val_cpu, x)) p = r.copy() t1 = r.T.dot(r) # Main iteration; for i in range(self.num_iterations): y = spmv(self.ptr_cpu, self.idx_cpu, self.val_cpu, p) t2 = p.dot(y) alpha = t1 / t2 t1_old = t1 x += alpha * p r -= alpha * y t1 = r.T.dot(r) beta = t1 / t1_old p = r + beta * p self.cpu_result = x cpu_time = System.nanoTime() - start # Compare GPU and CPU results; difference = 0 for i in range(self.size): difference += np.abs(self.cpu_result[i] - gpu_result[i]) self.benchmark.add_to_benchmark("cpu_time_sec", cpu_time) self.benchmark.add_to_benchmark("cpu_gpu_res_difference", str(difference)) if self.benchmark.debug: BenchmarkResult.log_message(f"\tcpu result: [" + ", ".join([f"{x:.4f}" for x in self.cpu_result[:10]]) + "...]; " + f"difference: {difference:.4f}, time: {cpu_time:.4f} sec")
def run(self): before_time = None after_time = None time_diff = None sleep_time = None over_sleep_time = 0 no_delays = 0 excess = 0 game_start_time = System.nanoTime() prev_stats_time = game_start_time before_time = game_start_time running = True while running: self.game_update() self.game_render() self.paint_screen() after_time = System.nanoTime() time_diff = after_time - before_time sleep_time = (self.period - time_diff) - over_sleep_time if sleep_time > 0: try: Thread.sleep(sleep_time) except InterruptedException as e: pass over_sleep_time = (System.nanoTime() - after_time) - sleep_time else: excess -= sleep_time over_sleep_time = 0 if (no_delays + 1) >= self.NO_DELAYS_PER_YIELD: Thread.yield() no_delays = 0 before_time = System.nanoTime() skips = 0 while excess > self.period and skips < self.MAX_FRAME_SKIPS: excess -= self.period self.game_update() skips += 1 self.frames_skipped += skips self.store_stats() self.print_stats() System.exit(0)
def execute_cuda_benchmark(benchmark, size, block_size, exec_policy, num_iter, debug, prefetch=False, num_blocks=DEFAULT_NUM_BLOCKS, output_date=None): if debug: BenchmarkResult.log_message("") BenchmarkResult.log_message("") BenchmarkResult.log_message("#" * 30) BenchmarkResult.log_message(f"Benchmark {i + 1}/{tot_benchmarks}") BenchmarkResult.log_message(f"benchmark={b}, size={n}," f" block size={block_size}, " f" prefetch={prefetch}, " f" num blocks={num_blocks}, " f" exec policy={exec_policy}") BenchmarkResult.log_message("#" * 30) BenchmarkResult.log_message("") BenchmarkResult.log_message("") if not output_date: output_date = datetime.now().strftime("%Y_%m_%d_%H_%M_%S") file_name = f"cuda_{output_date}_{benchmark}_{exec_policy}_{size}_{block_size['block_size_1d']}_{block_size['block_size_2d']}_{prefetch}_{num_iter}_{num_blocks}.csv" # Create a folder if it doesn't exist; output_folder_path = os.path.join(BenchmarkResult.DEFAULT_RES_FOLDER, output_date + "_cuda") if not os.path.exists(output_folder_path): if debug: BenchmarkResult.log_message( f"creating result folder: {output_folder_path}") os.mkdir(output_folder_path) output_path = os.path.join(output_folder_path, file_name) benchmark_cmd = CUDA_CMD.format(benchmark, exec_policy, size, block_size["block_size_1d"], block_size["block_size_2d"], num_iter, num_blocks, "-r" if prefetch else "", "-a", output_path) start = System.nanoTime() result = subprocess.run( benchmark_cmd, shell=True, stdout=subprocess.STDOUT, cwd=f"{os.getenv('GRCUDA_HOME')}/projects/resources/cuda/bin") result.check_returncode() end = System.nanoTime() if debug: BenchmarkResult.log_message( f"Benchmark total execution time: {(end - start) / 1_000_000_000:.2f} seconds" )
def delay(self, time): """ **pyj2d.time.delay** Pause for given time (in ms). Return ms paused. """ start = System.nanoTime() // 1000000 try: Thread.sleep(time) except InterruptedException: Thread.currentThread().interrupt() return (System.nanoTime() // 1000000) - start
def delay(self, time): """ **pyj2d.time.delay** Pause for given time (in ms). Return ms paused. """ start = System.nanoTime()//1000000 try: Thread.sleep(time) except InterruptedException: Thread.currentThread().interrupt() return (System.nanoTime()//1000000) - start
def cpu_validation(self, gpu_result: object, reinit: bool) -> None: def spmv(ptr, idx, val, vec): res = np.zeros(len(ptr) - 1) for i in range(len(ptr) - 1): curr_sum = 0 start = int(ptr[i]) end = int(ptr[i + 1]) for j in range(start, end): curr_sum += val[j] * vec[idx[j]] res[i] = curr_sum return res # Recompute the CPU result only if necessary; start = System.nanoTime() if self.current_iter == 0 or reinit: # Re-initialize the random number generator with the same seed as the GPU to generate the same values; seed(self.random_seed) # Initialize the support device arrays; N = self.size auth1 = np.ones(N) hub1 = np.ones(N) # Main iteration; for i in range(self.num_iterations): # Authority; auth2 = spmv(self.ptr2_cpu, self.idx2_cpu, self.val2_cpu, hub1) auth2 = auth2 / np.sum(auth2) # Hubs hub2 = spmv(self.ptr_cpu, self.idx_cpu, self.val_cpu, auth1) hub2 = hub2 / np.sum(hub2) auth1 = auth2 hub1 = hub2 self.cpu_result = hub1 + auth1 cpu_time = System.nanoTime() - start # Compare GPU and CPU results; difference = 0 for i in range(self.size): difference += np.abs(self.cpu_result[i] - gpu_result[i]) self.benchmark.add_to_benchmark("cpu_time_sec", cpu_time) self.benchmark.add_to_benchmark("cpu_gpu_res_difference", str(difference)) if self.benchmark.debug: BenchmarkResult.log_message( f"\tcpu result: [" + ", ".join([f"{x:.4f}" for x in self.cpu_result[:10]]) + "...]; " + f"difference: {difference:.4f}, time: {cpu_time:.4f} sec")
def run(fn, args, msg="", n_iterations=20): timings = [] for i in xrange(n_iterations): t0 = System.nanoTime() fn(*args) t1 = System.nanoTime() timings.append(t1 - t0) minimum = min(timings) maximum = max(timings) average = sum(timings) / float(len(timings)) print msg, "min:", minimum, "max:", maximum, "avg:", average
def timeIt(fn, n_iterations=10): elapsed_times = [] for i in range(n_iterations): t0 = System.nanoTime() fn() t1 = System.nanoTime() elapsed_times.append(t1 - t0) smallest = min(elapsed_times) largest = max(elapsed_times) average = sum(elapsed_times) / float(n_iterations) print "Elapsed time: min", smallest, "max", largest, "average", average return elapsed_time
def cpu_validation(self, gpu_result: object, reinit: bool) -> None: def softmax(X): return np.exp(X) / np.sum(np.exp(X), axis=1).reshape(X.shape[0], 1) def logsumexp(X): return np.log(np.sum(np.exp(X))) def naive_bayes_predict(X, feature_log_prob, log_class_prior): jll = X.dot(feature_log_prob.T) + log_class_prior amax = np.amax(jll, axis=1) l = logsumexp(jll - np.atleast_2d(amax).T) + amax return np.exp(jll - np.atleast_2d(l).T) def normalize(X): return (X - np.mean(X, axis=0)) / np.std(X, axis=0) def ridge_pred(X, coef, intercept): return np.dot(X, coef.T) + intercept # Recompute the CPU result only if necessary; start = System.nanoTime() if self.current_iter == 0 or reinit: # Re-initialize the random number generator with the same seed as the GPU to generate the same values; seed(self.random_seed) r1_g = naive_bayes_predict(self.x_cpu, self.nb_feat_log_prob_cpu, self.nb_class_log_prior_cpu) r2_g = ridge_pred(normalize(self.x_cpu), self.ridge_coeff_cpu, self.ridge_intercept_cpu) r_g = np.argmax(softmax(r1_g) + softmax(r2_g), axis=1) self.cpu_result = r_g cpu_time = System.nanoTime() - start # Compare GPU and CPU results; difference = 0 for i in range(self.size): difference += np.abs(self.cpu_result[i] - gpu_result[i]) self.benchmark.add_to_benchmark("cpu_time_sec", cpu_time) self.benchmark.add_to_benchmark("cpu_gpu_res_difference", str(difference)) if self.benchmark.debug: BenchmarkResult.log_message( f"\tcpu result: [" + ", ".join([f"{x:.4f}" for x in self.cpu_result[:10]]) + "...]; " + f"difference: {difference:.4f}, time: {cpu_time:.4f} sec")
def game_update(self): self.gelapsed_after = System.nanoTime() self.game_time.elapsed_game_time.set_span(self.gelapsed_before, self.gelapsed_after) self.game_time.elapsed_real_time.set_span(self.gelapsed_before, self.gelapsed_after) self.game_time.total_game_time.set_span(self.game_start_time, self.gelapsed_after) self.game_time.total_real_time.set_span(self.game_start_time, self.gelapsed_after) self.gelapsed_before = System.nanoTime() if self.running and self.is_paused is not True and self.game_over is not True: for item in self.components.getComponents(): item.update(self.game_time) self.updates += 1
def get_ticks(self): """ **pyj2d.time.get_ticks** Return ms since program start. """ return (System.nanoTime() // 1000000) - self._time_init
def get_ticks(): """ **pyj2d.time.get_ticks** Return ms since program start. """ return (System.nanoTime()/1000000) - _time_init
def cpu_validation(self, gpu_result: object, reinit: bool) -> None: def CND(X): """ Cumulative normal distribution. Helper function used by BS(...). """ (a1, a2, a3, a4, a5) = (0.31938153, -0.356563782, 1.781477937, -1.821255978, 1.330274429) L = np.absolute(X) K = np.float64(1.0) / (1.0 + 0.2316419 * L) w = 1.0 - 1.0 / math.sqrt(2 * np.pi) * np.exp(-L * L / 2.) * \ (a1 * K + a2 * (K ** 2) + a3 * (K ** 3) + a4 * (K ** 4) + a5 * (K ** 5)) mask = X < 0 w = w * ~mask + (1.0 - w) * mask return w def BS(X, R, V, T, K): """Black Scholes Function.""" d1_arr = (np.log(X / K) + (R + V * V / 2.) * T) / (V * math.sqrt(T)) d2_arr = d1_arr - V * math.sqrt(T) w_arr = CND(d1_arr) w2_arr = CND(d2_arr) return X * w_arr - X * math.exp(-R * T) * w2_arr # Recompute the CPU result only if necessary; start = System.nanoTime() if self.current_iter == 0 or reinit: res = BS(np.array(self.x_tmp), R, V, T, K) self.cpu_result = res[0] cpu_time = System.nanoTime() - start difference = np.abs(self.cpu_result - gpu_result) self.benchmark.add_to_benchmark("cpu_time_sec", cpu_time) self.benchmark.add_to_benchmark("cpu_gpu_res_difference", difference) if self.benchmark.debug: BenchmarkResult.log_message( f"\tcpu result: {self.cpu_result:.4f}, " + f"difference: {difference:.4f}, time: {cpu_time:.4f} sec")
def __init__(self): """ Return Clock. """ self.time = System.nanoTime() / 1000000 self.time_init = self.time self.time_diff = [25] * 10 self.pos = 0 self.thread = Thread()
def __init__(self): """ Return Clock. """ self.time = System.nanoTime()/1000000 self.time_init = self.time self.time_diff = [25]*10 self.pos = 0 self.thread = Thread()
def __init__(self): """ Return Clock. """ self._time = System.nanoTime()//1000000 self._time_init = self._time self._time_diff = [33 for i in range(10)] self._pos = 0 self._thread = Thread()
def execute_phase(self, phase_name, function, *args) -> object: """ Executes a single step of the benchmark, possibily measuring the time it takes :param phase_name: name of this benchmark step :param function: a function to execute :param args: arguments of the function :return: the result of the function """ if self.time_phases: start = System.nanoTime() res = function(*args) end = System.nanoTime() self.benchmark.add_phase({ "name": phase_name, "time_sec": (end - start) / 1_000_000_000 }) return res else: return function(*args)
def setupGUI(self, initialFilename): self.gui = JESUI(self) self.gui.windowSetting(None) self.setHelpArray() self.gui.changeSkin(JESConfig.getInstance().getStringProperty( JESConfig.CONFIG_SKIN)) self.gui.show() if JESConfig.getInstance().getBooleanProperty(JESConfig.CONFIG_BLOCK): self.gui.editor.removeBox() else: self.gui.editor.addBox() if JESConfig.getInstance().getBooleanProperty(JESConfig.CONFIG_GUTTER): self.gui.turnOnGutter() else: self.gui.turnOffGutter() # Install the bridges. self.terpControl = InterpreterControl(self.gui, self.interpreter) self.replBuffer = REPLBuffer(self.interpreter, self.gui.commandWindow) # Open or create the file. if initialFilename is None: self.fileManager.newFile() else: self.fileManager.readFile(initialFilename) # Startup complete! startTimeNS = System.getProperty("jes.starttimens") if startTimeNS is not None: self.startupTimeSec = ((System.nanoTime() - long(startTimeNS)) / 1000000000.0) # Show introduction window if settings could not be loaded (Either new # JES user or bad write permissions) config = JESConfig.getInstance() loadError = config.getLoadError() if loadError is not None: JOptionPane.showMessageDialog( self.gui, "Your JESConfig.properties file could not be opened!\n" + loadError.toString(), "JES Configuration", JOptionPane.ERROR_MESSAGE) elif config.wasMigrated(): JOptionPane.showMessageDialog( self.gui, "Your settings were imported from JES 4.3.\n" + "JES doesn't use the JESConfig.txt file in " + "your home directory anymore, so you can delete it.", "JES Configuration", JOptionPane.INFORMATION_MESSAGE) elif not config.wasLoaded(): introController.show()
def callAndTime(function, *args, **kwargs): if not callable(function): print "callAndTime(function[, arguments...]): Input is not a function" name = getattr(function, "__name__", "The function") def showElapsedTime(start, end): return "%d.%06d milliseconds" % divmod(end - start, 1000000) startTime = System.nanoTime() try: rv = function(*args, **kwargs) except: endTime = System.nanoTime() print >> sys.stderr, "%s ran for %s and crashed" % (name, showElapsedTime(startTime, endTime)) raise else: endTime = System.nanoTime() print >> sys.stderr, "%s ran in %s" % (name, showElapsedTime(startTime, endTime)) return rv
def callAndTime(function, *args, **kwargs): if not callable(function): print "callAndTime(function[, arguments...]): Input is not a function" name = getattr(function, '__name__', 'The function') def showElapsedTime(start, end): return "%d.%06d milliseconds" % divmod(end - start, 1000000) startTime = System.nanoTime() try: rv = function(*args, **kwargs) except: endTime = System.nanoTime() print >> sys.stderr, "%s ran for %s and crashed" % ( name, showElapsedTime(startTime, endTime)) raise else: endTime = System.nanoTime() print >> sys.stderr, "%s ran in %s" % ( name, showElapsedTime(startTime, endTime)) return rv
def tick(self, framerate=0): """ Call once per program cycle, returns ms since last call. An optional framerate will add pause to limit rate. """ if self.pos < 9: self.pos += 1 else: self.pos = 0 self.time = System.nanoTime()/1000000 self.time_diff[self.pos] = (self.time-self.time_init) self.time_init = self.time if framerate: time_diff = sum(self.time_diff)/10 time_pause = long( ((1.0/framerate)*1000) - time_diff ) if time_pause > 0: self.thread.sleep(time_pause) return self.time_diff[self.pos]
def tick(self, framerate=0): """ Call once per program cycle, returns ms since last call. An optional framerate will add pause to limit rate. """ if self.pos < 9: self.pos += 1 else: self.pos = 0 self.time = System.nanoTime() / 1000000 self.time_diff[self.pos] = (self.time - self.time_init) self.time_init = self.time if framerate: time_diff = sum(self.time_diff) / 10 time_pause = long(((1.0 / framerate) * 1000) - time_diff) if time_pause > 0: self.thread.sleep(time_pause) return self.time_diff[self.pos]
def store_stats(self): self.frame_count += 1 self.stats_interval += self.period if self.stats_interval >= self.MAX_STAT_INTERVAL: time_now = System.nanoTime() self.time_spend_in_game = time_now - self.game_start_time real_elapsed_time = time_now - self.prev_stats_time self.total_elapsed_time += real_elapsed_time self.total_frames_skipped += self.frames_skipped if self.total_elapsed_time > 0: actual_fps = self.frame_count / self.total_elapsed_time actual_ups = (self.frame_count + self.total_frames_skipped) / self.total_elapsed_time self.fps_store[self.stats_count % self.NUM_FPS] = actual_fps self.ups_store[self.stats_count % self.NUM_FPS] = actual_ups self.stats_count += 1 i = 0 if i < self.NUM_FPS: total_fps = self.fps_store[i] total_ups = self.ups_store[i] i += 1 if self.stats_count < self.NUM_FPS: self.average_fps = total_fps / self.stats_count self.average_ups = total_ups / self.stats_count else: self.average_fps = total_fps / self.NUM_FPS self.average_ups = total_ups / self.NUM_FPS self.frames_skipped = 0 self.prev_stats_time = time_now self.stats_interval = 0
def execute(self) -> object: # A. B. Call the kernels. The 2 computations are independent, and can be done in parallel; start = System.nanoTime() self.sum_kernel(self.num_blocks, self.block_size)(self.x, self.size) end = System.nanoTime() self.benchmark.add_phase({"name": "sum_1", "time_sec": (end - start) / 1_000_000_000}) start = System.nanoTime() self.sum_kernel(self.num_blocks, self.block_size)(self.y, self.size) end = System.nanoTime() self.benchmark.add_phase({"name": "sum_2", "time_sec": (end - start) / 1_000_000_000}) start = System.nanoTime() result_1 = self.x[0] result_2 = self.y[0] end = System.nanoTime() self.benchmark.add_phase({"name": "read_result", "time_sec": (end - start) / 1_000_000_000}) self.benchmark.add_to_benchmark("gpu_result", result_1 + result_2) if self.benchmark.debug: BenchmarkResult.log_message(f"\tgpu result: {result_1} {result_2}") return result_1 + result_2
def __init__(self): self._time_init = System.nanoTime() // 1000000 self.Clock = Clock self.Clock._repaint_sync = AtomicBoolean(False) self._timers = {}
def __init__(self): self._time = System.nanoTime() // 1000000 self._time_init = self._time self._time_diff = [33 for i in range(10)] self._pos = 0 self._thread = Thread()
def time(self): """ Return system time (in ms). """ return System.nanoTime() / 1000000.0
def cpu_validation(self, gpu_result: object, reinit: bool) -> None: def relu(x): return np.maximum(x, 0) def conv3d2(x, kernels, shape, K, k_out, stride=1, operator=relu): N, M, L = shape out = np.zeros((N // stride) * (M // stride) * k_out) radius = K // 2 for m in range(k_out): for i in range(0, int(np.ceil(N / stride)) - radius): for j in range(0, int(np.ceil(M / stride)) - radius): res = 0 i_f = i * stride + radius j_f = j * stride + radius for k_i in range(-radius, radius + 1): for k_j in range(-radius, radius + 1): for l in range(L): ni = i_f + k_i nj = j_f + k_j res += kernels[ l + L * (k_j + radius + K * (k_i + radius + K * m))] * x[( (ni * M) + nj) * L + l] out[m + k_out * (j + M * i // stride)] = operator(res) return out def pooling(x, shape, K, stride): N, M, L = shape out = np.zeros((N // pooling, M // pooling, L)) radius = K // 2 for i in range(0, int(np.ceil(N / stride)) - radius): for j in range(0, int(np.ceil(M / stride)) - radius): for l in range(L): res = 0 i_f = i * stride + radius j_f = j * stride + radius for k_i in range(-radius, radius + 1): for k_j in range(-radius, radius + 1): ni = i_f + k_i nj = j_f + k_j res += x[((ni * M) + nj) * L + l] out[l + L * (j + M * i // stride)] = res / K**2 return out def gap2(x, shape): N, M, L = shape out = np.zeros(L) for n in range(N): for m in range(M): for i in range(L): out[i] += x[i + L * (m + M * n)] / (N * M) return out def concat(x, y): # x and y have the same length; out = np.zeros(2 * len(x)) for i in range(len(x)): out[i] = x[i] out[i + len(x)] = y[i] return out # Recompute the CPU result only if necessary; start = System.nanoTime() if self.current_iter == 0 or reinit: # Initialize weights; N = self.size kernel_1 = np.zeros(len(self.kernel_1)) kernel_2 = np.zeros(len(self.kernel_2)) kernel_3 = np.zeros(len(self.kernel_3)) kernel_4 = np.zeros(len(self.kernel_4)) dense_weights = np.zeros(len(self.dense_weights)) # Random weights; for i in range(len(self.kernel_1)): kernel_1[i] = self.kernel_1[i] kernel_3[i] = self.kernel_3[i] for i in range(len(self.kernel_2)): kernel_2[i] = self.kernel_2[i] kernel_4[i] = self.kernel_4[i] for i in range(len(self.dense_weights)): dense_weights[i] = self.dense_weights[i] # First convolution (N,N,1) -> (N/stride,N/stride,kn1) x_1 = conv3d2(np.array(self.x_cpu), kernel_1, (N, N, self.channels), self.K, self.kn1, stride=self.stride) x_11 = pooling(x_1, (N // self.stride, N // self.stride, self.kn1), self.pooling, self.pooling) # Second convolution (N/stride,N/stride,kn1) -> (N/stride^2,N/stride^2,kn2) x_2 = conv3d2(x_11, kernel_2, (N // self.stride // self.pooling, N // self.stride // self.pooling, self.kn1), self.K, self.kn2, stride=self.stride) # First convolution (N,N,1) -> (N/stride,N/stride,kn1) y_1 = conv3d2(np.array(self.y_cpu), kernel_3, (N, N, self.channels), self.K, self.kn1, stride=self.stride) y_11 = pooling(y_1, (N // self.stride, N // self.stride, self.kn1), self.pooling, self.pooling) # Second convolution (N/stride,N/stride,kn1) -> (N/stride^2,N/stride^2,kn2) y_2 = conv3d2(y_11, kernel_4, (N // self.stride // self.pooling, N // self.stride // self.pooling, self.kn1), self.K, self.kn2, stride=self.stride) # Global average pooling 2D; # x_3 = gap2(x_2, (N // (self.stride * self.stride), N // (self.stride * self.stride), self.kn2)) # y_3 = gap2(y_2, (N // (self.stride * self.stride), N // (self.stride * self.stride), self.kn2)) # Concatenate; out = concat(x_2, y_2) # Final dense layer; self.cpu_result = out.dot(dense_weights[:len(out)]) # self.cpu_result = x_1[:100] cpu_time = (System.nanoTime() - start) / 1_000_000_000 # Compare GPU and CPU results; difference = np.abs(self.cpu_result - gpu_result) self.benchmark.add_to_benchmark("cpu_time_sec", cpu_time) self.benchmark.add_to_benchmark("cpu_gpu_res_difference", str(difference)) if self.benchmark.debug: # BenchmarkResult.log_message( # f"\tcpu result: [" + ", ".join([f"{x:.2f}" for x in self.cpu_result[:100]]) + "...]"+ # f"difference: {difference:.4f}, time: {cpu_time:.4f} sec") BenchmarkResult.log_message( f"\tcpu result: {self.cpu_result:.4f}; " + f"difference: {difference:.4f}, time: {cpu_time:.4f} sec")
def get_time(self): """ Get current time. """ return System.nanoTime()/1000000
#PyJ2D - Copyright (C) 2011 James Garnon from __future__ import division from java.lang import Thread, System __docformat__ = 'restructuredtext' _time_init = System.nanoTime()/1000000 class Clock(object): """ **pyj2d.time.Clock** * Clock.get_time * Clock.tick * Clock.tick_busy_loop * Clock.get_fps """ def __init__(self): """ Return Clock. """ self.time = System.nanoTime()/1000000 self.time_init = self.time self.time_diff = [25]*10 self.pos = 0 self.thread = Thread()
def execute(self) -> object: num_blocks_spmv = int(np.ceil(self.size / self.block_size)) start_comp = System.nanoTime() start = 0 # Initialization phase; # r = b - A * x self.execute_phase("spmv_init", self.spmv_full_kernel(num_blocks_spmv, self.block_size, 4 * self.block_size), self.row_cnt_1, self.ptr, self.idx, self.val, self.x, self.r, self.size, -1, self.b) # p = r self.execute_phase("cpy_init", self.cpy_kernel(self.num_blocks_size, self.block_size), self.p, self.r, self.size) # t1 = r^t * r self.execute_phase("norm_init", self.norm_kernel(self.num_blocks_size, self.block_size), self.r, self.t1, self.size) for i in range(self.num_iterations): # t2 = p^t * A * p self.execute_phase(f"spmv_{i}", self.spmv_kernel(num_blocks_spmv, self.block_size, 4 * self.block_size), self.row_cnt_2, self.ptr, self.idx, self.val, self.p, self.y, self.size) self.t2[0] = 0 self.execute_phase(f"dp_{i}", self.dp_kernel(self.num_blocks_size, self.block_size), self.p, self.y, self.t2, self.size) if self.time_phases: start = System.nanoTime() alpha = self.t1[0] / self.t2[0] old_r_norm_squared = self.t1[0] self.t1[0] = 0 self.row_cnt_1[0] = 0.0 self.row_cnt_2[0] = 0.0 if self.time_phases: end = System.nanoTime() self.benchmark.add_phase({"name": f"alpha_{i}", "time_sec": (end - start) / 1_000_000_000}) # Update x: x = x + alpha * p self.execute_phase(f"saxpy_x_{i}", self.saxpy_kernel(self.num_blocks_size, self.block_size), self.x, self.x, self.p, alpha, self.size) # r = r - alpha * y self.execute_phase(f"saxpy_r_{i}", self.saxpy_kernel(self.num_blocks_size, self.block_size), self.r, self.r, self.y, -1 * alpha, self.size) # t1 = r^t * r self.execute_phase(f"norm_{i}", self.norm_kernel(self.num_blocks_size, self.block_size), self.r, self.t1, self.size) if self.time_phases: start = System.nanoTime() beta = self.t1[0] / old_r_norm_squared if self.time_phases: end = System.nanoTime() self.benchmark.add_phase({"name": f"beta_{i}", "time_sec": (end - start) / 1_000_000_000}) self.execute_phase(f"saxpy_p_{i}", self.saxpy_kernel(self.num_blocks_size, self.block_size), self.p, self.r, self.p, beta, self.size) # Add a final sync step to measure the real computation time; if self.time_phases: start = System.nanoTime() tmp1 = self.x[0] end = System.nanoTime() if self.time_phases: self.benchmark.add_phase({"name": "sync", "time_sec": (end - start) / 1_000_000_000}) self.benchmark.add_computation_time((end - start_comp) / 1_000_000_000) # Compute GPU result; for i in range(self.size): self.gpu_result[i] = self.x[i] self.benchmark.add_to_benchmark("gpu_result", 0) if self.benchmark.debug: BenchmarkResult.log_message(f"\tgpu result: [" + ", ".join([f"{x:.4f}" for x in self.gpu_result[:10]]) + "...]") return self.gpu_result
def setupGUI(self, initialFilename): self.gui = JESUI(self) self.gui.windowSetting(None) self.setHelpArray() self.gui.changeSkin( JESConfig.getInstance().getStringProperty(JESConfig.CONFIG_SKIN)) self.gui.show() if JESConfig.getInstance().getBooleanProperty(JESConfig.CONFIG_BLOCK): self.gui.editor.removeBox() else: self.gui.editor.addBox() if JESConfig.getInstance().getBooleanProperty(JESConfig.CONFIG_GUTTER): self.gui.turnOnGutter() else: self.gui.turnOffGutter() # Install the bridges. self.terpControl = InterpreterControl(self.gui, self.interpreter) self.replBuffer = REPLBuffer(self.interpreter, self.gui.commandWindow) # Open or create the file. if initialFilename is None: self.fileManager.newFile() else: self.fileManager.readFile(initialFilename) # Startup complete! startTimeNS = System.getProperty("jes.starttimens") if startTimeNS is not None: self.startupTimeSec = ( (System.nanoTime() - long(startTimeNS)) / 1000000000.0 ) # Show introduction window if settings could not be loaded (Either new # JES user or bad write permissions) config = JESConfig.getInstance() loadError = config.getLoadError() if loadError is not None: JOptionPane.showMessageDialog( self.gui, "Your JESConfig.properties file could not be opened!\n" + loadError.toString(), "JES Configuration", JOptionPane.ERROR_MESSAGE ) elif config.wasMigrated(): JOptionPane.showMessageDialog( self.gui, "Your settings were imported from JES 4.3.\n" + "JES doesn't use the JESConfig.txt file in " + "your home directory anymore, so you can delete it.", "JES Configuration", JOptionPane.INFORMATION_MESSAGE ) elif not config.wasLoaded(): introController.show()
def __init__(self): self._time_init = System.nanoTime()//1000000 self.Clock = Clock self.Clock._repaint_sync = AtomicBoolean(False)
def execute(self) -> object: self.num_blocks_per_processor = self.num_blocks self.block_size_1d = self._block_size["block_size_1d"] self.block_size_2d = self._block_size["block_size_2d"] start_comp = System.nanoTime() start = 0 a = self.num_blocks_per_processor / 2 # Convolutions; self.execute_phase( "conv_x1", self.conv2d_kernel( (a, a), (self.block_size_2d, self.block_size_2d), 4 * (self.K**2) * self.kn1 * self.channels), self.x1, self.x, self.kernel_1, self.size, self.size, self.channels, self.K, self.kn1, self.stride) self.execute_phase( "conv_y1", self.conv2d_kernel( (a, a), (self.block_size_2d, self.block_size_2d), 4 * (self.K**2) * self.kn1 * self.channels), self.y1, self.y, self.kernel_3, self.size, self.size, self.channels, self.K, self.kn1, self.stride) # Pooling; self.execute_phase( "pool_x1", self.pooling_kernel( (a / 2, a / 2, a / 2), (self.block_size_2d / 2, self.block_size_2d / 2, self.block_size_2d / 2)), self.x11, self.x1, self.size // self.stride, self.size // self.stride, self.kn1, self.pooling, self.pooling) self.execute_phase( "pool_y1", self.pooling_kernel( (a / 2, a / 2, a / 2), (self.block_size_2d / 2, self.block_size_2d / 2, self.block_size_2d / 2)), self.y11, self.y1, self.size // self.stride, self.size // self.stride, self.kn1, self.pooling, self.pooling) # Other convolutions; self.execute_phase( "conv_x2", self.conv2d_kernel( (a, a), (self.block_size_2d, self.block_size_2d), 4 * (self.K**2) * self.kn1 * self.kn2), self.x2, self.x11, self.kernel_2, self.size // self.stride // self.pooling, self.size // self.stride // self.pooling, self.kn1, self.K, self.kn2, self.stride) self.execute_phase( "conv_y2", self.conv2d_kernel( (a, a), (self.block_size_2d, self.block_size_2d), 4 * (self.K**2) * self.kn1 * self.kn2), self.y2, self.y11, self.kernel_4, self.size // self.stride // self.pooling, self.size // self.stride // self.pooling, self.kn1, self.K, self.kn2, self.stride) # Global average pooling; # self.execute_phase("gap_x", # self.gap_kernel((a, a), (self.block_size_2d, self.block_size_2d), 4 * self.kn2), # self.x3, self.x2, self.size // self.stride**2, self.size // self.stride**2, self.kn2) # self.execute_phase("gap_y", # self.gap_kernel((a, a), (self.block_size_2d, self.block_size_2d), 4 * self.kn2), # self.y3, self.y2, self.size // self.stride ** 2, self.size // self.stride ** 2, self.kn2) # Dense layer; self.execute_phase( "concat", self.concat_kernel(self.num_blocks_per_processor, self.block_size_1d), self.z, self.x2, self.y2, len(self.x2)) self.execute_phase( "dot_product", self.dp_kernel(self.num_blocks_per_processor, self.block_size_1d), self.z, self.dense_weights, self.res, len(self.z)) # Add a final sync step to measure the real computation time; if self.time_phases: start = System.nanoTime() # self.gpu_result = sigmoid(self.res[0]) self.gpu_result = self.res[0] # self.gpu_result = [self.x1[i] for i in range(100)] end = System.nanoTime() if self.time_phases: self.benchmark.add_phase({ "name": "sync", "time_sec": (end - start) / 1_000_000_000 }) self.benchmark.add_computation_time((end - start_comp) / 1_000_000_000) self.benchmark.add_to_benchmark("gpu_result", self.gpu_result) if self.benchmark.debug: BenchmarkResult.log_message(f"\tgpu result: {self.gpu_result:.4f}") # BenchmarkResult.log_message( # f"\tgpu result: [" + ", ".join([f"{x:.2f}" for x in self.gpu_result[:100]]) + "...]") return self.gpu_result