Exemple #1
0
 def call(self):
     t = Thread.currentThread()
     if t.isInterrupted() or not t.isAlive():
         return None
     t0 = System.nanoTime()
     r = self.fn(*self.args, **self.kwargs)
     return r, (System.nanoTime() - t0) / 1000000.0
Exemple #2
0
 def _isPaused(self, keycode):
     if keycode not in self.keyHeld:
         self.keyHeld[keycode] = {
             'pressed': False,
             'delay': False,
             'time': 0
         }
     key = self.keyHeld[keycode]
     if not key['pressed']:
         key['pressed'] = True
         paused = False
         if self.keyRepeat[0]:
             key['delay'] = True
             key['time'] = System.nanoTime() // 1000000
     else:
         paused = True
         if self.keyRepeat[0]:
             time = System.nanoTime() // 1000000
             if key['delay']:
                 if time - key['time'] > self.keyRepeat[0]:
                     key['time'] = time
                     key['delay'] = False
                     paused = False
             elif time - key['time'] > self.keyRepeat[1]:
                 key['time'] = time
                 paused = False
     return paused
Exemple #3
0
    def execute(self) -> object:
        self.block_size = self._block_size["block_size_1d"]
        result = [0] * self.K

        # Call the kernels;
        start_comp = System.nanoTime()
        start = System.nanoTime()
        for i in range(self.K):
            self.execute_phase(
                f"bs_{i}", self.bs_kernel(self.num_blocks, self.block_size),
                self.x[i], self.y[i], self.size, R, V, T, K)

        if self.time_phases:
            start = System.nanoTime()
        for i in range(self.K):
            result[i] = self.y[i][0]
        end = System.nanoTime()
        if self.time_phases:
            self.benchmark.add_phase({
                "name": "sync",
                "time_sec": (end - start) / 1_000_000_000
            })
        self.benchmark.add_computation_time((end - start_comp) / 1_000_000_000)

        self.benchmark.add_to_benchmark("gpu_result", result[0])
        if self.benchmark.debug:
            BenchmarkResult.log_message(f"\tgpu result: {result[0]}")

        return result[0]
Exemple #4
0
    def cpu_validation(self, gpu_result: object, reinit: bool) -> None:
        # Recompute the CPU result only if necessary;
        start = System.nanoTime()
        if self.current_iter == 0 or reinit:
            # Re-initialize the random number generator with the same seed as the GPU to generate the same values;
            seed(self.random_seed)
            if self.benchmark.random_init:
                x_g = np.zeros(self.size)
                y_g = np.zeros(self.size)
                for i in range(self.size):
                    x_g[i] = randint(0, 10)
                    y_g[i] = randint(0, 10)
            else:
                x_g = 1 / np.linspace(1, self.size, self.size)
                y_g = 1 / np.linspace(1, self.size, self.size)

            x_g += 1
            y_g += 1
            self.cpu_result = x_g[0] + y_g[0]
        cpu_time = System.nanoTime() - start
        difference = np.abs(self.cpu_result - gpu_result)
        self.benchmark.add_to_benchmark("cpu_time_sec", cpu_time)
        self.benchmark.add_to_benchmark("cpu_gpu_res_difference", difference)
        if self.benchmark.debug:
            BenchmarkResult.log_message(f"\tcpu result: {self.cpu_result:.4f}, " +
                                        f"difference: {difference:.4f}, time: {cpu_time:.4f} sec")
Exemple #5
0
 def tick(self, framerate=0):
     """
     Call once per program cycle, returns ms since last call.
     An optional framerate will add pause to limit rate.
     """
     while self._repaint_sync.get():
         try:
             self._thread.sleep(1)
         except InterruptedException:
             Thread.currentThread().interrupt()
             break
     self._time = System.nanoTime()//1000000
     if framerate:
         time_pause = (1000//framerate) - (self._time-self._time_init)
         if time_pause > 0:
             try:
                 self._thread.sleep(time_pause)
             except InterruptedException:
                 Thread.currentThread().interrupt()
             self._time = System.nanoTime()//1000000
     if self._pos:
         self._pos -= 1
     else:
         self._pos = 9
     self._time_diff[self._pos] = self._time-self._time_init
     self._time_init = self._time
     return self._time_diff[self._pos]
Exemple #6
0
 def tick(self, framerate=0):
     """
     Call once per program cycle, returns ms since last call.
     An optional framerate will add pause to limit rate.
     """
     while self._repaint_sync.get():
         try:
             self._thread.sleep(1)
         except InterruptedException:
             Thread.currentThread().interrupt()
             break
     self._time = System.nanoTime() // 1000000
     if framerate:
         time_pause = ((1000 // framerate) - (self._time - self._time_init))
         if time_pause > 0:
             try:
                 self._thread.sleep(time_pause)
             except InterruptedException:
                 Thread.currentThread().interrupt()
             self._time = System.nanoTime() // 1000000
     if self._pos:
         self._pos -= 1
     else:
         self._pos = 9
     self._time_diff[self._pos] = self._time - self._time_init
     self._time_init = self._time
     return self._time_diff[self._pos]
Exemple #7
0
    def execute(self) -> object:
        self.block_size = self._block_size["block_size_1d"]
        start_comp = System.nanoTime()
        start = 0

        # A, B. Call the kernel. The 2 computations are independent, and can be done in parallel;
        self.execute_phase(
            "square_1", self.square_kernel(self.num_blocks, self.block_size),
            self.x, self.x1, self.size)
        self.execute_phase(
            "square_2", self.square_kernel(self.num_blocks, self.block_size),
            self.y, self.y1, self.size)

        # C. Compute the sum of the result;
        self.execute_phase(
            "reduce", self.reduce_kernel(self.num_blocks, self.block_size),
            self.x1, self.y1, self.res, self.size)

        # Add a final sync step to measure the real computation time;
        if self.time_phases:
            start = System.nanoTime()
        result = self.res[0]
        end = System.nanoTime()
        if self.time_phases:
            self.benchmark.add_phase({
                "name": "sync",
                "time_sec": (end - start) / 1_000_000_000
            })
        self.benchmark.add_computation_time((end - start_comp) / 1_000_000_000)
        self.benchmark.add_to_benchmark("gpu_result", result)
        if self.benchmark.debug:
            BenchmarkResult.log_message(f"\tgpu result: {result:.4f}")

        return result
Exemple #8
0
def delay(time):
    """
    **pyj2d.time.delay**
    
    Pause for given time (in ms). Return ms paused.
    """
    start = System.nanoTime()/1000000
    Thread.sleep(time)
    return (System.nanoTime()/1000000) - start
Exemple #9
0
 def func_call(self, *args, **kwargs) -> object:
     start = System.nanoTime()
     result = func(self, *args, **kwargs)
     end = System.nanoTime()
     self.benchmark.add_phase({
         "name": phase_name,
         "time_sec": (end - start) / 1_000_000_000
     })
     return result
Exemple #10
0
def delay(time):
    """
    **pyj2d.time.delay**
    
    Pause for given time (in ms). Return ms paused.
    """
    start = System.nanoTime() / 1000000
    Thread.sleep(time)
    return (System.nanoTime() / 1000000) - start
Exemple #11
0
def timeit(n_iterations, fn, *args, **kwargs):
    times = []
    for i in xrange(n_iterations):
        t0 = System.nanoTime()
        imp = fn(*args, **kwargs)
        t1 = System.nanoTime()
        times.append(t1 - t0)
    print("min: %.2f ms, max: %.2f ms, mean: %.2f ms" %
          (min(times) / 1000000.0, max(times) / 1000000.0, sum(times) /
           (len(times) * 1000000.0)))
Exemple #12
0
    def cpu_validation(self, gpu_result: object, reinit: bool) -> None:

        def spmv(ptr, idx, val, vec):
            res = np.zeros(len(ptr) - 1)
            for i in range(len(ptr) - 1):
                curr_sum = 0
                start = int(ptr[i])
                end = int(ptr[i + 1])
                for j in range(start, end):
                    curr_sum += val[j] * vec[idx[j]]
                res[i] = curr_sum
            return res

        # Recompute the CPU result only if necessary;
        start = System.nanoTime()
        if self.current_iter == 0 or reinit:
            # Re-initialize the random number generator with the same seed as the GPU to generate the same values;
            seed(self.random_seed)
            # Initialize the support device arrays;
            N = self.size

            x = np.ones(N)
            # r = b - A * x
            r = np.array(self.b_cpu) - np.array(spmv(self.ptr_cpu, self.idx_cpu, self.val_cpu, x))
            p = r.copy()
            t1 = r.T.dot(r)

            # Main iteration;
            for i in range(self.num_iterations):
                y = spmv(self.ptr_cpu, self.idx_cpu, self.val_cpu, p)
                t2 = p.dot(y)
                alpha = t1 / t2
                t1_old = t1
                x += alpha * p
                r -= alpha * y
                t1 = r.T.dot(r)
                beta = t1 / t1_old
                p = r + beta * p

            self.cpu_result = x

        cpu_time = System.nanoTime() - start

        # Compare GPU and CPU results;
        difference = 0
        for i in range(self.size):
            difference += np.abs(self.cpu_result[i] - gpu_result[i])

        self.benchmark.add_to_benchmark("cpu_time_sec", cpu_time)
        self.benchmark.add_to_benchmark("cpu_gpu_res_difference", str(difference))
        if self.benchmark.debug:
            BenchmarkResult.log_message(f"\tcpu result: [" + ", ".join([f"{x:.4f}" for x in self.cpu_result[:10]])
                                        + "...]; " +
                                        f"difference: {difference:.4f}, time: {cpu_time:.4f} sec")
 def run(self):
     before_time = None
     after_time = None
     time_diff = None
     sleep_time = None
     over_sleep_time = 0
     no_delays = 0
     excess = 0
     
     game_start_time = System.nanoTime()
     prev_stats_time = game_start_time
     before_time = game_start_time
     
     running = True
     
     while running:
         self.game_update()
         self.game_render()
         self.paint_screen()
         
         after_time = System.nanoTime()
         time_diff = after_time - before_time
         sleep_time = (self.period - time_diff) - over_sleep_time
         
         if sleep_time > 0:
             try:
                 Thread.sleep(sleep_time)
             except InterruptedException as e:
                 pass
             over_sleep_time = (System.nanoTime() - after_time) - sleep_time
         else:
             excess -= sleep_time
             over_sleep_time = 0
             
             if (no_delays + 1) >= self.NO_DELAYS_PER_YIELD:
                 Thread.yield()
                 no_delays = 0
         
         before_time = System.nanoTime()
         
         skips = 0
         
         while excess > self.period and skips < self.MAX_FRAME_SKIPS:
             excess -= self.period
             self.game_update()
             skips += 1
         
         self.frames_skipped += skips
         
         self.store_stats()
     
     self.print_stats()
     System.exit(0)
Exemple #14
0
def execute_cuda_benchmark(benchmark,
                           size,
                           block_size,
                           exec_policy,
                           num_iter,
                           debug,
                           prefetch=False,
                           num_blocks=DEFAULT_NUM_BLOCKS,
                           output_date=None):
    if debug:
        BenchmarkResult.log_message("")
        BenchmarkResult.log_message("")
        BenchmarkResult.log_message("#" * 30)
        BenchmarkResult.log_message(f"Benchmark {i + 1}/{tot_benchmarks}")
        BenchmarkResult.log_message(f"benchmark={b}, size={n},"
                                    f" block size={block_size}, "
                                    f" prefetch={prefetch}, "
                                    f" num blocks={num_blocks}, "
                                    f" exec policy={exec_policy}")
        BenchmarkResult.log_message("#" * 30)
        BenchmarkResult.log_message("")
        BenchmarkResult.log_message("")

    if not output_date:
        output_date = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
    file_name = f"cuda_{output_date}_{benchmark}_{exec_policy}_{size}_{block_size['block_size_1d']}_{block_size['block_size_2d']}_{prefetch}_{num_iter}_{num_blocks}.csv"
    # Create a folder if it doesn't exist;
    output_folder_path = os.path.join(BenchmarkResult.DEFAULT_RES_FOLDER,
                                      output_date + "_cuda")
    if not os.path.exists(output_folder_path):
        if debug:
            BenchmarkResult.log_message(
                f"creating result folder: {output_folder_path}")
        os.mkdir(output_folder_path)
    output_path = os.path.join(output_folder_path, file_name)

    benchmark_cmd = CUDA_CMD.format(benchmark, exec_policy, size,
                                    block_size["block_size_1d"],
                                    block_size["block_size_2d"], num_iter,
                                    num_blocks, "-r" if prefetch else "", "-a",
                                    output_path)
    start = System.nanoTime()
    result = subprocess.run(
        benchmark_cmd,
        shell=True,
        stdout=subprocess.STDOUT,
        cwd=f"{os.getenv('GRCUDA_HOME')}/projects/resources/cuda/bin")
    result.check_returncode()
    end = System.nanoTime()
    if debug:
        BenchmarkResult.log_message(
            f"Benchmark total execution time: {(end - start) / 1_000_000_000:.2f} seconds"
        )
Exemple #15
0
 def delay(self, time):
     """
     **pyj2d.time.delay**
     
     Pause for given time (in ms). Return ms paused.
     """
     start = System.nanoTime() // 1000000
     try:
         Thread.sleep(time)
     except InterruptedException:
         Thread.currentThread().interrupt()
     return (System.nanoTime() // 1000000) - start
Exemple #16
0
 def delay(self, time):
     """
     **pyj2d.time.delay**
     
     Pause for given time (in ms). Return ms paused.
     """
     start = System.nanoTime()//1000000
     try:
         Thread.sleep(time)
     except InterruptedException:
         Thread.currentThread().interrupt()
     return (System.nanoTime()//1000000) - start
Exemple #17
0
    def cpu_validation(self, gpu_result: object, reinit: bool) -> None:
        def spmv(ptr, idx, val, vec):
            res = np.zeros(len(ptr) - 1)
            for i in range(len(ptr) - 1):
                curr_sum = 0
                start = int(ptr[i])
                end = int(ptr[i + 1])
                for j in range(start, end):
                    curr_sum += val[j] * vec[idx[j]]
                res[i] = curr_sum
            return res

        # Recompute the CPU result only if necessary;
        start = System.nanoTime()
        if self.current_iter == 0 or reinit:
            # Re-initialize the random number generator with the same seed as the GPU to generate the same values;
            seed(self.random_seed)
            # Initialize the support device arrays;
            N = self.size

            auth1 = np.ones(N)
            hub1 = np.ones(N)

            # Main iteration;
            for i in range(self.num_iterations):
                # Authority;
                auth2 = spmv(self.ptr2_cpu, self.idx2_cpu, self.val2_cpu, hub1)
                auth2 = auth2 / np.sum(auth2)
                # Hubs
                hub2 = spmv(self.ptr_cpu, self.idx_cpu, self.val_cpu, auth1)
                hub2 = hub2 / np.sum(hub2)

                auth1 = auth2
                hub1 = hub2
            self.cpu_result = hub1 + auth1

        cpu_time = System.nanoTime() - start

        # Compare GPU and CPU results;
        difference = 0
        for i in range(self.size):
            difference += np.abs(self.cpu_result[i] - gpu_result[i])

        self.benchmark.add_to_benchmark("cpu_time_sec", cpu_time)
        self.benchmark.add_to_benchmark("cpu_gpu_res_difference",
                                        str(difference))
        if self.benchmark.debug:
            BenchmarkResult.log_message(
                f"\tcpu result: [" +
                ", ".join([f"{x:.4f}"
                           for x in self.cpu_result[:10]]) + "...]; " +
                f"difference: {difference:.4f}, time: {cpu_time:.4f} sec")
Exemple #18
0
def run(fn, args, msg="", n_iterations=20):
  timings = []
  for i in xrange(n_iterations):
    t0 = System.nanoTime()
    fn(*args)
    t1 = System.nanoTime()
    timings.append(t1 - t0)

  minimum = min(timings)
  maximum = max(timings)
  average = sum(timings) / float(len(timings))
  
  print msg, "min:", minimum, "max:", maximum, "avg:", average
def timeIt(fn, n_iterations=10):
  elapsed_times = []
  for i in range(n_iterations):
    t0 = System.nanoTime()
    fn()
    t1 = System.nanoTime()
    elapsed_times.append(t1 - t0)

  smallest = min(elapsed_times)
  largest =  max(elapsed_times)
  average =  sum(elapsed_times) / float(n_iterations)
  print "Elapsed time: min", smallest, "max", largest, "average", average
  return elapsed_time
Exemple #20
0
    def cpu_validation(self, gpu_result: object, reinit: bool) -> None:
        def softmax(X):
            return np.exp(X) / np.sum(np.exp(X), axis=1).reshape(X.shape[0], 1)

        def logsumexp(X):
            return np.log(np.sum(np.exp(X)))

        def naive_bayes_predict(X, feature_log_prob, log_class_prior):
            jll = X.dot(feature_log_prob.T) + log_class_prior
            amax = np.amax(jll, axis=1)
            l = logsumexp(jll - np.atleast_2d(amax).T) + amax

            return np.exp(jll - np.atleast_2d(l).T)

        def normalize(X):
            return (X - np.mean(X, axis=0)) / np.std(X, axis=0)

        def ridge_pred(X, coef, intercept):
            return np.dot(X, coef.T) + intercept

        # Recompute the CPU result only if necessary;
        start = System.nanoTime()
        if self.current_iter == 0 or reinit:
            # Re-initialize the random number generator with the same seed as the GPU to generate the same values;
            seed(self.random_seed)

            r1_g = naive_bayes_predict(self.x_cpu, self.nb_feat_log_prob_cpu,
                                       self.nb_class_log_prior_cpu)
            r2_g = ridge_pred(normalize(self.x_cpu), self.ridge_coeff_cpu,
                              self.ridge_intercept_cpu)
            r_g = np.argmax(softmax(r1_g) + softmax(r2_g), axis=1)
            self.cpu_result = r_g

        cpu_time = System.nanoTime() - start

        # Compare GPU and CPU results;
        difference = 0
        for i in range(self.size):
            difference += np.abs(self.cpu_result[i] - gpu_result[i])

        self.benchmark.add_to_benchmark("cpu_time_sec", cpu_time)
        self.benchmark.add_to_benchmark("cpu_gpu_res_difference",
                                        str(difference))
        if self.benchmark.debug:
            BenchmarkResult.log_message(
                f"\tcpu result: [" +
                ", ".join([f"{x:.4f}"
                           for x in self.cpu_result[:10]]) + "...]; " +
                f"difference: {difference:.4f}, time: {cpu_time:.4f} sec")
 def game_update(self):
     self.gelapsed_after = System.nanoTime()
     
     self.game_time.elapsed_game_time.set_span(self.gelapsed_before, self.gelapsed_after)
     self.game_time.elapsed_real_time.set_span(self.gelapsed_before, self.gelapsed_after)
     
     self.game_time.total_game_time.set_span(self.game_start_time, self.gelapsed_after)
     self.game_time.total_real_time.set_span(self.game_start_time, self.gelapsed_after)
     
     self.gelapsed_before = System.nanoTime()
     
     if self.running and self.is_paused is not True and self.game_over is not True:
         for item in self.components.getComponents():
             item.update(self.game_time)
         self.updates += 1
Exemple #22
0
 def get_ticks(self):
     """
     **pyj2d.time.get_ticks**
     
     Return ms since program start.
     """
     return (System.nanoTime() // 1000000) - self._time_init
Exemple #23
0
def get_ticks():
    """
    **pyj2d.time.get_ticks**
    
    Return ms since program start.
    """
    return (System.nanoTime()/1000000) - _time_init
Exemple #24
0
    def cpu_validation(self, gpu_result: object, reinit: bool) -> None:
        def CND(X):
            """
            Cumulative normal distribution.
            Helper function used by BS(...).
            """

            (a1, a2, a3, a4, a5) = (0.31938153, -0.356563782, 1.781477937,
                                    -1.821255978, 1.330274429)
            L = np.absolute(X)
            K = np.float64(1.0) / (1.0 + 0.2316419 * L)
            w = 1.0 - 1.0 / math.sqrt(2 * np.pi) * np.exp(-L * L / 2.) * \
                (a1 * K +
                 a2 * (K ** 2) +
                 a3 * (K ** 3) +
                 a4 * (K ** 4) +
                 a5 * (K ** 5))

            mask = X < 0
            w = w * ~mask + (1.0 - w) * mask

            return w

        def BS(X, R, V, T, K):
            """Black Scholes Function."""
            d1_arr = (np.log(X / K) +
                      (R + V * V / 2.) * T) / (V * math.sqrt(T))
            d2_arr = d1_arr - V * math.sqrt(T)
            w_arr = CND(d1_arr)
            w2_arr = CND(d2_arr)
            return X * w_arr - X * math.exp(-R * T) * w2_arr

        # Recompute the CPU result only if necessary;
        start = System.nanoTime()
        if self.current_iter == 0 or reinit:
            res = BS(np.array(self.x_tmp), R, V, T, K)
            self.cpu_result = res[0]
        cpu_time = System.nanoTime() - start
        difference = np.abs(self.cpu_result - gpu_result)
        self.benchmark.add_to_benchmark("cpu_time_sec", cpu_time)
        self.benchmark.add_to_benchmark("cpu_gpu_res_difference", difference)
        if self.benchmark.debug:
            BenchmarkResult.log_message(
                f"\tcpu result: {self.cpu_result:.4f}, " +
                f"difference: {difference:.4f}, time: {cpu_time:.4f} sec")
Exemple #25
0
 def __init__(self):
     """
     Return Clock.
     """
     self.time = System.nanoTime() / 1000000
     self.time_init = self.time
     self.time_diff = [25] * 10
     self.pos = 0
     self.thread = Thread()
Exemple #26
0
 def __init__(self):
     """
     Return Clock.
     """
     self.time = System.nanoTime()/1000000
     self.time_init = self.time
     self.time_diff = [25]*10
     self.pos = 0
     self.thread = Thread()
Exemple #27
0
 def __init__(self):
     """
     Return Clock.
     """
     self._time = System.nanoTime()//1000000
     self._time_init = self._time
     self._time_diff = [33 for i in range(10)]
     self._pos = 0
     self._thread = Thread()
Exemple #28
0
 def execute_phase(self, phase_name, function, *args) -> object:
     """
     Executes a single step of the benchmark, possibily measuring the time it takes
     :param phase_name: name of this benchmark step
     :param function: a function to execute
     :param args: arguments of the function
     :return: the result of the function
     """
     if self.time_phases:
         start = System.nanoTime()
         res = function(*args)
         end = System.nanoTime()
         self.benchmark.add_phase({
             "name": phase_name,
             "time_sec": (end - start) / 1_000_000_000
         })
         return res
     else:
         return function(*args)
Exemple #29
0
    def setupGUI(self, initialFilename):
        self.gui = JESUI(self)
        self.gui.windowSetting(None)

        self.setHelpArray()

        self.gui.changeSkin(JESConfig.getInstance().getStringProperty(
            JESConfig.CONFIG_SKIN))
        self.gui.show()

        if JESConfig.getInstance().getBooleanProperty(JESConfig.CONFIG_BLOCK):
            self.gui.editor.removeBox()
        else:
            self.gui.editor.addBox()

        if JESConfig.getInstance().getBooleanProperty(JESConfig.CONFIG_GUTTER):
            self.gui.turnOnGutter()
        else:
            self.gui.turnOffGutter()

        # Install the bridges.
        self.terpControl = InterpreterControl(self.gui, self.interpreter)
        self.replBuffer = REPLBuffer(self.interpreter, self.gui.commandWindow)

        # Open or create the file.
        if initialFilename is None:
            self.fileManager.newFile()
        else:
            self.fileManager.readFile(initialFilename)

        # Startup complete!
        startTimeNS = System.getProperty("jes.starttimens")
        if startTimeNS is not None:
            self.startupTimeSec = ((System.nanoTime() - long(startTimeNS)) /
                                   1000000000.0)

        # Show introduction window if settings could not be loaded (Either new
        # JES user or bad write permissions)
        config = JESConfig.getInstance()
        loadError = config.getLoadError()

        if loadError is not None:
            JOptionPane.showMessageDialog(
                self.gui,
                "Your JESConfig.properties file could not be opened!\n" +
                loadError.toString(), "JES Configuration",
                JOptionPane.ERROR_MESSAGE)
        elif config.wasMigrated():
            JOptionPane.showMessageDialog(
                self.gui, "Your settings were imported from JES 4.3.\n" +
                "JES doesn't use the JESConfig.txt file in " +
                "your home directory anymore, so you can delete it.",
                "JES Configuration", JOptionPane.INFORMATION_MESSAGE)
        elif not config.wasLoaded():
            introController.show()
Exemple #30
0
def callAndTime(function, *args, **kwargs):
    if not callable(function):
        print "callAndTime(function[, arguments...]): Input is not a function"

    name = getattr(function, "__name__", "The function")

    def showElapsedTime(start, end):
        return "%d.%06d milliseconds" % divmod(end - start, 1000000)

    startTime = System.nanoTime()
    try:
        rv = function(*args, **kwargs)
    except:
        endTime = System.nanoTime()

        print >> sys.stderr, "%s ran for %s and crashed" % (name, showElapsedTime(startTime, endTime))
        raise
    else:
        endTime = System.nanoTime()

        print >> sys.stderr, "%s ran in %s" % (name, showElapsedTime(startTime, endTime))
        return rv
Exemple #31
0
def callAndTime(function, *args, **kwargs):
    if not callable(function):
        print "callAndTime(function[, arguments...]): Input is not a function"

    name = getattr(function, '__name__', 'The function')

    def showElapsedTime(start, end):
        return "%d.%06d milliseconds" % divmod(end - start, 1000000)

    startTime = System.nanoTime()
    try:
        rv = function(*args, **kwargs)
    except:
        endTime = System.nanoTime()

        print >> sys.stderr, "%s ran for %s and crashed" % (
            name, showElapsedTime(startTime, endTime))
        raise
    else:
        endTime = System.nanoTime()

        print >> sys.stderr, "%s ran in %s" % (
            name, showElapsedTime(startTime, endTime))
        return rv
Exemple #32
0
 def tick(self, framerate=0):
     """
     Call once per program cycle, returns ms since last call.
     An optional framerate will add pause to limit rate.
     """
     if self.pos < 9:
         self.pos += 1
     else:
         self.pos = 0
     self.time = System.nanoTime()/1000000
     self.time_diff[self.pos] = (self.time-self.time_init)
     self.time_init = self.time
     if framerate:
         time_diff = sum(self.time_diff)/10
         time_pause = long( ((1.0/framerate)*1000) - time_diff )
         if time_pause > 0:
             self.thread.sleep(time_pause)
     return self.time_diff[self.pos]
Exemple #33
0
 def tick(self, framerate=0):
     """
     Call once per program cycle, returns ms since last call.
     An optional framerate will add pause to limit rate.
     """
     if self.pos < 9:
         self.pos += 1
     else:
         self.pos = 0
     self.time = System.nanoTime() / 1000000
     self.time_diff[self.pos] = (self.time - self.time_init)
     self.time_init = self.time
     if framerate:
         time_diff = sum(self.time_diff) / 10
         time_pause = long(((1.0 / framerate) * 1000) - time_diff)
         if time_pause > 0:
             self.thread.sleep(time_pause)
     return self.time_diff[self.pos]
 def store_stats(self):
     self.frame_count += 1
     self.stats_interval += self.period
     
     if self.stats_interval >= self.MAX_STAT_INTERVAL:
         time_now = System.nanoTime()
         self.time_spend_in_game = time_now - self.game_start_time
         
         real_elapsed_time = time_now - self.prev_stats_time
         self.total_elapsed_time += real_elapsed_time
         
         self.total_frames_skipped += self.frames_skipped
         
         if self.total_elapsed_time > 0:
             actual_fps = self.frame_count / self.total_elapsed_time
             actual_ups = (self.frame_count + self.total_frames_skipped) / self.total_elapsed_time
         
         self.fps_store[self.stats_count % self.NUM_FPS] = actual_fps
         self.ups_store[self.stats_count % self.NUM_FPS] = actual_ups
         self.stats_count += 1
         
         i = 0
         if i < self.NUM_FPS:
             total_fps = self.fps_store[i]
             total_ups = self.ups_store[i]
             i += 1
         
         if self.stats_count < self.NUM_FPS:
             self.average_fps = total_fps / self.stats_count
             self.average_ups = total_ups / self.stats_count
         else:
             self.average_fps = total_fps / self.NUM_FPS
             self.average_ups = total_ups / self.NUM_FPS
         
         self.frames_skipped = 0
         self.prev_stats_time = time_now
         self.stats_interval = 0
Exemple #35
0
    def execute(self) -> object:

        # A. B. Call the kernels. The 2 computations are independent, and can be done in parallel;
        start = System.nanoTime()
        self.sum_kernel(self.num_blocks, self.block_size)(self.x, self.size)
        end = System.nanoTime()
        self.benchmark.add_phase({"name": "sum_1", "time_sec": (end - start) / 1_000_000_000})

        start = System.nanoTime()
        self.sum_kernel(self.num_blocks, self.block_size)(self.y, self.size)
        end = System.nanoTime()
        self.benchmark.add_phase({"name": "sum_2", "time_sec": (end - start) / 1_000_000_000})

        start = System.nanoTime()
        result_1 = self.x[0]
        result_2 = self.y[0]
        end = System.nanoTime()
        self.benchmark.add_phase({"name": "read_result", "time_sec": (end - start) / 1_000_000_000})

        self.benchmark.add_to_benchmark("gpu_result", result_1 + result_2)
        if self.benchmark.debug:
            BenchmarkResult.log_message(f"\tgpu result: {result_1} {result_2}")

        return result_1 + result_2
Exemple #36
0
 def __init__(self):
     self._time_init = System.nanoTime() // 1000000
     self.Clock = Clock
     self.Clock._repaint_sync = AtomicBoolean(False)
     self._timers = {}
Exemple #37
0
 def __init__(self):
     self._time = System.nanoTime() // 1000000
     self._time_init = self._time
     self._time_diff = [33 for i in range(10)]
     self._pos = 0
     self._thread = Thread()
Exemple #38
0
 def time(self):
     """
     Return system time (in ms).
     """
     return System.nanoTime() / 1000000.0
Exemple #39
0
    def cpu_validation(self, gpu_result: object, reinit: bool) -> None:
        def relu(x):
            return np.maximum(x, 0)

        def conv3d2(x, kernels, shape, K, k_out, stride=1, operator=relu):
            N, M, L = shape
            out = np.zeros((N // stride) * (M // stride) * k_out)
            radius = K // 2

            for m in range(k_out):
                for i in range(0, int(np.ceil(N / stride)) - radius):
                    for j in range(0, int(np.ceil(M / stride)) - radius):
                        res = 0
                        i_f = i * stride + radius
                        j_f = j * stride + radius
                        for k_i in range(-radius, radius + 1):
                            for k_j in range(-radius, radius + 1):
                                for l in range(L):
                                    ni = i_f + k_i
                                    nj = j_f + k_j
                                    res += kernels[
                                        l + L * (k_j + radius + K *
                                                 (k_i + radius + K * m))] * x[(
                                                     (ni * M) + nj) * L + l]
                        out[m + k_out * (j + M * i // stride)] = operator(res)
            return out

        def pooling(x, shape, K, stride):
            N, M, L = shape
            out = np.zeros((N // pooling, M // pooling, L))
            radius = K // 2
            for i in range(0, int(np.ceil(N / stride)) - radius):
                for j in range(0, int(np.ceil(M / stride)) - radius):
                    for l in range(L):
                        res = 0
                        i_f = i * stride + radius
                        j_f = j * stride + radius
                        for k_i in range(-radius, radius + 1):
                            for k_j in range(-radius, radius + 1):
                                ni = i_f + k_i
                                nj = j_f + k_j
                                res += x[((ni * M) + nj) * L + l]
                        out[l + L * (j + M * i // stride)] = res / K**2
            return out

        def gap2(x, shape):
            N, M, L = shape
            out = np.zeros(L)
            for n in range(N):
                for m in range(M):
                    for i in range(L):
                        out[i] += x[i + L * (m + M * n)] / (N * M)
            return out

        def concat(x, y):
            # x and y have the same length;
            out = np.zeros(2 * len(x))
            for i in range(len(x)):
                out[i] = x[i]
                out[i + len(x)] = y[i]
            return out

        # Recompute the CPU result only if necessary;
        start = System.nanoTime()
        if self.current_iter == 0 or reinit:

            # Initialize weights;
            N = self.size
            kernel_1 = np.zeros(len(self.kernel_1))
            kernel_2 = np.zeros(len(self.kernel_2))
            kernel_3 = np.zeros(len(self.kernel_3))
            kernel_4 = np.zeros(len(self.kernel_4))
            dense_weights = np.zeros(len(self.dense_weights))
            # Random weights;
            for i in range(len(self.kernel_1)):
                kernel_1[i] = self.kernel_1[i]
                kernel_3[i] = self.kernel_3[i]
            for i in range(len(self.kernel_2)):
                kernel_2[i] = self.kernel_2[i]
                kernel_4[i] = self.kernel_4[i]

            for i in range(len(self.dense_weights)):
                dense_weights[i] = self.dense_weights[i]

            # First convolution (N,N,1) -> (N/stride,N/stride,kn1)
            x_1 = conv3d2(np.array(self.x_cpu),
                          kernel_1, (N, N, self.channels),
                          self.K,
                          self.kn1,
                          stride=self.stride)
            x_11 = pooling(x_1, (N // self.stride, N // self.stride, self.kn1),
                           self.pooling, self.pooling)
            # Second convolution (N/stride,N/stride,kn1) -> (N/stride^2,N/stride^2,kn2)
            x_2 = conv3d2(x_11,
                          kernel_2,
                          (N // self.stride // self.pooling,
                           N // self.stride // self.pooling, self.kn1),
                          self.K,
                          self.kn2,
                          stride=self.stride)

            # First convolution (N,N,1) -> (N/stride,N/stride,kn1)
            y_1 = conv3d2(np.array(self.y_cpu),
                          kernel_3, (N, N, self.channels),
                          self.K,
                          self.kn1,
                          stride=self.stride)
            y_11 = pooling(y_1, (N // self.stride, N // self.stride, self.kn1),
                           self.pooling, self.pooling)
            # Second convolution (N/stride,N/stride,kn1) -> (N/stride^2,N/stride^2,kn2)
            y_2 = conv3d2(y_11,
                          kernel_4,
                          (N // self.stride // self.pooling,
                           N // self.stride // self.pooling, self.kn1),
                          self.K,
                          self.kn2,
                          stride=self.stride)

            # Global average pooling 2D;
            # x_3 = gap2(x_2, (N // (self.stride * self.stride), N // (self.stride * self.stride), self.kn2))
            # y_3 = gap2(y_2, (N // (self.stride * self.stride), N // (self.stride * self.stride), self.kn2))

            # Concatenate;
            out = concat(x_2, y_2)

            # Final dense layer;
            self.cpu_result = out.dot(dense_weights[:len(out)])
            # self.cpu_result = x_1[:100]

        cpu_time = (System.nanoTime() - start) / 1_000_000_000

        # Compare GPU and CPU results;
        difference = np.abs(self.cpu_result - gpu_result)

        self.benchmark.add_to_benchmark("cpu_time_sec", cpu_time)
        self.benchmark.add_to_benchmark("cpu_gpu_res_difference",
                                        str(difference))
        if self.benchmark.debug:
            # BenchmarkResult.log_message(
            #     f"\tcpu result: [" + ", ".join([f"{x:.2f}" for x in self.cpu_result[:100]]) + "...]"+
            #                             f"difference: {difference:.4f}, time: {cpu_time:.4f} sec")
            BenchmarkResult.log_message(
                f"\tcpu result: {self.cpu_result:.4f}; " +
                f"difference: {difference:.4f}, time: {cpu_time:.4f} sec")
Exemple #40
0
 def get_time(self):
     """
     Get current time.
     """
     return System.nanoTime()/1000000
Exemple #41
0
#PyJ2D - Copyright (C) 2011 James Garnon

from __future__ import division
from java.lang import Thread, System

__docformat__ = 'restructuredtext'


_time_init = System.nanoTime()/1000000


class Clock(object):
    """
    **pyj2d.time.Clock**
    
    * Clock.get_time
    * Clock.tick
    * Clock.tick_busy_loop
    * Clock.get_fps
    """

    def __init__(self):
        """
        Return Clock.
        """
        self.time = System.nanoTime()/1000000
        self.time_init = self.time
        self.time_diff = [25]*10
        self.pos = 0
        self.thread = Thread()
Exemple #42
0
    def execute(self) -> object:
        num_blocks_spmv = int(np.ceil(self.size / self.block_size))
        start_comp = System.nanoTime()
        start = 0

        # Initialization phase;
        # r = b - A * x
        self.execute_phase("spmv_init", self.spmv_full_kernel(num_blocks_spmv, self.block_size, 4 * self.block_size),
                           self.row_cnt_1, self.ptr, self.idx, self.val, self.x, self.r, self.size, -1, self.b)
        # p = r
        self.execute_phase("cpy_init", self.cpy_kernel(self.num_blocks_size, self.block_size), self.p, self.r, self.size)
        # t1 = r^t * r
        self.execute_phase("norm_init", self.norm_kernel(self.num_blocks_size, self.block_size), self.r, self.t1, self.size)

        for i in range(self.num_iterations):
            # t2 = p^t * A * p
            self.execute_phase(f"spmv_{i}", self.spmv_kernel(num_blocks_spmv, self.block_size, 4 * self.block_size),
                               self.row_cnt_2, self.ptr, self.idx, self.val, self.p, self.y, self.size)
            self.t2[0] = 0
            self.execute_phase(f"dp_{i}", self.dp_kernel(self.num_blocks_size, self.block_size), self.p, self.y, self.t2, self.size)

            if self.time_phases:
                start = System.nanoTime()
            alpha = self.t1[0] / self.t2[0]
            old_r_norm_squared = self.t1[0]
            self.t1[0] = 0
            self.row_cnt_1[0] = 0.0
            self.row_cnt_2[0] = 0.0
            if self.time_phases:
                end = System.nanoTime()
                self.benchmark.add_phase({"name": f"alpha_{i}", "time_sec": (end - start) / 1_000_000_000})

            # Update x: x = x + alpha * p
            self.execute_phase(f"saxpy_x_{i}", self.saxpy_kernel(self.num_blocks_size, self.block_size),
                               self.x, self.x, self.p, alpha, self.size)
            # r = r - alpha * y
            self.execute_phase(f"saxpy_r_{i}", self.saxpy_kernel(self.num_blocks_size, self.block_size),
                               self.r, self.r, self.y, -1 * alpha, self.size)
            # t1 = r^t * r
            self.execute_phase(f"norm_{i}", self.norm_kernel(self.num_blocks_size, self.block_size), self.r, self.t1, self.size)

            if self.time_phases:
                start = System.nanoTime()
            beta = self.t1[0] / old_r_norm_squared
            if self.time_phases:
                end = System.nanoTime()
                self.benchmark.add_phase({"name": f"beta_{i}", "time_sec": (end - start) / 1_000_000_000})

            self.execute_phase(f"saxpy_p_{i}", self.saxpy_kernel(self.num_blocks_size, self.block_size),
                               self.p, self.r, self.p, beta, self.size)

        # Add a final sync step to measure the real computation time;
        if self.time_phases:
            start = System.nanoTime()
        tmp1 = self.x[0]
        end = System.nanoTime()
        if self.time_phases:
            self.benchmark.add_phase({"name": "sync", "time_sec": (end - start) / 1_000_000_000})
        self.benchmark.add_computation_time((end - start_comp) / 1_000_000_000)
        # Compute GPU result;
        for i in range(self.size):
            self.gpu_result[i] = self.x[i]

        self.benchmark.add_to_benchmark("gpu_result", 0)
        if self.benchmark.debug:
            BenchmarkResult.log_message(f"\tgpu result: [" + ", ".join([f"{x:.4f}" for x in self.gpu_result[:10]]) + "...]")

        return self.gpu_result
Exemple #43
0
    def setupGUI(self, initialFilename):
        self.gui = JESUI(self)
        self.gui.windowSetting(None)

        self.setHelpArray()

        self.gui.changeSkin(
            JESConfig.getInstance().getStringProperty(JESConfig.CONFIG_SKIN))
        self.gui.show()

        if JESConfig.getInstance().getBooleanProperty(JESConfig.CONFIG_BLOCK):
            self.gui.editor.removeBox()
        else:
            self.gui.editor.addBox()

        if JESConfig.getInstance().getBooleanProperty(JESConfig.CONFIG_GUTTER):
            self.gui.turnOnGutter()
        else:
            self.gui.turnOffGutter()

        # Install the bridges.
        self.terpControl = InterpreterControl(self.gui, self.interpreter)
        self.replBuffer = REPLBuffer(self.interpreter, self.gui.commandWindow)

        # Open or create the file.
        if initialFilename is None:
            self.fileManager.newFile()
        else:
            self.fileManager.readFile(initialFilename)

        # Startup complete!
        startTimeNS = System.getProperty("jes.starttimens")
        if startTimeNS is not None:
            self.startupTimeSec = (
                (System.nanoTime() - long(startTimeNS)) / 1000000000.0
            )

        # Show introduction window if settings could not be loaded (Either new
        # JES user or bad write permissions)
        config = JESConfig.getInstance()
        loadError = config.getLoadError()

        if loadError is not None:
            JOptionPane.showMessageDialog(
                self.gui,
                "Your JESConfig.properties file could not be opened!\n" +
                loadError.toString(),
                "JES Configuration",
                JOptionPane.ERROR_MESSAGE
            )
        elif config.wasMigrated():
            JOptionPane.showMessageDialog(
                self.gui,
                "Your settings were imported from JES 4.3.\n" +
                "JES doesn't use the JESConfig.txt file in " +
                "your home directory anymore, so you can delete it.",
                "JES Configuration",
                JOptionPane.INFORMATION_MESSAGE
            )
        elif not config.wasLoaded():
            introController.show()
Exemple #44
0
 def __init__(self):
     self._time_init = System.nanoTime()//1000000
     self.Clock = Clock
     self.Clock._repaint_sync = AtomicBoolean(False)
Exemple #45
0
    def execute(self) -> object:
        self.num_blocks_per_processor = self.num_blocks
        self.block_size_1d = self._block_size["block_size_1d"]
        self.block_size_2d = self._block_size["block_size_2d"]
        start_comp = System.nanoTime()
        start = 0

        a = self.num_blocks_per_processor / 2
        # Convolutions;
        self.execute_phase(
            "conv_x1",
            self.conv2d_kernel(
                (a, a), (self.block_size_2d, self.block_size_2d),
                4 * (self.K**2) * self.kn1 * self.channels), self.x1, self.x,
            self.kernel_1, self.size, self.size, self.channels, self.K,
            self.kn1, self.stride)
        self.execute_phase(
            "conv_y1",
            self.conv2d_kernel(
                (a, a), (self.block_size_2d, self.block_size_2d),
                4 * (self.K**2) * self.kn1 * self.channels), self.y1, self.y,
            self.kernel_3, self.size, self.size, self.channels, self.K,
            self.kn1, self.stride)
        # Pooling;
        self.execute_phase(
            "pool_x1",
            self.pooling_kernel(
                (a / 2, a / 2, a / 2),
                (self.block_size_2d / 2, self.block_size_2d / 2,
                 self.block_size_2d / 2)), self.x11, self.x1,
            self.size // self.stride, self.size // self.stride, self.kn1,
            self.pooling, self.pooling)
        self.execute_phase(
            "pool_y1",
            self.pooling_kernel(
                (a / 2, a / 2, a / 2),
                (self.block_size_2d / 2, self.block_size_2d / 2,
                 self.block_size_2d / 2)), self.y11, self.y1,
            self.size // self.stride, self.size // self.stride, self.kn1,
            self.pooling, self.pooling)
        # Other convolutions;
        self.execute_phase(
            "conv_x2",
            self.conv2d_kernel(
                (a, a), (self.block_size_2d, self.block_size_2d),
                4 * (self.K**2) * self.kn1 * self.kn2), self.x2, self.x11,
            self.kernel_2, self.size // self.stride // self.pooling,
            self.size // self.stride // self.pooling, self.kn1, self.K,
            self.kn2, self.stride)
        self.execute_phase(
            "conv_y2",
            self.conv2d_kernel(
                (a, a), (self.block_size_2d, self.block_size_2d),
                4 * (self.K**2) * self.kn1 * self.kn2), self.y2, self.y11,
            self.kernel_4, self.size // self.stride // self.pooling,
            self.size // self.stride // self.pooling, self.kn1, self.K,
            self.kn2, self.stride)

        # Global average pooling;
        # self.execute_phase("gap_x",
        #                    self.gap_kernel((a, a), (self.block_size_2d, self.block_size_2d), 4 * self.kn2),
        #                    self.x3, self.x2, self.size // self.stride**2, self.size // self.stride**2, self.kn2)
        # self.execute_phase("gap_y",
        #                    self.gap_kernel((a, a), (self.block_size_2d, self.block_size_2d), 4 * self.kn2),
        #                    self.y3, self.y2, self.size // self.stride ** 2, self.size // self.stride ** 2, self.kn2)

        # Dense layer;
        self.execute_phase(
            "concat",
            self.concat_kernel(self.num_blocks_per_processor,
                               self.block_size_1d), self.z, self.x2, self.y2,
            len(self.x2))
        self.execute_phase(
            "dot_product",
            self.dp_kernel(self.num_blocks_per_processor, self.block_size_1d),
            self.z, self.dense_weights, self.res, len(self.z))

        # Add a final sync step to measure the real computation time;
        if self.time_phases:
            start = System.nanoTime()
        # self.gpu_result = sigmoid(self.res[0])
        self.gpu_result = self.res[0]
        # self.gpu_result = [self.x1[i] for i in range(100)]
        end = System.nanoTime()
        if self.time_phases:
            self.benchmark.add_phase({
                "name": "sync",
                "time_sec": (end - start) / 1_000_000_000
            })
        self.benchmark.add_computation_time((end - start_comp) / 1_000_000_000)

        self.benchmark.add_to_benchmark("gpu_result", self.gpu_result)
        if self.benchmark.debug:
            BenchmarkResult.log_message(f"\tgpu result: {self.gpu_result:.4f}")
            # BenchmarkResult.log_message(
            #     f"\tgpu result: [" + ", ".join([f"{x:.2f}" for x in self.gpu_result[:100]]) + "...]")

        return self.gpu_result