def compute_ratings(self, device_infos): devdt = {} min_dt = {} for desc, device_info in sorted(device_infos.items()): krninfo = device_info.get("matrix_multiplication") if krninfo is None: continue devdt[desc] = {} for dtype, typeinfo in krninfo.items(): bs_vo_dt = typeinfo.get("0") if bs_vo_dt is None or len(bs_vo_dt) < 3: continue devdt[desc][dtype] = bs_vo_dt[2] min_dt[dtype] = min(min_dt.get(dtype, 1.0e30), bs_vo_dt[2]) table = prettytable.PrettyTable("device", " dtype", "rating") table.align["device"] = "l" table.align[" dtype"] = "l" rating = {} for desc, dtypedt in sorted(devdt.items()): rating[desc] = {} for dtype, dt in sorted(dtypedt.items()): rating[desc][dtype] = min_dt[dtype] / dt table.add_row(desc, dtype, "%.3f" % rating[desc][dtype]) self.debug("Device ratings:\n%s", str(table)) if self.device_info.desc in rating: self.device_info.rating = rating[self.device_info.desc]
def __init__(self): super(OpenCLDevice, self).__init__() self._blas = None self._id = None # Workaround for NVIDIA # (fixes incorrect behaviour with OpenCL binaries) if os.getenv("CUDA_CACHE_DISABLE") is None: os.putenv("CUDA_CACHE_DISABLE", "1") # Workaround for AMD # (fixes segmentation fault when accessed over ssh with X and # no X is running or when accessing locally and integrated # video device is used instead of AMD one) d = os.getenv("DISPLAY") if d is not None and d != os.getenv("COMPUTE"): os.unsetenv("DISPLAY") # Set 64-bit mode for AMD OpenCL by default if os.getenv("GPU_FORCE_64BIT_PTR") is None: os.putenv("GPU_FORCE_64BIT_PTR", "1") # Get the device res = self._get_some_device() # Restore DISPLAY to enable drawing if d is not None: os.putenv("DISPLAY", d) if not res: return self._fill_device_info_performance_values() log_configs = "Selected the following OpenCL configuration:\n" table = prettytable.PrettyTable("device", " dtype", "rating", "BLOCK_SIZE", "VECTOR_OPT", "version") table.align["device"] = "l" table.align[" dtype"] = "l" table.align["BLOCK_SIZE"] = "l" table.align["VECTOR_OPT"] = "l" for dtype in sorted(opencl_types.dtypes.keys()): rating = self.device_info.rating.get(dtype) if rating is None: rating = "" else: rating = "%.3f" % rating bs_vo = self.device_info.get_kernel_bs_vo(dtype=dtype) table.add_row(self.device_info.desc, dtype, rating, bs_vo[0], bs_vo[1], self.device_info.version) self.info(log_configs + str(table))
def __init__(self): super(CUDADevice, self).__init__() self._context_ = None self._id = None self._blas_ = {} # Get the device self._get_some_device() log_configs = "Selected the following CUDA device:\n" table = prettytable.PrettyTable("device", "mem", "compute", "pci") table.align["device"] = "l" table.align["mem"] = "r" table.align["pci"] = "l" table.add_row(self.context.device.name, self.context.device.total_mem // 1048576, "%d.%d" % self.context.device.compute_capability, self.context.device.pci_bus_id) self.info(log_configs + str(table))
def _dump_unit_attributes(self, arrays=True): import veles.external.prettytable as prettytable from veles.workflow import Workflow self.debug("Dumping unit attributes of %s...", str(self.workflow)) table = prettytable.PrettyTable("#", "unit", "attr", "value") table.align["#"] = "r" table.align["unit"] = "l" table.align["attr"] = "l" table.align["value"] = "l" table.max_width["value"] = 100 for i, u in enumerate(self.workflow.units_in_dependency_order): for k, v in sorted(u.__dict__.items()): if k not in Workflow.HIDDEN_UNIT_ATTRS: if (not arrays and hasattr(v, "__len__") and len(v) > 32 and not isinstance(v, str) and not isinstance(v, bytes)): strv = "object of class %s of length %d" % (repr( v.__class__.__name__), len(v)) else: strv = repr(v) table.add_row(i, u.__class__.__name__, k, strv) print(table)