Ejemplo n.º 1
0
    def _setup_sys(self):
        self.data["os"] = self._settings._os
        self.data["python"] = self._settings._python
        self.data["heartbeatAt"] = datetime.utcnow().isoformat()
        self.data["startedAt"] = datetime.utcfromtimestamp(
            self._settings._start_time
        ).isoformat()

        self.data["docker"] = self._settings.docker

        try:
            pynvml.nvmlInit()
            self.data["gpu"] = pynvml.nvmlDeviceGetName(
                pynvml.nvmlDeviceGetHandleByIndex(0)
            ).decode("utf8")
            self.data["gpu_count"] = pynvml.nvmlDeviceGetCount()
        except pynvml.NVMLError:
            pass
        try:
            self.data["cpu_count"] = multiprocessing.cpu_count()
        except NotImplementedError:
            pass

        self.data["cuda"] = self._settings._cuda
        self.data["args"] = self._settings._args
        self.data["state"] = "running"
Ejemplo n.º 2
0
 def __init__(self, pid=None, api=None, process_q=None, notify_q=None):
     try:
         pynvml.nvmlInit()
         self.gpu_count = pynvml.nvmlDeviceGetCount()
     except pynvml.NVMLError as err:
         self.gpu_count = 0
     #self.run = run
     self._pid = pid
     self._api = api
     self._interface = interface.BackendSender(
         process_queue=process_q,
         notify_queue=notify_q,
     )
     self.sampler = {}
     self.samples = 0
     self._shutdown = False
     if psutil:
         net = psutil.net_io_counters()
         self.network_init = {
             "sent": net.bytes_sent,
             "recv": net.bytes_recv
         }
     else:
         wandb.termlog(
             "psutil not installed, only GPU stats will be reported.  Install with pip install psutil"
         )
     self._thread = threading.Thread(target=self._thread_body)
     self._thread.daemon = True
Ejemplo n.º 3
0
    def __init__(self, pid, interface):
        try:
            pynvml.nvmlInit()
            self.gpu_count = pynvml.nvmlDeviceGetCount()
        except pynvml.NVMLError:
            self.gpu_count = 0
        # self.run = run
        self._pid = pid
        self._interface = interface
        self.sampler = {}
        self.samples = 0
        self._shutdown = False
        self._telem = telemetry.TelemetryRecord()
        if psutil:
            net = psutil.net_io_counters()
            self.network_init = {
                "sent": net.bytes_sent,
                "recv": net.bytes_recv
            }
        else:
            wandb.termlog(
                "psutil not installed, only GPU stats will be reported.  Install with pip install psutil"
            )
        self._thread = None
        self._tpu_profiler = None

        if tpu.is_tpu_available():
            try:
                self._tpu_profiler = tpu.get_profiler()
            except Exception as e:
                wandb.termlog("Error initializing TPUProfiler: " + str(e))
Ejemplo n.º 4
0
 def __init__(self, pid=None, api=None, interface=None):
     try:
         pynvml.nvmlInit()
         self.gpu_count = pynvml.nvmlDeviceGetCount()
     except pynvml.NVMLError:
         self.gpu_count = 0
     # self.run = run
     self._pid = pid
     self._api = api
     self._interface = interface
     self.sampler = {}
     self.samples = 0
     self._shutdown = False
     if psutil:
         net = psutil.net_io_counters()
         self.network_init = {
             "sent": net.bytes_sent,
             "recv": net.bytes_recv
         }
     else:
         wandb.termlog(
             "psutil not installed, only GPU stats will be reported.  Install with pip install psutil"
         )
     self._thread = None