Example #1
0
    def __init__(self, path):
        import json
        self._path = path
        try:
            with open(path) as f:
                old_data = json.load(f)
        except json.JSONDecodeError:
            logger.warning('Could not decode config')
            old_data = {}
        except OSError:
            logger.debug('No config file')
            old_data = {}

        for i in range(10):
            try:
                self._f = open(path, 'w+')
                fcntl.flock(self._f, fcntl.LOCK_EX | fcntl.LOCK_NB)
                self._locked = True
                break
            except BlockingIOError:
                import signal
                pid = old_data.get('pid')
                if pid:
                    logger.info(
                        f'Config file is locked (try {i}). Killing previous instance {pid}'
                    )
                    os.kill(pid, signal.SIGTERM)
                    time.sleep(.05)
                else:
                    logger.error(f'Config file is locked and no pid to kill')
        assert self._locked
Example #2
0
 def tb_watcher(self):
     assert isinstance(self.tb, subprocess.Popen)
     outs, errs = self.tb.communicate()
     returncode = self.tb.returncode
     self.tb = None
     msg = 'tensorboard on {} for {} returned with code {}'.format(
         self.tb_port, self.path, returncode)
     if returncode == 0:
         logger.debug(msg)
     else:
         logger.warning(f'{msg}\n out: {outs}\n err: {errs}')
     logger.debug('tb watcher finished')
Example #3
0
    def tensorboard(self):
        has_event_files = glob.glob(self.path + '**/*.tfevents*',
                                    recursive=True)
        if not has_event_files:
            return dict(no_event_files=True)

        elif not self.tb:
            self.tb_port = get_free(self.server.port_pool)
            cmds = [
                'tensorboard', '--logdir', "{}".format(self.path), '--host',
                '0.0.0.0', '--port',
                str(self.tb_port)
            ]
            logger.debug('Start tensorboard with: ' + ' '.join(cmds))
            self.tb = subprocess.Popen(cmds,
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.PIPE,
                                       universal_newlines=True)
            Thread(target=self.tb_watcher, daemon=True).start()

            @repeat_until(timeout=6.)
            def check_tb():
                try:
                    url = "http://{}:{}".format(self.host, self.tb_port)
                    r = requests.get(
                        url)  # requests.head not supported by tensorboard
                    available = r.status_code == 200
                    sleep(.3)
                    logger.debug('tb on {} status {}, {}'.format(
                        url, r.status_code, r.reason))
                    return available
                except requests.ConnectionError:
                    return False

            if not check_tb:
                logger.warning('tb could not be started')

            self.tb_t = time()
            Thread(target=self.tb_killer, daemon=True).start()
            return dict(host=self.host,
                        port=self.tb_port,
                        new=True,
                        available=check_tb,
                        no_event_files=False)

        else:
            self.tb_t = time()  # heartbeat
            # print('heartbeat')
            return dict(host=self.host,
                        port=self.tb_port,
                        new=False,
                        available=True,
                        no_event_files=False)
Example #4
0
    def tensorboard(self):
        if not self.tb:
            self.tb_port = get_free_port(
                self.host)  # TODO: use self.host here?
            cmds = [
                'tensorboard', '--logdir', "{}".format(self.path), '--host',
                '0.0.0.0', '--port',
                str(self.tb_port)
            ]
            print(' '.join(cmds))
            self.tb = subprocess.Popen(cmds,
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.PIPE,
                                       universal_newlines=True)
            Thread(target=self.tb_watcher, daemon=True).start()

            @repeat_until(timeout=3.)
            def check_tb():
                try:
                    url = "http://{}:{}".format(self.host, self.tb_port)
                    r = requests.get(
                        url)  # requests.head not supported by tensorboard
                    available = r.status_code == 200
                    sleep(.3)
                    logger.debug('tb on {} status {}, {}'.format(
                        url, r.status_code, r.reason))
                    return available
                except requests.ConnectionError:
                    return False

            if not check_tb:
                logger.warning('tb could not be started')

            self.tb_t = time()
            Thread(target=self.tb_killer, daemon=True).start()
            return dict(host=self.host,
                        port=self.tb_port,
                        new=True,
                        available=check_tb)

        else:
            self.tb_t = time()  # heartbeat
            # print('heartbeat')
            return dict(host=self.host,
                        port=self.tb_port,
                        new=False,
                        available=True)
Example #5
0
 def tb_watcher(self):
     assert isinstance(self.tb, subprocess.Popen)
     outs, errs = self.tb.communicate()
     returncode = self.tb.returncode
     self.tb = None
     msg = 'tensorboard on {} for {} returned with code {}'.format(
         self.tb_port, self.path, returncode)
     if returncode == 0:
         logger.debug(msg)
     else:
         logger.warning(msg)
         logger.warning('out: ' + outs)
         logger.warning('err: ' + errs)
     print('watcher finish')