예제 #1
0
파일: task.py 프로젝트: zayedmohamed/DIGITS
    def on_status_update(self):
        """
        Called when StatusCls.status.setter is used
        """
        from digits.webapp import app, socketio

        # Send socketio updates
        message = {
            'task': self.html_id(),
            'update': 'status',
            'status': self.status.name,
            'css': self.status.css,
            'show': (self.status in [Status.RUN, Status.ERROR]),
            'running': self.status.is_running(),
        }
        with app.app_context():
            message['html'] = flask.render_template(
                'status_updates.html',
                updates=self.status_history,
                exception=self.exception,
                traceback=self.traceback,
            )

        socketio.emit(
            'task update',
            message,
            namespace='/jobs',
            room=self.job_id,
        )

        from digits.webapp import scheduler
        job = scheduler.get_job(self.job_id)
        if job:
            job.on_status_update()
예제 #2
0
    def process_output(self, line):
        from digits.webapp import socketio

        timestamp, level, message = self.preprocess_output_digits(line)
        if not message:
            return False

        # progress
        match = re.match(r'Processed (\d+)\/(\d+)', message)
        if match:
            self.progress = float(match.group(1))/int(match.group(2))
            socketio.emit('task update',
                    {
                        'task': self.html_id(),
                        'update': 'progress',
                        'percentage': int(round(100*self.progress)),
                        'eta': utils.time_filters.print_time_diff(self.est_done()),
                        },
                    namespace='/jobs',
                    room=self.job_id,
                    )
            return True

        # distribution
        match = re.match(r'Category (\d+) has (\d+)', message)
        if match and self.labels_file is not None:
            if not hasattr(self, 'distribution') or self.distribution is None:
                self.distribution = {}

            self.distribution[match.group(1)] = int(match.group(2))

            data = self.distribution_data()
            if data:
                socketio.emit('task update',
                        {
                            'task': self.html_id(),
                            'update': 'distribution',
                            'data': data,
                            },
                        namespace='/jobs',
                        room=self.job_id,
                        )
            return True

        # result
        match = re.match(r'Total images added: (\d+)', message)
        if match:
            self.entries_count = int(match.group(1))
            self.logger.debug(message)
            return True

        if level == 'warning':
            self.logger.warning('%s: %s' % (self.name(), message))
            return True
        if level in ['error', 'critical']:
            self.logger.error('%s: %s' % (self.name(), message))
            self.exception = message
            return True

        return True
예제 #3
0
    def add_job(self, job):
        """
        Add a job to self.jobs
        """
        if not self.running:
            logger.error('Scheduler not running. Cannot add job.')
            return False
        else:
            self.jobs[job.id()] = job

            # Need to fix this properly
            # if True or flask._app_ctx_stack.top is not None:
            from digits.webapp import app, socketio
            with app.app_context():
                # send message to job_management room that the job is added

                socketio.emit('job update',
                              {
                                  'update': 'added',
                                  'job_id': job.id(),
                              },
                              namespace='/jobs',
                              room='job_management',
                          )

            if 'DIGITS_MODE_TEST' not in os.environ:
                # Let the scheduler do a little work before returning
                time.sleep(utils.wait_time())
            return True
예제 #4
0
파일: create_db.py 프로젝트: pfShawn/DIGITS
    def process_output(self, line):
        from digits.webapp import socketio

        timestamp, level, message = self.preprocess_output_digits(line)
        if not message:
            return False

        # progress
        match = re.match(r'Processed (\d+)\/(\d+)', message)
        if match:
            self.progress = float(match.group(1))/int(match.group(2))
            socketio.emit('task update',
                    {
                        'task': self.html_id(),
                        'update': 'progress',
                        'percentage': int(round(100*self.progress)),
                        'eta': utils.time_filters.print_time_diff(self.est_done()),
                        },
                    namespace='/jobs',
                    room=self.job_id,
                    )
            return True

        # distribution
        match = re.match(r'Category (\d+) has (\d+)', message)
        if match and self.labels_file is not None:
            if not hasattr(self, 'distribution') or self.distribution is None:
                self.distribution = {}

            self.distribution[match.group(1)] = int(match.group(2))

            data = self.distribution_data()
            if data:
                socketio.emit('task update',
                        {
                            'task': self.html_id(),
                            'update': 'distribution',
                            'data': data,
                            },
                        namespace='/jobs',
                        room=self.job_id,
                        )
            return True

        # result
        match = re.match(r'Total images added: (\d+)', message)
        if match:
            self.entries_count = int(match.group(1))
            self.logger.debug(message)
            return True

        if level == 'warning':
            self.logger.warning('%s: %s' % (self.name(), message))
            return True
        if level in ['error', 'critical']:
            self.logger.error('%s: %s' % (self.name(), message))
            self.exception = message
            return True

        return True
예제 #5
0
파일: job.py 프로젝트: 4QuantOSS/OpenDIGITS
    def on_status_update(self):
        """
        Called when StatusCls.status.setter is used
        """
        from digits.webapp import app, socketio

        message = {
            'update': 'status',
            'status': self.status_of_tasks().name,
            'css': self.status_of_tasks().css,
            'running': self.status.is_running(),
            'job_id': self.id(),
        }
        with app.app_context():
            message['html'] = flask.render_template('status_updates.html', updates=self.status_history)

        socketio.emit('job update',
                      message,
                      namespace='/jobs',
                      room=self.id(),
                      )

        # send message to job_management room as well
        socketio.emit('job update',
                      message,
                      namespace='/jobs',
                      room='job_management',
                      )

        if not self.status.is_running():
            if hasattr(self, 'event'):
                # release threads that are waiting for job to complete
                self.event.set()
예제 #6
0
 def server_utilization_updater(self):
     from digits.webapp import scheduler, socketio
     from digits import device_query
     devices = []
     gpus = len(self.resources['gpus'])
     if gpus:
         for index in range(0, gpus):
             device = device_query.get_device(index)
             if device:
                 devices.append((index, device))
             else:
                 raise RuntimeError(
                     'Failed to load gpu information for GPU #"%s"' % index)
     while True:
         data_gpu = []
         for index, device in devices:
             update = {'name': device.name, 'index': index}
             nvml_info = device_query.get_nvml_info(index)
             if nvml_info is not None:
                 update.update(nvml_info)
             data_gpu.append(update)
         socketio.emit('server update', {
             'update': 'gpus_utilization',
             'data_gpu': data_gpu,
         },
                       namespace='/jobs',
                       room='job_management')
         gevent.sleep(1)
예제 #7
0
    def add_job(self, job):
        """
        Add a job to self.jobs
        """
        if not self.running:
            logger.error('Scheduler not running. Cannot add job.')
            return False
        else:
            self.jobs[job.id()] = job

            # Need to fix this properly
            # if True or flask._app_ctx_stack.top is not None:
            from digits.webapp import app, socketio
            with app.app_context():
                # send message to job_management room that the job is added

                socketio.emit('job update',
                              {
                                  'update': 'added',
                                  'job_id': job.id(),
                              },
                              namespace='/jobs',
                              room='job_management',
                              )

            if 'DIGITS_MODE_TEST' not in os.environ:
                # Let the scheduler do a little work before returning
                time.sleep(utils.wait_time())
            return True
예제 #8
0
    def on_status_update(self):
        """
        Called when StatusCls.status.setter is used
        """
        from digits.webapp import app, socketio

        message = {
                'update': 'status',
                'status': self.status.name,
                'css': self.status.css,
                'running': self.status.is_running(),
                'job_id': self.id(),
                }
        with app.app_context():
            message['html'] = flask.render_template('status_updates.html', updates=self.status_history)

        socketio.emit('job update',
                message,
                namespace='/jobs',
                room=self.id(),
                )

        # send message to job_management room as well
        socketio.emit('job update',
                message,
                namespace='/jobs',
                room='job_management',
                )
예제 #9
0
파일: job.py 프로젝트: code4101/DIGITS
    def on_status_update(self):
        """
        Called when StatusCls.status.setter is used
        """
        from digits.webapp import app, socketio

        message = {
            'update': 'status',
            'status': self.status_of_tasks().name,
            'css': self.status_of_tasks().css,
            'running': self.status.is_running(),
            'job_id': self.id(),
        }
        with app.app_context():
            message['html'] = flask.render_template('status_updates.html', updates=self.status_history)

        socketio.emit('job update',
                      message,
                      namespace='/jobs',
                      room=self.id(),
                      )

        # send message to job_management room as well
        socketio.emit('job update',
                      message,
                      namespace='/jobs',
                      room='job_management',
                      )

        if not self.status.is_running():
            if hasattr(self, 'event'):
                # release threads that are waiting for job to complete
                self.event.set()
예제 #10
0
파일: job.py 프로젝트: maotong/DIGITS
    def on_status_update(self):
        """
        Called when StatusCls.status.setter is used
        """
        from digits.webapp import app, socketio

        message = {
                'update': 'status',
                'status': self.status.name,
                'css': self.status.css,
                'running': self.status.is_running(),
                'job_id': self.id(),
                }
        with app.app_context():
            message['html'] = flask.render_template('status_updates.html', updates=self.status_history)

        socketio.emit('job update',
                message,
                namespace='/jobs',
                room=self.id(),
                )

        # send message to job_management room as well
        socketio.emit('job update',
                message,
                namespace='/jobs',
                room='job_management',
                )
예제 #11
0
파일: task.py 프로젝트: JD-accounts/DIGITS
    def on_status_update(self):
        """
        Called when StatusCls.status.setter is used
        """
        from digits.webapp import app, socketio

        # Send socketio updates
        message = {
                'task': self.html_id(),
                'update': 'status',
                'status': self.status.name,
                'css': self.status.css,
                'show': (self.status in [Status.RUN, Status.ERROR]),
                'running': self.status.is_running(),
                }
        with app.app_context():
            message['html'] = flask.render_template('status_updates.html',
                    updates     = self.status_history,
                    exception   = self.exception,
                    traceback   = self.traceback,
                    )

        socketio.emit('task update',
                message,
                namespace='/jobs',
                room=self.job_id,
                )

        from digits.webapp import scheduler
        job = scheduler.get_job(self.job_id)
        if job:
            job.on_status_update()
예제 #12
0
    def process_output(self, line):
        from digits.webapp import socketio

        self.create_db_log.write('%s\n' % line)
        self.create_db_log.flush()

        timestamp, level, message = self.preprocess_output_digits(line)
        if not message:
            return False

        # progress
        match = re.match(r'Processed (\d+)\/(\d+)', message)
        if match:
            self.progress = float(match.group(1)) / int(match.group(2))
            self.emit_progress_update()
            return True

        # distribution
        match = re.match(r'Type (\d+): Category (\d+) has (\d+)', message)
        if match and self.labels_file is not None:
            if not hasattr(self, 'distribution') or self.distribution is None:
                self.distribution = OrderedDict()
            if int(match.group(1)) not in self.distribution:
                self.distribution[int(match.group(1))] = {}

            self.distribution[int(match.group(1))][int(match.group(2))] = int(
                match.group(3))

            data = self.distribution_data()
            if data:
                socketio.emit(
                    'task update',
                    {
                        'task': self.html_id(),
                        'update': 'distribution',
                        'data': data,
                    },
                    namespace='/jobs',
                    room=self.job_id,
                )
            return True

        # result
        match = re.match(r'(\d+) images written to database', message)
        if match:
            self.entries_count = int(match.group(1))
            self.logger.debug(message)
            return True

        if level == 'warning':
            self.logger.warning('%s: %s' % (self.name(), message))
            return True
        if level in ['error', 'critical']:
            self.logger.error('%s: %s' % (self.name(), message))
            self.exception = message
            return True

        return True
예제 #13
0
파일: train.py 프로젝트: gheinrich/DIGITS
    def hw_socketio_updater(self, gpus):
        """
        This thread sends SocketIO messages about hardware utilization
        to connected clients

        Arguments:
        gpus -- a list of identifiers for the GPUs currently being used
        """
        from digits.webapp import app, socketio

        devices = []
        if gpus is not None:
            for index in gpus:
                device = device_query.get_device(index)
                if device:
                    devices.append((index, device))
                else:
                    raise RuntimeError('Failed to load gpu information for GPU #"%s"' % index)

        # this thread continues until killed in after_run()
        while True:
            # CPU (Non-GPU) Info
            data_cpu = {}
            if hasattr(self, "p") and self.p is not None:
                data_cpu["pid"] = self.p.pid
                try:
                    ps = psutil.Process(self.p.pid)  # 'self.p' is the system call object
                    if ps.is_running():
                        if psutil.version_info[0] >= 2:
                            data_cpu["cpu_pct"] = ps.cpu_percent(interval=1)
                            data_cpu["mem_pct"] = ps.memory_percent()
                            data_cpu["mem_used"] = ps.memory_info().rss
                        else:
                            data_cpu["cpu_pct"] = ps.get_cpu_percent(interval=1)
                            data_cpu["mem_pct"] = ps.get_memory_percent()
                            data_cpu["mem_used"] = ps.get_memory_info().rss
                except psutil.NoSuchProcess:
                    # In rare case of instant process crash or PID went zombie (report nothing)
                    pass

            data_gpu = []
            for index, device in devices:
                update = {"name": device.name, "index": index}
                nvml_info = device_query.get_nvml_info(index)
                if nvml_info is not None:
                    update.update(nvml_info)
                data_gpu.append(update)

            with app.app_context():
                html = flask.render_template("models/gpu_utilization.html", data_gpu=data_gpu, data_cpu=data_cpu)

                socketio.emit(
                    "task update",
                    {"task": self.html_id(), "update": "gpu_utilization", "html": html},
                    namespace="/jobs",
                    room=self.job_id,
                )
            gevent.sleep(1)
예제 #14
0
파일: scheduler.py 프로젝트: flx42/DIGITS
    def delete_job(self, job):
        """
        Deletes an entire job folder from disk
        Returns True if the Job was found and deleted
        """
        if isinstance(job, str) or isinstance(job, unicode):
            job_id = str(job)
        elif isinstance(job, Job):
            job_id = job.id()
        else:
            raise ValueError('called delete_job with a %s' % type(job))
        dependent_jobs = []
        # try to find the job
        for i, job in enumerate(self.jobs):
            if job.id() == job_id:
                if isinstance(job, DatasetJob):
                    # check for dependencies
                    for j in self.jobs:
                        if isinstance(j,
                                      ModelJob) and j.dataset_id == job.id():
                            logger.error(
                                'Cannot delete "%s" (%s) because "%s" (%s) depends on it.'
                                % (job.name(), job.id(), j.name(), j.id()))
                            dependent_jobs.append(j.name())
                if len(dependent_jobs) > 0:
                    error_message = 'Cannot delete "%s" because %d model%s depend%s on it: %s' % (
                        job.name(), len(dependent_jobs),
                        ('s' if len(dependent_jobs) != 1 else ''),
                        ('s' if len(dependent_jobs) == 1 else ''), ', '.join(
                            ['"%s"' % j for j in dependent_jobs]))
                    raise errors.DeleteError(error_message)
                self.jobs.pop(i)
                job.abort()
                if os.path.exists(job.dir()):
                    shutil.rmtree(job.dir())
                logger.info('Job deleted.', job_id=job_id)
                from digits.webapp import socketio
                socketio.emit(
                    'job update',
                    {
                        'update': 'deleted',
                        'job_id': job.id()
                    },
                    namespace='/jobs',
                    room='job_management',
                )
                return True

        # see if the folder exists on disk
        path = os.path.join(config_value('jobs_dir'), job_id)
        path = os.path.normpath(path)
        if os.path.dirname(path) == config_value(
                'jobs_dir') and os.path.exists(path):
            shutil.rmtree(path)
            return True

        return False
예제 #15
0
    def process_output(self, line):
        from digits.webapp import socketio

        self.create_db_log.write('%s\n' % line)
        self.create_db_log.flush()

        timestamp, level, message = self.preprocess_output_digits(line)
        if not message:
            return False

        # progress
        match = re.match(r'Processed (\d+)\/(\d+)', message)
        if match:
            self.progress = float(match.group(1))/int(match.group(2))
            self.emit_progress_update()
            return True

        # distribution
        match = re.match(r'Type (\d+): Category (\d+) has (\d+)', message)
        if match and self.labels_file is not None:
            if not hasattr(self, 'distribution') or self.distribution is None:
                self.distribution = OrderedDict()
            if int(match.group(1)) not in self.distribution:
                self.distribution[int(match.group(1))] = {}

            self.distribution[int(match.group(1))][int(match.group(2))] = int(match.group(3))

            data = self.distribution_data()
            if data:
                socketio.emit('task update',
                        {
                            'task': self.html_id(),
                            'update': 'distribution',
                            'data': data,
                            },
                        namespace='/jobs',
                        room=self.job_id,
                        )
            return True

        # result
        match = re.match(r'(\d+) images written to database', message)
        if match:
            self.entries_count = int(match.group(1))
            self.logger.debug(message)
            return True

        if level == 'warning':
            self.logger.warning('%s: %s' % (self.name(), message))
            return True
        if level in ['error', 'critical']:
            self.logger.error('%s: %s' % (self.name(), message))
            self.exception = message
            return True

        return True
예제 #16
0
    def delete_job(self, job):
        """
        Deletes an entire job folder from disk
        Returns True if the Job was found and deleted
        """
        if isinstance(job, str) or isinstance(job, unicode):
            job_id = str(job)
        elif isinstance(job, Job):
            job_id = job.id()
        else:
            raise ValueError('called delete_job with a %s' % type(job))
        dependent_jobs = []
        # try to find the job
        job = self.jobs.get(job_id, None)
        if job:
            if isinstance(job, DatasetJob):
                # check for dependencies
                for j in self.jobs.values():
                    if isinstance(j, ModelJob) and j.dataset_id == job.id():
                        logger.error('Cannot delete "%s" (%s) because "%s" (%s) depends on it.' %
                                     (job.name(), job.id(), j.name(), j.id()))
                        dependent_jobs.append(j.name())
            if len(dependent_jobs) > 0:
                error_message = 'Cannot delete "%s" because %d model%s depend%s on it: %s' % (
                    job.name(),
                    len(dependent_jobs),
                    ('s' if len(dependent_jobs) != 1 else ''),
                    ('s' if len(dependent_jobs) == 1 else ''),
                    ', '.join(['"%s"' % j for j in dependent_jobs]))
                raise errors.DeleteError(error_message)
            self.jobs.pop(job_id, None)
            job.abort()
            if os.path.exists(job.dir()):
                shutil.rmtree(job.dir())
            logger.info('Job deleted.', job_id=job_id)
            from digits.webapp import socketio
            socketio.emit('job update',
                          {
                              'update': 'deleted',
                              'job_id': job.id()
                          },
                          namespace='/jobs',
                          room='job_management',
                          )
            return True

        # see if the folder exists on disk
        path = os.path.join(config_value('jobs_dir'), job_id)
        path = os.path.normpath(path)
        if os.path.dirname(path) == config_value('jobs_dir') and os.path.exists(path):
            shutil.rmtree(path)
            return True

        return False
예제 #17
0
파일: views.py 프로젝트: zjucsxxd/DIGITS
 def emit(self, progress):
     """ emit the progress to the client """
     socketio.emit('update', {
         'model_id': self._model_id,
         'update': 'progress',
         'progress': progress,
     },
                   namespace='/jobs',
                   room='job_management')
     # micro sleep so that emit is broadcast to the client
     time.sleep(0.001)
예제 #18
0
    def send_snapshot_update(self):
        """
        Sends socketio message about the snapshot list
        """
        # TODO: move to TrainTask
        from digits.webapp import socketio

        socketio.emit('task update', {'task': self.html_id(),
                                      'update': 'snapshots',
                                      'data': self.snapshot_list()},
                      namespace='/jobs',
                      room=self.job_id)
예제 #19
0
    def send_snapshot_update(self):
        """
        Sends socketio message about the snapshot list
        """
        from digits.webapp import socketio

        socketio.emit(
            "task update",
            {"task": self.html_id(), "update": "snapshots", "data": self.snapshot_list()},
            namespace="/jobs",
            room=self.job_id,
        )
예제 #20
0
    def send_snapshot_update(self):
        """
        Sends socketio message about the snapshot list
        """
        # TODO: move to TrainTask
        from digits.webapp import socketio

        socketio.emit('task update', {'task': self.html_id(),
                                      'update': 'snapshots',
                                      'data': self.snapshot_list()},
                      namespace='/jobs',
                      room=self.job_id)
예제 #21
0
    def process_output(self, line):
        from digits.webapp import socketio

        self.analyze_db_log.write("%s\n" % line)
        self.analyze_db_log.flush()

        timestamp, level, message = self.preprocess_output_digits(line)
        if not message:
            return False

        # progress
        match = re.match(r"Progress: (\d+)\/(\d+)", message)
        if match:
            self.progress = float(match.group(1)) / float(match.group(2))
            socketio.emit(
                "task update",
                {
                    "task": self.html_id(),
                    "update": "progress",
                    "percentage": int(round(100 * self.progress)),
                    "eta": utils.time_filters.print_time_diff(self.est_done()),
                },
                namespace="/jobs",
                room=self.job_id,
            )
            return True

        # total count
        match = re.match(r"Total entries: (\d+)", message)
        if match:
            self.image_count = int(match.group(1))
            return True

        # image dimensions
        match = re.match(r"(\d+) entries found with shape ((\d+)x(\d+)x(\d+))", message)
        if match:
            count = int(match.group(1))
            dims = match.group(2)
            self.image_width = int(match.group(3))
            self.image_height = int(match.group(4))
            self.image_channels = int(match.group(5))
            self.logger.debug("Images are %s" % dims)
            return True

        if level == "warning":
            self.logger.warning("%s: %s" % (self.name(), message))
            return True
        if level in ["error", "critical"]:
            self.logger.error("%s: %s" % (self.name(), message))
            self.exception = message
            return True

        return True
예제 #22
0
    def process_output(self, line):
        from digits.webapp import socketio

        self.analyze_db_log.write('%s\n' % line)
        self.analyze_db_log.flush()

        timestamp, level, message = self.preprocess_output_digits(line)
        if not message:
            return False

        # progress
        match = re.match(r'Progress: (\d+)\/(\d+)', message)
        if match:
            self.progress = float(match.group(1))/float(match.group(2))
            socketio.emit('task update',
                    {
                        'task': self.html_id(),
                        'update': 'progress',
                        'percentage': int(round(100*self.progress)),
                        'eta': utils.time_filters.print_time_diff(self.est_done()),
                        },
                    namespace='/jobs',
                    room=self.job_id,
                    )
            return True

        # total count
        match = re.match(r'Total entries: (\d+)', message)
        if match:
            self.image_count = int(match.group(1))
            return True

        # image dimensions
        match = re.match(r'(\d+) entries found with shape ((\d+)x(\d+)x(\d+))', message)
        if match:
            count = int(match.group(1))
            dims = match.group(2)
            self.image_width = int(match.group(3))
            self.image_height = int(match.group(4))
            self.image_channels = int(match.group(5))
            self.logger.debug('Images are %s' % dims)
            return True

        if level == 'warning':
            self.logger.warning('%s: %s' % (self.name(), message))
            return True
        if level in ['error', 'critical']:
            self.logger.error('%s: %s' % (self.name(), message))
            self.exception = message
            return True

        return True
예제 #23
0
    def process_output(self, line):
        from digits.webapp import socketio

        self.analyze_db_log.write('%s\n' % line)
        self.analyze_db_log.flush()

        timestamp, level, message = self.preprocess_output_digits(line)
        if not message:
            return False

        # progress
        match = re.match(r'Progress: (\d+)\/(\d+)', message)
        if match:
            self.progress = float(match.group(1))/float(match.group(2))
            socketio.emit('task update',
                    {
                        'task': self.html_id(),
                        'update': 'progress',
                        'percentage': int(round(100*self.progress)),
                        'eta': utils.time_filters.print_time_diff(self.est_done()),
                        },
                    namespace='/jobs',
                    room=self.job_id,
                    )
            return True

        # total count
        match = re.match(r'Total entries: (\d+)', message)
        if match:
            self.image_count = int(match.group(1))
            return True

        # image dimensions
        match = re.match(r'(\d+) entries found with shape ((\d+)x(\d+)x(\d+))', message)
        if match:
            count = int(match.group(1))
            dims = match.group(2)
            self.image_width = int(match.group(3))
            self.image_height = int(match.group(4))
            self.image_channels = int(match.group(5))
            self.logger.debug('Images are %s' % dims)
            return True

        if level == 'warning':
            self.logger.warning('%s: %s' % (self.name(), message))
            return True
        if level in ['error', 'critical']:
            self.logger.error('%s: %s' % (self.name(), message))
            self.exception = message
            return True

        return True
예제 #24
0
파일: job.py 프로젝트: zhangxiaoli73/DIGITS
 def emit_attribute_changed(self, attribute, value):
     """
     Call socketio.emit for task job update
     """
     from digits.webapp import socketio
     socketio.emit('job update', {
         'job_id': self.id(),
         'update': 'attribute',
         'attribute': attribute,
         'value': value,
     },
                   namespace='/jobs',
                   room='job_management')
예제 #25
0
파일: views.py 프로젝트: Dasona/DIGITS
 def emit(self, progress):
     """ emit the progress to the client """
     socketio.emit('update',
                   {
                       'model_id': self._model_id,
                       'update': 'progress',
                       'progress': progress,
                   },
                   namespace='/jobs',
                   room='job_management'
                   )
     # micro sleep so that emit is broadcast to the client
     time.sleep(0.001)
예제 #26
0
파일: create_db.py 프로젝트: chintak/DIGITS
    def process_output(self, line):
        from digits.webapp import socketio

        self.create_db_log.write("%s\n" % line)
        self.create_db_log.flush()

        timestamp, level, message = self.preprocess_output_digits(line)
        if not message:
            return False

        # progress
        match = re.match(r"Processed (\d+)\/(\d+)", message)
        if match:
            self.progress = float(match.group(1)) / int(match.group(2))
            self.emit_progress_update()
            return True

        # distribution
        match = re.match(r"Category (\d+) has (\d+)", message)
        if match and self.labels_file is not None:
            if not hasattr(self, "distribution") or self.distribution is None:
                self.distribution = {}

            self.distribution[match.group(1)] = int(match.group(2))

            data = self.distribution_data()
            if data:
                socketio.emit(
                    "task update",
                    {"task": self.html_id(), "update": "distribution", "data": data},
                    namespace="/jobs",
                    room=self.job_id,
                )
            return True

        # result
        match = re.match(r"(\d+) images written to database", message)
        if match:
            self.entries_count = int(match.group(1))
            self.logger.debug(message)
            return True

        if level == "warning":
            self.logger.warning("%s: %s" % (self.name(), message))
            return True
        if level in ["error", "critical"]:
            self.logger.error("%s: %s" % (self.name(), message))
            self.exception = message
            return True

        return True
예제 #27
0
    def process_output(self, line):
        from digits.webapp import socketio

        timestamp, level, message = self.preprocess_output_digits(line)
        if not message:
            return False

        # progress
        match = re.match(r'Progress: ([-+]?[0-9]*\.?[0-9]+(e[-+]?[0-9]+)?)',
                         message)
        if match:
            self.progress = float(match.group(1))
            socketio.emit(
                'task update',
                {
                    'task': self.html_id(),
                    'update': 'progress',
                    'percentage': int(round(100 * self.progress)),
                    'eta': utils.time_filters.print_time_diff(self.est_done()),
                },
                namespace='/jobs',
                room=self.job_id,
            )
            return True

        # totals
        match = re.match(r'Found (\d+) images in (\d+) categories', message)
        if match:
            self.label_count = int(match.group(2))
            return True

        # splits
        match = re.match(r'Selected (\d+) for (\w+)', message)
        if match:
            if match.group(2).startswith('training'):
                self.train_count = int(match.group(1))
            elif match.group(2).startswith('validation'):
                self.val_count = int(match.group(1))
            elif match.group(2).startswith('test'):
                self.test_count = int(match.group(1))
            return True

        if level == 'warning':
            self.logger.warning('%s: %s' % (self.name(), message))
            return True
        if level in ['error', 'critical']:
            self.logger.error('%s: %s' % (self.name(), message))
            self.exception = message
            return True

        return True
예제 #28
0
    def update_distribution_graph(self):
        from digits.webapp import socketio
        data = self.distribution_data()

        if data:
            socketio.emit('task update',
                          {
                              'task': self.html_id(),
                              'update': 'distribution',
                              'data': data,
                          },
                          namespace='/jobs',
                          room=self.job_id,
                          )
예제 #29
0
 def emit_gpus_available(self):
     """
     Call socketio.emit gpu availablity
     """
     from digits.webapp import scheduler, socketio
     socketio.emit('server update',
                   {
                       'update': 'gpus_available',
                       'total_gpu_count': len(self.resources['gpus']),
                       'remaining_gpu_count': sum(r.remaining() for r in scheduler.resources['gpus']),
                   },
                   namespace='/jobs',
                   room='job_management'
               )
예제 #30
0
    def update_distribution_graph(self):
        from digits.webapp import socketio
        data = self.distribution_data()

        if data:
            socketio.emit('task update',
                          {
                              'task': self.html_id(),
                              'update': 'distribution',
                              'data': data,
                          },
                          namespace='/jobs',
                          room=self.job_id,
                          )
예제 #31
0
 def emit_gpus_available(self):
     """
     Call socketio.emit gpu availability
     """
     from digits.webapp import scheduler, socketio
     socketio.emit('server update',
                   {
                       'update': 'gpus_available',
                       'total_gpu_count': len(self.resources['gpus']),
                       'remaining_gpu_count': sum(r.remaining() for r in scheduler.resources['gpus']),
                   },
                   namespace='/jobs',
                   room='job_management'
                   )
예제 #32
0
파일: job.py 프로젝트: zhangxiaoli73/DIGITS
    def emit_progress_update(self):
        """
        Call socketio.emit for task job update, by considering task progress.
        """
        progress = self.get_progress()

        from digits.webapp import socketio
        socketio.emit('job update', {
            'job_id': self.id(),
            'update': 'progress',
            'percentage': int(round(100 * progress)),
        },
                      namespace='/jobs',
                      room='job_management')
예제 #33
0
    def process_output(self, line):
        from digits.webapp import socketio

        timestamp, level, message = self.preprocess_output_digits(line)
        if not message:
            return False

        # progress
        match = re.match(r'Progress: ([-+]?[0-9]*\.?[0-9]+(e[-+]?[0-9]+)?)', message)
        if match:
            self.progress = float(match.group(1))
            socketio.emit('task update',
                    {
                        'task': self.html_id(),
                        'update': 'progress',
                        'percentage': int(round(100*self.progress)),
                        'eta': utils.time_filters.print_time_diff(self.est_done()),
                        },
                    namespace='/jobs',
                    room=self.job_id,
                    )
            return True

        # totals
        match = re.match(r'Found (\d+) images in (\d+) categories', message)
        if match:
            self.label_count = int(match.group(2))
            return True

        # splits
        match = re.match(r'Selected (\d+) for (\w+)', message)
        if match:
            if match.group(2).startswith('training'):
                self.train_count = int(match.group(1))
            elif match.group(2).startswith('validation'):
                self.val_count = int(match.group(1))
            elif match.group(2).startswith('test'):
                self.test_count = int(match.group(1))
            return True

        if level == 'warning':
            self.logger.warning('%s: %s' % (self.name(), message))
            return True
        if level in ['error', 'critical']:
            self.logger.error('%s: %s' % (self.name(), message))
            self.exception = message
            return True

        return True
예제 #34
0
파일: job.py 프로젝트: aichemzee/DIGITS
    def on_status_update(self):
        super(InferenceJob, self).on_status_update()

        from digits.webapp import app, socketio

        if not self.status.is_running():
            message = {
                    'job_id': self.id(),
                    }

            socketio.emit('job reload_page',
                    message,
                    namespace='/jobs',
                    room=self.id(),
                    )
예제 #35
0
파일: job.py 프로젝트: code4101/DIGITS
 def emit_attribute_changed(self, attribute, value):
     """
     Call socketio.emit for task job update
     """
     from digits.webapp import socketio
     socketio.emit('job update',
                   {
                       'job_id': self.id(),
                       'update': 'attribute',
                       'attribute': attribute,
                       'value': value,
                   },
                   namespace='/jobs',
                   room='job_management'
                   )
예제 #36
0
파일: job.py 프로젝트: maotong/DIGITS
    def emit_progress_update(self):
        """
        Call socketio.emit for task job update, by considering task progress.
        """
        progress = self.get_progress()

        from digits.webapp import socketio
        socketio.emit('job update',
                      {
                          'job_id': self.id(),
                          'update': 'progress',
                          'percentage': int(round(100*progress)),
                      },
                      namespace='/jobs',
                      room='job_management'
                  )
예제 #37
0
    def save_train_output(self, *args):
        """
        Save output to self.train_outputs
        """
        from digits.webapp import socketio

        if not self.save_output(self.train_outputs, *args):
            return

        if self.last_train_update and (time.time() -
                                       self.last_train_update) < 5:
            return
        self.last_train_update = time.time()

        self.logger.debug(
            'Training %s%% complete.' %
            round(100 * self.current_epoch / self.train_epochs, 2))

        # loss graph data
        data = self.combined_graph_data()
        if data:
            socketio.emit(
                'task update',
                {
                    'task': self.html_id(),
                    'update': 'combined_graph',
                    'data': data,
                },
                namespace='/jobs',
                room=self.job_id,
            )

        # lr graph data
        data = self.lr_graph_data()
        if data:
            socketio.emit(
                'task update',
                {
                    'task': self.html_id(),
                    'update': 'lr_graph',
                    'data': data,
                },
                namespace='/jobs',
                room=self.job_id,
            )
예제 #38
0
    def gpu_socketio_updater(self, gpus):
        """
        This thread sends SocketIO messages about GPU utilization
        to connected clients

        Arguments:
        gpus -- a list of identifiers for the GPUs currently being used
        """
        from digits.webapp import app, socketio

        devices = []
        for index in gpus:
            device = device_query.get_device(index)
            if device:
                devices.append((index, device))
        if not devices:
            raise RuntimeError('Failed to load gpu information for "%s"' %
                               gpus)

        # this thread continues until killed in after_run()
        while True:
            data = []

            for index, device in devices:
                update = {'name': device.name, 'index': index}
                nvml_info = device_query.get_nvml_info(index)
                if nvml_info is not None:
                    update.update(nvml_info)
                data.append(update)

            with app.app_context():
                html = flask.render_template('models/gpu_utilization.html',
                                             data=data)

                socketio.emit(
                    'task update',
                    {
                        'task': self.html_id(),
                        'update': 'gpu_utilization',
                        'html': html,
                    },
                    namespace='/jobs',
                    room=self.job_id,
                )
            gevent.sleep(1)
예제 #39
0
파일: train.py 프로젝트: gheinrich/DIGITS
    def save_val_output(self, *args):
        """
        Save output to self.val_outputs
        """
        from digits.webapp import socketio

        if not self.save_output(self.val_outputs, *args):
            return

        # loss graph data
        data = self.combined_graph_data()
        if data:
            socketio.emit(
                "task update",
                {"task": self.html_id(), "update": "combined_graph", "data": data},
                namespace="/jobs",
                room=self.job_id,
            )
예제 #40
0
    def gpu_socketio_updater(self, gpus):
        """
        This thread sends SocketIO messages about GPU utilization
        to connected clients

        Arguments:
        gpus -- a list of identifiers for the GPUs currently being used
        """
        from digits.webapp import app, socketio

        devices = []
        for index in gpus:
            device = device_query.get_device(index)
            if device:
                devices.append((index, device))
        if not devices:
            raise RuntimeError('Failed to load gpu information for "%s"' % gpus)

        # this thread continues until killed in after_run()
        while True:
            data = []

            for index, device in devices:
                update = {'name': device.name, 'index': index}
                nvml_info = device_query.get_nvml_info(index)
                if nvml_info is not None:
                    update.update(nvml_info)
                data.append(update)

            with app.app_context():
                html = flask.render_template('models/gpu_utilization.html',
                        data = data)

                socketio.emit('task update',
                        {
                            'task': self.html_id(),
                            'update': 'gpu_utilization',
                            'html': html,
                            },
                        namespace='/jobs',
                        room=self.job_id,
                        )
            gevent.sleep(1)
예제 #41
0
파일: scheduler.py 프로젝트: flx42/DIGITS
    def add_job(self, job):
        """
        Add a job to self.jobs
        """
        if not self.running:
            logger.error('Scheduler not running. Cannot add job.')
            return False
        else:
            self.jobs.append(job)

            # Need to fix this properly
            # if True or flask._app_ctx_stack.top is not None:
            from digits.webapp import app
            with app.app_context():
                # send message to job_management room that the job is added
                import flask
                html = flask.render_template('job_row.html', job=job)

                # Convert the html into a list for the jQuery
                # DataTable.row.add() method.  This regex removes the <tr>
                # and <td> tags, and splits the string into one element
                # for each cell.
                import re
                html = re.sub('<tr[^<]*>[\s\n\r]*<td[^<]*>[\s\n\r]*', '', html)
                html = re.sub('[\s\n\r]*</td>[\s\n\r]*</tr>', '', html)
                html = re.split('</td>[\s\n\r]*<td[^<]*>', html)

                from digits.webapp import socketio
                socketio.emit(
                    'job update',
                    {
                        'update': 'added',
                        'job_id': job.id(),
                        'html': html
                    },
                    namespace='/jobs',
                    room='job_management',
                )

            if 'DIGITS_MODE_TEST' not in os.environ:
                # Let the scheduler do a little work before returning
                time.sleep(utils.wait_time())
            return True
예제 #42
0
    def send_data_update(self, important=False):
        """
        Send socketio updates with the latest graph data

        Keyword arguments:
        important -- if False, only send this update if the last unimportant update was sent more than 5 seconds ago
        """
        from digits.webapp import socketio

        if not important:
            if self.last_unimportant_update and (
                    time.time() - self.last_unimportant_update) < 5:
                return
            self.last_unimportant_update = time.time()

        # loss graph data
        data = self.loss_graph_data()
        if data:
            socketio.emit(
                'task update',
                {
                    'task': self.html_id(),
                    'update': 'loss_graph',
                    'data': data,
                },
                namespace='/jobs',
                room=self.job_id,
            )

        # lr graph data
        data = self.lr_graph_data()
        if data:
            socketio.emit(
                'task update',
                {
                    'task': self.html_id(),
                    'update': 'lr_graph',
                    'data': data,
                },
                namespace='/jobs',
                room=self.job_id,
            )
예제 #43
0
파일: scheduler.py 프로젝트: mersoy/DIGITS
    def add_job(self, job):
        """
        Add a job to self.jobs
        """
        if not self.running:
            logger.error('Scheduler not running. Cannot add job.')
            return False
        else:
            self.jobs[job.id()] = job

            # Need to fix this properly
            # if True or flask._app_ctx_stack.top is not None:
            from digits.webapp import app
            with app.app_context():
                # send message to job_management room that the job is added
                import flask
                html = flask.render_template('job_row.html', job = job)

                # Convert the html into a list for the jQuery
                # DataTable.row.add() method.  This regex removes the <tr>
                # and <td> tags, and splits the string into one element
                # for each cell.
                import re
                html = re.sub('<tr[^<]*>[\s\n\r]*<td[^<]*>[\s\n\r]*', '', html)
                html = re.sub('[\s\n\r]*</td>[\s\n\r]*</tr>', '', html)
                html = re.split('</td>[\s\n\r]*<td[^<]*>', html)

                from digits.webapp import socketio
                socketio.emit('job update',
                              {
                                  'update': 'added',
                                  'job_id': job.id(),
                                  'html': html
                              },
                              namespace='/jobs',
                              room='job_management',
                          )

            if 'DIGITS_MODE_TEST' not in os.environ:
                # Let the scheduler do a little work before returning
                time.sleep(utils.wait_time())
            return True
예제 #44
0
    def emit_progress_update(self):
        """
        Call socketio.emit for task progess update, and trigger job progress update.
        """
        from digits.webapp import socketio
        socketio.emit('task update',
                {
                    'task': self.html_id(),
                    'update': 'progress',
                    'percentage': int(round(100*self.progress)),
                    'eta': utils.time_filters.print_time_diff(self.est_done()),
                    },
                namespace='/jobs',
                room=self.job_id,
                )

        from digits.webapp import scheduler
        job = scheduler.get_job(self.job_id)
        if job:
            job.emit_progress_update()
예제 #45
0
파일: task.py 프로젝트: JD-accounts/DIGITS
    def emit_progress_update(self):
        """
        Call socketio.emit for task progress update, and trigger job progress update.
        """
        from digits.webapp import socketio
        socketio.emit('task update',
                {
                    'task': self.html_id(),
                    'update': 'progress',
                    'percentage': int(round(100*self.progress)),
                    'eta': utils.time_filters.print_time_diff(self.est_done()),
                    },
                namespace='/jobs',
                room=self.job_id,
                )

        from digits.webapp import scheduler
        job = scheduler.get_job(self.job_id)
        if job:
            job.emit_progress_update()
예제 #46
0
파일: train.py 프로젝트: wilsonmar/DIGITS
    def save_val_output(self, *args):
        """
        Save output to self.val_outputs
        """
        from digits.webapp import socketio

        if not self.save_output(self.val_outputs, *args):
            return

        # loss graph data
        data = self.combined_graph_data()
        if data:
            socketio.emit('task update',
                          {
                              'task': self.html_id(),
                              'update': 'combined_graph',
                              'data': data,
                          },
                          namespace='/jobs',
                          room=self.job_id,
                          )
예제 #47
0
    def save_val_output(self, *args):
        """
        Save output to self.val_outputs
        """
        from digits.webapp import socketio

        if not self.save_output(self.val_outputs, *args):
            return

        # loss graph data
        data = self.combined_graph_data()
        if data:
            socketio.emit('task update',
                    {
                        'task': self.html_id(),
                        'update': 'combined_graph',
                        'data': data,
                        },
                    namespace='/jobs',
                    room=self.job_id,
                    )
예제 #48
0
파일: train.py 프로젝트: rupertsmall/DIGITS
    def save_train_output(self, *args):
        """
        Save output to self.train_outputs
        """
        from digits.webapp import socketio

        if not self.save_output(self.train_outputs, *args):
            return

        if self.last_train_update and (time.time() - self.last_train_update) < 5:
            return
        self.last_train_update = time.time()

        self.logger.debug('Training %s%% complete.' % round(100 * self.current_epoch/self.train_epochs,2))

        # loss graph data
        data = self.combined_graph_data()
        if data:
            socketio.emit('task update',
                    {
                        'task': self.html_id(),
                        'update': 'combined_graph',
                        'data': data,
                        },
                    namespace='/jobs',
                    room=self.job_id,
                    )

        # lr graph data
        data = self.lr_graph_data()
        if data:
            socketio.emit('task update',
                    {
                        'task': self.html_id(),
                        'update': 'lr_graph',
                        'data': data,
                        },
                    namespace='/jobs',
                    room=self.job_id,
                    )
예제 #49
0
    def send_data_update(self, important=False):
        """
        Send socketio updates with the latest graph data

        Keyword arguments:
        important -- if False, only send this update if the last unimportant update was sent more than 5 seconds ago
        """
        # TODO: move to TrainTask
        from digits.webapp import socketio

        if not important:
            if self.last_unimportant_update and (time.time() - self.last_unimportant_update) < 5:
                return
            self.last_unimportant_update = time.time()

        # loss graph data
        data = self.loss_graph_data()
        if data:
            socketio.emit('task update',
                    {
                        'task': self.html_id(),
                        'update': 'loss_graph',
                        'data': data,
                        },
                    namespace='/jobs',
                    room=self.job_id,
                    )

        # lr graph data
        data = self.lr_graph_data()
        if data:
            socketio.emit('task update',
                    {
                        'task': self.html_id(),
                        'update': 'lr_graph',
                        'data': data,
                        },
                    namespace='/jobs',
                    room=self.job_id,
                    )
예제 #50
0
    def after_run(self):
        from digits.webapp import socketio

        super(CreateDbTask, self).after_run()
        self.create_db_log.close()

        if self.backend == 'lmdb':
            socketio.emit(
                'task update',
                {
                    'task': self.html_id(),
                    'update': 'exploration-ready',
                },
                namespace='/jobs',
                room=self.job_id,
            )

        elif self.backend == 'hdf5':
            # add more path information to the list of h5 files
            lines = None
            with open(self.path(self.textfile)) as infile:
                lines = infile.readlines()
            with open(self.path(self.textfile), 'w') as outfile:
                for line in lines:
                    # XXX this works because the model job will be in an adjacent folder
                    outfile.write('%s\n' % os.path.join(
                        '..', self.job_id, self.db_name, line.strip()))

        if self.mean_file:
            socketio.emit(
                'task update',
                {
                    'task': self.html_id(),
                    'update': 'mean-image',
                    # XXX Can't use url_for here because we don't have a request context
                    'data': '/files/' + self.path('mean.jpg', relative=True),
                },
                namespace='/jobs',
                room=self.job_id,
            )
예제 #51
0
파일: train.py 프로젝트: yanweifu/DIGITS
    def send_progress_update(self, epoch):
        """
        Sends socketio message about the current progress
        """
        from digits.webapp import socketio

        if self.current_epoch == epoch:
            return

        self.current_epoch = epoch
        self.progress = epoch/self.train_epochs

        socketio.emit('task update',
                {
                    'task': self.html_id(),
                    'update': 'progress',
                    'percentage': int(round(100*self.progress)),
                    'eta': utils.time_filters.print_time_diff(self.est_done()),
                    },
                namespace='/jobs',
                room=self.job_id,
                )
예제 #52
0
    def send_iteration_update(self, it):
        """
        Sends socketio message about the current iteration
        """
        from digits.webapp import socketio

        if self.current_iteration == it:
            return

        self.current_iteration = it
        self.progress = float(it)/self.solver.max_iter

        socketio.emit('task update',
                {
                    'task': self.html_id(),
                    'update': 'progress',
                    'percentage': int(round(100*self.progress)),
                    'eta': utils.time_filters.print_time_diff(self.est_done()),
                    },
                namespace='/jobs',
                room=self.job_id,
                )
예제 #53
0
파일: train.py 프로젝트: rupertsmall/DIGITS
    def send_progress_update(self, epoch):
        """
        Sends socketio message about the current progress
        """
        from digits.webapp import socketio

        if self.current_epoch == epoch:
            return

        self.current_epoch = epoch
        self.progress = epoch/self.train_epochs

        socketio.emit('task update',
                {
                    'task': self.html_id(),
                    'update': 'progress',
                    'percentage': int(round(100*self.progress)),
                    'eta': utils.time_filters.print_time_diff(self.est_done()),
                    },
                namespace='/jobs',
                room=self.job_id,
                )
예제 #54
0
    def send_iteration_update(self, it):
        """
        Sends socketio message about the current iteration
        """
        from digits.webapp import socketio

        if self.current_iteration == it:
            return

        self.current_iteration = it
        self.progress = float(it)/self.solver.max_iter

        socketio.emit('task update',
                {
                    'task': self.html_id(),
                    'update': 'progress',
                    'percentage': int(round(100*self.progress)),
                    'eta': utils.time_filters.print_time_diff(self.est_done()),
                    },
                namespace='/jobs',
                room=self.job_id,
                )
예제 #55
0
    def after_run(self):
        from digits.webapp import socketio

        super(CreateDbTask, self).after_run()
        self.create_db_log.close()

        if self.backend == 'lmdb':
            socketio.emit('task update',
                          {
                              'task': self.html_id(),
                              'update': 'exploration-ready',
                          },
                          namespace='/jobs',
                          room=self.job_id,
                          )

        elif self.backend == 'hdf5':
            # add more path information to the list of h5 files
            lines = None
            with open(self.path(self.textfile)) as infile:
                lines = infile.readlines()
            with open(self.path(self.textfile), 'w') as outfile:
                for line in lines:
                    # XXX this works because the model job will be in an adjacent folder
                    outfile.write('%s\n' % os.path.join(
                        '..', self.job_id, self.db_name, line.strip()))

        if self.mean_file:
            socketio.emit('task update',
                          {
                              'task': self.html_id(),
                              'update': 'mean-image',
                              # XXX Can't use url_for here because we don't have a request context
                              'data': '/files/' + self.path('mean.jpg', relative=True),
                          },
                          namespace='/jobs',
                          room=self.job_id,
                          )
예제 #56
0
파일: weights.py 프로젝트: Lucaszw/DIGITS
    def process_output(self, line):
        self.inference_log.write('%s\n' % line)
        self.inference_log.flush()

        timestamp, level, message = self.preprocess_output_digits(line)
        if not message:
            return False

        # progress
        match = re.match(r'Processed (\d+)\/(\d+)', message)
        if match:
            self.progress = float(match.group(1)) / int(match.group(2))

            from digits.webapp import socketio
            task_info = {
                'task': self.html_id(),
                'update': 'progress',
                'data': {},
                'job_id': self.job_id,
                'percentage': int(self.progress * 100)
            }

            # Update Job Board:
            socketio.emit(
                'job update',
                task_info,
                namespace='/jobs',
                room="job_management",
            )
            return True
        # path to weights data
        match = re.match(r'Saved data to (.*)', message)
        if match:
            self.inference_data_filename = match.group(1).strip()
            return True

        return False
예제 #57
0
    def hw_socketio_updater(self, gpus):
        """
        This thread sends SocketIO messages about hardware utilization
        to connected clients

        Arguments:
        gpus -- a list of identifiers for the GPUs currently being used
        """
        from digits.webapp import app, socketio

        devices = []
        if gpus is not None:
            for index in gpus:
                device = device_query.get_device(index)
                if device:
                    devices.append((index, device))
                else:
                    raise RuntimeError(
                        'Failed to load gpu information for GPU #"%s"' % index)

        # this thread continues until killed in after_run()
        while True:
            # CPU (Non-GPU) Info
            data_cpu = {}
            if hasattr(self, 'p') and self.p is not None:
                data_cpu['pid'] = self.p.pid
                try:
                    ps = psutil.Process(
                        self.p.pid)  # 'self.p' is the system call object
                    if ps.is_running():
                        if psutil.version_info[0] >= 2:
                            data_cpu['cpu_pct'] = ps.cpu_percent(interval=1)
                            data_cpu['mem_pct'] = ps.memory_percent()
                            data_cpu['mem_used'] = ps.memory_info().rss
                        else:
                            data_cpu['cpu_pct'] = ps.get_cpu_percent(
                                interval=1)
                            data_cpu['mem_pct'] = ps.get_memory_percent()
                            data_cpu['mem_used'] = ps.get_memory_info().rss
                except psutil.NoSuchProcess:
                    # In rare case of instant process crash or PID went zombie (report nothing)
                    pass

            data_gpu = []
            for index, device in devices:
                update = {'name': device.name, 'index': index}
                nvml_info = device_query.get_nvml_info(index)
                if nvml_info is not None:
                    update.update(nvml_info)
                data_gpu.append(update)

            with app.app_context():
                html = flask.render_template('models/gpu_utilization.html',
                                             data_gpu=data_gpu,
                                             data_cpu=data_cpu)

                socketio.emit(
                    'task update',
                    {
                        'task': self.html_id(),
                        'update': 'gpu_utilization',
                        'html': html,
                    },
                    namespace='/jobs',
                    room=self.job_id,
                )
            gevent.sleep(1)
예제 #58
0
    def save_train_output(self, *args):
        """
        Save output to self.train_outputs
        """
        from digits.webapp import socketio

        if not self.save_output(self.train_outputs, *args):
            return

        if self.last_train_update and (time.time() -
                                       self.last_train_update) < 5:
            return
        self.last_train_update = time.time()

        self.logger.debug(
            'Training %s%% complete.' %
            round(100 * self.current_epoch / self.train_epochs, 2))

        # loss graph data
        data = self.combined_graph_data()
        # print '----------data', data
        if data:
            socketio.emit(
                'task update',
                {
                    'task': self.html_id(),
                    'update': 'combined_graph',
                    'data': data,
                },
                namespace='/jobs',
                room=self.job_id,
            )

            if data['columns']:
                # isolate the Loss column data for the sparkline
                graph_data = data['columns'][0][1:]
                socketio.emit(
                    'task update',
                    {
                        'task': self.html_id(),
                        'job_id': self.job_id,
                        'update': 'combined_graph',
                        'data': graph_data,
                    },
                    namespace='/jobs',
                    room='job_management',
                )

        # lr graph data
        data = self.lr_graph_data()
        if data:
            socketio.emit(
                'task update',
                {
                    'task': self.html_id(),
                    'update': 'lr_graph',
                    'data': data,
                },
                namespace='/jobs',
                room=self.job_id,
            )
예제 #59
0
    def process_output(self, line):
        self.inference_log.write('%s\n' % line)
        self.inference_log.flush()

        timestamp, level, message = self.preprocess_output_digits(line)
        if not message:
            return False

        # error
        match = re.match(r'Error: (\w+)', message)
        if match:
            message = message.replace('Error: ', '')
            from digits.webapp import socketio
            task_info = {
                'task': self.html_id(),
                'update': 'gradient_ascent',
                'data': {
                    'layer': self.layer,
                    'error': message,
                    'id': self.job_id
                }
            }
            # Update Layer Vis tool:
            socketio.emit(
                'task error',
                task_info,
                namespace='/jobs',
                room=self.pretrained_model.id(),
            )

        # progress
        match = re.match(r'Processed (\d+)\/(\d+)', message)
        if match:
            self.progress = float(match.group(1)) / int(match.group(2))

            from digits.webapp import socketio
            task_info = {
                'task': self.html_id(),
                'update': 'gradient_ascent',
                'data': {
                    'layer': self.layer,
                    'unit': int(match.group(1)),
                    'progress': self.progress,
                    'id': self.job_id
                },
                'job_id': self.job_id,
                'percentage': int(self.progress * 100)
            }

            # Update Layer Vis tool:
            socketio.emit(
                'task update',
                task_info,
                namespace='/jobs',
                room=self.pretrained_model.id(),
            )

            # Update Job Board:
            task_info['update'] = 'progress'
            socketio.emit(
                'job update',
                task_info,
                namespace='/jobs',
                room="job_management",
            )

            # Update Satus:

            return True

        # completion
        match = re.match(r'Saved data to (.*)', message)
        if match:
            self.inference_data_filename = match.group(1).strip()
            return True

        return False