def on_status_update(self): """ Called when StatusCls.status.setter is used """ from digits.webapp import app, socketio # Send socketio updates message = { 'task': self.html_id(), 'update': 'status', 'status': self.status.name, 'css': self.status.css, 'show': (self.status in [Status.RUN, Status.ERROR]), 'running': self.status.is_running(), } with app.app_context(): message['html'] = flask.render_template( 'status_updates.html', updates=self.status_history, exception=self.exception, traceback=self.traceback, ) socketio.emit( 'task update', message, namespace='/jobs', room=self.job_id, ) from digits.webapp import scheduler job = scheduler.get_job(self.job_id) if job: job.on_status_update()
def process_output(self, line): from digits.webapp import socketio timestamp, level, message = self.preprocess_output_digits(line) if not message: return False # progress match = re.match(r'Processed (\d+)\/(\d+)', message) if match: self.progress = float(match.group(1))/int(match.group(2)) socketio.emit('task update', { 'task': self.html_id(), 'update': 'progress', 'percentage': int(round(100*self.progress)), 'eta': utils.time_filters.print_time_diff(self.est_done()), }, namespace='/jobs', room=self.job_id, ) return True # distribution match = re.match(r'Category (\d+) has (\d+)', message) if match and self.labels_file is not None: if not hasattr(self, 'distribution') or self.distribution is None: self.distribution = {} self.distribution[match.group(1)] = int(match.group(2)) data = self.distribution_data() if data: socketio.emit('task update', { 'task': self.html_id(), 'update': 'distribution', 'data': data, }, namespace='/jobs', room=self.job_id, ) return True # result match = re.match(r'Total images added: (\d+)', message) if match: self.entries_count = int(match.group(1)) self.logger.debug(message) return True if level == 'warning': self.logger.warning('%s: %s' % (self.name(), message)) return True if level in ['error', 'critical']: self.logger.error('%s: %s' % (self.name(), message)) self.exception = message return True return True
def add_job(self, job): """ Add a job to self.jobs """ if not self.running: logger.error('Scheduler not running. Cannot add job.') return False else: self.jobs[job.id()] = job # Need to fix this properly # if True or flask._app_ctx_stack.top is not None: from digits.webapp import app, socketio with app.app_context(): # send message to job_management room that the job is added socketio.emit('job update', { 'update': 'added', 'job_id': job.id(), }, namespace='/jobs', room='job_management', ) if 'DIGITS_MODE_TEST' not in os.environ: # Let the scheduler do a little work before returning time.sleep(utils.wait_time()) return True
def on_status_update(self): """ Called when StatusCls.status.setter is used """ from digits.webapp import app, socketio message = { 'update': 'status', 'status': self.status_of_tasks().name, 'css': self.status_of_tasks().css, 'running': self.status.is_running(), 'job_id': self.id(), } with app.app_context(): message['html'] = flask.render_template('status_updates.html', updates=self.status_history) socketio.emit('job update', message, namespace='/jobs', room=self.id(), ) # send message to job_management room as well socketio.emit('job update', message, namespace='/jobs', room='job_management', ) if not self.status.is_running(): if hasattr(self, 'event'): # release threads that are waiting for job to complete self.event.set()
def server_utilization_updater(self): from digits.webapp import scheduler, socketio from digits import device_query devices = [] gpus = len(self.resources['gpus']) if gpus: for index in range(0, gpus): device = device_query.get_device(index) if device: devices.append((index, device)) else: raise RuntimeError( 'Failed to load gpu information for GPU #"%s"' % index) while True: data_gpu = [] for index, device in devices: update = {'name': device.name, 'index': index} nvml_info = device_query.get_nvml_info(index) if nvml_info is not None: update.update(nvml_info) data_gpu.append(update) socketio.emit('server update', { 'update': 'gpus_utilization', 'data_gpu': data_gpu, }, namespace='/jobs', room='job_management') gevent.sleep(1)
def on_status_update(self): """ Called when StatusCls.status.setter is used """ from digits.webapp import app, socketio message = { 'update': 'status', 'status': self.status.name, 'css': self.status.css, 'running': self.status.is_running(), 'job_id': self.id(), } with app.app_context(): message['html'] = flask.render_template('status_updates.html', updates=self.status_history) socketio.emit('job update', message, namespace='/jobs', room=self.id(), ) # send message to job_management room as well socketio.emit('job update', message, namespace='/jobs', room='job_management', )
def on_status_update(self): """ Called when StatusCls.status.setter is used """ from digits.webapp import app, socketio # Send socketio updates message = { 'task': self.html_id(), 'update': 'status', 'status': self.status.name, 'css': self.status.css, 'show': (self.status in [Status.RUN, Status.ERROR]), 'running': self.status.is_running(), } with app.app_context(): message['html'] = flask.render_template('status_updates.html', updates = self.status_history, exception = self.exception, traceback = self.traceback, ) socketio.emit('task update', message, namespace='/jobs', room=self.job_id, ) from digits.webapp import scheduler job = scheduler.get_job(self.job_id) if job: job.on_status_update()
def process_output(self, line): from digits.webapp import socketio self.create_db_log.write('%s\n' % line) self.create_db_log.flush() timestamp, level, message = self.preprocess_output_digits(line) if not message: return False # progress match = re.match(r'Processed (\d+)\/(\d+)', message) if match: self.progress = float(match.group(1)) / int(match.group(2)) self.emit_progress_update() return True # distribution match = re.match(r'Type (\d+): Category (\d+) has (\d+)', message) if match and self.labels_file is not None: if not hasattr(self, 'distribution') or self.distribution is None: self.distribution = OrderedDict() if int(match.group(1)) not in self.distribution: self.distribution[int(match.group(1))] = {} self.distribution[int(match.group(1))][int(match.group(2))] = int( match.group(3)) data = self.distribution_data() if data: socketio.emit( 'task update', { 'task': self.html_id(), 'update': 'distribution', 'data': data, }, namespace='/jobs', room=self.job_id, ) return True # result match = re.match(r'(\d+) images written to database', message) if match: self.entries_count = int(match.group(1)) self.logger.debug(message) return True if level == 'warning': self.logger.warning('%s: %s' % (self.name(), message)) return True if level in ['error', 'critical']: self.logger.error('%s: %s' % (self.name(), message)) self.exception = message return True return True
def hw_socketio_updater(self, gpus): """ This thread sends SocketIO messages about hardware utilization to connected clients Arguments: gpus -- a list of identifiers for the GPUs currently being used """ from digits.webapp import app, socketio devices = [] if gpus is not None: for index in gpus: device = device_query.get_device(index) if device: devices.append((index, device)) else: raise RuntimeError('Failed to load gpu information for GPU #"%s"' % index) # this thread continues until killed in after_run() while True: # CPU (Non-GPU) Info data_cpu = {} if hasattr(self, "p") and self.p is not None: data_cpu["pid"] = self.p.pid try: ps = psutil.Process(self.p.pid) # 'self.p' is the system call object if ps.is_running(): if psutil.version_info[0] >= 2: data_cpu["cpu_pct"] = ps.cpu_percent(interval=1) data_cpu["mem_pct"] = ps.memory_percent() data_cpu["mem_used"] = ps.memory_info().rss else: data_cpu["cpu_pct"] = ps.get_cpu_percent(interval=1) data_cpu["mem_pct"] = ps.get_memory_percent() data_cpu["mem_used"] = ps.get_memory_info().rss except psutil.NoSuchProcess: # In rare case of instant process crash or PID went zombie (report nothing) pass data_gpu = [] for index, device in devices: update = {"name": device.name, "index": index} nvml_info = device_query.get_nvml_info(index) if nvml_info is not None: update.update(nvml_info) data_gpu.append(update) with app.app_context(): html = flask.render_template("models/gpu_utilization.html", data_gpu=data_gpu, data_cpu=data_cpu) socketio.emit( "task update", {"task": self.html_id(), "update": "gpu_utilization", "html": html}, namespace="/jobs", room=self.job_id, ) gevent.sleep(1)
def delete_job(self, job): """ Deletes an entire job folder from disk Returns True if the Job was found and deleted """ if isinstance(job, str) or isinstance(job, unicode): job_id = str(job) elif isinstance(job, Job): job_id = job.id() else: raise ValueError('called delete_job with a %s' % type(job)) dependent_jobs = [] # try to find the job for i, job in enumerate(self.jobs): if job.id() == job_id: if isinstance(job, DatasetJob): # check for dependencies for j in self.jobs: if isinstance(j, ModelJob) and j.dataset_id == job.id(): logger.error( 'Cannot delete "%s" (%s) because "%s" (%s) depends on it.' % (job.name(), job.id(), j.name(), j.id())) dependent_jobs.append(j.name()) if len(dependent_jobs) > 0: error_message = 'Cannot delete "%s" because %d model%s depend%s on it: %s' % ( job.name(), len(dependent_jobs), ('s' if len(dependent_jobs) != 1 else ''), ('s' if len(dependent_jobs) == 1 else ''), ', '.join( ['"%s"' % j for j in dependent_jobs])) raise errors.DeleteError(error_message) self.jobs.pop(i) job.abort() if os.path.exists(job.dir()): shutil.rmtree(job.dir()) logger.info('Job deleted.', job_id=job_id) from digits.webapp import socketio socketio.emit( 'job update', { 'update': 'deleted', 'job_id': job.id() }, namespace='/jobs', room='job_management', ) return True # see if the folder exists on disk path = os.path.join(config_value('jobs_dir'), job_id) path = os.path.normpath(path) if os.path.dirname(path) == config_value( 'jobs_dir') and os.path.exists(path): shutil.rmtree(path) return True return False
def process_output(self, line): from digits.webapp import socketio self.create_db_log.write('%s\n' % line) self.create_db_log.flush() timestamp, level, message = self.preprocess_output_digits(line) if not message: return False # progress match = re.match(r'Processed (\d+)\/(\d+)', message) if match: self.progress = float(match.group(1))/int(match.group(2)) self.emit_progress_update() return True # distribution match = re.match(r'Type (\d+): Category (\d+) has (\d+)', message) if match and self.labels_file is not None: if not hasattr(self, 'distribution') or self.distribution is None: self.distribution = OrderedDict() if int(match.group(1)) not in self.distribution: self.distribution[int(match.group(1))] = {} self.distribution[int(match.group(1))][int(match.group(2))] = int(match.group(3)) data = self.distribution_data() if data: socketio.emit('task update', { 'task': self.html_id(), 'update': 'distribution', 'data': data, }, namespace='/jobs', room=self.job_id, ) return True # result match = re.match(r'(\d+) images written to database', message) if match: self.entries_count = int(match.group(1)) self.logger.debug(message) return True if level == 'warning': self.logger.warning('%s: %s' % (self.name(), message)) return True if level in ['error', 'critical']: self.logger.error('%s: %s' % (self.name(), message)) self.exception = message return True return True
def delete_job(self, job): """ Deletes an entire job folder from disk Returns True if the Job was found and deleted """ if isinstance(job, str) or isinstance(job, unicode): job_id = str(job) elif isinstance(job, Job): job_id = job.id() else: raise ValueError('called delete_job with a %s' % type(job)) dependent_jobs = [] # try to find the job job = self.jobs.get(job_id, None) if job: if isinstance(job, DatasetJob): # check for dependencies for j in self.jobs.values(): if isinstance(j, ModelJob) and j.dataset_id == job.id(): logger.error('Cannot delete "%s" (%s) because "%s" (%s) depends on it.' % (job.name(), job.id(), j.name(), j.id())) dependent_jobs.append(j.name()) if len(dependent_jobs) > 0: error_message = 'Cannot delete "%s" because %d model%s depend%s on it: %s' % ( job.name(), len(dependent_jobs), ('s' if len(dependent_jobs) != 1 else ''), ('s' if len(dependent_jobs) == 1 else ''), ', '.join(['"%s"' % j for j in dependent_jobs])) raise errors.DeleteError(error_message) self.jobs.pop(job_id, None) job.abort() if os.path.exists(job.dir()): shutil.rmtree(job.dir()) logger.info('Job deleted.', job_id=job_id) from digits.webapp import socketio socketio.emit('job update', { 'update': 'deleted', 'job_id': job.id() }, namespace='/jobs', room='job_management', ) return True # see if the folder exists on disk path = os.path.join(config_value('jobs_dir'), job_id) path = os.path.normpath(path) if os.path.dirname(path) == config_value('jobs_dir') and os.path.exists(path): shutil.rmtree(path) return True return False
def emit(self, progress): """ emit the progress to the client """ socketio.emit('update', { 'model_id': self._model_id, 'update': 'progress', 'progress': progress, }, namespace='/jobs', room='job_management') # micro sleep so that emit is broadcast to the client time.sleep(0.001)
def send_snapshot_update(self): """ Sends socketio message about the snapshot list """ # TODO: move to TrainTask from digits.webapp import socketio socketio.emit('task update', {'task': self.html_id(), 'update': 'snapshots', 'data': self.snapshot_list()}, namespace='/jobs', room=self.job_id)
def send_snapshot_update(self): """ Sends socketio message about the snapshot list """ from digits.webapp import socketio socketio.emit( "task update", {"task": self.html_id(), "update": "snapshots", "data": self.snapshot_list()}, namespace="/jobs", room=self.job_id, )
def process_output(self, line): from digits.webapp import socketio self.analyze_db_log.write("%s\n" % line) self.analyze_db_log.flush() timestamp, level, message = self.preprocess_output_digits(line) if not message: return False # progress match = re.match(r"Progress: (\d+)\/(\d+)", message) if match: self.progress = float(match.group(1)) / float(match.group(2)) socketio.emit( "task update", { "task": self.html_id(), "update": "progress", "percentage": int(round(100 * self.progress)), "eta": utils.time_filters.print_time_diff(self.est_done()), }, namespace="/jobs", room=self.job_id, ) return True # total count match = re.match(r"Total entries: (\d+)", message) if match: self.image_count = int(match.group(1)) return True # image dimensions match = re.match(r"(\d+) entries found with shape ((\d+)x(\d+)x(\d+))", message) if match: count = int(match.group(1)) dims = match.group(2) self.image_width = int(match.group(3)) self.image_height = int(match.group(4)) self.image_channels = int(match.group(5)) self.logger.debug("Images are %s" % dims) return True if level == "warning": self.logger.warning("%s: %s" % (self.name(), message)) return True if level in ["error", "critical"]: self.logger.error("%s: %s" % (self.name(), message)) self.exception = message return True return True
def process_output(self, line): from digits.webapp import socketio self.analyze_db_log.write('%s\n' % line) self.analyze_db_log.flush() timestamp, level, message = self.preprocess_output_digits(line) if not message: return False # progress match = re.match(r'Progress: (\d+)\/(\d+)', message) if match: self.progress = float(match.group(1))/float(match.group(2)) socketio.emit('task update', { 'task': self.html_id(), 'update': 'progress', 'percentage': int(round(100*self.progress)), 'eta': utils.time_filters.print_time_diff(self.est_done()), }, namespace='/jobs', room=self.job_id, ) return True # total count match = re.match(r'Total entries: (\d+)', message) if match: self.image_count = int(match.group(1)) return True # image dimensions match = re.match(r'(\d+) entries found with shape ((\d+)x(\d+)x(\d+))', message) if match: count = int(match.group(1)) dims = match.group(2) self.image_width = int(match.group(3)) self.image_height = int(match.group(4)) self.image_channels = int(match.group(5)) self.logger.debug('Images are %s' % dims) return True if level == 'warning': self.logger.warning('%s: %s' % (self.name(), message)) return True if level in ['error', 'critical']: self.logger.error('%s: %s' % (self.name(), message)) self.exception = message return True return True
def emit_attribute_changed(self, attribute, value): """ Call socketio.emit for task job update """ from digits.webapp import socketio socketio.emit('job update', { 'job_id': self.id(), 'update': 'attribute', 'attribute': attribute, 'value': value, }, namespace='/jobs', room='job_management')
def emit(self, progress): """ emit the progress to the client """ socketio.emit('update', { 'model_id': self._model_id, 'update': 'progress', 'progress': progress, }, namespace='/jobs', room='job_management' ) # micro sleep so that emit is broadcast to the client time.sleep(0.001)
def process_output(self, line): from digits.webapp import socketio self.create_db_log.write("%s\n" % line) self.create_db_log.flush() timestamp, level, message = self.preprocess_output_digits(line) if not message: return False # progress match = re.match(r"Processed (\d+)\/(\d+)", message) if match: self.progress = float(match.group(1)) / int(match.group(2)) self.emit_progress_update() return True # distribution match = re.match(r"Category (\d+) has (\d+)", message) if match and self.labels_file is not None: if not hasattr(self, "distribution") or self.distribution is None: self.distribution = {} self.distribution[match.group(1)] = int(match.group(2)) data = self.distribution_data() if data: socketio.emit( "task update", {"task": self.html_id(), "update": "distribution", "data": data}, namespace="/jobs", room=self.job_id, ) return True # result match = re.match(r"(\d+) images written to database", message) if match: self.entries_count = int(match.group(1)) self.logger.debug(message) return True if level == "warning": self.logger.warning("%s: %s" % (self.name(), message)) return True if level in ["error", "critical"]: self.logger.error("%s: %s" % (self.name(), message)) self.exception = message return True return True
def process_output(self, line): from digits.webapp import socketio timestamp, level, message = self.preprocess_output_digits(line) if not message: return False # progress match = re.match(r'Progress: ([-+]?[0-9]*\.?[0-9]+(e[-+]?[0-9]+)?)', message) if match: self.progress = float(match.group(1)) socketio.emit( 'task update', { 'task': self.html_id(), 'update': 'progress', 'percentage': int(round(100 * self.progress)), 'eta': utils.time_filters.print_time_diff(self.est_done()), }, namespace='/jobs', room=self.job_id, ) return True # totals match = re.match(r'Found (\d+) images in (\d+) categories', message) if match: self.label_count = int(match.group(2)) return True # splits match = re.match(r'Selected (\d+) for (\w+)', message) if match: if match.group(2).startswith('training'): self.train_count = int(match.group(1)) elif match.group(2).startswith('validation'): self.val_count = int(match.group(1)) elif match.group(2).startswith('test'): self.test_count = int(match.group(1)) return True if level == 'warning': self.logger.warning('%s: %s' % (self.name(), message)) return True if level in ['error', 'critical']: self.logger.error('%s: %s' % (self.name(), message)) self.exception = message return True return True
def update_distribution_graph(self): from digits.webapp import socketio data = self.distribution_data() if data: socketio.emit('task update', { 'task': self.html_id(), 'update': 'distribution', 'data': data, }, namespace='/jobs', room=self.job_id, )
def emit_gpus_available(self): """ Call socketio.emit gpu availablity """ from digits.webapp import scheduler, socketio socketio.emit('server update', { 'update': 'gpus_available', 'total_gpu_count': len(self.resources['gpus']), 'remaining_gpu_count': sum(r.remaining() for r in scheduler.resources['gpus']), }, namespace='/jobs', room='job_management' )
def emit_gpus_available(self): """ Call socketio.emit gpu availability """ from digits.webapp import scheduler, socketio socketio.emit('server update', { 'update': 'gpus_available', 'total_gpu_count': len(self.resources['gpus']), 'remaining_gpu_count': sum(r.remaining() for r in scheduler.resources['gpus']), }, namespace='/jobs', room='job_management' )
def emit_progress_update(self): """ Call socketio.emit for task job update, by considering task progress. """ progress = self.get_progress() from digits.webapp import socketio socketio.emit('job update', { 'job_id': self.id(), 'update': 'progress', 'percentage': int(round(100 * progress)), }, namespace='/jobs', room='job_management')
def process_output(self, line): from digits.webapp import socketio timestamp, level, message = self.preprocess_output_digits(line) if not message: return False # progress match = re.match(r'Progress: ([-+]?[0-9]*\.?[0-9]+(e[-+]?[0-9]+)?)', message) if match: self.progress = float(match.group(1)) socketio.emit('task update', { 'task': self.html_id(), 'update': 'progress', 'percentage': int(round(100*self.progress)), 'eta': utils.time_filters.print_time_diff(self.est_done()), }, namespace='/jobs', room=self.job_id, ) return True # totals match = re.match(r'Found (\d+) images in (\d+) categories', message) if match: self.label_count = int(match.group(2)) return True # splits match = re.match(r'Selected (\d+) for (\w+)', message) if match: if match.group(2).startswith('training'): self.train_count = int(match.group(1)) elif match.group(2).startswith('validation'): self.val_count = int(match.group(1)) elif match.group(2).startswith('test'): self.test_count = int(match.group(1)) return True if level == 'warning': self.logger.warning('%s: %s' % (self.name(), message)) return True if level in ['error', 'critical']: self.logger.error('%s: %s' % (self.name(), message)) self.exception = message return True return True
def on_status_update(self): super(InferenceJob, self).on_status_update() from digits.webapp import app, socketio if not self.status.is_running(): message = { 'job_id': self.id(), } socketio.emit('job reload_page', message, namespace='/jobs', room=self.id(), )
def emit_attribute_changed(self, attribute, value): """ Call socketio.emit for task job update """ from digits.webapp import socketio socketio.emit('job update', { 'job_id': self.id(), 'update': 'attribute', 'attribute': attribute, 'value': value, }, namespace='/jobs', room='job_management' )
def emit_progress_update(self): """ Call socketio.emit for task job update, by considering task progress. """ progress = self.get_progress() from digits.webapp import socketio socketio.emit('job update', { 'job_id': self.id(), 'update': 'progress', 'percentage': int(round(100*progress)), }, namespace='/jobs', room='job_management' )
def save_train_output(self, *args): """ Save output to self.train_outputs """ from digits.webapp import socketio if not self.save_output(self.train_outputs, *args): return if self.last_train_update and (time.time() - self.last_train_update) < 5: return self.last_train_update = time.time() self.logger.debug( 'Training %s%% complete.' % round(100 * self.current_epoch / self.train_epochs, 2)) # loss graph data data = self.combined_graph_data() if data: socketio.emit( 'task update', { 'task': self.html_id(), 'update': 'combined_graph', 'data': data, }, namespace='/jobs', room=self.job_id, ) # lr graph data data = self.lr_graph_data() if data: socketio.emit( 'task update', { 'task': self.html_id(), 'update': 'lr_graph', 'data': data, }, namespace='/jobs', room=self.job_id, )
def gpu_socketio_updater(self, gpus): """ This thread sends SocketIO messages about GPU utilization to connected clients Arguments: gpus -- a list of identifiers for the GPUs currently being used """ from digits.webapp import app, socketio devices = [] for index in gpus: device = device_query.get_device(index) if device: devices.append((index, device)) if not devices: raise RuntimeError('Failed to load gpu information for "%s"' % gpus) # this thread continues until killed in after_run() while True: data = [] for index, device in devices: update = {'name': device.name, 'index': index} nvml_info = device_query.get_nvml_info(index) if nvml_info is not None: update.update(nvml_info) data.append(update) with app.app_context(): html = flask.render_template('models/gpu_utilization.html', data=data) socketio.emit( 'task update', { 'task': self.html_id(), 'update': 'gpu_utilization', 'html': html, }, namespace='/jobs', room=self.job_id, ) gevent.sleep(1)
def save_val_output(self, *args): """ Save output to self.val_outputs """ from digits.webapp import socketio if not self.save_output(self.val_outputs, *args): return # loss graph data data = self.combined_graph_data() if data: socketio.emit( "task update", {"task": self.html_id(), "update": "combined_graph", "data": data}, namespace="/jobs", room=self.job_id, )
def gpu_socketio_updater(self, gpus): """ This thread sends SocketIO messages about GPU utilization to connected clients Arguments: gpus -- a list of identifiers for the GPUs currently being used """ from digits.webapp import app, socketio devices = [] for index in gpus: device = device_query.get_device(index) if device: devices.append((index, device)) if not devices: raise RuntimeError('Failed to load gpu information for "%s"' % gpus) # this thread continues until killed in after_run() while True: data = [] for index, device in devices: update = {'name': device.name, 'index': index} nvml_info = device_query.get_nvml_info(index) if nvml_info is not None: update.update(nvml_info) data.append(update) with app.app_context(): html = flask.render_template('models/gpu_utilization.html', data = data) socketio.emit('task update', { 'task': self.html_id(), 'update': 'gpu_utilization', 'html': html, }, namespace='/jobs', room=self.job_id, ) gevent.sleep(1)
def add_job(self, job): """ Add a job to self.jobs """ if not self.running: logger.error('Scheduler not running. Cannot add job.') return False else: self.jobs.append(job) # Need to fix this properly # if True or flask._app_ctx_stack.top is not None: from digits.webapp import app with app.app_context(): # send message to job_management room that the job is added import flask html = flask.render_template('job_row.html', job=job) # Convert the html into a list for the jQuery # DataTable.row.add() method. This regex removes the <tr> # and <td> tags, and splits the string into one element # for each cell. import re html = re.sub('<tr[^<]*>[\s\n\r]*<td[^<]*>[\s\n\r]*', '', html) html = re.sub('[\s\n\r]*</td>[\s\n\r]*</tr>', '', html) html = re.split('</td>[\s\n\r]*<td[^<]*>', html) from digits.webapp import socketio socketio.emit( 'job update', { 'update': 'added', 'job_id': job.id(), 'html': html }, namespace='/jobs', room='job_management', ) if 'DIGITS_MODE_TEST' not in os.environ: # Let the scheduler do a little work before returning time.sleep(utils.wait_time()) return True
def send_data_update(self, important=False): """ Send socketio updates with the latest graph data Keyword arguments: important -- if False, only send this update if the last unimportant update was sent more than 5 seconds ago """ from digits.webapp import socketio if not important: if self.last_unimportant_update and ( time.time() - self.last_unimportant_update) < 5: return self.last_unimportant_update = time.time() # loss graph data data = self.loss_graph_data() if data: socketio.emit( 'task update', { 'task': self.html_id(), 'update': 'loss_graph', 'data': data, }, namespace='/jobs', room=self.job_id, ) # lr graph data data = self.lr_graph_data() if data: socketio.emit( 'task update', { 'task': self.html_id(), 'update': 'lr_graph', 'data': data, }, namespace='/jobs', room=self.job_id, )
def add_job(self, job): """ Add a job to self.jobs """ if not self.running: logger.error('Scheduler not running. Cannot add job.') return False else: self.jobs[job.id()] = job # Need to fix this properly # if True or flask._app_ctx_stack.top is not None: from digits.webapp import app with app.app_context(): # send message to job_management room that the job is added import flask html = flask.render_template('job_row.html', job = job) # Convert the html into a list for the jQuery # DataTable.row.add() method. This regex removes the <tr> # and <td> tags, and splits the string into one element # for each cell. import re html = re.sub('<tr[^<]*>[\s\n\r]*<td[^<]*>[\s\n\r]*', '', html) html = re.sub('[\s\n\r]*</td>[\s\n\r]*</tr>', '', html) html = re.split('</td>[\s\n\r]*<td[^<]*>', html) from digits.webapp import socketio socketio.emit('job update', { 'update': 'added', 'job_id': job.id(), 'html': html }, namespace='/jobs', room='job_management', ) if 'DIGITS_MODE_TEST' not in os.environ: # Let the scheduler do a little work before returning time.sleep(utils.wait_time()) return True
def emit_progress_update(self): """ Call socketio.emit for task progess update, and trigger job progress update. """ from digits.webapp import socketio socketio.emit('task update', { 'task': self.html_id(), 'update': 'progress', 'percentage': int(round(100*self.progress)), 'eta': utils.time_filters.print_time_diff(self.est_done()), }, namespace='/jobs', room=self.job_id, ) from digits.webapp import scheduler job = scheduler.get_job(self.job_id) if job: job.emit_progress_update()
def emit_progress_update(self): """ Call socketio.emit for task progress update, and trigger job progress update. """ from digits.webapp import socketio socketio.emit('task update', { 'task': self.html_id(), 'update': 'progress', 'percentage': int(round(100*self.progress)), 'eta': utils.time_filters.print_time_diff(self.est_done()), }, namespace='/jobs', room=self.job_id, ) from digits.webapp import scheduler job = scheduler.get_job(self.job_id) if job: job.emit_progress_update()
def save_val_output(self, *args): """ Save output to self.val_outputs """ from digits.webapp import socketio if not self.save_output(self.val_outputs, *args): return # loss graph data data = self.combined_graph_data() if data: socketio.emit('task update', { 'task': self.html_id(), 'update': 'combined_graph', 'data': data, }, namespace='/jobs', room=self.job_id, )
def save_train_output(self, *args): """ Save output to self.train_outputs """ from digits.webapp import socketio if not self.save_output(self.train_outputs, *args): return if self.last_train_update and (time.time() - self.last_train_update) < 5: return self.last_train_update = time.time() self.logger.debug('Training %s%% complete.' % round(100 * self.current_epoch/self.train_epochs,2)) # loss graph data data = self.combined_graph_data() if data: socketio.emit('task update', { 'task': self.html_id(), 'update': 'combined_graph', 'data': data, }, namespace='/jobs', room=self.job_id, ) # lr graph data data = self.lr_graph_data() if data: socketio.emit('task update', { 'task': self.html_id(), 'update': 'lr_graph', 'data': data, }, namespace='/jobs', room=self.job_id, )
def send_data_update(self, important=False): """ Send socketio updates with the latest graph data Keyword arguments: important -- if False, only send this update if the last unimportant update was sent more than 5 seconds ago """ # TODO: move to TrainTask from digits.webapp import socketio if not important: if self.last_unimportant_update and (time.time() - self.last_unimportant_update) < 5: return self.last_unimportant_update = time.time() # loss graph data data = self.loss_graph_data() if data: socketio.emit('task update', { 'task': self.html_id(), 'update': 'loss_graph', 'data': data, }, namespace='/jobs', room=self.job_id, ) # lr graph data data = self.lr_graph_data() if data: socketio.emit('task update', { 'task': self.html_id(), 'update': 'lr_graph', 'data': data, }, namespace='/jobs', room=self.job_id, )
def after_run(self): from digits.webapp import socketio super(CreateDbTask, self).after_run() self.create_db_log.close() if self.backend == 'lmdb': socketio.emit( 'task update', { 'task': self.html_id(), 'update': 'exploration-ready', }, namespace='/jobs', room=self.job_id, ) elif self.backend == 'hdf5': # add more path information to the list of h5 files lines = None with open(self.path(self.textfile)) as infile: lines = infile.readlines() with open(self.path(self.textfile), 'w') as outfile: for line in lines: # XXX this works because the model job will be in an adjacent folder outfile.write('%s\n' % os.path.join( '..', self.job_id, self.db_name, line.strip())) if self.mean_file: socketio.emit( 'task update', { 'task': self.html_id(), 'update': 'mean-image', # XXX Can't use url_for here because we don't have a request context 'data': '/files/' + self.path('mean.jpg', relative=True), }, namespace='/jobs', room=self.job_id, )
def send_progress_update(self, epoch): """ Sends socketio message about the current progress """ from digits.webapp import socketio if self.current_epoch == epoch: return self.current_epoch = epoch self.progress = epoch/self.train_epochs socketio.emit('task update', { 'task': self.html_id(), 'update': 'progress', 'percentage': int(round(100*self.progress)), 'eta': utils.time_filters.print_time_diff(self.est_done()), }, namespace='/jobs', room=self.job_id, )
def send_iteration_update(self, it): """ Sends socketio message about the current iteration """ from digits.webapp import socketio if self.current_iteration == it: return self.current_iteration = it self.progress = float(it)/self.solver.max_iter socketio.emit('task update', { 'task': self.html_id(), 'update': 'progress', 'percentage': int(round(100*self.progress)), 'eta': utils.time_filters.print_time_diff(self.est_done()), }, namespace='/jobs', room=self.job_id, )
def after_run(self): from digits.webapp import socketio super(CreateDbTask, self).after_run() self.create_db_log.close() if self.backend == 'lmdb': socketio.emit('task update', { 'task': self.html_id(), 'update': 'exploration-ready', }, namespace='/jobs', room=self.job_id, ) elif self.backend == 'hdf5': # add more path information to the list of h5 files lines = None with open(self.path(self.textfile)) as infile: lines = infile.readlines() with open(self.path(self.textfile), 'w') as outfile: for line in lines: # XXX this works because the model job will be in an adjacent folder outfile.write('%s\n' % os.path.join( '..', self.job_id, self.db_name, line.strip())) if self.mean_file: socketio.emit('task update', { 'task': self.html_id(), 'update': 'mean-image', # XXX Can't use url_for here because we don't have a request context 'data': '/files/' + self.path('mean.jpg', relative=True), }, namespace='/jobs', room=self.job_id, )
def process_output(self, line): self.inference_log.write('%s\n' % line) self.inference_log.flush() timestamp, level, message = self.preprocess_output_digits(line) if not message: return False # progress match = re.match(r'Processed (\d+)\/(\d+)', message) if match: self.progress = float(match.group(1)) / int(match.group(2)) from digits.webapp import socketio task_info = { 'task': self.html_id(), 'update': 'progress', 'data': {}, 'job_id': self.job_id, 'percentage': int(self.progress * 100) } # Update Job Board: socketio.emit( 'job update', task_info, namespace='/jobs', room="job_management", ) return True # path to weights data match = re.match(r'Saved data to (.*)', message) if match: self.inference_data_filename = match.group(1).strip() return True return False
def hw_socketio_updater(self, gpus): """ This thread sends SocketIO messages about hardware utilization to connected clients Arguments: gpus -- a list of identifiers for the GPUs currently being used """ from digits.webapp import app, socketio devices = [] if gpus is not None: for index in gpus: device = device_query.get_device(index) if device: devices.append((index, device)) else: raise RuntimeError( 'Failed to load gpu information for GPU #"%s"' % index) # this thread continues until killed in after_run() while True: # CPU (Non-GPU) Info data_cpu = {} if hasattr(self, 'p') and self.p is not None: data_cpu['pid'] = self.p.pid try: ps = psutil.Process( self.p.pid) # 'self.p' is the system call object if ps.is_running(): if psutil.version_info[0] >= 2: data_cpu['cpu_pct'] = ps.cpu_percent(interval=1) data_cpu['mem_pct'] = ps.memory_percent() data_cpu['mem_used'] = ps.memory_info().rss else: data_cpu['cpu_pct'] = ps.get_cpu_percent( interval=1) data_cpu['mem_pct'] = ps.get_memory_percent() data_cpu['mem_used'] = ps.get_memory_info().rss except psutil.NoSuchProcess: # In rare case of instant process crash or PID went zombie (report nothing) pass data_gpu = [] for index, device in devices: update = {'name': device.name, 'index': index} nvml_info = device_query.get_nvml_info(index) if nvml_info is not None: update.update(nvml_info) data_gpu.append(update) with app.app_context(): html = flask.render_template('models/gpu_utilization.html', data_gpu=data_gpu, data_cpu=data_cpu) socketio.emit( 'task update', { 'task': self.html_id(), 'update': 'gpu_utilization', 'html': html, }, namespace='/jobs', room=self.job_id, ) gevent.sleep(1)
def save_train_output(self, *args): """ Save output to self.train_outputs """ from digits.webapp import socketio if not self.save_output(self.train_outputs, *args): return if self.last_train_update and (time.time() - self.last_train_update) < 5: return self.last_train_update = time.time() self.logger.debug( 'Training %s%% complete.' % round(100 * self.current_epoch / self.train_epochs, 2)) # loss graph data data = self.combined_graph_data() # print '----------data', data if data: socketio.emit( 'task update', { 'task': self.html_id(), 'update': 'combined_graph', 'data': data, }, namespace='/jobs', room=self.job_id, ) if data['columns']: # isolate the Loss column data for the sparkline graph_data = data['columns'][0][1:] socketio.emit( 'task update', { 'task': self.html_id(), 'job_id': self.job_id, 'update': 'combined_graph', 'data': graph_data, }, namespace='/jobs', room='job_management', ) # lr graph data data = self.lr_graph_data() if data: socketio.emit( 'task update', { 'task': self.html_id(), 'update': 'lr_graph', 'data': data, }, namespace='/jobs', room=self.job_id, )
def process_output(self, line): self.inference_log.write('%s\n' % line) self.inference_log.flush() timestamp, level, message = self.preprocess_output_digits(line) if not message: return False # error match = re.match(r'Error: (\w+)', message) if match: message = message.replace('Error: ', '') from digits.webapp import socketio task_info = { 'task': self.html_id(), 'update': 'gradient_ascent', 'data': { 'layer': self.layer, 'error': message, 'id': self.job_id } } # Update Layer Vis tool: socketio.emit( 'task error', task_info, namespace='/jobs', room=self.pretrained_model.id(), ) # progress match = re.match(r'Processed (\d+)\/(\d+)', message) if match: self.progress = float(match.group(1)) / int(match.group(2)) from digits.webapp import socketio task_info = { 'task': self.html_id(), 'update': 'gradient_ascent', 'data': { 'layer': self.layer, 'unit': int(match.group(1)), 'progress': self.progress, 'id': self.job_id }, 'job_id': self.job_id, 'percentage': int(self.progress * 100) } # Update Layer Vis tool: socketio.emit( 'task update', task_info, namespace='/jobs', room=self.pretrained_model.id(), ) # Update Job Board: task_info['update'] = 'progress' socketio.emit( 'job update', task_info, namespace='/jobs', room="job_management", ) # Update Satus: return True # completion match = re.match(r'Saved data to (.*)', message) if match: self.inference_data_filename = match.group(1).strip() return True return False