def get_inference_time(self, samples_size=10, upload_options={}): """ Benchmark onboard inference time for a classifier :param samples_size: how many samples to include in the benchmark :param upload_options: dict options for upload() :return: float inference time in microseconds """ with self.project.tmp_project() as tmp: template_folder = 'tf' if self.clf.is_tf() else 'sklearn' X, y = self.dataset.random(samples_size) sketch = jinja('benchmarks/%s/Runtime.jinja' % template_folder, { 'X': X, 'y': y }) ported = self.clf.port(classname='Classifier') tmp.files.add('%s.ino' % tmp.name, contents=sketch, exists_ok=True) tmp.files.add('Classifier.h', contents=ported, exists_ok=True) tmp.upload(**upload_options) # parse serial output # since we can miss the first response, try a few times for i in range(0, 3): response = tmp.serial.read_until('======', timeout=8) match = re.search(r'inference time = ([0-9.]+) micros', response) if match is not None: return float(match.group(1)) raise BadBoardResponseError( 'Unexpected response during runtime inference time benchmark: %s' % response)
def add_eloquent_library(self): """Save the eloquent library into the sketch""" filepath = self.path_to('eloquent-arduino.h') self.log('Injecting eloquent-arduino library') with open(filepath, 'w', encoding='utf-8') as file: file.write(jinja('magics/eloquent-arduino.h.jinja'))
def port(self, arena_size='1024 * 16', model_name='model', classname='NeuralNetwork', classmap=None): """ Port Tf model to plain C++ :param arena_size: int|str size of tensor arena (read Tf docs) :param model_name: str name of the exported model variable :param classname: str name of the exported class """ return jinja( 'ml/classification/tensorflow/NeuralNetwork.jinja', { 'classname': classname, 'model_name': model_name, 'model_data': port(self.sequential, variable_name=model_name, optimize=False), 'num_inputs': self.num_inputs, 'num_outputs': self.num_classes, 'arena_size': arena_size, 'classmap': classmap })
def port(self, persist=False): return jinja( 'third_party/snoopy/SnoopyStream.jinja', { 'pipeline_ns': self.pipeline.name, 'pipeline': self.uglify( self.pipeline.port(classname='Pipeline')), 'clf': self.uglify(self.clf.port(classname='Classifier')), 'voting': self.voting, 'persist': 'true' if persist else 'false' })
def baseline(self): """ Create an empty sketch to get the bare minimum resources needed :return: """ with self.project.tmp_project() as tmp: tmp.files.add('%s.ino' % tmp.name, contents=jinja('metrics/Baseline.jinja'), exists_ok=True) return self._benchmark_current(tmp)
def benchmark_baseline(cls, project, X): """ Create an empty sketch to get the bare minimum resources needed """ if cls._baseline is None: with project.tmp_project() as tmp: tmp.files.add('%s.ino' % tmp.name, contents=jinja('metrics/Baseline.jinja', {'X': X}), exists_ok=True) cls._baseline = CompileLogParser(project=tmp).info
def get_baseline(self, project, X): """ Get resources for an empty sketch :param project: :param X: :return: dict of resources """ project.logger.debug('benchmarking empty sketch to get a baseline') with project.tmp_project() as tmp: tmp.files.add('%s.ino' % tmp.name, contents=jinja('metrics/Baseline.jinja', {'X': X}), exists_ok=True) return self._parse_resources(tmp)
def port(self): """ Port to plain C++ :return: str plain C++ code """ env = { 'num_windows': self.num_windows, 'window_size': self.window_size, 'axis': self.axis, 'features': self.features, 'offset': (self.num_windows - 1) * len(self.features) * self.axis } return jinja("RollingWindow.jinja", env, pretty=True)
def set_project(self, project): """ Export class to Arduino sketch """ ported_clf = port(self.clf, classname='Classifier', classmap=self.dataset.classmap, pretty=True) self.config.update(ported_clf=ported_clf, num_features=len(self.dataset.df.columns)) contents = jinja('third_party/snoopy/snoopy.jinja', self.config) project.files.add('ML.h', contents=contents, exists_ok=True)
def port(self, classname='Pipeline'): """ Port to C++ """ return jinja('ml/data/preprocessing/pipeline/Pipeline.jinja', { 'ns': self.name, 'classname': classname, 'steps': self.steps, 'input_dim': self.input_dim, 'output_dim': max([self.output_dim, self.working_dim]), 'working_dim': max([1, self.working_dim]), 'includes': self.includes }, pretty=True)
def port(self, ns): """ Port to plain C++ :param ns: str namespace for the pipeline :return: str C++ code """ template_name = type(self).__name__ template_data = self.get_template_data() template_data.update(name=self.name, input_dim=self.input_dim, working_dim=self.working_dim, ns=ns) return self.postprocess_port( jinja('ml/data/preprocessing/pipeline/%s.jinja' % template_name, template_data))
def benchmark(self, clf, x=None, n_features=1): """ Run the benchmark for a given classifier :param clf: :param x: :param n_features: :return: """ if x is None: x = np.random.random(n_features) with self.project.tmp_project() as tmp: sketch = jinja('metrics/Resources.jinja', {'x': x}) ported = port(clf, classname='Classifier') tmp.files.add('%s.ino' % tmp.name, contents=sketch, exists_ok=True) tmp.files.add('Classifier.h', contents=ported, exists_ok=True) return self._benchmark_current(tmp)
def baseline(cls, project, dataset, samples_size=10): """ Benchmark the baseline sketch for the current project and dataset """ cache_key = (project.board.fqbn, dataset.name) if cache_key not in cls.cache: with project.tmp_project() as tmp: X, y = dataset.random(samples_size) sketch = jinja('metrics/Baseline.jinja', {'X': X}) tmp.files.add('%s.ino' % tmp.name, contents=sketch, exists_ok=True) cls.cache[cache_key] = CompileLogParser(project=tmp).info return cls.cache[cache_key]
def benchmark(self, clf, X_test=None, y_test=None, n_features=1, n_samples=20, repeat=1, upload_options={}): """ Benchmark on-line inference time for a classifier :param clf: :param X_test: :param y_test: :param n_features: :param n_samples: :param repeat: :param compile: :param upload_options: :return: """ if X_test is None or y_test is None: assert n_features > 0, 'n_features MUST be positive when X_test is not set' assert n_samples > 0, 'n_samples MUST be positive when X_test is not set' X_test = np.random.random((n_samples, n_features)) y_test = np.random.random_integers(0, 1, n_samples) with self.project.tmp_project() as tmp: # upload benchmarking sketch sketch = jinja('metrics/Runtime.jinja', { 'X_test': X_test, 'y_test': y_test, 'repeat': repeat }) ported = port(clf, classname='Classifier') tmp.files.add(tmp.ino_name, contents=sketch, exists_ok=True) tmp.files.add('Classifier.h', contents=ported, exists_ok=True) tmp.upload(**upload_options) # parse serial output # since we can miss the first response, try a few times for i in range(0, 3): response = tmp.serial.read_until('======', timeout=8) match = re.search(r'inference time = ([0-9.]+) micros[\s\S]+?Score = ([0-9.]+)', response) if match is not None: return { 'inference_time': float(match.group(1)), 'online_accuracy': float(match.group(2)) } self.project.logger.error('Failed to parse response: %s' % response) raise BadBoardResponseError('Unexpected response during runtime inference time benchmark')
def benchmark_resources(self, X, project=eloquentarduino.project): """ Compute resources needed to compile a sketch that uses this classifier :param X: :param project: """ with project.tmp_project() as tmp: Classifier.benchmark_baseline(tmp, X) # compile a benchmarking sketch and get the resources needed template_folder = 'tf' if self.is_tf() else 'sklearn' sketch = jinja('benchmarks/%s/Resources.jinja' % template_folder, {'X': X}) ported = self.port(classname='Classifier') tmp.files.add('%s.ino' % tmp.name, contents=sketch, exists_ok=True) tmp.files.add('Classifier.h', contents=ported, exists_ok=True) return CompileLogParser(project=tmp).sub(Classifier._baseline).info
def get_resources(self, samples_size=10): """ Benchmark the resources for a classifier :param samples_size: how many samples to include in the benchmark (should match with the baseline) :return: dict resources needed """ with self.project.tmp_project() as tmp: template_folder = 'tf' if self.clf.is_tf() else 'sklearn' baseline_key = (self.project.board.fqbn, self.dataset.name) X, y = self.dataset.random(samples_size) sketch = jinja('benchmarks/%s/Resources.jinja' % template_folder, {'X': X}) ported = self.clf.port(classname='Classifier') tmp.files.add('%s.ino' % tmp.name, contents=sketch, exists_ok=True) tmp.files.add('Classifier.h', contents=ported, exists_ok=True) return CompileLogParser(project=tmp).sub( Benchmarker.cache.get(baseline_key, None)).info
def port(self, **kwargs): """ Port to C++ :return: str C++ code """ classname = kwargs.get('classname', 'CascadingClassifier') simplex_classname = '%s_SimplexClassifier' % classname complex_classname = '%s_ComplexClassifier' % classname return jinja( 'cascading/CascadingClassifier.jinja', { 'classname': classname, 'classmap': self.classmap, 'simplex_classname': simplex_classname, 'complex_classname': complex_classname, 'simplex_clf': port(self.simplex_clf, classname=simplex_classname), 'complex_clf': port(self.complex_clf, classname=complex_classname), 'depth': self.depth, })
def benchmark(self, port=None, project=None, inference_time=False, save_to=None, exists_ok=True, exists_overwrite=False, cv=3, before_upload=None, after_upload=None): """ """ if inference_time: assert port is not None or ( project is not None and project.board is not None and project.board.port is not None), 'You MUST set a port' save_to = CheckpointFile(save_to, keys=['board', 'dataset', 'clf']) if save_to.key_exists(self.key) and exists_ok: return if save_to.key_exists(self.key) and not exists_overwrite: raise BoardBenchmarkAlreadyExists(self.key) if project is None: project = Project() if inference_time and port: project.board.set_port(port) # benchmark offline accuracy X = self.dataset.X y = self.dataset.y idx = np.arange(len(X))[::(len(X) // 5)][:5] X_test = X[idx] y_test = y[idx] cross_results = cross_validate(self.classifier.generator(X, y), X, y, cv=cv, return_estimator=True) offline_accuracy = cross_results['test_score'].mean() clf = cross_results['estimator'][0] benchmark = { 'board': self.board.name, 'dataset': self.dataset.name, 'clf': self.classifier.name, 'fqbn': '', 'cpu_speed': self.board.cpu_speed, 'cpu_family': self.board.cpu_family, 'n_samples': X.shape[0], 'n_features': X.shape[1], 'offline_accuracy': offline_accuracy, 'inference_time': 0 } with project.tmp_project() as tmp: tmp.board.set_model(self.board) benchmark['fqbn'] = tmp.board.fqbn cache_key = (self.board.name, self.dataset.name) if cache_key not in BoardBenchmark._cache: BoardBenchmark._cache[cache_key] = self.get_baseline( tmp, X_test) baseline = BoardBenchmark._cache.get(cache_key) sketch = jinja('metrics/Resources.jinja', {'X': X_test}) ported = port_clf(clf, classname='Classifier') tmp.files.add('%s.ino' % tmp.name, contents=sketch, exists_ok=True) tmp.files.add('Classifier.h', contents=ported, exists_ok=True) resources = self._parse_resources(tmp) resources[ 'flash_increment'] = resources['flash'] - baseline['flash'] resources[ 'memory_increment'] = resources['memory'] - baseline['memory'] resources['flash_increment_percent'] = float( resources['flash_increment'] ) / resources['flash_max'] if resources['flash_max'] > 0 else 0 resources['memory_increment_percent'] = float( resources['memory_increment'] ) / resources['memory_max'] if resources['memory_max'] > 0 else 0 benchmark.update(resources) if inference_time: sketch = jinja('metrics/Runtime.jinja', { 'X': X_test, 'y': y_test }) ported = port_clf(clf, classname='Classifier') tmp.files.add(tmp.ino_name, contents=sketch, exists_ok=True) tmp.files.add('Classifier.h', contents=ported, exists_ok=True) if callable(before_upload): before_upload() tmp.upload(success_message='') if callable(after_upload): after_upload(tmp) benchmark.update(self._parse_inference_time(tmp)) save_to.set(self.key, benchmark) return benchmark
def jinja(self, template_name, **kwargs): """ Get jinja template from current package """ return jinja('on_device/%s/%s' % (self.package, template_name), kwargs)
def jinja(self, template, data={}): """Return Jinja2 template""" data.update(input_dim=self.input_dim, output_dim=self.output_dim) return jinja('Pipeline/%s' % template, data)