def test_python_command_execution(self): """Test command line execution.""" subp.call(['rm', '-rf', 'tmp']) subp.call(['mkdir', 'tmp']) filename = '{}.java'.format(self.tmp_fn) cp_src = os.path.join('tmp', filename) with open(cp_src, 'w') as f: porter = Porter(self.mdl) out = porter.export(method_name='predict', class_name=self.tmp_fn) f.write(out) # $ javac tmp/Tmp.java subp.call(['javac', cp_src]) # Rename model for comparison: filename = '{}_2.java'.format(self.tmp_fn) cp_dest = os.path.join('tmp', filename) # $ mv tmp/Brain.java tmp/Brain_2.java subp.call(['mv', cp_src, cp_dest]) # Dump model: filename = '{}.pkl'.format(self.tmp_fn) pkl_path = os.path.join('tmp', filename) joblib.dump(self.mdl, pkl_path) # Port model: cmd = ['python', '-m', 'sklearn_porter', '-i', pkl_path] subp.call(cmd) # Compare file contents: equal = filecmp.cmp(cp_src, cp_dest) self.assertEqual(equal, True)
def train(request): data_dir = settings.BASE_DIR + "/phishing/app/ia/datasets/" models_dir = settings.BASE_DIR + "/phishing/app/ia/models/" # data, meta = arff.loadarff(data_dir + "dataset.arff" ) data, meta = arff.loadarff(data_dir + "dataset_modificado.arff") array_data = data.tolist() dataset_array = [i[:-1] for i in array_data] target_array = [i[-1] for i in array_data] dataset = np.asarray(dataset_array, dtype=np.float64) target = np.asarray(target_array, dtype=str) clf = tree.DecisionTreeClassifier() clf = clf.fit(dataset, target) joblib.dump(clf, models_dir + 'tree_model.pkl') # build model in PHP porter = Porter(clf, language='php') output = porter.export(embedded=True) php_file = open(models_dir + 'model_php.php', "w") php_file.write(output) php_file.close() return HttpResponse('Model builded')
def export_model_to_js(selector, filename): """Exports the trained model to JavaScript""" porter = Porter(selector.estimator_, language='js') output = porter.export(embed_data=True) with open(os.path.join(args.out_dir, filename), mode='w+', encoding='utf8') as f: f.write('export ' + output)
def test_python_command_execution(self): """Test command line execution.""" subp.call('rm -rf tmp'.split()) subp.call('mkdir tmp'.split()) filename = '{}.java'.format(self.tmp_fn) cp_src = os.path.join('tmp', filename) with open(cp_src, 'w') as f: porter = Porter(self.estimator) out = porter.export(method_name='predict', class_name=self.tmp_fn) f.write(out) # $ javac tmp/Tmp.java subp.call(['javac', cp_src]) # Rename estimator for comparison: filename = '{}_2.java'.format(self.tmp_fn) cp_dest = os.path.join('tmp', filename) # $ mv tmp/Brain.java tmp/Brain_2.java subp.call(['mv', cp_src, cp_dest]) # Dump estimator: filename = '{}.pkl'.format(self.tmp_fn) pkl_path = os.path.join('tmp', filename) joblib.dump(self.estimator, pkl_path) # Port estimator: cmd = 'python -m sklearn_porter -i {} --class_name Brain'.format( pkl_path).split() subp.call(cmd) # Compare file contents: equal = filecmp.cmp(cp_src, cp_dest) self.assertEqual(equal, True)
def _port_model(self): self.mdl.fit(self.X, self.y) subp.call(['rm', '-rf', 'tmp']) subp.call(['mkdir', 'tmp']) with open(self.tmp_fn, 'w') as f: porter = Porter(self.mdl, language=self.LANGUAGE) out = porter.export(class_name='Brain', method_name='foo') f.write(out)
def _port_model(self): self.mdl.fit(self.X, self.y) subp.call('rm -rf tmp'.split()) subp.call('mkdir tmp'.split()) filename = self.tmp_fn + '.rb' path = os.path.join('tmp', filename) with open(path, 'w') as f: porter = Porter(self.mdl, language=self.LANGUAGE) out = porter.export(class_name='Brain', method_name='foo') f.write(out)
def _port_estimator(self): self.estimator.fit(self.X, self.y) Shell.call('rm -rf tmp') Shell.call('mkdir tmp') filename = self.tmp_fn + '.rb' path = os.path.join('tmp', filename) with open(path, 'w') as f: porter = Porter(self.estimator, language=self.LANGUAGE) out = porter.export(class_name='Brain', method_name='foo') f.write(out)
def main(): args = parse_args(sys.argv[1:]) # Check input data: pkl_file_path = str(args.get('input')) if not isfile(pkl_file_path): exit_msg = 'No valid estimator in pickle ' \ 'format was found at \'{}\'.'.format(pkl_file_path) sys.exit('Error: {}'.format(exit_msg)) # Load data: estimator = joblib.load(pkl_file_path) # Determine the target programming language: language = str(args.get('language')) # with default language languages = ['c', 'java', 'js', 'go', 'php', 'ruby'] for key in languages: if args.get(key): # found explicit assignment language = key break # Define destination path: dest_dir = str(args.get('to')) if dest_dir == '' or not isdir(dest_dir): dest_dir = pkl_file_path.split(sep) del dest_dir[-1] dest_dir = sep.join(dest_dir) # Port estimator: try: class_name = args.get('class_name') method_name = args.get('method_name') with_export = bool(args.get('export')) with_checksum = bool(args.get('checksum')) porter = Porter(estimator, language=language) output = porter.export(class_name=class_name, method_name=method_name, export_dir=dest_dir, export_data=with_export, export_append_checksum=with_checksum, details=True) except Exception as exception: # Catch any exception and exit the process: sys.exit('Error: {}'.format(str(exception))) else: # Print transpiled estimator to the console: if bool(args.get('pipe', False)): print(output.get('estimator')) sys.exit(0) only_data = bool(args.get('data')) if not only_data: filename = output.get('filename') dest_path = dest_dir + sep + filename # Save transpiled estimator: with open(dest_path, 'w') as file_: file_.write(output.get('estimator'))
def _port_model(self): """Create and compile ported model for comparison of predictions.""" subp.call(['rm', '-rf', 'temp']) # $ rm -rf temp subp.call(['mkdir', 'temp']) # $ mkdir temp filename = '%s.java' % self.tmp_fn path = os.path.join('temp', filename) with open(path, 'w') as f: porter = Porter(method_name='predict', class_name=self.tmp_fn) ported_model = porter.port(self.clf) f.write(ported_model) subp.call(['javac', path]) # $ javac temp/Tmp.java
def _port_estimator(self): self.estimator.fit(self.X, self.y) Shell.call('rm -rf tmp') Shell.call('mkdir tmp') path = os.path.join('.', 'tmp', self.tmp_fn + '.go') output = os.path.join('.', 'tmp', self.tmp_fn) with open(path, 'w') as f: porter = Porter(self.estimator, language=self.LANGUAGE) out = porter.export(class_name='Brain', method_name='foo') f.write(out) cmd = 'go build -o {} {}'.format(output, path) Shell.call(cmd)
def _port_model(self): self.mdl.fit(self.X, self.y) subp.call('rm -rf tmp'.split()) subp.call('mkdir tmp'.split()) path = os.path.join('.', 'tmp', self.tmp_fn + '.go') output = os.path.join('.', 'tmp', self.tmp_fn) with open(path, 'w') as f: porter = Porter(self.mdl, language=self.LANGUAGE) out = porter.export(class_name='Brain', method_name='foo') f.write(out) cmd = 'go build -o {} {}'.format(output, path) subp.call(cmd.split())
def _port_model(self): self.mdl.fit(self.X, self.y) subp.call(['rm', '-rf', 'tmp']) subp.call(['mkdir', 'tmp']) filename = self.tmp_fn + '.java' path = os.path.join('tmp', filename) with open(path, 'w') as f: porter = Porter(self.mdl, language=self.LANGUAGE) out = porter.export(class_name='Brain', method_name='foo') f.write(out) # $ javac temp/Tmp.java subp.call(['javac', path])
def _port_model(self): self.mdl.fit(self.X, self.y) subp.call(['rm', '-rf', 'tmp']) # $ mkdir tmp subp.call(['mkdir', 'tmp']) filename = self.tmp_fn + '.c' path = os.path.join('tmp', filename) with open(path, 'w') as f: porter = Porter(self.mdl, language='c') out = porter.export(class_name='Brain', method_name='foo') f.write(out) # $ gcc temp/tmp.c -o temp/tmp subp.call(['gcc', path, '-lm', '-o', 'tmp/' + self.tmp_fn])
def _create_java_files(self): """Create and compile ported model for comparison of predictions.""" # $ rm -rf temp subp.call(['rm', '-rf', 'temp']) # $ mkdir temp subp.call(['mkdir', 'temp']) path = 'temp/%s.java' % (self.tmp_fn) with open(path, 'w') as file: porter = Porter(method_name='predict', class_name=self.tmp_fn) ported_model = porter.port(self.clf) file.write(ported_model) # $ javac temp/Tmp.java subp.call(['javac', path])
def _port_estimator(self): self.estimator.fit(self.X, self.y) Shell.call('rm -rf tmp') Shell.call('mkdir tmp') filename = self.tmp_fn + '.c' path = os.path.join('tmp', filename) with open(path, 'w') as f: porter = Porter(self.estimator, language=self.LANGUAGE) out = porter.export(class_name='Brain', method_name='foo') f.write(out) # $ gcc temp/tmp.c -o temp/tmp cmd = 'gcc {} -std=c99 -lm -o tmp/{}'.format(path, self.tmp_fn) Shell.call(cmd)
def _port_model(self): self.mdl.fit(self.X, self.y) subp.call('rm -rf tmp'.split()) subp.call('mkdir tmp'.split()) filename = self.tmp_fn + '.c' path = os.path.join('tmp', filename) with open(path, 'w') as f: porter = Porter(self.mdl, language='c') out = porter.export(class_name='Brain', method_name='foo') f.write(out) # $ gcc temp/tmp.c -o temp/tmp cmd = 'gcc {} -std=c99 -lm -o tmp/{}'.format(path, self.tmp_fn) subp.call(cmd.split())
def main(argv): path = argv[0] data = pd.read_csv(path) mode = argv[1] if mode not in ['stats', 'code']: print('Invalid mode: {}'.format(mode)) sys.exit(1) e_vars = argv[2:] if mode == 'code' and len(e_vars) != 1: print('Can only export code for one model') sys.exit(2) if len(e_vars) == 0: e_vars = output_vars(data) for var in e_vars: xs, ys = input_data(data), output_var(data, var) accs = [] max_a = 0 for s in seeds(): train_xs, test_xs, train_ys, test_ys = train_test_split( xs, ys, test_size=0.33, random_state=s) mod = model(s).fit(train_xs, train_ys) acc = np.sum(mod.predict(test_xs) == test_ys) / float(len(test_ys)) if acc > max_a: best_mod = mod max_a = acc accs.append(acc) if mode == 'stats': print("{}: {:.2f}%".format(var, 100 * sum(accs) / len(accs))) if mode == 'code': func_name = 'predict_{}'.format(var) port = Porter(best_mod, language='c') code = port.export(embed_data=True) code = code.replace('int predict_', 'static int predict_') code = code.replace('predict', func_name) code = "\n".join(code.split('\n')[:-10]) print(code, file=sys.stdout) header_line = 'extern "C" int {} (float features[]);'.format( func_name) print(header_line, file=sys.stderr)
def _port_estimator(self, export_data=False): self.estimator.fit(self.X, self.y) subp.call('rm -rf tmp'.split()) subp.call('mkdir tmp'.split()) with open(self.tmp_fn, 'w') as f: porter = Porter(self.estimator, language=self.LANGUAGE) if export_data: out = porter.export(class_name='Brain', method_name='foo', export_data=True, export_dir='tmp') else: out = porter.export(class_name='Brain', method_name='foo') f.write(out)
def port_model(model, name="rf_from_sklearn"): if isinstance(model, XGBClassifier): model._Booster.save_model(name) else: porter = Porter(model, language='c') output = porter.export(embed_data=True) name = name + '.cpp' with open(name, "w") as text_file: text_file.write(output) statinfo = os.stat(name) print(bcolors.OKBLUE + f'model {name} saved, with size {statinfo.st_size} in bytes: ' + bcolors.ENDC)
def _save_model_to_java(model): from sklearn_porter import Porter cpm_filename = "cp_xnn/MLPClassifier.java" print("preper new trained model {}".format(cpm_filename)) if os.path.isfile(cpm_filename): os.unlink(cpm_filename) porter = Porter(model, language='java') output = porter.export() with open(cpm_filename, 'w+') as file: n = file.write(output) print("traned model saved in c: {} len: {}, wlen: {} ".format( cpm_filename, len(output), n)) if not os.path.isfile(cpm_filename): print("Error: no training model saved") sys.exit(0)
def setUp(self): super(KNeighborsClassifierTest, self).setUp() self.porter = Porter(language='js') model = KNeighborsClassifier(algorithm='brute', n_neighbors=3, weights='uniform') self._port_model(model)
def test_model_within_optimizer(self): pipe = Pipeline([('reduce_dim', PCA()), ('classify', LinearSVC())]) n_features_options = [2, 4, 8] c_options = [1, 10, 100, 1000] param_grid = [ { 'reduce_dim': [PCA(iterated_power=7), NMF()], 'reduce_dim__n_components': n_features_options, 'classify__C': c_options }, { 'reduce_dim': [SelectKBest(chi2)], 'reduce_dim__k': n_features_options, 'classify__C': c_options }, ] grid = GridSearchCV(pipe, cv=3, n_jobs=1, param_grid=param_grid) digits = load_digits() grid.fit(digits.data, digits.target) try: Porter(grid, language='java') except ValueError: self.assertTrue(False) else: self.assertTrue(True)
def _port_estimator(self, export_data=False, embed_data=False): self.estimator.fit(self.X, self.y) Shell.call('rm -rf tmp') Shell.call('mkdir tmp') with open(self.tmp_fn, 'w') as f: porter = Porter(self.estimator, language=self.LANGUAGE) if export_data: out = porter.export(class_name='Brain', method_name='foo', export_data=True, export_dir='tmp') else: out = porter.export(class_name='Brain', method_name='foo', embed_data=embed_data) f.write(out)
def setUp(self): super(AdaBoostClassifierTest, self).setUp() self.porter = Porter(language='js') base_estimator = DecisionTreeClassifier(max_depth=4, random_state=0) clf = AdaBoostClassifier( base_estimator=base_estimator, n_estimators=100, random_state=0) self._port_model(clf)
def setUp(self): super(MLPClassifierTest, self).setUp() self.porter = Porter(language='java') clf = MLPClassifier( activation='identity', hidden_layer_sizes=50, max_iter=500, alpha=1e-4, solver='sgd', tol=1e-4, random_state=1, learning_rate_init=.1) self._port_model(clf)
def classify(X_test, X_train, base_dir, samples_test, y_test, y_train, model, model_alias): base_dir = os.path.join(base_dir, 'features') file_helper.guarantee_path_preconditions(base_dir) resnet50features_train = extract_features(X_train, base_dir, 'train', model, model_alias) resnet50features_test = extract_features(X_test, base_dir, 'test', model, model_alias) print('extract featuers done, reshaping') resnet50features_train = np.reshape(resnet50features_train, (resnet50features_train.shape[0], -1)) resnet50features_test = np.reshape(resnet50features_test, (resnet50features_test.shape[0], -1)) print('begin fit') # top_model = OneVsRestClassifier(BaggingClassifier(SVC(verbose=False), n_jobs=-1)) top_model = SVC(verbose=False, C=1., kernel='rbf', gamma=0.001) top_model.fit(resnet50features_train, y_train) joblib.dump(top_model, 'fitted_print.sav', protocol=2) porter = Porter(top_model) output = porter.export(export_data=True) file = open("java_svc.java", "w") file.write(output) file.close() print('begin predict') y_pred = top_model.predict(resnet50features_test) y_pred_proba = top_model.predict_proba(resnet50features_test) output_dir = os.path.join(base_dir, model_alias) file_helper.guarantee_path_preconditions(output_dir) np.save(os.path.join(output_dir, "y_test.npy"), y_test) np.save(os.path.join(output_dir, "y_pred.npy"), y_pred) np.save(os.path.join(output_dir, "y_pred_proba.npy"), y_pred_proba) np.save(os.path.join(output_dir, "y_train.npy"), y_train) file = open(os.path.join(output_dir, "names_test.txt"), "w") file.write(str(json.dumps(samples_test)) + "\n") file.close() #hter, apcer, bpcer = evaluate_hter.evaluate_predictions(output_dir) #print('HTER: ', hter) # Computing the average accuracy acc_score = accuracy_score(y_test, y_pred) print('acc_score: ', acc_score)
def _port_model(self, clf): self._clear_model() self.clf = clf self.clf.fit(self.X, self.y) # $ mkdir temp subp.call(['mkdir', 'tmp']) with open(self.tmp_fn, 'w') as f: f.write( Porter(self.clf, language='js').export(class_name='Brain', method_name='foo')) self._start_test()
def export_learner(learner, folder="learners"): '''Export transpiled java and python dill dump for a learner.''' outfolder = folder + "/" + slugify(learner.codename) if not os.path.isdir(outfolder): os.makedirs(outfolder) with open("{}/MLPClassifier.java".format(outfolder), "w") as f: porter = Porter(learner.classifier, language='java') f.write(porter.export(export_data=True, export_dir=outfolder)) with open("{}/DataMapper.java".format(outfolder), "w") as f: _export_mapper(learner, out=f) with open("{}/learner.dill".format(outfolder), "wb") as f: dill.dump( { k: v for k, v in learner.__dict__.items() if not k.startswith('_Learner__') }, f)
def _port_estimator(self, export_data=False): self.estimator.fit(self.X, self.y) subp.call('rm -rf tmp'.split()) subp.call('mkdir tmp'.split()) filename = self.tmp_fn + '.java' path = os.path.join('tmp', filename) with open(path, 'w') as f: porter = Porter(self.estimator, language=self.LANGUAGE) if export_data: out = porter.export(class_name='Brain', method_name='foo', export_data=True, export_dir='tmp') else: out = porter.export(class_name='Brain', method_name='foo') f.write(out) if export_data: cmd = 'javac -cp ./gson.jar {}'.format(path).split() subp.call(cmd) else: subp.call(['javac', path])
def export_all(): ''' Export all sklearn pickle model to C code ''' ml_model_path = os.path.join(os.path.dirname(__file__), "../../ml-model") output_path = os.path.join(os.path.dirname(__file__), "output") if not os.path.exists(output_path): os.mkdir(output_path) with open( os.path.join(ml_model_path, "./saved-models/decision_tree_model.pickle"), "rb") as fd_model: model = pickle.load(fd_model) with open(os.path.join(output_path, "decision_tree.c"), "w") as fd: porter = Porter(model, language='C') fd.write(porter.export(embed_data=True)) with open( os.path.join(ml_model_path, "./saved-models/random_forest_model.pickle"), "rb") as fd_model: model = pickle.load(fd_model) with open(os.path.join(output_path, "random_forest.c"), "w") as fd: porter = Porter(model, language='C') fd.write(porter.export(embed_data=True))
def _port_model(self, clf): self._clear_model() self.clf = clf self.clf.fit(self.X, self.y) # $ mkdir temp subp.call(['mkdir', 'tmp']) # Save transpiled model: filename = self.tmp_fn + '.rb' path = os.path.join('tmp', filename) with open(path, 'w') as f: f.write(Porter(self.clf, language='ruby').export( class_name='Brain', method_name='foo')) self._start_test()
# %% [markdown] # ### Train classifier # %% from sklearn import svm clf = svm.NuSVC(gamma=0.001, kernel='rbf', random_state=0) clf.fit(X, y) # %% [markdown] # ### Transpile classifier # %% from sklearn_porter import Porter porter = Porter(clf, language='js') output = porter.export() print(output) # %% [markdown] # ### Run classification in JavaScript # %% # Save classifier: # with open('NuSVC.js', 'w') as f: # f.write(output) # Run classification: # if hash node 2/dev/null; then # node NuSVC.js 1 2 3 4
# ### Train classifier # %% from sklearn.ensemble import RandomForestClassifier clf = RandomForestClassifier(n_estimators=15, max_depth=None, min_samples_split=2, random_state=0) clf.fit(X, y) # %% [markdown] # ### Transpile classifier # %% from sklearn_porter import Porter porter = Porter(clf, language='java') output = porter.export(embed_data=True) print(output) # %% [markdown] # ### Run classification in Java # %% # Save classifier: # with open('RandomForestClassifier.java', 'w') as f: # f.write(output) # Compile model: # $ javac -cp . RandomForestClassifier.java
# %% [markdown] # ### Train classifier # %% from sklearn.tree import tree clf = tree.DecisionTreeClassifier() clf.fit(X, y) # %% [markdown] # ### Transpile classifier # %% from sklearn_porter import Porter porter = Porter(clf, language='java') output = porter.export(export_data=True) print(output) # %% [markdown] # ### Run classification in Java # %% # Save classifier: # with open('DecisionTreeClassifier.java', 'w') as f: # f.write(output) # Check model data: # $ cat data.json