Ejemplo n.º 1
0
    def test_python_command_execution(self):
        """Test command line execution."""

        subp.call(['rm', '-rf', 'tmp'])
        subp.call(['mkdir', 'tmp'])
        filename = '{}.java'.format(self.tmp_fn)
        cp_src = os.path.join('tmp', filename)
        with open(cp_src, 'w') as f:
            porter = Porter(self.mdl)
            out = porter.export(method_name='predict', class_name=self.tmp_fn)
            f.write(out)
        # $ javac tmp/Tmp.java
        subp.call(['javac', cp_src])

        # Rename model for comparison:
        filename = '{}_2.java'.format(self.tmp_fn)
        cp_dest = os.path.join('tmp', filename)
        # $ mv tmp/Brain.java tmp/Brain_2.java
        subp.call(['mv', cp_src, cp_dest])

        # Dump model:
        filename = '{}.pkl'.format(self.tmp_fn)
        pkl_path = os.path.join('tmp', filename)
        joblib.dump(self.mdl, pkl_path)

        # Port model:
        cmd = ['python', '-m', 'sklearn_porter', '-i', pkl_path]
        subp.call(cmd)
        # Compare file contents:
        equal = filecmp.cmp(cp_src, cp_dest)

        self.assertEqual(equal, True)
Ejemplo n.º 2
0
def train(request):
    data_dir = settings.BASE_DIR + "/phishing/app/ia/datasets/"
    models_dir = settings.BASE_DIR + "/phishing/app/ia/models/"
    # data, meta = arff.loadarff(data_dir + "dataset.arff" )
    data, meta = arff.loadarff(data_dir + "dataset_modificado.arff")

    array_data = data.tolist()
    dataset_array = [i[:-1] for i in array_data]
    target_array = [i[-1] for i in array_data]

    dataset = np.asarray(dataset_array, dtype=np.float64)
    target = np.asarray(target_array, dtype=str)

    clf = tree.DecisionTreeClassifier()
    clf = clf.fit(dataset, target)

    joblib.dump(clf, models_dir + 'tree_model.pkl')

    # build model in PHP
    porter = Porter(clf, language='php')
    output = porter.export(embedded=True)
    php_file = open(models_dir + 'model_php.php', "w")
    php_file.write(output)
    php_file.close()

    return HttpResponse('Model builded')
Ejemplo n.º 3
0
def export_model_to_js(selector, filename):
    """Exports the trained model to JavaScript"""
    porter = Porter(selector.estimator_, language='js')
    output = porter.export(embed_data=True)
    with open(os.path.join(args.out_dir, filename), mode='w+',
              encoding='utf8') as f:
        f.write('export ' + output)
Ejemplo n.º 4
0
    def test_python_command_execution(self):
        """Test command line execution."""
        subp.call('rm -rf tmp'.split())
        subp.call('mkdir tmp'.split())
        filename = '{}.java'.format(self.tmp_fn)
        cp_src = os.path.join('tmp', filename)
        with open(cp_src, 'w') as f:
            porter = Porter(self.estimator)
            out = porter.export(method_name='predict', class_name=self.tmp_fn)
            f.write(out)
        # $ javac tmp/Tmp.java
        subp.call(['javac', cp_src])

        # Rename estimator for comparison:
        filename = '{}_2.java'.format(self.tmp_fn)
        cp_dest = os.path.join('tmp', filename)
        # $ mv tmp/Brain.java tmp/Brain_2.java
        subp.call(['mv', cp_src, cp_dest])

        # Dump estimator:
        filename = '{}.pkl'.format(self.tmp_fn)
        pkl_path = os.path.join('tmp', filename)
        joblib.dump(self.estimator, pkl_path)

        # Port estimator:
        cmd = 'python -m sklearn_porter -i {} --class_name Brain'.format(
            pkl_path).split()
        subp.call(cmd)
        # Compare file contents:
        equal = filecmp.cmp(cp_src, cp_dest)

        self.assertEqual(equal, True)
Ejemplo n.º 5
0
 def _port_model(self):
     self.mdl.fit(self.X, self.y)
     subp.call(['rm', '-rf', 'tmp'])
     subp.call(['mkdir', 'tmp'])
     with open(self.tmp_fn, 'w') as f:
         porter = Porter(self.mdl, language=self.LANGUAGE)
         out = porter.export(class_name='Brain', method_name='foo')
         f.write(out)
Ejemplo n.º 6
0
 def _port_model(self):
     self.mdl.fit(self.X, self.y)
     subp.call('rm -rf tmp'.split())
     subp.call('mkdir tmp'.split())
     filename = self.tmp_fn + '.rb'
     path = os.path.join('tmp', filename)
     with open(path, 'w') as f:
         porter = Porter(self.mdl, language=self.LANGUAGE)
         out = porter.export(class_name='Brain', method_name='foo')
         f.write(out)
Ejemplo n.º 7
0
 def _port_estimator(self):
     self.estimator.fit(self.X, self.y)
     Shell.call('rm -rf tmp')
     Shell.call('mkdir tmp')
     filename = self.tmp_fn + '.rb'
     path = os.path.join('tmp', filename)
     with open(path, 'w') as f:
         porter = Porter(self.estimator, language=self.LANGUAGE)
         out = porter.export(class_name='Brain', method_name='foo')
         f.write(out)
Ejemplo n.º 8
0
def main():
    args = parse_args(sys.argv[1:])

    # Check input data:
    pkl_file_path = str(args.get('input'))
    if not isfile(pkl_file_path):
        exit_msg = 'No valid estimator in pickle ' \
                   'format was found at \'{}\'.'.format(pkl_file_path)
        sys.exit('Error: {}'.format(exit_msg))

    # Load data:
    estimator = joblib.load(pkl_file_path)

    # Determine the target programming language:
    language = str(args.get('language'))  # with default language
    languages = ['c', 'java', 'js', 'go', 'php', 'ruby']
    for key in languages:
        if args.get(key):  # found explicit assignment
            language = key
            break

    # Define destination path:
    dest_dir = str(args.get('to'))
    if dest_dir == '' or not isdir(dest_dir):
        dest_dir = pkl_file_path.split(sep)
        del dest_dir[-1]
        dest_dir = sep.join(dest_dir)

    # Port estimator:
    try:
        class_name = args.get('class_name')
        method_name = args.get('method_name')
        with_export = bool(args.get('export'))
        with_checksum = bool(args.get('checksum'))
        porter = Porter(estimator, language=language)
        output = porter.export(class_name=class_name, method_name=method_name,
                               export_dir=dest_dir, export_data=with_export,
                               export_append_checksum=with_checksum,
                               details=True)
    except Exception as exception:
        # Catch any exception and exit the process:
        sys.exit('Error: {}'.format(str(exception)))
    else:
        # Print transpiled estimator to the console:
        if bool(args.get('pipe', False)):
            print(output.get('estimator'))
            sys.exit(0)

        only_data = bool(args.get('data'))
        if not only_data:
            filename = output.get('filename')
            dest_path = dest_dir + sep + filename
            # Save transpiled estimator:
            with open(dest_path, 'w') as file_:
                file_.write(output.get('estimator'))
Ejemplo n.º 9
0
 def _port_model(self):
     """Create and compile ported model for comparison of predictions."""
     subp.call(['rm', '-rf', 'temp'])  # $ rm -rf temp
     subp.call(['mkdir', 'temp'])  # $ mkdir temp
     filename = '%s.java' % self.tmp_fn
     path = os.path.join('temp', filename)
     with open(path, 'w') as f:
         porter = Porter(method_name='predict', class_name=self.tmp_fn)
         ported_model = porter.port(self.clf)
         f.write(ported_model)
     subp.call(['javac', path])  # $ javac temp/Tmp.java
Ejemplo n.º 10
0
 def _port_estimator(self):
     self.estimator.fit(self.X, self.y)
     Shell.call('rm -rf tmp')
     Shell.call('mkdir tmp')
     path = os.path.join('.', 'tmp', self.tmp_fn + '.go')
     output = os.path.join('.', 'tmp', self.tmp_fn)
     with open(path, 'w') as f:
         porter = Porter(self.estimator, language=self.LANGUAGE)
         out = porter.export(class_name='Brain', method_name='foo')
         f.write(out)
     cmd = 'go build -o {} {}'.format(output, path)
     Shell.call(cmd)
Ejemplo n.º 11
0
 def _port_model(self):
     self.mdl.fit(self.X, self.y)
     subp.call('rm -rf tmp'.split())
     subp.call('mkdir tmp'.split())
     path = os.path.join('.', 'tmp', self.tmp_fn + '.go')
     output = os.path.join('.', 'tmp', self.tmp_fn)
     with open(path, 'w') as f:
         porter = Porter(self.mdl, language=self.LANGUAGE)
         out = porter.export(class_name='Brain', method_name='foo')
         f.write(out)
     cmd = 'go build -o {} {}'.format(output, path)
     subp.call(cmd.split())
Ejemplo n.º 12
0
 def _port_model(self):
     self.mdl.fit(self.X, self.y)
     subp.call(['rm', '-rf', 'tmp'])
     subp.call(['mkdir', 'tmp'])
     filename = self.tmp_fn + '.java'
     path = os.path.join('tmp', filename)
     with open(path, 'w') as f:
         porter = Porter(self.mdl, language=self.LANGUAGE)
         out = porter.export(class_name='Brain', method_name='foo')
         f.write(out)
     # $ javac temp/Tmp.java
     subp.call(['javac', path])
Ejemplo n.º 13
0
 def _port_model(self):
     self.mdl.fit(self.X, self.y)
     subp.call(['rm', '-rf', 'tmp'])
     # $ mkdir tmp
     subp.call(['mkdir', 'tmp'])
     filename = self.tmp_fn + '.c'
     path = os.path.join('tmp', filename)
     with open(path, 'w') as f:
         porter = Porter(self.mdl, language='c')
         out = porter.export(class_name='Brain', method_name='foo')
         f.write(out)
     # $ gcc temp/tmp.c -o temp/tmp
     subp.call(['gcc', path, '-lm', '-o', 'tmp/' + self.tmp_fn])
Ejemplo n.º 14
0
 def _create_java_files(self):
     """Create and compile ported model for comparison of predictions."""
     # $ rm -rf temp
     subp.call(['rm', '-rf', 'temp'])
     # $ mkdir temp
     subp.call(['mkdir', 'temp'])
     path = 'temp/%s.java' % (self.tmp_fn)
     with open(path, 'w') as file:
         porter = Porter(method_name='predict', class_name=self.tmp_fn)
         ported_model = porter.port(self.clf)
         file.write(ported_model)
     # $ javac temp/Tmp.java
     subp.call(['javac', path])
Ejemplo n.º 15
0
 def _port_estimator(self):
     self.estimator.fit(self.X, self.y)
     Shell.call('rm -rf tmp')
     Shell.call('mkdir tmp')
     filename = self.tmp_fn + '.c'
     path = os.path.join('tmp', filename)
     with open(path, 'w') as f:
         porter = Porter(self.estimator, language=self.LANGUAGE)
         out = porter.export(class_name='Brain', method_name='foo')
         f.write(out)
     # $ gcc temp/tmp.c -o temp/tmp
     cmd = 'gcc {} -std=c99 -lm -o tmp/{}'.format(path, self.tmp_fn)
     Shell.call(cmd)
Ejemplo n.º 16
0
 def _port_model(self):
     self.mdl.fit(self.X, self.y)
     subp.call('rm -rf tmp'.split())
     subp.call('mkdir tmp'.split())
     filename = self.tmp_fn + '.c'
     path = os.path.join('tmp', filename)
     with open(path, 'w') as f:
         porter = Porter(self.mdl, language='c')
         out = porter.export(class_name='Brain', method_name='foo')
         f.write(out)
     # $ gcc temp/tmp.c -o temp/tmp
     cmd = 'gcc {} -std=c99 -lm -o tmp/{}'.format(path, self.tmp_fn)
     subp.call(cmd.split())
Ejemplo n.º 17
0
def main(argv):
    path = argv[0]
    data = pd.read_csv(path)

    mode = argv[1]
    if mode not in ['stats', 'code']:
        print('Invalid mode: {}'.format(mode))
        sys.exit(1)

    e_vars = argv[2:]

    if mode == 'code' and len(e_vars) != 1:
        print('Can only export code for one model')
        sys.exit(2)

    if len(e_vars) == 0:
        e_vars = output_vars(data)

    for var in e_vars:
        xs, ys = input_data(data), output_var(data, var)

        accs = []
        max_a = 0
        for s in seeds():
            train_xs, test_xs, train_ys, test_ys = train_test_split(
                xs, ys, test_size=0.33, random_state=s)

            mod = model(s).fit(train_xs, train_ys)
            acc = np.sum(mod.predict(test_xs) == test_ys) / float(len(test_ys))
            if acc > max_a:
                best_mod = mod
                max_a = acc
            accs.append(acc)

        if mode == 'stats':
            print("{}: {:.2f}%".format(var, 100 * sum(accs) / len(accs)))

        if mode == 'code':
            func_name = 'predict_{}'.format(var)

            port = Porter(best_mod, language='c')
            code = port.export(embed_data=True)
            code = code.replace('int predict_', 'static int predict_')
            code = code.replace('predict', func_name)
            code = "\n".join(code.split('\n')[:-10])

            print(code, file=sys.stdout)

            header_line = 'extern "C" int {} (float features[]);'.format(
                func_name)
            print(header_line, file=sys.stderr)
Ejemplo n.º 18
0
 def _port_estimator(self, export_data=False):
     self.estimator.fit(self.X, self.y)
     subp.call('rm -rf tmp'.split())
     subp.call('mkdir tmp'.split())
     with open(self.tmp_fn, 'w') as f:
         porter = Porter(self.estimator, language=self.LANGUAGE)
         if export_data:
             out = porter.export(class_name='Brain',
                                 method_name='foo',
                                 export_data=True,
                                 export_dir='tmp')
         else:
             out = porter.export(class_name='Brain', method_name='foo')
         f.write(out)
Ejemplo n.º 19
0
def port_model(model, name="rf_from_sklearn"):
    if isinstance(model, XGBClassifier):
        model._Booster.save_model(name)
    else:
        porter = Porter(model, language='c')
        output = porter.export(embed_data=True)
        name = name + '.cpp'
        with open(name, "w") as text_file:
            text_file.write(output)

    statinfo = os.stat(name)
    print(bcolors.OKBLUE +
          f'model {name} saved, with size {statinfo.st_size} in bytes: ' +
          bcolors.ENDC)
Ejemplo n.º 20
0
def _save_model_to_java(model):
    from sklearn_porter import Porter
    cpm_filename = "cp_xnn/MLPClassifier.java"
    print("preper new trained model  {}".format(cpm_filename))
    if os.path.isfile(cpm_filename):
        os.unlink(cpm_filename)
    porter = Porter(model, language='java')
    output = porter.export()
    with open(cpm_filename, 'w+') as file:
        n = file.write(output)
        print("traned model saved in c: {} len: {}, wlen: {} ".format(
            cpm_filename, len(output), n))
    if not os.path.isfile(cpm_filename):
        print("Error: no training model saved")
        sys.exit(0)
Ejemplo n.º 21
0
 def setUp(self):
     super(KNeighborsClassifierTest, self).setUp()
     self.porter = Porter(language='js')
     model = KNeighborsClassifier(algorithm='brute',
                                  n_neighbors=3,
                                  weights='uniform')
     self._port_model(model)
Ejemplo n.º 22
0
    def test_model_within_optimizer(self):
        pipe = Pipeline([('reduce_dim', PCA()), ('classify', LinearSVC())])
        n_features_options = [2, 4, 8]
        c_options = [1, 10, 100, 1000]
        param_grid = [
            {
                'reduce_dim': [PCA(iterated_power=7),
                               NMF()],
                'reduce_dim__n_components': n_features_options,
                'classify__C': c_options
            },
            {
                'reduce_dim': [SelectKBest(chi2)],
                'reduce_dim__k': n_features_options,
                'classify__C': c_options
            },
        ]
        grid = GridSearchCV(pipe, cv=3, n_jobs=1, param_grid=param_grid)
        digits = load_digits()
        grid.fit(digits.data, digits.target)

        try:
            Porter(grid, language='java')
        except ValueError:
            self.assertTrue(False)
        else:
            self.assertTrue(True)
Ejemplo n.º 23
0
 def _port_estimator(self, export_data=False, embed_data=False):
     self.estimator.fit(self.X, self.y)
     Shell.call('rm -rf tmp')
     Shell.call('mkdir tmp')
     with open(self.tmp_fn, 'w') as f:
         porter = Porter(self.estimator, language=self.LANGUAGE)
         if export_data:
             out = porter.export(class_name='Brain',
                                 method_name='foo',
                                 export_data=True,
                                 export_dir='tmp')
         else:
             out = porter.export(class_name='Brain',
                                 method_name='foo',
                                 embed_data=embed_data)
         f.write(out)
 def setUp(self):
     super(AdaBoostClassifierTest, self).setUp()
     self.porter = Porter(language='js')
     base_estimator = DecisionTreeClassifier(max_depth=4, random_state=0)
     clf = AdaBoostClassifier(
         base_estimator=base_estimator, n_estimators=100, random_state=0)
     self._port_model(clf)
Ejemplo n.º 25
0
 def setUp(self):
     super(MLPClassifierTest, self).setUp()
     self.porter = Porter(language='java')
     clf = MLPClassifier(
         activation='identity', hidden_layer_sizes=50,
         max_iter=500, alpha=1e-4, solver='sgd', tol=1e-4,
         random_state=1, learning_rate_init=.1)
     self._port_model(clf)
Ejemplo n.º 26
0
def classify(X_test, X_train, base_dir, samples_test, y_test, y_train, model, model_alias):
    base_dir = os.path.join(base_dir, 'features')
    file_helper.guarantee_path_preconditions(base_dir)

    resnet50features_train = extract_features(X_train, base_dir, 'train', model, model_alias)
    resnet50features_test = extract_features(X_test, base_dir, 'test', model, model_alias)

    print('extract featuers done, reshaping')
    resnet50features_train = np.reshape(resnet50features_train, (resnet50features_train.shape[0], -1))
    resnet50features_test = np.reshape(resnet50features_test, (resnet50features_test.shape[0], -1))

    print('begin fit')
    # top_model = OneVsRestClassifier(BaggingClassifier(SVC(verbose=False), n_jobs=-1))
    top_model = SVC(verbose=False, C=1., kernel='rbf', gamma=0.001)
    top_model.fit(resnet50features_train, y_train)

    joblib.dump(top_model, 'fitted_print.sav', protocol=2)
    porter = Porter(top_model)
    output = porter.export(export_data=True)

    file = open("java_svc.java", "w")
    file.write(output)
    file.close()

    print('begin predict')
    y_pred = top_model.predict(resnet50features_test)
    y_pred_proba = top_model.predict_proba(resnet50features_test)

    output_dir = os.path.join(base_dir, model_alias)
    file_helper.guarantee_path_preconditions(output_dir)

    np.save(os.path.join(output_dir, "y_test.npy"), y_test)
    np.save(os.path.join(output_dir, "y_pred.npy"), y_pred)
    np.save(os.path.join(output_dir, "y_pred_proba.npy"), y_pred_proba)
    np.save(os.path.join(output_dir, "y_train.npy"), y_train)

    file = open(os.path.join(output_dir, "names_test.txt"), "w")
    file.write(str(json.dumps(samples_test)) + "\n")
    file.close()

    #hter, apcer, bpcer = evaluate_hter.evaluate_predictions(output_dir)
    #print('HTER: ', hter)

    # Computing the average accuracy
    acc_score = accuracy_score(y_test, y_pred)
    print('acc_score: ', acc_score)
Ejemplo n.º 27
0
 def _port_model(self, clf):
     self._clear_model()
     self.clf = clf
     self.clf.fit(self.X, self.y)
     # $ mkdir temp
     subp.call(['mkdir', 'tmp'])
     with open(self.tmp_fn, 'w') as f:
         f.write(
             Porter(self.clf, language='js').export(class_name='Brain',
                                                    method_name='foo'))
     self._start_test()
Ejemplo n.º 28
0
def export_learner(learner, folder="learners"):
    '''Export transpiled java and python dill dump for a learner.'''
    outfolder = folder + "/" + slugify(learner.codename)

    if not os.path.isdir(outfolder):
        os.makedirs(outfolder)

    with open("{}/MLPClassifier.java".format(outfolder), "w") as f:
        porter = Porter(learner.classifier, language='java')
        f.write(porter.export(export_data=True, export_dir=outfolder))

    with open("{}/DataMapper.java".format(outfolder), "w") as f:
        _export_mapper(learner, out=f)

    with open("{}/learner.dill".format(outfolder), "wb") as f:
        dill.dump(
            {
                k: v
                for k, v in learner.__dict__.items()
                if not k.startswith('_Learner__')
            }, f)
Ejemplo n.º 29
0
 def _port_estimator(self, export_data=False):
     self.estimator.fit(self.X, self.y)
     subp.call('rm -rf tmp'.split())
     subp.call('mkdir tmp'.split())
     filename = self.tmp_fn + '.java'
     path = os.path.join('tmp', filename)
     with open(path, 'w') as f:
         porter = Porter(self.estimator, language=self.LANGUAGE)
         if export_data:
             out = porter.export(class_name='Brain',
                                 method_name='foo',
                                 export_data=True,
                                 export_dir='tmp')
         else:
             out = porter.export(class_name='Brain', method_name='foo')
         f.write(out)
     if export_data:
         cmd = 'javac -cp ./gson.jar {}'.format(path).split()
         subp.call(cmd)
     else:
         subp.call(['javac', path])
Ejemplo n.º 30
0
def export_all():
    '''
    Export all sklearn pickle model to C code
    '''

    ml_model_path = os.path.join(os.path.dirname(__file__), "../../ml-model")
    output_path = os.path.join(os.path.dirname(__file__), "output")

    if not os.path.exists(output_path):
        os.mkdir(output_path)

    with open(
            os.path.join(ml_model_path,
                         "./saved-models/decision_tree_model.pickle"),
            "rb") as fd_model:
        model = pickle.load(fd_model)

        with open(os.path.join(output_path, "decision_tree.c"), "w") as fd:
            porter = Porter(model, language='C')
            fd.write(porter.export(embed_data=True))

    with open(
            os.path.join(ml_model_path,
                         "./saved-models/random_forest_model.pickle"),
            "rb") as fd_model:
        model = pickle.load(fd_model)

        with open(os.path.join(output_path, "random_forest.c"), "w") as fd:
            porter = Porter(model, language='C')
            fd.write(porter.export(embed_data=True))
Ejemplo n.º 31
0
 def _port_model(self, clf):
     self._clear_model()
     self.clf = clf
     self.clf.fit(self.X, self.y)
     # $ mkdir temp
     subp.call(['mkdir', 'tmp'])
     # Save transpiled model:
     filename = self.tmp_fn + '.rb'
     path = os.path.join('tmp', filename)
     with open(path, 'w') as f:
         f.write(Porter(self.clf, language='ruby').export(
             class_name='Brain', method_name='foo'))
     self._start_test()
Ejemplo n.º 32
0
# %% [markdown]
# ### Train classifier

# %%
from sklearn import svm

clf = svm.NuSVC(gamma=0.001, kernel='rbf', random_state=0)
clf.fit(X, y)

# %% [markdown]
# ### Transpile classifier

# %%
from sklearn_porter import Porter

porter = Porter(clf, language='js')
output = porter.export()

print(output)

# %% [markdown]
# ### Run classification in JavaScript

# %%
# Save classifier:
# with open('NuSVC.js', 'w') as f:
#     f.write(output)

# Run classification:
# if hash node 2/dev/null; then
#     node NuSVC.js 1 2 3 4
# ### Train classifier

# %%
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_estimators=15, max_depth=None,
                             min_samples_split=2, random_state=0)
clf.fit(X, y)

# %% [markdown]
# ### Transpile classifier

# %%
from sklearn_porter import Porter

porter = Porter(clf, language='java')
output = porter.export(embed_data=True)

print(output)

# %% [markdown]
# ### Run classification in Java

# %%
# Save classifier:
# with open('RandomForestClassifier.java', 'w') as f:
#     f.write(output)

# Compile model:
# $ javac -cp . RandomForestClassifier.java
# %% [markdown]
# ### Train classifier

# %%
from sklearn.tree import tree

clf = tree.DecisionTreeClassifier()
clf.fit(X, y)

# %% [markdown]
# ### Transpile classifier

# %%
from sklearn_porter import Porter

porter = Porter(clf, language='java')
output = porter.export(export_data=True)

print(output)

# %% [markdown]
# ### Run classification in Java

# %%
# Save classifier:
# with open('DecisionTreeClassifier.java', 'w') as f:
#     f.write(output)

# Check model data:
# $ cat data.json