コード例 #1
0
ファイル: test_mlut.py プロジェクト: jouker2k/ml2rt
    def test_SparkMLGraph(self):
        spark_model, prototype = get_spark_model_and_prototype()

        # saving with prototype
        path = f'{time.time()}.onnx'
        save_sparkml(spark_model, path, prototype=prototype)
        load_model(path)
        assert os.path.exists(path)
        os.remove(path)

        # saving with shape and dtype
        shape = prototype.shape
        if prototype.dtype == np.float32:
            dtype = prototype.dtype
        else:
            raise RuntimeError(
                "Test is not configured to run with another type")
        path = f'{time.time()}.onnx'
        save_sparkml(spark_model, path, shape=shape, dtype=dtype)
        assert os.path.exists(path)
        load_model(path)
        os.remove(path)

        # saving with initial_types
        inital_types = utils.guess_onnx_tensortype(shape=shape, dtype=dtype)
        path = f'{time.time()}.onnx'
        save_sparkml(spark_model, path, initial_types=[inital_types])
        assert os.path.exists(path)
        load_model(path)
        os.remove(path)
コード例 #2
0
executable = sys.executable
os.environ["SPARK_HOME"] = pyspark.__path__[0]
os.environ["PYSPARK_PYTHON"] = executable
os.environ["PYSPARK_DRIVER_PYTHON"] = executable
spark = SparkSession.builder.appName("redisai_trial").getOrCreate()
original_data = spark.read.format("libsvm").load("sample_libsvm_data.txt")
feature_count = 5
spark.udf.register(
    "truncateFeatures",
    lambda x: SparseVector(feature_count, range(0, feature_count),
                           x.toArray()[125:130]), VectorUDT())
data = original_data.selectExpr("label",
                                "truncateFeatures(features) as features")
feature_indexer = VectorIndexer(inputCol="features",
                                outputCol="indexedFeatures",
                                maxCategories=4,
                                handleInvalid='error')
dt = DecisionTreeRegressor(featuresCol="indexedFeatures")
pipeline = Pipeline(stages=[feature_indexer, dt])

# (trainingData, testData) = data.randomSplit([0.9, 0.1])
model = pipeline.fit(data)
featurestype = utils.guess_onnx_tensortype(node_name='features',
                                           dtype='float32',
                                           shape=(1, feature_count))
save_sparkml(model,
             'spark.onnx',
             initial_types=[featurestype],
             spark_session=spark)
コード例 #3
0
import os
import sys
import pyspark
from pyspark.sql import SparkSession
from pyspark.ml.classification import LogisticRegression, OneVsRest
from ml2rt import save_sparkml
from ml2rt import utils

executable = sys.executable
os.environ["SPARK_HOME"] = pyspark.__path__[0]
os.environ["PYSPARK_PYTHON"] = executable
os.environ["PYSPARK_DRIVER_PYTHON"] = executable
spark = SparkSession.builder.appName("redisai_trial").getOrCreate()

data = spark.read.format("libsvm").load('multiclass_classification_data.txt')
lr = LogisticRegression(maxIter=100, tol=0.0001, regParam=0.01)
ovr = OneVsRest(classifier=lr)
model = ovr.fit(data)
feature_count = data.first()[1].size
tensor_types = utils.guess_onnx_tensortype(node_name='features',
                                           dtype='float32',
                                           shape=(1, feature_count))
save_sparkml(model, 'spark.onnx', initial_types=[tensor_types])
コード例 #4
0
import os
import sys
from pyspark.sql import SparkSession
from pyspark.ml.linalg import Vectors
from pyspark.ml.regression import LinearRegression
import pyspark
from ml2rt import save_sparkml

executable = sys.executable
os.environ["SPARK_HOME"] = pyspark.__path__[0]
os.environ["PYSPARK_PYTHON"] = executable
os.environ["PYSPARK_DRIVER_PYTHON"] = executable

spark = SparkSession.builder.appName("redisai_trial").getOrCreate()

# label is input + 1
data = spark.createDataFrame([(2.0, Vectors.dense(1.0)),
                              (3.0, Vectors.dense(2.0)),
                              (4.0, Vectors.dense(3.0)),
                              (5.0, Vectors.dense(4.0)),
                              (6.0, Vectors.dense(5.0)),
                              (7.0, Vectors.dense(6.0))],
                             ["label", "features"])
lr = LinearRegression(maxIter=5, regParam=0.0, solver="normal")
model = lr.fit(data)
# the name of the input is 'features'
C = model.numFeatures

save_sparkml(model, 'linear_regression.onnx', shape=(1, C), dtype='float32')