Example #1
0
    def test_dct(self):
        data = self.spark.createDataFrame([(Vectors.dense([5.0, 8.0, 6.0]), )],
                                          ["vec"])
        model = DCT(inverse=False, inputCol="vec", outputCol="resultVec")
        # the input name should match that of what inputCol
        feature_count = data.first()[0].size
        N = data.count()
        model_onnx = convert_sparkml(
            model, 'Sparkml DCT',
            [('vec', FloatTensorType([N, feature_count]))])
        self.assertTrue(model_onnx is not None)

        # run the model
        predicted = model.transform(data)
        expected = predicted.toPandas().resultVec.apply(
            lambda x: pandas.Series(x.toArray())).values.astype(numpy.float32)
        data_np = data.toPandas().vec.apply(
            lambda x: pandas.Series(x.toArray())).values.astype(numpy.float32)
        paths = save_data_models(data_np,
                                 expected,
                                 model,
                                 model_onnx,
                                 basename="SparkmlDCT")
        onnx_model_path = paths[3]
        output, output_shapes = run_onnx_model(['resultVec'], data_np,
                                               onnx_model_path)
        compare_results(expected, output, decimal=5)
Example #2
0
def DCTTransform(df, hiperparameter):
    '''
    Transforms the input dataset with optional parameters.
    Parameters:	
        dataset – input dataset, which is an instance of pyspark.sql.DataFrame
        params – an optional param map that overrides embedded params.
    Returns:	
        transformed dataset with coloumn is inputCol and the output column is outputCol
    '''
    dct = DCT(inverse=hiperparameter['inverse'],
              inputCol=hiperparameter['inputCol'],
              outputCol=hiperparameter['outputCol'])
    df_transformed = dct.transform(df)
    return df_transformed
Example #3
0
# _*_ coding:utf-8 _*_

'''
Discrete Cosine Transform(DCT)
'''

from pyspark.sql import SparkSession
from pyspark.ml.feature import DCT
from pyspark.ml.linalg import Vectors

spark = SparkSession.builder.appName("dct").getOrCreate()

df = spark.createDataFrame([
    (Vectors.dense([0.0, 1.0, -2.0, 3.0]),),
    (Vectors.dense([-1.0, 2.0, 4.0, -7.0]),),
    (Vectors.dense([14.0, -2.0, -5.0, 1.0]),)], ["features"])

dct=DCT(inverse=False,inputCol="features",outputCol="featuresDCT")

dctDf=dct.transform(df)

for dcts in dctDf.select("featuresDCT").take(3):
	print(dcts)

dctDf.show()
Example #4
0
#

from __future__ import print_function

# $example on$
from pyspark.ml.feature import DCT
from pyspark.ml.linalg import Vectors
# $example off$
from pyspark.sql import SparkSession

if __name__ == "__main__":
    spark = SparkSession\
        .builder\
        .appName("DCTExample")\
        .getOrCreate()

    # $example on$
    df = spark.createDataFrame([
        (Vectors.dense([0.0, 1.0, -2.0, 3.0]),),
        (Vectors.dense([-1.0, 2.0, 4.0, -7.0]),),
        (Vectors.dense([14.0, -2.0, -5.0, 1.0]),)], ["features"])

    dct = DCT(inverse=False, inputCol="features", outputCol="featuresDCT")

    dctDf = dct.transform(df)

    dctDf.select("featuresDCT").show(truncate=False)
    # $example off$

    spark.stop()
from pyspark.ml.feature import DCT
from pyspark.ml.linalg import Vectors
from pyspark.sql import SparkSession

if __name__ == "__main__":

    spark = SparkSession.builder.appName("DCT").master("local").getOrCreate()

    df = spark.createDataFrame([(Vectors.dense([0.0, 1.0, -2.0, 3.0]), ),
                                (Vectors.dense([-1.0, 2.0, 4.0, -7.0]), ),
                                (Vectors.dense([14.0, -2.0, -5.0, 1.0]), )],
                               ["features"])

    dct = DCT(inverse=False, inputCol="features", outputCol="FeaturesDCT")

    dct.transform(df).select("featuresDCT").show(truncate=False)

    spark.stop()
Example #6
0
from pyspark.sql import SparkSession
from pyspark.ml.linalg import Vectors
from pyspark.ml.feature import DCT

# The Discrete Cosine Transform transforms a length N real-valued sequence in
# the time domain into another length N real-valued sequence in the frequency
# domain. A DCT class provides this functionality, implementing the DCT-II and
# scaling the result by sqrt(2) such that the representing matrix for the
# transform is unitary. No shift is applied to the transformed sequence (e.g.
# the 0th element of the transformed sequence is the 0th DCT coefficient and
# not the N/2th).

# PS: The obvious distinction between a DCT and a DFT is that the former uses
# only cosine functions, while the latter uses both cosines and sines (in the
# form of complex exponentials).

spark = SparkSession.builder.appName("DCT").getOrCreate()

df = spark.createDataFrame([(Vectors.dense([0.0, 1.0, -2.0, 3.0]), ),
                            (Vectors.dense([-1.0, 2.0, 4.0, -7.0]), ),
                            (Vectors.dense([14.0, -2.0, -5.0, 1.0]), )],
                           ["features"])

dct = DCT(inverse=False, inputCol="features", outputCol="featuresDCT")

dctDF = dct.transform(df)
dctDF.select("featuresDCT").show(truncate=False)
Example #7
0
from pyspark.ml.feature import DCT
from pyspark.mllib.linalg import Vectors
from pyspark import SparkContext
from pyspark.sql import SQLContext

sc = SparkContext("local", "samp")
sqlContext = SQLContext(sc)
df = sqlContext.createDataFrame([(Vectors.dense([-2.0, 2.3, 0.0]), ),
                                 (Vectors.dense([1.0, 2.0, 3.0]), )],
                                ["features"])
dct = DCT(inputCol="features", outputCol="DCTfeatures", inverse=False)
dctmodel = dct.transform(df)
dctmodel.select("DCTfeatures").show()
"""OUTPUT
+--------------------+
|         DCTfeatures|
+--------------------+
|[0.17320508075688...|
|[3.46410161513775...|
+--------------------+"""