Esempio n. 1
0
    def test_model_transform(self):
        weight = Vectors.dense([3, 2, 1])

        densevec = Vectors.dense([4, 5, 6])
        sparsevec = Vectors.sparse(3, [0], [1])
        eprod = ElementwiseProduct(weight)
        self.assertEqual(eprod.transform(densevec), DenseVector([12, 10, 6]))
        self.assertEqual(eprod.transform(sparsevec), SparseVector(3, [0], [3]))
Esempio n. 2
0
    def test_model_transform(self):
        weight = Vectors.dense([3, 2, 1])

        densevec = Vectors.dense([4, 5, 6])
        sparsevec = Vectors.sparse(3, [0], [1])
        eprod = ElementwiseProduct(weight)
        self.assertEqual(eprod.transform(densevec), DenseVector([12, 10, 6]))
        self.assertEqual(
            eprod.transform(sparsevec), SparseVector(3, [0], [3]))
from pyspark.mllib.feature import StandardScaler
from pyspark.mllib.stat import Statistics

# load data
rawdata = sc.textFile("hdfs:///user/training/mldata/concrete.csv")

# convert raw data to RDD of dense vectors
# need to convert the split values to float before casting to a dense vector
vecrdd = rawdata.map(lambda x: Vectors.dense([float(i) for i in x.split(',')]))

# creating dense vector of weights to be used with an ElementWiseProduct transformer
# to be used to weight the values of vecrdd
weights = Vectors.dense([0.2, 0.1, 0.1, 0.1, 0.5, 0.5, 0.7, 0.9, 1.0])

# instantiate an ElementWiseProduct object and initialize with the weights vector
ep = ElementwiseProduct(weights)

# transform vecrdd using the transform method of the ElementWiseProduct object
# to create an RDD of weighted values
# print the top line of each RDD to confirm that the transformation was successful
weighted = ep.transform(vecrdd)

print weighted.take(1)
print vecrdd.take(1)

# call the colStats method of the Statistics object on vecrdd and print the
# mean, variance, and number of non-zero values
stats = Statistics.colStats(vecrdd)

print stats.mean()
print stats.variance()
from pyspark import SparkContext
# $example on$
from pyspark.mllib.feature import ElementwiseProduct
from pyspark.mllib.linalg import Vectors
# $example off$

if __name__ == "__main__":
    sc = SparkContext(appName="ElementwiseProductExample")  # SparkContext

    # $example on$
    data = sc.textFile("data/mllib/kmeans_data.txt")
    parsedData = data.map(lambda x: [float(t) for t in x.split(" ")])

    # Create weight vector.
    transformingVector = Vectors.dense([0.0, 1.0, 2.0])
    transformer = ElementwiseProduct(transformingVector)

    # Batch transform
    transformedData = transformer.transform(parsedData)
    # Single-row transform
    transformedData2 = transformer.transform(parsedData.first())
    # $example off$

    print("transformedData:")
    for each in transformedData.collect():
        print(each)

    print("transformedData2:")
    for each in transformedData2:
        print(each)
from pyspark import SparkContext
# $example on$
from pyspark.mllib.feature import ElementwiseProduct
from pyspark.mllib.linalg import Vectors
# $example off$

if __name__ == "__main__":
    sc = SparkContext(appName="ElementwiseProductExample")  # SparkContext

    # $example on$
    data = sc.textFile("data/mllib/kmeans_data.txt")
    parsedData = data.map(lambda x: [float(t) for t in x.split(" ")])

    # Create weight vector.
    transformingVector = Vectors.dense([0.0, 1.0, 2.0])
    transformer = ElementwiseProduct(transformingVector)

    # Batch transform
    transformedData = transformer.transform(parsedData)
    # Single-row transform
    transformedData2 = transformer.transform(parsedData.first())
    # $example off$

    print("transformedData:")
    for each in transformedData.collect():
        print(each)

    print("transformedData2:")
    for each in transformedData2:
        print(each)
Esempio n. 6
0
from pyspark.mllib.linalg import Vectors
from pyspark.mllib.feature import ElementwiseProduct

vec1 = Vectors.dense([10, 35, 50])
vec2 = Vectors.dense([1, 2, 3])
ep = ElementwiseProduct(vec2)
new_vec = ep.transform(vec_rdd)

new_vec.first()