def test_model_transform(self): weight = Vectors.dense([3, 2, 1]) densevec = Vectors.dense([4, 5, 6]) sparsevec = Vectors.sparse(3, [0], [1]) eprod = ElementwiseProduct(weight) self.assertEqual(eprod.transform(densevec), DenseVector([12, 10, 6])) self.assertEqual(eprod.transform(sparsevec), SparseVector(3, [0], [3]))
def test_model_transform(self): weight = Vectors.dense([3, 2, 1]) densevec = Vectors.dense([4, 5, 6]) sparsevec = Vectors.sparse(3, [0], [1]) eprod = ElementwiseProduct(weight) self.assertEqual(eprod.transform(densevec), DenseVector([12, 10, 6])) self.assertEqual( eprod.transform(sparsevec), SparseVector(3, [0], [3]))
from pyspark.mllib.feature import StandardScaler from pyspark.mllib.stat import Statistics # load data rawdata = sc.textFile("hdfs:///user/training/mldata/concrete.csv") # convert raw data to RDD of dense vectors # need to convert the split values to float before casting to a dense vector vecrdd = rawdata.map(lambda x: Vectors.dense([float(i) for i in x.split(',')])) # creating dense vector of weights to be used with an ElementWiseProduct transformer # to be used to weight the values of vecrdd weights = Vectors.dense([0.2, 0.1, 0.1, 0.1, 0.5, 0.5, 0.7, 0.9, 1.0]) # instantiate an ElementWiseProduct object and initialize with the weights vector ep = ElementwiseProduct(weights) # transform vecrdd using the transform method of the ElementWiseProduct object # to create an RDD of weighted values # print the top line of each RDD to confirm that the transformation was successful weighted = ep.transform(vecrdd) print weighted.take(1) print vecrdd.take(1) # call the colStats method of the Statistics object on vecrdd and print the # mean, variance, and number of non-zero values stats = Statistics.colStats(vecrdd) print stats.mean() print stats.variance()
from pyspark import SparkContext # $example on$ from pyspark.mllib.feature import ElementwiseProduct from pyspark.mllib.linalg import Vectors # $example off$ if __name__ == "__main__": sc = SparkContext(appName="ElementwiseProductExample") # SparkContext # $example on$ data = sc.textFile("data/mllib/kmeans_data.txt") parsedData = data.map(lambda x: [float(t) for t in x.split(" ")]) # Create weight vector. transformingVector = Vectors.dense([0.0, 1.0, 2.0]) transformer = ElementwiseProduct(transformingVector) # Batch transform transformedData = transformer.transform(parsedData) # Single-row transform transformedData2 = transformer.transform(parsedData.first()) # $example off$ print("transformedData:") for each in transformedData.collect(): print(each) print("transformedData2:") for each in transformedData2: print(each)
from pyspark.mllib.linalg import Vectors from pyspark.mllib.feature import ElementwiseProduct vec1 = Vectors.dense([10, 35, 50]) vec2 = Vectors.dense([1, 2, 3]) ep = ElementwiseProduct(vec2) new_vec = ep.transform(vec_rdd) new_vec.first()