Python IndexedRowMatrix.numCols 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: pyspark.mllib.linalg.distributed

클래스/타입: IndexedRowMatrix

메소드/함수: numCols

hotexamples.com에서의 예제들: 3

Python IndexedRowMatrix.numCols - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 pyspark.mllib.linalg.distributed.IndexedRowMatrix.numCols에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

IndexedRowMatrix(30)

multiply(12)

toBlockMatrix(4)

numRows(3)

columnSimilarities(2)

computeSVD(2)

numCols(2)

add(1)

cache(1)

예제 #1

파일 보기

파일: test_linalg.py 프로젝트: Swidasya/spark-research

 def test_indexed_row_matrix_from_dataframe(self):
     from pyspark.sql.utils import IllegalArgumentException
     df = self.spark.createDataFrame([Row(int(0), Vectors.dense(1))])
     matrix = IndexedRowMatrix(df)
     self.assertEqual(matrix.numRows(), 1)
     self.assertEqual(matrix.numCols(), 1)
     with self.assertRaises(IllegalArgumentException):
         IndexedRowMatrix(df.drop("_1"))

예제 #2

파일 보기

from pyspark.mllib.linalg import Vectors, DenseMatrix, Matrix
from sklearn import datasets
# create the standardizer model for standardizing the dataset

X_rdd = sc.parallelize(X).map(lambda x:Vectors.dense(x) )
scaler = StandardScaler(withMean = True, withStd = False).fit(iris_rdd)

X_sc = scaler.transform(X_rdd)


#create the IndexedRowMatrix from rdd
X_rm = IndexedRowMatrix(X_sc.zipWithIndex().map(lambda x: (x[1], x[0])))

# compute the svd factorization of the matrix. First the number of columns and second a boolean stating whether 
# to compute U or not. 
svd_o = X_rm.computeSVD(X_rm.numCols(), True)

# svd_o.V is of shape n * k not k * n(as in sklearn)

P_comps = svd_o.V.toArray().copy()
num_rows = X_rm.numRows()
# U is whitened and projected onto principal components subspace.

S = svd_o.s.toArray()
eig_vals = S**2
# change the ncomp to 3 for this tutorial
#n_comp  = np.argmax(np.cumsum(eig_vals)/eig_vals.sum() > 0.95)+1
n_comp = 3
U = svd_o.U.rows.map(lambda x:(x.index, (np.sqrt(num_rows-1)*x.vector).tolist()[0:n_comp]))
# K is our transformation matrix to obtain projection on PC's subspace
K = (U/S).T[:n_comp]

예제 #3

파일 보기

파일: uni_lin_reg.py 프로젝트: pmb311/Machine_Learning

    def sparkComputeCost(self, input_file, x, y, theta):
        
        sc = SparkContext()

        # add the ones vector while building the RDD
        idx = 0
        x_mat = sc.textFile(input_file) \
            .map(lambda line: ('1, ' + line).split(",")[:-1]) \
            .zipWithIndex()
        
        # need a SQLContext() to generate an IndexedRowMatrix from RDD
        sqlContext = SQLContext(sc)
        
        x_mat = IndexedRowMatrix( \
            x_mat \
            .map(lambda row: IndexedRow(row[1], row[0])) \
            ).toBlockMatrix()

        x_mat.cache()

        print "Matrix rows x cols"
        print x_mat.numRows()
        print x_mat.numCols()

        vec = sc.parallelize(theta) \
            .map(lambda line: [line]) \
            .zipWithIndex()

        vec = IndexedRowMatrix( \
            vec \
            .map(lambda row: IndexedRow(row[1], row[0])) \
            ).toBlockMatrix()

        vec.cache()

        print "Vector rows x cols"
        print vec.numRows()
        print vec.numCols()

        h = x_mat.multiply(vec)
        h.cache()

        print "Hypothesis rows x cols"
        print h.numRows()
        print h.numCols()

        y_vec = sc.textFile(input_file) \
            .map(lambda line: [('1, ' + line).split(",")[-1]]) \
            .zipWithIndex()

        y_vec = IndexedRowMatrix( \
            y_vec \
            .map(lambda row: IndexedRow(row[1], row[0])) \
            ).toBlockMatrix()

        y_vec.cache()

        errors = h.subtract(y_vec).toLocalMatrix()

        print sum(errors.toArray())

        '''sparkSession = SparkSession \
            .builder \
            .appName('pyspark') \
            .getOrCreate()
        
        df = sparkSession.read.csv(input_file)
        df = df \
            .toDF(x, y) \
            .withColumn("Ones", psf.lit(1)) \
            .cache()

        df.select(x,'Ones').show()'''

        '''sc = SparkContext('local', 'pyspark')