Python RowMatrix.computeCovariance Examples

Programming Language: Python

Namespace/Package Name: pyspark.mllib.linalg.distributed

Class/Type: RowMatrix

Method/Function: computeCovariance

Examples at hotexamples.com: 3

Python RowMatrix.computeCovariance - 3 examples found. These are the top rated real world Python examples of pyspark.mllib.linalg.distributed.RowMatrix.computeCovariance extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

RowMatrix(30)

columnSimilarities(7)

computeColumnSummaryStatistics(4)

computeCovariance(3)

computeGramianMatrix(1)

Example #1

Show file

def within_group_scatter(data: pyspark.sql.DataFrame, features, response,
                         targets):
    p = len(features)
    sw = numpy.zeros((p, p))
    for target in targets:
        df_t = data.filter("{} == '{}'".format(response, target))
        X_t = RowMatrix(df_t.select(features).rdd.map(numpy.array))
        sw += X_t.computeCovariance().toArray() * (df_t.count() - 1)
    return sw

Example #2

Show file

File: model.py Project: brettbevers/miner

    def get_gmm(self, k, sample_fraction=None, retry=True):
        if k == 1:
            if sample_fraction:
                data = self.mllib_training_data.sample(False, sample_fraction)
            else:
                data = self.mllib_training_data
            row_matrix = RowMatrix(data)
            mean = row_matrix.computeColumnSummaryStatistics().mean()
            cov = row_matrix.computeCovariance().toArray()
            weights = [1.0]
            gaussians = [Gaussian(mean, cov)]
            log_likelihood = None
        else:
            m = self.fit_ml_model(k,
                                  sample_fraction=sample_fraction,
                                  retry=retry)
            weights = m.weights
            gaussians = [
                Gaussian(g.mean, g.cov.toArray())
                for g in m.gaussiansDF.collect()
            ]
            log_likelihood = m.summary.logLikelihood

        return GaussianMixtureModel(weights, gaussians, log_likelihood)

Example #3

Show file

#matrix = Matrices.dense(nrows, ncols, rdd)
print("ncol: %d, nrow %d" % (ncols, nrows))
coord_mat = CoordinateMatrix(rdd.map(tuple))
print("num rows in matrix %d" % coord_mat.numRows())

print("finished using pyspark")
#________________________________________________-

print("now use SparkSession")

from pyspark.sql import SparkSession

spark = SparkSession.builder.getOrCreate()
df_2 = spark.read.option("delimiter", " ").csv('./data/lpi_ceria3d_b.mtx',
                                               header=False,
                                               inferSchema=True)
df_2.printSchema()

#coord_mat_2 = CoordinateMatrix(df_2.rdd.map(tuple))
row_mat = RowMatrix(df_2.rdd.map(tuple))
print("num rows in row matrix %d, num_cols %d" %
      (row_mat.numRows(), row_mat.numCols()))

print("print covariance")
print(row_mat.computeCovariance())

dm = Matrices.dense(3, 1, [4, 5, 6])

print("multiply row Matrix")
result = row_mat.multiply(dm)