Exemple #1
0
def finalizeMergeOnMasterNode(partsRDD):

    # Create an algorithm to compute PCA decomposition using the correlation method on the master node
    pcaMaster = pca.Distributed(step2Master, method=pca.correlationDense)

    covarianceSparse = covariance.Distributed(step2Master,
                                              method=covariance.fastCSR)
    pcaMaster.parameter.covariance = covarianceSparse

    parts_list = partsRDD.collect()

    # Add partial results computed on local nodes to the algorithm on the master node
    for key, pres in parts_list:
        dataArch = OutputDataArchive(pres)
        deserialized_pres = pca.PartialResult(pca.correlationDense)
        deserialized_pres.deserialize(dataArch)
        pcaMaster.input.add(pca.partialResults, deserialized_pres)

    # Compute PCA decomposition on the master node
    pcaMaster.compute()

    # Finalize computations and retrieve the results
    res = pcaMaster.finalizeCompute()

    return {
        'eigenvectors': res.get(pca.eigenvectors),
        'eigenvalues': res.get(pca.eigenvalues)
    }
def finalizeMergeOnMasterNode(partsRDD):

    # Create an algorithm to compute a dense variance-covariance matrix on the master node
    covarianceMaster = covariance.Distributed(step=step2Master,
                                              method=covariance.defaultDense)

    parts_list = partsRDD.collect()

    # Add partial results computed on local nodes to the algorithm on the master node
    for _, val in parts_list:
        dataArch = OutputDataArchive(val)
        deserialized_val = covariance.PartialResult()
        deserialized_val.deserialize(dataArch)
        covarianceMaster.input.add(covariance.partialResults, deserialized_val)

    # Compute a dense variance-covariance matrix on the master node
    covarianceMaster.compute()

    # Finalize computations and retrieve the results
    res = covarianceMaster.finalizeCompute()

    result = {}
    result['covariance'] = res.get(covariance.covariance)
    result['mean'] = res.get(covariance.mean)

    return result
def finalizeMergeOnMasterNode(partsRDD):

    # Create an algorithm to compute a sparse variance-covariance matrix on the master node
    covarianceMaster = covariance.Distributed(step=step2Master,
                                              method=covariance.fastCSR)

    parts_list = partsRDD.collect()

    # Add partial results computed on local nodes to the algorithm on the master node
    for _, value in parts_list:
        deserialized_value = deserializePartialResult(value, covariance)
        covarianceMaster.input.add(covariance.partialResults,
                                   deserialized_value)

    # Compute a sparse variance-covariance matrix on the master node
    covarianceMaster.compute()

    # Finalize computations and retrieve the results
    res = covarianceMaster.finalizeCompute()

    result = {}
    result['covariance'] = res.get(covariance.covariance)
    result['mean'] = res.get(covariance.mean)

    return result
    def mapper(tup):

        key, val = tup

        # Create an algorithm to compute a dense variance-covariance matrix on local nodes
        covarianceLocal = covariance.Distributed(
            step=step1Local, method=covariance.defaultDense)

        # Set the input data on local nodes
        deserialized_val = deserializeNumericTable(val)
        covarianceLocal.input.set(covariance.data, deserialized_val)

        # Compute a dense variance-covariance matrix on local nodes
        pres = covarianceLocal.compute()

        serialized_pres = serializeNumericTable(pres)

        return (key, serialized_pres)
Exemple #5
0
    def mapper(tup):
        key, csr_table = tup

        # Create an algorithm to compute PCA decomposition using the correlation method on local nodes
        pcaLocal = pca.Distributed(step1Local, method=pca.correlationDense)

        covarianceSparse = covariance.Distributed(step1Local,
                                                  method=covariance.fastCSR)
        pcaLocal.parameter.covariance = covarianceSparse

        # Set the input data on local nodes
        deserialized_csr_table = deserializeCSRNumericTable(csr_table)
        pcaLocal.input.setDataset(pca.data, deserialized_csr_table)

        # Compute PCA decomposition on local nodes
        pres = pcaLocal.compute()
        serialized_pres = serializeNumericTable(pres)

        return (key, serialized_pres)
    def mapper(tup):

        key, val = tup

        # Create an algorithm to compute a sparse variance-covariance matrix on local nodes
        covarianceLocal = covariance.Distributed(step=step1Local,
                                                 method=covariance.fastCSR)

        # Deserialize the data into a CSRNumericTable
        deserialized_val = deserializeCSRNumericTable(val)

        # Set the input data on local nodes
        covarianceLocal.input.set(covariance.data, deserialized_val)

        # Compute a sparse variance-covariance matrix on local nodes
        pres = covarianceLocal.compute()

        # Serialize the result
        serialized_pres = serializeNumericTable(pres)

        return (key, serialized_pres)
Exemple #7
0
    comm_size = MPI.COMM_WORLD
    rankId = comm_size.Get_rank()

    # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
    dataSource = FileDataSource(
        datasetFileNames[rankId],
        DataSourceIface.doAllocateNumericTable,
        DataSourceIface.doDictionaryFromContext
    )

    # Retrieve the input data
    dataSource.loadDataBlock()

    # Create an algorithm to compute a variance-covariance matrix on local nodes
    localAlgorithm = covariance.Distributed(step1Local)

    # Set the input data set to the algorithm
    localAlgorithm.input.set(covariance.data, dataSource.getNumericTable())

    # Compute a variance-covariance matrix
    pres = localAlgorithm.compute()

    # Serialize partial results required by step 2
    dataArch = InputDataArchive()

    pres.serialize(dataArch)
    perNodeArchLength = dataArch.getSizeOfArchive()

    nodeResults = dataArch.getArchiveAsArray()
Exemple #8
0
    jp(data_dir, 'covcormoments_csr_1.csv'),
    jp(data_dir, 'covcormoments_csr_2.csv'),
    jp(data_dir, 'covcormoments_csr_3.csv'),
    jp(data_dir, 'covcormoments_csr_4.csv')
]

if __name__ == "__main__":

    comm_size = MPI.COMM_WORLD
    rankId = comm_size.Get_rank()

    # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
    dataTable = createSparseTable(datasetFileNames[rankId])

    # Create an algorithm to compute a sparse variance-covariance matrix on local nodes
    localAlgorithm = covariance.Distributed(step1Local,
                                            method=covariance.fastCSR)

    # Set the input data set to the algorithm
    localAlgorithm.input.set(covariance.data, dataTable)

    # Compute a sparse variance-covariance matrix
    pres = localAlgorithm.compute()

    # Serialize partial results required by step 2
    dataArch = InputDataArchive()
    pres.serialize(dataArch)
    perNodeArchLength = dataArch.getSizeOfArchive()

    nodeResults = dataArch.getArchiveAsArray()

    # Transfer partial results to step 2 on the root node