Beispiel #1
0
def run(factors, file, features, outpath):
    """
    Fit a factor analysis to a data set
    """

    from pybda.util.string import drop_suffix
    from pybda.logger import set_logger
    from pybda.spark_session import SparkSession
    from pybda.io.io import read_info, read_and_transmute
    from pybda.io.as_filename import as_logfile

    outpath = drop_suffix(outpath, "/")
    set_logger(as_logfile(outpath))

    with SparkSession() as spark:
        try:
            features = read_info(features)
            data = read_and_transmute(spark,
                                      file,
                                      features,
                                      assemble_features=False)
            fl = FactorAnalysis(spark, factors, features)
            trans = fl.fit_transform(data)
            trans.write(outpath)
        except Exception as e:
            logger.error("Some error: {}".format(str(e)))
Beispiel #2
0
def run(clusters, file, features, outpath):
    """
    Fit a kmeans-clustering to a data set.
    """

    from pybda.io.as_filename import as_logfile
    from pybda.logger import set_logger
    from pybda.spark_session import SparkSession
    from pybda.util.string import drop_suffix
    from pybda.io.io import read_info, read_and_transmute

    outfolder = drop_suffix(outpath, "/")
    set_logger(as_logfile(outpath))

    with SparkSession() as spark:
        try:
            features = read_info(features)
            data = read_and_transmute(spark, file, features)
            fit = KMeans(spark, clusters, features)
            fit = fit.fit(data, outfolder)
            fit.write(data, outfolder)
        except Exception as e:
            logger.error("Some error: {}".format(e))