Ejemplo n.º 1
0
    def test_ols_equivalence(self):
        # Simple ols problem
        # y =  a * x + b + r
        # Where r ~ N(0, 1)
        n = 40
        a = 0.27
        b = 1.2
        x = np.arange(0, n)
        r = np.random.normal(0, 1, n)
        y = (a * x + b + r).reshape(n, 1)
        features = x.reshape(n, 1)
        features = np.concatenate([features, np.ones_like(features)], axis=1)
        df = self.spark.createDataFrame(
            [(Vectors.dense(y[i]), Matrices.dense(1, 2, features[i]))
             for i in range(n)], ["measurement", "measurementModel"])
        lkf = LinearKalmanFilter()\
            .setInitialStateMean(Vectors.dense(0.0, 0.0))\
            .setMeasurementModelCol("measurementModel")\
            .setMeasurementCol("measurement")\
            .setInitialStateCovariance(Matrices.dense(2, 2, (np.eye(2)*10).reshape(4, 1)))\
            .setProcessModel(Matrices.dense(2, 2, np.eye(2).reshape(4, 1)))\
            .setProcessNoise(Matrices.dense(2, 2, np.zeros(4)))\
            .setMeasurementNoise(Matrices.dense(1, 1, [10E-5]))

        model = lkf.transform(df)
        state = model.filter(
            "stateIndex = {}".format(n)).collect()[0].state.mean.values

        # Check equivalence with least squares solution with numpy
        expected, _, _, _ = np.linalg.lstsq(features, y, rcond=None)
        np.testing.assert_array_almost_equal(state,
                                             expected.reshape(2),
                                             decimal=5)
Ejemplo n.º 2
0
    def test_multiple_model_adaptive_filter(self):
        n = 100
        a = 0.27
        b = 1.2
        x = np.concatenate([np.arange(0, n), np.arange(0, n)])
        r = np.random.normal(0, 1, n * 2)
        y = (a * x + b + r).reshape(n * 2, 1)
        features = x.reshape(n * 2, 1)
        features = np.concatenate([features, np.ones_like(features)], axis=1)
        state_keys = ["1"] * n + ["2"] * n
        df = self.spark.createDataFrame(
            [(state_keys[i], Vectors.dense(
                y[i]), Matrices.dense(1, 2, features[i]))
             for i in range(n * 2)],
            ["state_key", "measurement", "measurementModel"])

        mmaeFilter = LinearKalmanFilter(2, 1)\
            .setStateKeyCol("state_key")\
            .setMeasurementModelCol("measurementModel")\
            .setMeasurementCol("measurement")\
            .setInitialCovariance(Matrices.dense(2, 2, (np.eye(2)*10).reshape(4, 1)))\
            .setProcessModel(Matrices.dense(2, 2, np.eye(2).reshape(4, 1)))\
            .setProcessNoise(Matrices.dense(2, 2, np.zeros(4)))\
            .setMeasurementNoise(Matrices.dense(1, 1, [1.0]))\
            .setSlidingLikelihoodWindow(5)\
            .setEnableMultipleModelAdaptiveEstimation()

        model = mmaeFilter.transform(df)
        state = model.filter(
            "stateIndex = {}".format(n)).collect()[0].state.values

        expected, _, _, _ = np.linalg.lstsq(features, y, rcond=None)
        np.testing.assert_array_almost_equal(state,
                                             expected.reshape(2),
                                             decimal=0)
Ejemplo n.º 3
0
    def test_batch_save_and_resume(self):
        n = 100
        ts = np.arange(0, n)
        zs = np.random.normal(0, 1, n) + ts

        split_point = n // 2
        initial = zs[:split_point]
        remaining = zs[split_point:]

        filter = LinearKalmanFilter()\
            .setMeasurementCol("measurement")\
            .setInitialStateMean(
                Vectors.dense([0.0, 0.0]))\
            .setInitialStateCovariance(
                Matrices.dense(2, 2, [1, 0, 0, 1]))\
            .setProcessModel(
                Matrices.dense(2, 2, [1, 0, 1, 1]))\
            .setProcessNoise(
                Matrices.dense(2, 2, [0.01, 0.0, 0.0, 0.01]))\
            .setMeasurementNoise(
                Matrices.dense(1, 1, [1]))\
            .setMeasurementModel(
                Matrices.dense(1, 2, [1, 0]))

        initial_filter = filter.setInitialStateCovariance(
            Matrices.dense(2, 2, [1000.0, 0.0, 0.0, 1000.0]))

        def create_df(m):
            return self.spark.createDataFrame([(Vectors.dense(m[i]), )
                                               for i in range(len(m))],
                                              ["measurement"])

        initial_measurements = create_df(initial)

        complete_measurements = create_df(zs)

        initial_state = initial_filter.transform(initial_measurements)\
            .filter("stateIndex == {}".format(len(initial)))\
            .select("stateKey", "state")

        complete_state = initial_filter.transform(complete_measurements) \
            .filter("stateIndex == {}".format(len(zs)))\
            .select("stateKey", "state")

        restarted_filter = filter\
            .setInitialStateDistributionCol("state")

        remaining_measurements = create_df(remaining)\
            .crossJoin(initial_state)

        restarted_state = restarted_filter.transform(remaining_measurements)\
            .filter("stateIndex == {}".format(n - split_point))\
            .select("stateKey", "state")

        assert (restarted_state.collect() == complete_state.collect())
Ejemplo n.º 4
0
    def test_persistance(self):
        filter = LinearKalmanFilter() \
            .setStateSize(2) \
            .setInitialStateMean(Vectors.dense([0.0, 0.0])) \
            .setInitialStateCovariance(Matrices.dense(2, 2, [1.0, 0.0, 0.0, 0.0]))\

        path = tempfile.mkdtemp()
        model_path = os.path.join(path, "lkf")
        filter.save(model_path)

        loaded = LinearKalmanFilter.load(model_path)
        assert (loaded.getInitialStateMean() == filter.getInitialStateMean())
        assert (loaded.getInitialStateCovariance() ==
                filter.getInitialStateCovariance())
        assert (loaded.getStateSize() == filter.getStateSize())
Ejemplo n.º 5
0
    mps = int(sys.argv[2])

    spark = SparkSession.builder.appName("RateSourceLKF").getOrCreate()
    spark.sparkContext.setLogLevel("WARN")

    noise_param = 1

    input_df = spark.readStream.format("rate").option("rowsPerSecond", mps).load()\
        .withColumn("mod", F.col("value") % num_states)\
        .withColumn("stateKey", F.col("mod").cast("String"))\
        .withColumn("trend", (F.col("value")/num_states).cast("Integer") + F.randn() * noise_param)

    lkf = LinearKalmanFilter(2, 1)\
        .setStateKeyCol("stateKey")\
        .setMeasurementCol("measurement")\
        .setInitialCovariance(Matrices.dense(2, 2, [10000.0, 0.0, 0.0, 10000.0]))\
        .setProcessModel(Matrices.dense(2, 2, [1.0, 0.0, 1.0, 1.0]))\
        .setProcessNoise(Matrices.dense(2, 2, [0.0001, 0.0, 0.0, 0.0001]))\
        .setMeasurementNoise(Matrices.dense(1, 1, [noise_param]))\
        .setMeasurementModel(Matrices.dense(1, 2, [1.0, 0.0]))

    assembler = VectorAssembler(inputCols=["trend"], outputCol="measurement")

    measurements = assembler.transform(input_df)
    query = lkf.transform(measurements)\
        .writeStream\
        .queryName("RateSourceLKF")\
        .outputMode("append")\
        .format("console")\
        .start()

    query.awaitTermination()
Ejemplo n.º 6
0
                         MatrixUDT())

    features = spark.readStream.format("rate").option("rowsPerSecond", mps).load()\
        .withColumn("mod", F.col("value") % num_states)\
        .withColumn("stateKey", F.col("mod").cast("String"))\
        .withColumn("x", (F.col("value")/num_states).cast("Integer").cast("Double"))\
        .withColumn("y", F.sqrt("x"))\
        .withColumn("w", F.randn(0) * noise_param)\
        .withColumn("label", label_udf("x", "y", "w"))\
        .withColumn("features", features_udf("x", "y"))

    lkf = LinearKalmanFilter()\
        .setStateKeyCol("stateKey")\
        .setMeasurementCol("label")\
        .setMeasurementModelCol("features") \
        .setInitialStateMean(Vectors.dense([0.0, 0.0, 0.0]))\
        .setInitialStateCovariance(Matrices.dense(3, 3, [10, 0, 0, 0, 10, 0, 0, 0, 10]))\
        .setProcessModel(Matrices.dense(3, 3, [1, 0, 0, 0, 1, 0, 0, 0, 1]))\
        .setProcessNoise(Matrices.dense(3, 3, [0] * 9))\
        .setMeasurementNoise(Matrices.dense(1, 1, [1]))

    truncate_udf = F.udf(lambda x: "[%.2f, %.2f, %.2f]" % (x[0], x[1], x[2]),
                         StringType())

    query = lkf.transform(features)\
        .select("stateKey", "stateIndex", truncate_udf("state.mean").alias("modelParameters"))\
        .writeStream\
        .queryName("LKFRateSourceOLS")\
        .outputMode("append")\
        .format("console")\
        .start()