예제 #1
0
    def test_sw_sax_limit_constant(self):
        """
        Test sliding window and SAX on a constant timeseries with two greater values
        """
        sax_info = ConfigSax(paa=10,
                             sequences_size=10,
                             with_mean=True,
                             with_std=True,
                             global_norm=False,
                             local_norm=False,
                             linear_filter=False,
                             recovery=0.5,
                             coefficients=[0.1, 0.9],
                             alphabet_size=5)

        spark_ctx = ScManager.get()

        result, _ = sliding_windows(ts_list=["specific_time_serie"],
                                    sax_info=sax_info,
                                    spark_ctx=spark_ctx)

        print("result={}".format(result.collect()))

        sax_result = run_sax_on_sequences(rdd_sequences_data=result,
                                          paa=sax_info.paa,
                                          alphabet_size=sax_info.alphabet_size)

        print("sax_word={}".format(sax_result.sax_word))
        # PAA_value = 0 => 'c'
        # PAA_value = 10 => 'e' or 'd'
        # PAA_value = -10 => 'a' or 'b'
        self.assertTrue(sax_result.sax_word is 'ccccccccae'
                        or sax_result.sax_word is 'ccccccccbd')
예제 #2
0
    def test_sliding_window_sax_basic(self):
        """
        Test the nominal case
        """
        sax_info = ConfigSax(paa=3,
                             sequences_size=6,
                             with_mean=True,
                             with_std=True,
                             global_norm=False,
                             local_norm=False,
                             linear_filter=False,
                             recovery=0.5,
                             coefficients=[0.1, 0.9],
                             alphabet_size=3)

        spark_ctx = ScManager.get()
        result, _ = sliding_windows(ts_list=["linear_time_serie"],
                                    sax_info=sax_info,
                                    spark_ctx=spark_ctx)

        sax_result = run_sax_on_sequences(rdd_sequences_data=result,
                                          paa=sax_info.paa,
                                          alphabet_size=sax_info.alphabet_size)

        # recovery = 0.5 and word_size = 3 => sax_result = 'aab abc bcc'
        self.assertEqual(sax_result.sax_word, 'aababcbcc')
예제 #3
0
    def test_sliding_window_filter(self):
        """
        Testing linear filter.
        """
        sax_info = ConfigSax(paa=3,
                             sequences_size=6,
                             with_mean=True,
                             with_std=True,
                             global_norm=False,
                             local_norm=False,
                             linear_filter=True,
                             recovery=0.5,
                             coefficients=[1, 0.5],
                             alphabet_size=6)

        spark_ctx = ScManager.get()
        # Test for linear sequences
        result, _ = sliding_windows(ts_list=["linear_time_serie"],
                                    sax_info=sax_info,
                                    spark_ctx=spark_ctx)

        result = result.collect()
        # all sequences are linear => no sequence
        self.assertEqual(len(result), 0)

        # Test for constant sequences with a maximum recovery (= 0 => no overlap between sequences)
        sax_info.coefficients = [0, 1]
        sax_info.recovery = 0
        result, _ = sliding_windows(ts_list=["ts_with_constant_pattern"],
                                    sax_info=sax_info,
                                    spark_ctx=spark_ctx)
        result = result.collect()
        LOGGER.info("result=%s", result)
        LOGGER.info("ts_init=%s", get_ts_mock("ts_with_constant_pattern"))
        # Sequence of 12 pts, recovery = 0 (no recovery) -> 2 sequences
        self.assertEqual(len(result), 2)
예제 #4
0
    def test_patterns(self):
        """
        Test the 'main' function of random projection.
        Setting all the param is quite long...
        """
        sax_info = ConfigSax(paa=4,
                             sequences_size=4,
                             with_mean=True,
                             with_std=True,
                             global_norm=True,
                             local_norm=True,
                             linear_filter=True,
                             recovery=1,
                             coefficients=[0.1, 0.9],
                             alphabet_size=4)

        ts_list = [{'tsuid': 'testPatternA'},
                   {'tsuid': 'tesPatternB'},
                   {'tsuid': 'testPatternC'},
                   {'tsuid': 'testPatternConstant'},
                   {'tsuid': 'testPatternLinear'},
                   {'tsuid': 'testPatternTooSmall'},
                   {'tsuid': 'testPatternTrivialMatch'},
                   {'tsuid': 'testPatternRealistic'}]

        # get all the tsuid (necessary for the format of the result)
        tsuid_list = []
        for ts_ref in ts_list:
            tsuid_list.append(ts_ref['tsuid'])

        COLLISION_INFO.nb_iterations = 10
        # set the recognition_info.min_value as done in 'main_random_projection'
        max_iterations = binom(sax_info.paa, COLLISION_INFO.index)
        RECOGNITION_INFO.min_value = int(0.05 * max_iterations)

        RECOGNITION_INFO.activate_spark = None

        # We have set the values

        result = random_projections(ts_list=tsuid_list,
                                    sax_info=sax_info,
                                    collision_info=COLLISION_INFO,
                                    recognition_info=RECOGNITION_INFO)

        LOGGER.info(len(result["patterns"]))
        self.assertTrue(len(result["patterns"]) > 0)
예제 #5
0
    def test_sax(self):
        """
        Test with no calculate the PAA (4 PAA for 4 points in a sequence) and the PAA are equidistants
        """
        sax_info = ConfigSax(paa=4,
                             sequences_size=4,
                             with_mean=True,
                             with_std=True,
                             global_norm=False,
                             local_norm=False,
                             linear_filter=False,
                             recovery=0.5,
                             coefficients=[0.1, 0.9],
                             alphabet_size=4)
        spark_ctx = ScManager.get()
        result, _ = sliding_windows(
            ts_list=["simple_sequences_ts0", "simple_sequences_ts1"],
            sax_info=sax_info,
            spark_ctx=spark_ctx)

        LOGGER.info("sliding_windows done!")

        sax_result = run_sax_on_sequences(rdd_sequences_data=result,
                                          paa=sax_info.paa,
                                          alphabet_size=sax_info.alphabet_size)

        result = result.collect()
        LOGGER.info("sax_result=%s", sax_result)
        LOGGER.info("result=%s", result)

        # the PAA : [[4, 4, 0, 2], [-2, 2, -2, 0]]
        self.assertEqual(sax_result.paa.collect(), [4, 4, 0, 2, -2, 2, -2, 0])
        # the result expected : 'ddbc acab'
        self.assertEqual(sax_result.sax_word, 'ddbcacab')

        # Test with calculate the PAA
        sax_info = ConfigSax(paa=4,
                             sequences_size=12,
                             with_mean=True,
                             with_std=True,
                             global_norm=False,
                             local_norm=False,
                             linear_filter=False,
                             recovery=0.5,
                             coefficients=[0.1, 0.9],
                             alphabet_size=4)

        result, _ = sliding_windows(
            ts_list=["sequences_1_ts0", "sequences_1_ts1"],
            sax_info=sax_info,
            spark_ctx=spark_ctx)

        sax_result = run_sax_on_sequences(rdd_sequences_data=result,
                                          paa=sax_info.paa,
                                          alphabet_size=sax_info.alphabet_size)

        # the PAA : [[1, 4, -2, 1], [4, -2, -3, -3]]
        self.assertEqual(sax_result.paa.collect(),
                         [1, 4, -2, 1, 4, -2, -3, -3])
        # the result expected : 'cdbc dbaa'
        self.assertEqual(sax_result.sax_word, 'cdbcdbaa')
예제 #6
0
    def test_sliding_window_norm(self):
        """
        Testing global and local norm.
        """
        epsilon = 1.0e-10
        # recovery = 1 (no recovery) -> 3 seq of 4 points (nb_points = 12)
        sax_info = ConfigSax(paa=3,
                             sequences_size=4,
                             with_mean=True,
                             with_std=True,
                             global_norm=True,
                             local_norm=False,
                             linear_filter=False,
                             recovery=0,
                             coefficients=[0.1, 1],
                             alphabet_size=6)

        spark_ctx = ScManager.get()
        # Test with global normalization : the timeseries is normalized
        result, coeff = sliding_windows(ts_list=["linear_time_serie"],
                                        sax_info=sax_info,
                                        spark_ctx=spark_ctx)

        result = result.collect()
        coeff = coeff.collect()
        # Check coeff : coeff is the mean and variance of each sequence

        # 12 points no recovery (recovery=0) -> 3 seq of 4 points
        self.assertEqual(len(coeff), 3)

        # ts_value is an array with the sequences values
        ts_value = np.array([])
        for i, _ in enumerate(result):
            # result[i] = (key, list([timestamps, values],[,],...))
            ts_value = np.concatenate((result[i][1][:, 1], ts_value))

        LOGGER.info("result=%s", result)
        # no recovery => 2 seq * 6 points = 12 values = npoints
        self.assertEqual(len(ts_value), 12)

        LOGGER.info("ts_std=%s", (ts_value.std()))
        LOGGER.info("ts_mean=%s", np.mean(ts_value))
        # global normalisation => ts_value have a standard deviation of 1 and mean if 0
        self.assertTrue(1 - epsilon < np.std(ts_value) < 1 + epsilon)
        self.assertTrue(-epsilon < np.mean(ts_value) < epsilon)

        # Test with local normalization : all the sequences are normalized
        sax_info.global_norm = False
        sax_info.local_norm = True
        sax_info.linear_filter = True

        # Recovery = 1 : maximum recovery
        sax_info.recovery = 1
        result, coeff = sliding_windows(ts_list=["ts_with_constant_pattern"],
                                        sax_info=sax_info,
                                        spark_ctx=spark_ctx)
        result = result.collect()

        # Verify that each sequence are normalized
        for i, _ in enumerate(result):
            # result[i] = (key, list([timestamps, values],[,],...))
            seq_value = result[i][1][:, 1]
            self.assertTrue(1 - epsilon < np.std(seq_value) < 1 + epsilon)
            self.assertTrue(-epsilon < np.mean(seq_value) < epsilon)
예제 #7
0
    def test_sliding_window_recovery(self):
        """
        Testing the recovery parameter.
        """
        sax_info = ConfigSax(paa=3,
                             sequences_size=6,
                             with_mean=True,
                             with_std=True,
                             global_norm=False,
                             local_norm=False,
                             linear_filter=False,
                             recovery=0.5,
                             coefficients=[1, 1],
                             alphabet_size=6)
        ts_name = ["linear_time_serie"]
        spark_ctx = ScManager.get()
        # Test with recovery = 0.5
        result, _ = sliding_windows(ts_list=ts_name,
                                    sax_info=sax_info,
                                    spark_ctx=spark_ctx)

        result = result.collect()
        # 2 sequences in the timeseries => 3 sequences at the end
        self.assertEqual(len(result), 3)

        # Test with MAX recovery
        # recovery = 1 (the maximum : 100 % <=> the next window start one point to the right)
        sax_info.recovery = 1.0
        result, _ = sliding_windows(ts_list=ts_name,
                                    sax_info=sax_info,
                                    spark_ctx=spark_ctx)
        result = result.collect()

        # remember that in 'sliding_window' function, we call 'get_ts_mock(ts_name)[0]'
        ts = get_ts_mock(ts_name)[0]
        ts_val_0 = list(ts[0:6][:, 1])
        ts_val_1 = list(ts[6:12][:, 1])
        timestamp_0 = list(ts[0:6][:, 0])
        timestamp_1 = list(ts[6:12][:, 0])

        # Check the timestamp and the values of the two sequences
        # result[i] = (key, list([timestamps, values],[,],...))

        # check ts value
        condition = (np.all(result[i][1][:, 1] in ts_val_0
                            for i in range(len(result)))
                     or np.all(result[i][1][:, 1] in ts_val_1
                               for i in range(len(result))))

        self.assertTrue(condition)

        # check timestamps
        condition = (np.all(result[i][1][:, 0] in timestamp_0
                            for i in range(len(result)))
                     or np.all(result[i][1][:, 0] in timestamp_1
                               for i in range(len(result))))
        self.assertTrue(condition)

        # Test with MINIMUM recovery
        # recovery = 0 (no recovery)
        sax_info.recovery = 0.01
        result2, _ = sliding_windows(ts_list=ts_name,
                                     sax_info=sax_info,
                                     spark_ctx=spark_ctx)
        result2 = result2.collect()
        # 2 sequences in the timeseries => 2 sequences
        self.assertEqual(len(result2), 2)
예제 #8
0
# Add logs to the unittest stdout
for the_logger in [SAX_LOGGER, RECOG_LOGGER, COLL_LOGGER, LOGGER, LOGGER]:
    the_logger.setLevel(logging.DEBUG)
    formatter = logging.Formatter('%(asctime)s:%(levelname)s:%(funcName)s:%(message)s')
    # Create another handler that will redirect log entries to STDOUT
    stream_handler = logging.StreamHandler()
    stream_handler.setLevel(logging.DEBUG)
    stream_handler.setFormatter(formatter)
    the_logger.addHandler(stream_handler)

SAX_INFO = ConfigSax(paa=20,
                     sequences_size=1000,
                     with_mean=True,
                     with_std=True,
                     global_norm=False,
                     local_norm=True,
                     linear_filter=True,
                     recovery=0.8,
                     coefficients=[0.1, 0.9],
                     alphabet_size=10)

COLLISION_INFO = ConfigCollision(iterations=1, index=2, config_sax=SAX_INFO)

# Avoiding spark jobs here: already tested in test_recognition
RECOGNITION_INFO = ConfigRecognition(is_stopped_by_eq9=True,
                                     is_algo_method_global=True,
                                     min_value=1,
                                     iterations=10,
                                     radius=1.5,
                                     neighborhood_method=2,
                                     activate_spark=False)