Esempio n. 1
0
def sax(feature, n_bins=None):
    f = feature.T
    const_idxs = np.where(np.var(f, axis=1) < 0.01)[0]
    f = np.array([f[i] for i in range(len(f)) if i not in const_idxs])
    transformer = SymbolicAggregateApproximation(n_bins=(n_bins
                                                         or feature.shape[0]),
                                                 strategy='uniform')
    tokens = [''.join(i) for i in transformer.transform(f)]
    return ' '.join(tokens)
Esempio n. 2
0
    def _sax(self):
        # Todo: rework docstring
        """
        Computes the Symbolic Aggregate Approximation of a time series using the 'normal' strategy.
        This is a wrapper around the SymbolicAggregateApproximation() class from the pyts
        package.
        :return: nothing, all objects are stored internally in the class.
        """
        sax = SymbolicAggregateApproximation(n_bins=self.nb_bins, strategy='normal')

        self.sax_data = sax.fit_transform(self.paa_data)
Esempio n. 3
0
def ts3sax(_ts):
    # obtains a SAX representation of the time series
    ts, (mu, sd) = normalise(_ts)
    binmap = {
        'a': mu - sd * norm.ppf(1 / 3),
        'b': mu,
        'c': mu - sd * norm.ppf(2 / 3)
    }
    model = SymbolicAggregateApproximation(n_bins=3, strategy='normal')
    sax_ts = model.fit_transform(ts.reshape(
        1, -1))  # data comprises a single sample
    return (''.join(sax_ts[0]), binmap)
Esempio n. 4
0
def sax_bop_windows(features, labels, n_bins=4):
    new_features, new_labels = list(), list()
    for i in range(features.shape[0]):
        sax = SymbolicAggregateApproximation(n_bins=n_bins,
                                             alphabet='ordinal',
                                             strategy='uniform')
        pattern_row = sax.fit_transform(features[i, :, :])
        row = [[
            sum(pattern_row[j, :] == k)
            for k in range(max(pattern_row[j, :]) + 1)
        ] for j in range(len(pattern_row))]
        print(row)
        new_features.append(row)
        new_labels.append(labels[i])
    return np.array(new_features), np.array(new_labels)
Esempio n. 5
0
def shannon_entropy(current_observation: pd.DataFrame, raw_key: str):
    result = None
    try:
        sax = SymbolicAggregateApproximation()
        time_series = [current_observation[raw_key].to_numpy()]

        symbolic_representation = sax.transform(time_series)
        _, counts = np.unique(symbolic_representation, return_counts=True)
        frequencies = counts / len(symbolic_representation[0])
        result = entropy(pk=frequencies)
    except ValueError as e:
        message = str(e)
        if message == 'At least one sample is constant.':
            result = 0

    return result
Esempio n. 6
0
class Data_Transformer():
    SAX = SymbolicAggregateApproximation(strategy= 'uniform', alphabet= 'ordinal')
    Xtr_SAX = SAX.fit_transform(Xtr)
    Xte_SAX = SAX.fit_transform(Xte)
    
    SFA = SymbolicFourierApproximation(alphabet= 'ordinal')
    Xtr_SFA = SFA.fit_transform(Xtr)
    Xte_SFA = SFA.fit_transform(Xte)
Esempio n. 7
0
def sax(indices, time_point):
    cluster_summed = np.zeros_like(globe.CONN_NORM[0])
    cluster = []
    # fetch the data
    for idx in indices:
        cluster_summed = np.add(cluster_summed, globe.CONN_NORM[idx])
        cluster.append(globe.CONN_NORM[idx])
    # ROI = cluster_summed / len(indices)  # averaged within ROI
    conn_matrix = np.vstack(cluster)

    # downsample
    conn_norm_ds = np.transpose(
        ss.resample(np.transpose(conn_matrix), time_point))

    # SAX
    transformer = SymbolicAggregateApproximation(n_bins=8, strategy='uniform')

    # MAKE ONE TIME
    # make a letter dict
    letter_list = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
    letter_dict = {}
    for i, l in enumerate(letter_list):
        letter_dict[l] = i

    data = {}
    # initialize the sax dictionary solution
    for letter in letter_dict:
        for i in range(70):
            data['{}_{}'.format(i, letter_dict[letter])] = ({
                'time':
                '{}'.format(i),
                'letter':
                str(letter_dict[letter]),
                'value':
                0
            })
    # apply SAX
    for i in range(conn_norm_ds.shape[0]):  # ROI x time-point
        tmp_sax = transformer.transform(conn_norm_ds[i, :].reshape(1, -1))
        for j in range(tmp_sax.shape[1]):
            data['{}_{}'.format(j, letter_dict[tmp_sax[0, j]])]['value'] += 1

    data = list(data.values())

    return data  # data is in the format that the observable expecting
import numpy as np
import matplotlib.lines as mlines
import matplotlib.pyplot as plt
from scipy.stats import norm
from pyts.approximation import SymbolicAggregateApproximation
from numpy.testing import rundocs

n_samples = 100  # liczba próbek: parametry do stworzenia przykładowego zestawu danych
n_timestamps = 24  # znaczniki czasowe

rgn = np.random.RandomState(41)
X = rgn.randn(n_samples, n_timestamps)  # generowanie losowych danych

n_bins = 5  # liczba interwałów kwantyzacji
sax = SymbolicAggregateApproximation(n_bins=n_bins, strategy='normal')
X_sax = sax.fit_transform(X)

# obliczanie interwałów kwantyzacji dla rozkładu Gaussa
# ppf = percent point function (odwrotna dystrybuanta)
bins = norm.ppf(np.linspace(0, 1, n_bins + 1)[1:-1])

bottom_bool = np.r_[True, X_sax[0, 1:] > X_sax[0, :-1]]

# wyświetlenie wykresu
plt.figure(figsize=(12, 8))
plt.plot(X[0], 'o--', label='Original')
for x, y, s, bottom in zip(range(n_timestamps), X[0], X_sax[0], bottom_bool):
    va = 'bottom' if bottom else 'top'
    plt.text(x,
             y,
Esempio n. 9
0
def test_actual_results(params, X, arr_desired):
    """Test that the actual results are the expected ones."""
    arr_actual = SymbolicAggregateApproximation(**params).fit_transform(X)
    np.testing.assert_array_equal(arr_actual, arr_desired)
Esempio n. 10
0
def test_parameter_check(params, error, err_msg):
    """Test parameter validation."""
    sax = SymbolicAggregateApproximation(**params)
    with pytest.raises(error, match=re.escape(err_msg)):
        sax.transform(X)
Esempio n. 11
0
]

cluster_summed = np.zeros_like(conn_norm[0])
cluster = []
# fetch the data
for idx in indices:
    cluster_summed = np.add(cluster_summed, conn_norm[idx])
    cluster.append(conn_norm[idx])
ROI = cluster_summed / len(indices)  # averaged within ROI

conn_matrix = np.vstack(cluster)
# downsample
conn_norm_ds = np.transpose(ss.resample(np.transpose(conn_matrix), time_point))

# SAX
transformer = SymbolicAggregateApproximation(n_bins=8, strategy='uniform')
# https://pyts.readthedocs.io/en/stable/modules/approximation.html
# strategy='uniform': all bins in each sample have identical widths,
# strategy='quantile': all bins in each sample have the same number of points,
# strategy='normal': bin edges are quantiles from a standard normal distribution.

# MAKE ONE TIME
# make a letter dict
letter_list = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
letter_dict = {}
for i, l in enumerate(letter_list):
    letter_dict[l] = i

data = {}
# initialize the sax dictionary solution
for letter in letter_dict:
Esempio n. 12
0
window = 24
alphabets = 7

input_path = "../path/of/input/file..."

mindist_with_target = []

df = pd.read_csv(input_path)
column_names = list(df.columns)
df = df.T
N = df.shape[1]
df_to_numpy = df.values
ppa_transformer = PiecewiseAggregateApproximation(window_size=window)
paa_output = ppa_transformer.transform(df_to_numpy)

sax_transformer = SymbolicAggregateApproximation(n_bins=alphabets,
                                                 strategy='normal')
sax_output = sax_transformer.transform(paa_output)
target = sax_output[-1]

sax = SAX_trans(window, alphabets)

for i in range(len(sax_output) - 1):
    mindist = sax.compare_strings(sax_output[i], target)
    mindist_with_target.append(mindist)

MinDistances = np.asarray(mindist_with_target)
ranking = np.argsort(MinDistances)

result = []
for indx in ranking:
    result.append(column_names[indx])
Esempio n. 13
0
import numpy as np
import matplotlib.pyplot as plt
from pyts.approximation import PiecewiseAggregateApproximation
from pyts.utils import segmentation
import pandas as pd

# 1、近似算法
# 1.1 分段聚合近似
# pyts.approximation.PiecewiseAggregateApproximation
# PAA
n_samples, n_timestamps = 2, 10

rng = np.random.RandomState(41)
X = rng.randn(n_samples, n_timestamps)
print(X)

windows_size = 2
paa = PiecewiseAggregateApproximation(window_size=windows_size)
X_paa = paa.transform(X)
print(X_paa)

seg = segmentation(10, window_size=3, overlapping=False)
print(seg)

# 1.2 符号聚合近似 Symbolic Aggregate approXimation SAX

from pyts.approximation import SymbolicAggregateApproximation
X = [[0, 4, 2, 1, 7, 6, 3, 5], [2, 5, 4, 5, 3, 4, 2, 3]]
sax = SymbolicAggregateApproximation()
print(sax.transform(X))