예제 #1
0
def sax(feature, n_bins=None):
    f = feature.T
    const_idxs = np.where(np.var(f, axis=1) < 0.01)[0]
    f = np.array([f[i] for i in range(len(f)) if i not in const_idxs])
    transformer = SymbolicAggregateApproximation(n_bins=(n_bins
                                                         or feature.shape[0]),
                                                 strategy='uniform')
    tokens = [''.join(i) for i in transformer.transform(f)]
    return ' '.join(tokens)
예제 #2
0
    def _sax(self):
        # Todo: rework docstring
        """
        Computes the Symbolic Aggregate Approximation of a time series using the 'normal' strategy.
        This is a wrapper around the SymbolicAggregateApproximation() class from the pyts
        package.
        :return: nothing, all objects are stored internally in the class.
        """
        sax = SymbolicAggregateApproximation(n_bins=self.nb_bins, strategy='normal')

        self.sax_data = sax.fit_transform(self.paa_data)
예제 #3
0
def ts3sax(_ts):
    # obtains a SAX representation of the time series
    ts, (mu, sd) = normalise(_ts)
    binmap = {
        'a': mu - sd * norm.ppf(1 / 3),
        'b': mu,
        'c': mu - sd * norm.ppf(2 / 3)
    }
    model = SymbolicAggregateApproximation(n_bins=3, strategy='normal')
    sax_ts = model.fit_transform(ts.reshape(
        1, -1))  # data comprises a single sample
    return (''.join(sax_ts[0]), binmap)
예제 #4
0
def sax_bop_windows(features, labels, n_bins=4):
    new_features, new_labels = list(), list()
    for i in range(features.shape[0]):
        sax = SymbolicAggregateApproximation(n_bins=n_bins,
                                             alphabet='ordinal',
                                             strategy='uniform')
        pattern_row = sax.fit_transform(features[i, :, :])
        row = [[
            sum(pattern_row[j, :] == k)
            for k in range(max(pattern_row[j, :]) + 1)
        ] for j in range(len(pattern_row))]
        print(row)
        new_features.append(row)
        new_labels.append(labels[i])
    return np.array(new_features), np.array(new_labels)
예제 #5
0
def shannon_entropy(current_observation: pd.DataFrame, raw_key: str):
    result = None
    try:
        sax = SymbolicAggregateApproximation()
        time_series = [current_observation[raw_key].to_numpy()]

        symbolic_representation = sax.transform(time_series)
        _, counts = np.unique(symbolic_representation, return_counts=True)
        frequencies = counts / len(symbolic_representation[0])
        result = entropy(pk=frequencies)
    except ValueError as e:
        message = str(e)
        if message == 'At least one sample is constant.':
            result = 0

    return result
예제 #6
0
class Data_Transformer():
    SAX = SymbolicAggregateApproximation(strategy= 'uniform', alphabet= 'ordinal')
    Xtr_SAX = SAX.fit_transform(Xtr)
    Xte_SAX = SAX.fit_transform(Xte)
    
    SFA = SymbolicFourierApproximation(alphabet= 'ordinal')
    Xtr_SFA = SFA.fit_transform(Xtr)
    Xte_SFA = SFA.fit_transform(Xte)
예제 #7
0
파일: utils.py 프로젝트: nhunghoang/PRAGMA
def sax(indices, time_point):
    cluster_summed = np.zeros_like(globe.CONN_NORM[0])
    cluster = []
    # fetch the data
    for idx in indices:
        cluster_summed = np.add(cluster_summed, globe.CONN_NORM[idx])
        cluster.append(globe.CONN_NORM[idx])
    # ROI = cluster_summed / len(indices)  # averaged within ROI
    conn_matrix = np.vstack(cluster)

    # downsample
    conn_norm_ds = np.transpose(
        ss.resample(np.transpose(conn_matrix), time_point))

    # SAX
    transformer = SymbolicAggregateApproximation(n_bins=8, strategy='uniform')

    # MAKE ONE TIME
    # make a letter dict
    letter_list = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
    letter_dict = {}
    for i, l in enumerate(letter_list):
        letter_dict[l] = i

    data = {}
    # initialize the sax dictionary solution
    for letter in letter_dict:
        for i in range(70):
            data['{}_{}'.format(i, letter_dict[letter])] = ({
                'time':
                '{}'.format(i),
                'letter':
                str(letter_dict[letter]),
                'value':
                0
            })
    # apply SAX
    for i in range(conn_norm_ds.shape[0]):  # ROI x time-point
        tmp_sax = transformer.transform(conn_norm_ds[i, :].reshape(1, -1))
        for j in range(tmp_sax.shape[1]):
            data['{}_{}'.format(j, letter_dict[tmp_sax[0, j]])]['value'] += 1

    data = list(data.values())

    return data  # data is in the format that the observable expecting
예제 #8
0
import numpy as np
import matplotlib.lines as mlines
import matplotlib.pyplot as plt
from scipy.stats import norm
from pyts.approximation import SymbolicAggregateApproximation
from numpy.testing import rundocs

n_samples = 100  # liczba próbek: parametry do stworzenia przykładowego zestawu danych
n_timestamps = 24  # znaczniki czasowe

rgn = np.random.RandomState(41)
X = rgn.randn(n_samples, n_timestamps)  # generowanie losowych danych

n_bins = 5  # liczba interwałów kwantyzacji
sax = SymbolicAggregateApproximation(n_bins=n_bins, strategy='normal')
X_sax = sax.fit_transform(X)

# obliczanie interwałów kwantyzacji dla rozkładu Gaussa
# ppf = percent point function (odwrotna dystrybuanta)
bins = norm.ppf(np.linspace(0, 1, n_bins + 1)[1:-1])

bottom_bool = np.r_[True, X_sax[0, 1:] > X_sax[0, :-1]]

# wyświetlenie wykresu
plt.figure(figsize=(12, 8))
plt.plot(X[0], 'o--', label='Original')
for x, y, s, bottom in zip(range(n_timestamps), X[0], X_sax[0], bottom_bool):
    va = 'bottom' if bottom else 'top'
    plt.text(x,
             y,
예제 #9
0
def test_actual_results(params, X, arr_desired):
    """Test that the actual results are the expected ones."""
    arr_actual = SymbolicAggregateApproximation(**params).fit_transform(X)
    np.testing.assert_array_equal(arr_actual, arr_desired)
예제 #10
0
def test_parameter_check(params, error, err_msg):
    """Test parameter validation."""
    sax = SymbolicAggregateApproximation(**params)
    with pytest.raises(error, match=re.escape(err_msg)):
        sax.transform(X)
예제 #11
0
]

cluster_summed = np.zeros_like(conn_norm[0])
cluster = []
# fetch the data
for idx in indices:
    cluster_summed = np.add(cluster_summed, conn_norm[idx])
    cluster.append(conn_norm[idx])
ROI = cluster_summed / len(indices)  # averaged within ROI

conn_matrix = np.vstack(cluster)
# downsample
conn_norm_ds = np.transpose(ss.resample(np.transpose(conn_matrix), time_point))

# SAX
transformer = SymbolicAggregateApproximation(n_bins=8, strategy='uniform')
# https://pyts.readthedocs.io/en/stable/modules/approximation.html
# strategy='uniform': all bins in each sample have identical widths,
# strategy='quantile': all bins in each sample have the same number of points,
# strategy='normal': bin edges are quantiles from a standard normal distribution.

# MAKE ONE TIME
# make a letter dict
letter_list = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
letter_dict = {}
for i, l in enumerate(letter_list):
    letter_dict[l] = i

data = {}
# initialize the sax dictionary solution
for letter in letter_dict:
예제 #12
0
window = 24
alphabets = 7

input_path = "../path/of/input/file..."

mindist_with_target = []

df = pd.read_csv(input_path)
column_names = list(df.columns)
df = df.T
N = df.shape[1]
df_to_numpy = df.values
ppa_transformer = PiecewiseAggregateApproximation(window_size=window)
paa_output = ppa_transformer.transform(df_to_numpy)

sax_transformer = SymbolicAggregateApproximation(n_bins=alphabets,
                                                 strategy='normal')
sax_output = sax_transformer.transform(paa_output)
target = sax_output[-1]

sax = SAX_trans(window, alphabets)

for i in range(len(sax_output) - 1):
    mindist = sax.compare_strings(sax_output[i], target)
    mindist_with_target.append(mindist)

MinDistances = np.asarray(mindist_with_target)
ranking = np.argsort(MinDistances)

result = []
for indx in ranking:
    result.append(column_names[indx])
예제 #13
0
import numpy as np
import matplotlib.pyplot as plt
from pyts.approximation import PiecewiseAggregateApproximation
from pyts.utils import segmentation
import pandas as pd

# 1、近似算法
# 1.1 分段聚合近似
# pyts.approximation.PiecewiseAggregateApproximation
# PAA
n_samples, n_timestamps = 2, 10

rng = np.random.RandomState(41)
X = rng.randn(n_samples, n_timestamps)
print(X)

windows_size = 2
paa = PiecewiseAggregateApproximation(window_size=windows_size)
X_paa = paa.transform(X)
print(X_paa)

seg = segmentation(10, window_size=3, overlapping=False)
print(seg)

# 1.2 符号聚合近似 Symbolic Aggregate approXimation SAX

from pyts.approximation import SymbolicAggregateApproximation
X = [[0, 4, 2, 1, 7, 6, 3, 5], [2, 5, 4, 5, 3, 4, 2, 3]]
sax = SymbolicAggregateApproximation()
print(sax.transform(X))