コード例 #1
0
def ts_to_string(series, cuts):
    """A straightforward num-to-string conversion.

    >>> ts_to_string([-1, 0, 1], cuts_for_asize(3))
    'abc'

    >>> ts_to_string([1, -1, 1], cuts_for_asize(3))
    'cac'
    """

    series = np.array(series)
    a_size = len(cuts)
    sax = list()

    for i in range(series.shape[0]):
        num = series[i]

        # If the number is below 0, start from the bottom, otherwise from the top
        if num >= 0:
            j = a_size - 1
            while j > 0 and cuts[j] >= num:

                j = j - 1
            sax.append(idx2letter(j))
        else:
            j = 1
            while j < a_size and cuts[j] <= num:
                j = j + 1
            sax.append(idx2letter(j-1))

    return ''.join(sax)
コード例 #2
0
def ts_to_string(series, cuts):
    """A straightforward num-to-string conversion."""
    a_size = len(cuts)
    sax = list()
    for i in range(0, len(series)):
        num = series[i]
        # if teh number below 0, start from the bottom, or else from the top
        if (num >= 0):
            j = a_size - 1
            while ((j > 0) and (cuts[j] >= num)):
                j = j - 1
            sax.append(idx2letter(j))
        else:
            j = 1
            while (j < a_size and cuts[j] <= num):
                j = j + 1
            sax.append(idx2letter(j - 1))
    return ''.join(sax)
コード例 #3
0
def test_sizing():
    """Test idx to char."""
    assert 'a' == idx2letter(0)
    assert 'h' == idx2letter(7)
    assert 't' == idx2letter(19)

    with pytest.raises(ValueError, match=r'.* idx'):
        idx2letter(-1)

    with pytest.raises(ValueError, match=r'.* idx .*'):
        idx2letter(20)
コード例 #4
0
ファイル: sax.py プロジェクト: JingChufei/BIZSEER
def ts_to_string(series, cuts):
    """A straightforward num-to-string conversion."""

    # - series中的一个时刻的数值 num, 在alphabet cuts中找对应的位置, 根据其位置转化为字母

    a_size = len(cuts)
    sax = list()
    for i in range(0, len(series)):
        num = series[i]

        # num >= 0 从后往前找对应字母 j = a_size - 1
        if (num >= 0):
            j = a_size - 1
            while ((j > 0) and (cuts[j] >= num)):
                j = j - 1
            sax.append(idx2letter(j))

        # num < 0 从前往后找对应字母 j = 1
        else:
            j = 1
            while (j < a_size and cuts[j] <= num):
                j = j + 1
            sax.append(idx2letter(j - 1))
    return ''.join(sax)
コード例 #5
0
def sax_via_window(series, win_size, paa_size, alphabet_size=3,
                   nr_strategy='exact', znorm_threshold=0.01, sax_type='unidim'):
    """Simple via window conversion implementation.

    # SAX-ENERGY
    >>> sax_via_window([[1, 2, 3], [4, 5, 6]], win_size=1, paa_size=3, sax_type='energy', nr_strategy=None)['abc']
    [0, 1]

    >>> sax_via_window([[1, 2, 3, 4], [4, 5, 6, 7]], win_size=1, paa_size=4, sax_type='energy', nr_strategy=None)['aacc']
    [0, 1]

    >>> sax_via_window([[1, 2, 3, 4], [4, 5, 6, 7]], win_size=2, paa_size=4, sax_type='energy', nr_strategy=None)['aaccaacc']
    [0]

    # SAX-REPEAT
    >>> sax_via_window([[1, 2, 3], [4, 5, 6], [7, 8, 9]], win_size=2, paa_size=2, sax_type='repeat', nr_strategy=None)['ab']
    [0, 1]

    >>> sax_via_window([[1, 2, 3], [4, 5, 6], [7, 8, 9]], win_size=1, paa_size=1, sax_type='repeat', nr_strategy=None)['a']
    [0, 1, 2]

    # SAX-INDEPENDENT
    >>> sax_via_window([[1, 2, 3, 4], [4, 5, 6, 7]], win_size=2, paa_size=2, sax_type='independent', nr_strategy=None)['acacacac']
    [0]

    >>> sax_via_window([[1, 2], [4, 5], [7, 8]], win_size=2, paa_size=2, sax_type='independent', nr_strategy=None)['acac']
    [0, 1]

    >>> sax_via_window([[1, 2], [4, 8], [7, 5]], win_size=2, paa_size=2, sax_type='independent', nr_strategy=None)['acac']
    [0]

    >>> sax_via_window([[1, 2], [4, 8], [7, 5]], win_size=2, paa_size=2, sax_type='independent', nr_strategy=None)['acca']
    [1]

    """

    # Convert to numpy array.
    series = np.array(series)

    # Check on dimensions.
    if len(series.shape) > 2:
        raise ValueError('Please reshape time-series to stack dimensions along the 2nd dimension, so that the array shape is a 2-tuple.')

    # PAA size is the length of the PAA sequence.
    if sax_type != 'energy' and paa_size > win_size:
        raise ValueError('PAA size cannot be greater than the window size.')

    if sax_type == 'energy' and len(series.shape) == 1:
        raise ValueError('Must pass a multidimensional time-series to SAX-ENERGY.')

    # Breakpoints.
    cuts = cuts_for_asize(alphabet_size)

    # Dictionary mapping SAX words to indices.
    sax = defaultdict(list)

    if sax_type == 'repeat':
        # Maps indices to multi-dimensional SAX words.
        multidim_sax_dict = []

        # List of all the multi-dimensional SAX words.
        multidim_sax_list = []

        # Sliding window across time dimension.
        for i in range(series.shape[0] - win_size + 1):

            # Subsection starting at this index.
            sub_section = series[i: i + win_size]

            # Z-normalized subsection.
            if win_size == 1:
                zn = sub_section
            else:
                zn = znorm(sub_section, znorm_threshold)

            # PAA representation of subsection.
            paa_rep = paa(zn, paa_size, 'repeat')

            # SAX representation of subsection, but in terms of multi-dimensional vectors.
            multidim_sax = get_sax_list(paa_rep, cuts)

            # Update data-structures.
            multidim_sax_dict.append(multidim_sax)
            multidim_sax_list.extend(multidim_sax)

        # Cluster with k-means++.
        kmeans = KMeans(n_clusters=alphabet_size, random_state=0).fit(multidim_sax_list)

        # Cluster indices in sorted order.
        order = np.lexsort(np.rot90(kmeans.cluster_centers_))

        # Sliding window across time dimension.
        prev_word = ''
        for i in range(series.shape[0] - win_size + 1):

            # Map cluster indices to new SAX letters.
            curr_word_list = map(lambda cluster_index: idx2letter(order[cluster_index]), kmeans.predict(multidim_sax_dict[i]))
            curr_word = ''.join(curr_word_list)

            if '' != prev_word:
                if 'exact' == nr_strategy and prev_word == curr_word:
                    continue
                elif 'mindist' == nr_strategy and is_mindist_zero(prev_word, curr_word):
                    continue

            prev_word = curr_word

            sax[curr_word].append(i)

    else:
        # Sliding window across time dimension.
        prev_word = ''
        for i in range(series.shape[0] - win_size + 1):

            # Subsection starting at this index.
            sub_section = series[i: i + win_size]

            if sax_type == 'energy':
                curr_word = ''
                for energy_dist in sub_section:
                    # Normalize energy distribution.
                    energy_zn = znorm(energy_dist, znorm_threshold)

                    # PAA representation of energy distribution.
                    paa_rep = paa(energy_zn, paa_size, 'unidim')
                    # paa_rep = energy_zn

                    # SAX representation of the energy distribution.
                    energy_word = ts_to_string(paa_rep, cuts)

                    # Add to current word.
                    curr_word += energy_word

            elif sax_type == 'independent':
                curr_word = ''
                for dim in range(sub_section.shape[1]):
                    # Obtain the subsequence restricted to one dimension.
                    one_dimension_sub_section = sub_section[:, dim]

                    # Z-normalized subsection.
                    zn = znorm(one_dimension_sub_section, znorm_threshold)

                    # PAA representation of subsection.
                    paa_rep = paa(zn, paa_size, 'unidim')

                    # Get the SAX word - just a unidimensional SAX.
                    one_dim_word = ts_to_string(paa_rep, cuts)

                    # Add this dimensions' representation to the overall SAX word.
                    curr_word += one_dim_word

            else:
                # Z-normalized subsection.
                zn = znorm(sub_section, znorm_threshold)

                # PAA representation of subsection.
                paa_rep = paa(zn, paa_size, sax_type)

                # SAX representation of subsection.
                curr_word = ts_to_string(paa_rep, cuts)

            if '' != prev_word:
                if 'exact' == nr_strategy and prev_word == curr_word:
                    continue
                elif 'mindist' == nr_strategy and is_mindist_zero(prev_word, curr_word):
                    continue

            prev_word = curr_word

            sax[curr_word].append(i)

    return sax