def _min_subsequence_distance(values):
    """
    Computes the minimum distance for a given subsequence. The values
    consist of the iteration, batch size, subsequence and query. It is
    used for both batch processing in single threaded or multi-processing
    mode.

    Parameters
    ----------
    values : tuple(iteration, batch_size, subsequence, query)
        Tuple packed values for parallelization.

    Returns
    -------
    A tuple of the minimum index and distance for this particular subsequence.
    """
    iteration, batch_size, subsequence, query = values
    distances = mts.mass2(subsequence, query)

    # find mininimum index of this batch which will be between 0 and batch_size
    min_idx = np.argmin(distances)

    # add this distance to best distances
    dist = distances[min_idx]

    # compute the actual index and store it
    index = min_idx + (batch_size * iteration)

    return (index, dist)
Exemple #2
0
def test_top_k_discords():
    """Sanity check that compares results from UCR use case."""
    robot_dog = np.loadtxt(
        os.path.join(MODULE_PATH, '..', 'tests', 'robot_dog.txt'))
    carpet_walk = np.loadtxt(
        os.path.join(MODULE_PATH, '..', 'tests', 'carpet_query.txt'))

    distances = mts.mass2(robot_dog, carpet_walk)
    found = mts.top_k_discords(distances, 2, 25)
    found = np.array(found)
    expected = np.array([12900, 2])

    assert (np.array_equal(found, expected))
    return start_point,end_point


if __name__ == '__main__':

    ####### main file & dy main
    main_file = ut_mdf.getDataFromFile(fileName='light_curve_Gaia-DR2_49407521363733632_date20191129')
    main_period = 6
    start_point,end_point = getSublenght(period=main_period,mdfData=main_file)

    subInstance = main_file['instances'][start_point:end_point]
    subTimestamp = main_file["timestamp"][start_point:end_point]
    sub_len = len(subTimestamp)

    #
    distances = mts.mass2(target_file['instances'],
                          subInstance)
    # print("a")
    min_idx = np.argmin(distances)
    min_dis = distances.item(min_idx).real
    #
    print(min_idx)
    print("distance = {}".format(distances.item(min_idx).real))

    # plot TS
    plt.figure(figsize=(25, 5))
    plt.plot(target_file['timestamp'], target_file['instances'])
    plt.plot(target_file['timestamp'][min_idx:min_idx+sub_len], subInstance, c='r')
    plt.ylabel('Flux')
    plt.title('TS data : {}'.format(target_file['fileName']))
    plt.show()
    plt.clf()
Exemple #4
0
target_file = ut_mdf.getDataFromFile(
    fileName='light_curve_Gaia-DR2_49406353132632832_date20191129')
main_file = ut_mdf.getDataFromFile(
    fileName='light_curve_Gaia-DR2_49407521363733632_date20191129')

ts = target_file['instances']
query = main_file['instances'][765:2570]
# ts = np.loadtxt('ts.txt')
# query = np.loadtxt('query.txt')

# mass
distances = mts.mass(ts, query)

# mass2
distances = mts.mass2(ts, query)

# mass3
# distances = mts.mass3(ts, query, 256)

# mass2_batch
# start a multi-threaded batch job with all cpu cores and give me the top 5 matches.
# note that batch_size partitions your time series into a subsequence similarity search.
# even for large time series in single threaded mode, this is much more memory efficient than
# MASS2 on its own.
batch_size = 10000
top_matches = 5
n_jobs = -1
indices, distances = mts.mass2_batch(ts,
                                     query,
                                     batch_size,
    if i_ch == 2:
        axes[i_ax].set_xlabel('Time steps')

#%% ============== manual motifs
#data0 = data.copy()

i_chh = 1
k = 10
exclude_zone = 300
t_s = 57000
t_step = 1000
t = range(t_s, t_s + t_step)
quary = data0[i_chh - 1, t]
target = data0[i_chh - 1, :]

distances = mts.mass2(target, quary)
#distances = np.array([abs(i) for i in distances])
#distances.sort()

found = mts.top_k_motifs(distances, k, exclude_zone)
indices = np.array(found)
distances = distances[found]

#indices, distances = mts.mass2_batch(target, quary, 1000, top_matches = k)

i_sort = np.argsort(distances)
distances, indices = [t[i_sort] for t in (distances, indices)]
distances = [abs(i) for i in distances]

plt.figure()
plt.subplot(2, 1, 1)