Example #1
0
def randomly_warp_sequence(sequence,
                           max_number_of_extensions=10,
                           max_number_of_shrinks=10,
                           max_extend_length=4,
                           max_shrink_length=4,
                           may_reverse=True,
                           mutation_prob=0.01):
    sequence = np.copy(sequence)

    n_ext = 0
    n_shrinks = 0

    number_of_extensions = random.randint(0, max_number_of_extensions)
    number_of_shrinks = random.randint(0, max_number_of_shrinks)

    while n_ext < number_of_extensions or n_shrinks < number_of_shrinks:
        if mutation_prob > 0:
            mutate_sequence(sequence, mutation_prob)

        length = no_nans_len(sequence)

        available_actions = []
        if n_ext < number_of_extensions:
            available_actions.append('extend')
        if n_shrinks < number_of_shrinks:
            available_actions.append('shrink')

        if len(available_actions) == 1:
            action = available_actions[0]
        else:
            action = random.choice(available_actions)

        if action == 'extend':
            pos = random.randint(
                0, length - 1
            )  # minus one as the second number is inclusive in random.randint
            k = random.randint(2, max_extend_length)
            sequence = extend_point(sequence, pos, k)
            n_ext += 1
        else:
            pos = random.randint(0, length - 1)
            k = random.randint(2, max_shrink_length)
            sequence = shrink_to_a_single_point(sequence, pos, k)
            n_shrinks += 1

    if may_reverse:
        flip = random.choice([True, False])
        if flip:
            sequence = reverse_sequence(sequence)

    return sequence
Example #2
0
    def test_multidimensional_dtw_reverse(self):

        a = np.array([[1,2,3, np.nan], [7,8,9,np.nan]]).T
        b = np.array([[10,12,14], [13,15,17]]).T

        #Reverse:
        b = reverse_sequence(b)

        # DTW should match the points:
        # (1,7) to (10,13) (distance: sqrt(81 + 36 = 117))
        # (2,8) to (12,15) (distance: sqrt(100 + 49 = 149))
        # (3,9) to (14,17) (distance: sqrt(121 + 64 = 185))

        # Euclidean distance is the one worth testing for, as sqeuclidean will be the same
        # for a.T and b.T as well.
        euclid_distance = sqrt(117) + sqrt(149) + (sqrt(185))
        self.assertAlmostEqual(euclid_distance, dtw_std(a, b, metric='euclidean', try_reverse=True))
Example #3
0
def randomly_warp_sequence(sequence, max_number_of_extensions=10, max_number_of_shrinks=10,
                           max_extend_length=4, max_shrink_length=4, may_reverse=True, mutation_prob=0.01):
    sequence = np.copy(sequence)

    n_ext = 0
    n_shrinks = 0

    number_of_extensions = random.randint(0, max_number_of_extensions)
    number_of_shrinks = random.randint(0, max_number_of_shrinks)

    while n_ext < number_of_extensions or n_shrinks < number_of_shrinks:
        if mutation_prob > 0:
            mutate_sequence(sequence, mutation_prob)

        length = no_nans_len(sequence)

        available_actions = []
        if n_ext < number_of_extensions:
            available_actions.append('extend')
        if n_shrinks < number_of_shrinks:
            available_actions.append('shrink')

        if len(available_actions) == 1:
            action = available_actions[0]
        else:
            action = random.choice(available_actions)

        if action == 'extend':
            pos = random.randint(0, length - 1)  # minus one as the second number is inclusive in random.randint
            k = random.randint(2, max_extend_length)
            sequence = extend_point(sequence, pos, k)
            n_ext += 1
        else:
            pos = random.randint(0, length - 1)
            k = random.randint(2, max_shrink_length)
            sequence = shrink_to_a_single_point(sequence, pos, k)
            n_shrinks += 1

    if may_reverse:
        flip = random.choice([True, False])
        if flip:
            sequence = reverse_sequence(sequence)

    return sequence
Example #4
0
    def test_multidimensional_dtw_reverse(self):

        a = np.array([[1, 2, 3, np.nan], [7, 8, 9, np.nan]]).T
        b = np.array([[10, 12, 14], [13, 15, 17]]).T

        #Reverse:
        b = reverse_sequence(b)

        # DTW should match the points:
        # (1,7) to (10,13) (distance: sqrt(81 + 36 = 117))
        # (2,8) to (12,15) (distance: sqrt(100 + 49 = 149))
        # (3,9) to (14,17) (distance: sqrt(121 + 64 = 185))

        # Euclidean distance is the one worth testing for, as sqeuclidean will be the same
        # for a.T and b.T as well.
        euclid_distance = sqrt(117) + sqrt(149) + (sqrt(185))
        self.assertAlmostEqual(
            euclid_distance, dtw_std(a,
                                     b,
                                     metric='euclidean',
                                     try_reverse=True))
Example #5
0
def dtw_std(x,
            y,
            metric='sqeuclidean',
            dist_only=True,
            constraint=None,
            k=None,
            try_reverse=True,
            normalise=False,
            scale_first=False,
            *args,
            **kwargs):
    """
    Wrapper arround MLPY's dtw_std that supports cleaning up of NaNs, and reversing of strings.
    :param x:
    :param y:
    :param metric: dtw metric to use `sqeuclidean`, `euclidean` or `cosine`
    :param dist_only: return distance only
    :param constraint: constraint of dtw (try `None` or `'slanted_band'`
    :param k: parameter k needed for slanted band constraint
    :param try_reverse: Will try reversing one sequence as to get a better distance
    :param normalise: If set to true, distance will be divided from the length of the longer sequence
    :param scale_first: If set to true, the shorte sequence will be scaled to the length of the longer sequence before DTW
    :param kwargs:
    :return:
    """
    def _normalise(ans, max_len):
        if normalise:
            return ans / max_len
        else:
            return ans

    def _scaled_path(path, scaling_path, flip_paths):
        path_x = np.asarray([scaling_path[i] for i in path[0]])
        path_y = path[1]

        if flip_paths:
            path = (path_y, path_x)
        else:
            path = (path_x, path_y)

        return path

    def _reverse_path(path):
        n = path.max()
        path = n - path
        return path

    x = np.asarray(x, dtype=np.float)
    y = np.asarray(y, dtype=np.float)

    x = _strip_nans(x)
    y = _strip_nans(y)

    max_len = max(len(x), len(y))
    if scale_first:
        if len(x) >= len(y):
            x, y = y, x
            flip_paths = True
        else:
            flip_paths = False

        x, scaling_path = uniform_scaling_to_length(x,
                                                    len(y),
                                                    output_scaling_path=True)

    regular_ans = mlpy_dtw_std(x,
                               y,
                               metric=metric,
                               dist_only=dist_only,
                               constraint=constraint,
                               k=k,
                               *args,
                               **kwargs)
    if not try_reverse:
        if dist_only:
            return _normalise(regular_ans, max_len)
        else:
            dist, cost, path = regular_ans
            dist = _normalise(dist, max_len)

            if scale_first:
                path = _scaled_path(path, scaling_path, flip_paths)

            return dist, cost, path
    else:
        reverse_ans = mlpy_dtw_std(reverse_sequence(x),
                                   y,
                                   metric=metric,
                                   dist_only=dist_only,
                                   constraint=constraint,
                                   k=k,
                                   *args,
                                   **kwargs)
        if dist_only:
            return _normalise(min(regular_ans, reverse_ans), max_len)
        elif reverse_ans[0] >= regular_ans[0]:
            dist, cost, path = regular_ans
            if scale_first:
                path = _scaled_path(path, scaling_path, flip_paths)
            return _normalise(dist, max_len), cost, path
        else:  # dist_only = False and reverse_ans is smaller
            dist, cost, path = reverse_ans
            path_rev = (_reverse_path(path[0]), path[1])

            if scale_first:
                path_rev = _scaled_path(path_rev, scaling_path, flip_paths)

            cost = np.fliplr(cost)
            return _normalise(dist, max_len), cost, path_rev
Example #6
0
 def test_multi_dimension_with_nans_appended(self):
     a = np.array([[1, 2], [3, 4], [np.nan, np.nan]])
     assert_array_equal(np.array([[3, 4], [1, 2], [np.nan, np.nan]]),
                        reverse_sequence(a))
Example #7
0
 def test_single_dimension_with_nans_appended(self):
     a = np.array([1, 2, 3, 4, np.nan, np.nan])
     assert_array_equal(np.array([4, 3, 2, 1, np.nan, np.nan]),
                        reverse_sequence(a))
Example #8
0
 def test_multi_dimension(self):
     a = np.array([[1, 2], [3, 4], [5, 6]])
     assert_array_equal(np.array([[5, 6], [3, 4], [1, 2]]),
                        reverse_sequence(a))
Example #9
0
 def test_single_dimension(self):
     a = np.array([1, 2, 3, 4, 5, 6])
     assert_array_equal(np.array([6, 5, 4, 3, 2, 1]), reverse_sequence(a))
Example #10
0
 def test_multi_dimension_with_nans_appended(self):
     a = np.array([[1,2],[3,4],[np.nan, np.nan]])
     assert_array_equal(np.array([[3,4],[1,2], [np.nan, np.nan]]), reverse_sequence(a))
Example #11
0
 def test_single_dimension_with_nans_appended(self):
     a = np.array([1,2,3,4,np.nan, np.nan])
     assert_array_equal(np.array([4, 3, 2, 1, np.nan, np.nan]), reverse_sequence(a))
Example #12
0
 def test_multi_dimension(self):
     a = np.array([[1, 2], [3,4], [5,6]])
     assert_array_equal(np.array([[5,6], [3,4], [1,2]]), reverse_sequence(a))
Example #13
0
 def test_single_dimension(self):
     a = np.array([1,2,3,4,5,6])
     assert_array_equal(np.array([6,5,4,3,2,1]), reverse_sequence(a))
Example #14
0
def dtw_std(x, y, metric='sqeuclidean', dist_only=True, constraint=None, k=None, try_reverse=True, normalise=False,
            scale_first=False, *args, **kwargs):
    """
    Wrapper arround MLPY's dtw_std that supports cleaning up of NaNs, and reversing of strings.
    :param x:
    :param y:
    :param metric: dtw metric to use `sqeuclidean`, `euclidean` or `cosine`
    :param dist_only: return distance only
    :param constraint: constraint of dtw (try `None` or `'slanted_band'`
    :param k: parameter k needed for slanted band constraint
    :param try_reverse: Will try reversing one sequence as to get a better distance
    :param normalise: If set to true, distance will be divided from the length of the longer sequence
    :param scale_first: If set to true, the shorte sequence will be scaled to the length of the longer sequence before DTW
    :param kwargs:
    :return:
    """
    def _normalise(ans, max_len):
        if normalise:
            return ans / max_len
        else:
            return ans

    def _scaled_path(path, scaling_path, flip_paths):
        path_x = np.asarray([scaling_path[i] for i in path[0]])
        path_y = path[1]

        if flip_paths:
            path = (path_y, path_x)
        else:
            path = (path_x, path_y)

        return path

    def _reverse_path(path):
        n = path.max()
        path = n - path
        return path


    x = np.asarray(x, dtype=np.float)
    y = np.asarray(y, dtype=np.float)

    x = _strip_nans(x)
    y = _strip_nans(y)

    max_len = max(len(x), len(y))
    if scale_first:
        if len(x) >= len(y):
            x, y = y, x
            flip_paths = True
        else:
            flip_paths = False

        x, scaling_path = uniform_scaling_to_length(x, len(y), output_scaling_path=True)

    regular_ans = mlpy_dtw_std(x, y, metric=metric, dist_only=dist_only, constraint=constraint, k=k, *args, **kwargs)
    if not try_reverse:
        if dist_only:
            return _normalise(regular_ans, max_len)
        else:
            dist, cost, path = regular_ans
            dist = _normalise(dist, max_len)

            if scale_first:
                path = _scaled_path(path, scaling_path, flip_paths)

            return dist, cost, path
    else:
        reverse_ans = mlpy_dtw_std(reverse_sequence(x), y, metric=metric, dist_only=dist_only, constraint=constraint, k=k, *args, **kwargs)
        if dist_only:
            return _normalise(min(regular_ans, reverse_ans), max_len)
        elif reverse_ans[0] >= regular_ans[0]:
            dist, cost, path = regular_ans
            if scale_first:
                path = _scaled_path(path, scaling_path, flip_paths)
            return _normalise(dist, max_len), cost, path
        else:  # dist_only = False and reverse_ans is smaller
            dist, cost, path = reverse_ans
            path_rev = (_reverse_path(path[0]), path[1])

            if scale_first:
                path_rev = _scaled_path(path_rev, scaling_path, flip_paths)

            cost = np.fliplr(cost)
            return _normalise(dist, max_len), cost, path_rev