def randomly_warp_sequence(sequence, max_number_of_extensions=10, max_number_of_shrinks=10, max_extend_length=4, max_shrink_length=4, may_reverse=True, mutation_prob=0.01): sequence = np.copy(sequence) n_ext = 0 n_shrinks = 0 number_of_extensions = random.randint(0, max_number_of_extensions) number_of_shrinks = random.randint(0, max_number_of_shrinks) while n_ext < number_of_extensions or n_shrinks < number_of_shrinks: if mutation_prob > 0: mutate_sequence(sequence, mutation_prob) length = no_nans_len(sequence) available_actions = [] if n_ext < number_of_extensions: available_actions.append('extend') if n_shrinks < number_of_shrinks: available_actions.append('shrink') if len(available_actions) == 1: action = available_actions[0] else: action = random.choice(available_actions) if action == 'extend': pos = random.randint( 0, length - 1 ) # minus one as the second number is inclusive in random.randint k = random.randint(2, max_extend_length) sequence = extend_point(sequence, pos, k) n_ext += 1 else: pos = random.randint(0, length - 1) k = random.randint(2, max_shrink_length) sequence = shrink_to_a_single_point(sequence, pos, k) n_shrinks += 1 if may_reverse: flip = random.choice([True, False]) if flip: sequence = reverse_sequence(sequence) return sequence
def test_multidimensional_dtw_reverse(self): a = np.array([[1,2,3, np.nan], [7,8,9,np.nan]]).T b = np.array([[10,12,14], [13,15,17]]).T #Reverse: b = reverse_sequence(b) # DTW should match the points: # (1,7) to (10,13) (distance: sqrt(81 + 36 = 117)) # (2,8) to (12,15) (distance: sqrt(100 + 49 = 149)) # (3,9) to (14,17) (distance: sqrt(121 + 64 = 185)) # Euclidean distance is the one worth testing for, as sqeuclidean will be the same # for a.T and b.T as well. euclid_distance = sqrt(117) + sqrt(149) + (sqrt(185)) self.assertAlmostEqual(euclid_distance, dtw_std(a, b, metric='euclidean', try_reverse=True))
def randomly_warp_sequence(sequence, max_number_of_extensions=10, max_number_of_shrinks=10, max_extend_length=4, max_shrink_length=4, may_reverse=True, mutation_prob=0.01): sequence = np.copy(sequence) n_ext = 0 n_shrinks = 0 number_of_extensions = random.randint(0, max_number_of_extensions) number_of_shrinks = random.randint(0, max_number_of_shrinks) while n_ext < number_of_extensions or n_shrinks < number_of_shrinks: if mutation_prob > 0: mutate_sequence(sequence, mutation_prob) length = no_nans_len(sequence) available_actions = [] if n_ext < number_of_extensions: available_actions.append('extend') if n_shrinks < number_of_shrinks: available_actions.append('shrink') if len(available_actions) == 1: action = available_actions[0] else: action = random.choice(available_actions) if action == 'extend': pos = random.randint(0, length - 1) # minus one as the second number is inclusive in random.randint k = random.randint(2, max_extend_length) sequence = extend_point(sequence, pos, k) n_ext += 1 else: pos = random.randint(0, length - 1) k = random.randint(2, max_shrink_length) sequence = shrink_to_a_single_point(sequence, pos, k) n_shrinks += 1 if may_reverse: flip = random.choice([True, False]) if flip: sequence = reverse_sequence(sequence) return sequence
def test_multidimensional_dtw_reverse(self): a = np.array([[1, 2, 3, np.nan], [7, 8, 9, np.nan]]).T b = np.array([[10, 12, 14], [13, 15, 17]]).T #Reverse: b = reverse_sequence(b) # DTW should match the points: # (1,7) to (10,13) (distance: sqrt(81 + 36 = 117)) # (2,8) to (12,15) (distance: sqrt(100 + 49 = 149)) # (3,9) to (14,17) (distance: sqrt(121 + 64 = 185)) # Euclidean distance is the one worth testing for, as sqeuclidean will be the same # for a.T and b.T as well. euclid_distance = sqrt(117) + sqrt(149) + (sqrt(185)) self.assertAlmostEqual( euclid_distance, dtw_std(a, b, metric='euclidean', try_reverse=True))
def dtw_std(x, y, metric='sqeuclidean', dist_only=True, constraint=None, k=None, try_reverse=True, normalise=False, scale_first=False, *args, **kwargs): """ Wrapper arround MLPY's dtw_std that supports cleaning up of NaNs, and reversing of strings. :param x: :param y: :param metric: dtw metric to use `sqeuclidean`, `euclidean` or `cosine` :param dist_only: return distance only :param constraint: constraint of dtw (try `None` or `'slanted_band'` :param k: parameter k needed for slanted band constraint :param try_reverse: Will try reversing one sequence as to get a better distance :param normalise: If set to true, distance will be divided from the length of the longer sequence :param scale_first: If set to true, the shorte sequence will be scaled to the length of the longer sequence before DTW :param kwargs: :return: """ def _normalise(ans, max_len): if normalise: return ans / max_len else: return ans def _scaled_path(path, scaling_path, flip_paths): path_x = np.asarray([scaling_path[i] for i in path[0]]) path_y = path[1] if flip_paths: path = (path_y, path_x) else: path = (path_x, path_y) return path def _reverse_path(path): n = path.max() path = n - path return path x = np.asarray(x, dtype=np.float) y = np.asarray(y, dtype=np.float) x = _strip_nans(x) y = _strip_nans(y) max_len = max(len(x), len(y)) if scale_first: if len(x) >= len(y): x, y = y, x flip_paths = True else: flip_paths = False x, scaling_path = uniform_scaling_to_length(x, len(y), output_scaling_path=True) regular_ans = mlpy_dtw_std(x, y, metric=metric, dist_only=dist_only, constraint=constraint, k=k, *args, **kwargs) if not try_reverse: if dist_only: return _normalise(regular_ans, max_len) else: dist, cost, path = regular_ans dist = _normalise(dist, max_len) if scale_first: path = _scaled_path(path, scaling_path, flip_paths) return dist, cost, path else: reverse_ans = mlpy_dtw_std(reverse_sequence(x), y, metric=metric, dist_only=dist_only, constraint=constraint, k=k, *args, **kwargs) if dist_only: return _normalise(min(regular_ans, reverse_ans), max_len) elif reverse_ans[0] >= regular_ans[0]: dist, cost, path = regular_ans if scale_first: path = _scaled_path(path, scaling_path, flip_paths) return _normalise(dist, max_len), cost, path else: # dist_only = False and reverse_ans is smaller dist, cost, path = reverse_ans path_rev = (_reverse_path(path[0]), path[1]) if scale_first: path_rev = _scaled_path(path_rev, scaling_path, flip_paths) cost = np.fliplr(cost) return _normalise(dist, max_len), cost, path_rev
def test_multi_dimension_with_nans_appended(self): a = np.array([[1, 2], [3, 4], [np.nan, np.nan]]) assert_array_equal(np.array([[3, 4], [1, 2], [np.nan, np.nan]]), reverse_sequence(a))
def test_single_dimension_with_nans_appended(self): a = np.array([1, 2, 3, 4, np.nan, np.nan]) assert_array_equal(np.array([4, 3, 2, 1, np.nan, np.nan]), reverse_sequence(a))
def test_multi_dimension(self): a = np.array([[1, 2], [3, 4], [5, 6]]) assert_array_equal(np.array([[5, 6], [3, 4], [1, 2]]), reverse_sequence(a))
def test_single_dimension(self): a = np.array([1, 2, 3, 4, 5, 6]) assert_array_equal(np.array([6, 5, 4, 3, 2, 1]), reverse_sequence(a))
def test_multi_dimension_with_nans_appended(self): a = np.array([[1,2],[3,4],[np.nan, np.nan]]) assert_array_equal(np.array([[3,4],[1,2], [np.nan, np.nan]]), reverse_sequence(a))
def test_single_dimension_with_nans_appended(self): a = np.array([1,2,3,4,np.nan, np.nan]) assert_array_equal(np.array([4, 3, 2, 1, np.nan, np.nan]), reverse_sequence(a))
def test_multi_dimension(self): a = np.array([[1, 2], [3,4], [5,6]]) assert_array_equal(np.array([[5,6], [3,4], [1,2]]), reverse_sequence(a))
def test_single_dimension(self): a = np.array([1,2,3,4,5,6]) assert_array_equal(np.array([6,5,4,3,2,1]), reverse_sequence(a))