def test_sanity(self): v1 = compute_dtw_vector(self.ink1, self.ink3, alpha=0.5, penup_z=10) self.assertEqual(v1.shape, (140,)) v2 = compute_dtw_vector(self.ink2, self.ink3, alpha=0.5, penup_z=10) self.assertEqual(v2.shape, (150,))
def _find_centroid(obs, obs_weights, medoid_idx, distmat): n_features = obs[0].shape[1] medoid = obs[medoid_idx] # Ignore outliers by setting their weights to 0 if ignore_outliers: dist_mean = distmat[medoid_idx, :].mean() dist_std = distmat[medoid_idx, :].std() # only remove outliers when std is not too small if dist_std > 1e-3: for i in xrange(len(obs)): if (distmat[medoid_idx, i] - dist_mean) / dist_std > 3.0: obs_weights[i] = 0.0 # Ignore examples that doesn't have the same number of strokes medoid_n_strokes = medoid[:, _PU_IDX].sum() for i, o in enumerate(obs): if o[:, _PU_IDX].sum() != medoid_n_strokes: obs_weights[i] = 0.0 f = [compute_dtw_vector(medoid, ink) for ink in obs] feature_mat = np.vstack(f) feature_mat = np.nan_to_num(feature_mat) weighted_feature_mat = feature_mat * np.tile(obs_weights, (feature_mat.shape[1], 1)).T # reconstruct weighted-average ink mean_ink = weighted_feature_mat.sum(axis=0) / obs_weights.sum() mean_ink = mean_ink.reshape((-1, n_features), order="C") mean_ink = mean_ink + medoid # make sure pen-up is binary mean_ink[:, _PU_IDX] = mean_ink[:, _PU_IDX].round() # number of penups is off, fallback to medoid if mean_ink[:, _PU_IDX].sum() != medoid_n_strokes: return medoid # It seems like not updating the direction yeilds # a better result. # return update_directions(mean_ink) return mean_ink