def _perform_linkage(obs, max_cluster, distmat=None, alpha=0.5, penup_z=10.0, algorithm='complete', verbose=False): n = len(obs) if max_cluster == 1: return (np.ones(len(obs),dtype=np.int), None) if algorithm not in ('average','single','complete'): raise ValueError('algorithm must be either average, single, ' 'or complte.') # calculate distance matrix if distmat is None: if verbose: print "calculating distmat" distmat = np.zeros((n,n)) for i in range(n): for j in range(i+1,n): distmat[i,j] = compute_dtw_distance(obs[i], obs[j], alpha=alpha, penup_z=penup_z) distmat[j,i] = distmat[i,j] z_avg = sp_cluster.hierarchy.linkage(distance.squareform(distmat), method=algorithm) fc = sp_cluster.hierarchy.fcluster(z_avg,t=max_cluster, criterion='maxclust') return (np.asarray(fc), distmat)
def test_simple(self): self.assertAlmostEqual( compute_dtw_distance(self.ink1, self.ink1, alpha=0.5, penup_z=10), 0.000, delta=1e-3) self.assertAlmostEqual( compute_dtw_distance(self.ink1, self.ink2, alpha=0.5, penup_z=10), 0.100, delta=1e-3) self.assertAlmostEqual( compute_dtw_distance(self.ink1, self.ink3, alpha=0.5, penup_z=10), 0.053, delta=1e-3)
def score(self, obs): """Calculates the score of an observation. The score is defined as negative of the DTW distance normalized by the expected value. Returns ------- (score, None) """ dist = compute_dtw_distance(self.model, obs, alpha=self.alpha) return -dist / self.avg_dist
def train(self, obs, obs_weights=None, center_type="centroid", state_reduction=False, ignore_outliers=True): """Estimates the prototype from a set of observations.""" def _find_medoid(obs, obs_weights, distmat): n = len(obs) weighted_distmat = distmat * np.tile(obs_weights, (n, 1)).T avg_distmat = weighted_distmat.sum(axis=0) / obs_weights.sum() return avg_distmat.argmin() def _find_centroid(obs, obs_weights, medoid_idx, distmat): n_features = obs[0].shape[1] medoid = obs[medoid_idx] # Ignore outliers by setting their weights to 0 if ignore_outliers: dist_mean = distmat[medoid_idx, :].mean() dist_std = distmat[medoid_idx, :].std() # only remove outliers when std is not too small if dist_std > 1e-3: for i in xrange(len(obs)): if (distmat[medoid_idx, i] - dist_mean) / dist_std > 3.0: obs_weights[i] = 0.0 # Ignore examples that doesn't have the same number of strokes medoid_n_strokes = medoid[:, _PU_IDX].sum() for i, o in enumerate(obs): if o[:, _PU_IDX].sum() != medoid_n_strokes: obs_weights[i] = 0.0 f = [compute_dtw_vector(medoid, ink) for ink in obs] feature_mat = np.vstack(f) feature_mat = np.nan_to_num(feature_mat) weighted_feature_mat = feature_mat * np.tile(obs_weights, (feature_mat.shape[1], 1)).T # reconstruct weighted-average ink mean_ink = weighted_feature_mat.sum(axis=0) / obs_weights.sum() mean_ink = mean_ink.reshape((-1, n_features), order="C") mean_ink = mean_ink + medoid # make sure pen-up is binary mean_ink[:, _PU_IDX] = mean_ink[:, _PU_IDX].round() # number of penups is off, fallback to medoid if mean_ink[:, _PU_IDX].sum() != medoid_n_strokes: return medoid # It seems like not updating the direction yeilds # a better result. # return update_directions(mean_ink) return mean_ink n = len(obs) self.num_obs = n if obs_weights is None: obs_weights = np.ones(n) else: obs_weights = np.asarray(obs_weights) if not center_type in ["medoid", "centroid"]: raise ValueError("center_type should be either medoid or centroid.") # calculate distance matrix distMat = np.zeros((n, n)) for i in xrange(n): for j in xrange(i + 1, n): distMat[i, j] = compute_dtw_distance(obs[i], obs[j], alpha=self.alpha) distMat[j, i] = distMat[i, j] # compute the center if center_type == "centroid": medoid_idx = _find_medoid(obs, obs_weights, distMat) self.model = _find_centroid(obs, obs_weights, medoid_idx, distMat) else: medoid_idx = _find_medoid(obs, obs_weights, distMat) self.model = obs[medoid_idx].copy() if state_reduction: self.model = _state_reduction(self.model, obs) self.avg_dist = _compute_avg_dist(self.model, obs, obs_weights, self.alpha) self.total_weight = obs_weights.sum() return -self.avg_dist