Ejemplo n.º 1
0
def _perform_linkage(obs, max_cluster, distmat=None, 
                     alpha=0.5, penup_z=10.0, 
                     algorithm='complete',
                     verbose=False):
    n = len(obs)
    
    if max_cluster == 1:
        return (np.ones(len(obs),dtype=np.int), None)

    if algorithm not in ('average','single','complete'):
        raise ValueError('algorithm must be either average, single, '
                         'or complte.')

    # calculate distance matrix
    if distmat is None:
        if verbose: print "calculating distmat"
        distmat = np.zeros((n,n))
        for i in range(n):
            for j in range(i+1,n):
                distmat[i,j] = compute_dtw_distance(obs[i],
                                                    obs[j],
                                                    alpha=alpha,
                                                    penup_z=penup_z)
                distmat[j,i] = distmat[i,j]

    z_avg = sp_cluster.hierarchy.linkage(distance.squareform(distmat), 
                                         method=algorithm)
    fc = sp_cluster.hierarchy.fcluster(z_avg,t=max_cluster,
                                       criterion='maxclust')
    return (np.asarray(fc), distmat)
Ejemplo n.º 2
0
 def test_simple(self):
     self.assertAlmostEqual(
         compute_dtw_distance(self.ink1,
                              self.ink1,
                              alpha=0.5,
                              penup_z=10), 
         0.000, delta=1e-3)
     self.assertAlmostEqual(
         compute_dtw_distance(self.ink1,
                              self.ink2,
                              alpha=0.5,
                              penup_z=10),
         0.100, delta=1e-3)
     self.assertAlmostEqual(
         compute_dtw_distance(self.ink1,
                              self.ink3,
                              alpha=0.5,
                              penup_z=10), 
         0.053, delta=1e-3)
Ejemplo n.º 3
0
    def score(self, obs):
        """Calculates the score of an observation.
        
        The score is defined as negative of the DTW distance
        normalized by the expected value.

        Returns
        -------
        (score, None)

        """
        dist = compute_dtw_distance(self.model, obs, alpha=self.alpha)
        return -dist / self.avg_dist
Ejemplo n.º 4
0
    def train(self, obs, obs_weights=None, center_type="centroid", state_reduction=False, ignore_outliers=True):
        """Estimates the prototype from a set of observations."""

        def _find_medoid(obs, obs_weights, distmat):
            n = len(obs)
            weighted_distmat = distmat * np.tile(obs_weights, (n, 1)).T
            avg_distmat = weighted_distmat.sum(axis=0) / obs_weights.sum()
            return avg_distmat.argmin()

        def _find_centroid(obs, obs_weights, medoid_idx, distmat):
            n_features = obs[0].shape[1]
            medoid = obs[medoid_idx]

            # Ignore outliers by setting their weights to 0
            if ignore_outliers:
                dist_mean = distmat[medoid_idx, :].mean()
                dist_std = distmat[medoid_idx, :].std()
                # only remove outliers when std is not too small
                if dist_std > 1e-3:
                    for i in xrange(len(obs)):
                        if (distmat[medoid_idx, i] - dist_mean) / dist_std > 3.0:
                            obs_weights[i] = 0.0

            # Ignore examples that doesn't have the same number of strokes
            medoid_n_strokes = medoid[:, _PU_IDX].sum()
            for i, o in enumerate(obs):
                if o[:, _PU_IDX].sum() != medoid_n_strokes:
                    obs_weights[i] = 0.0

            f = [compute_dtw_vector(medoid, ink) for ink in obs]
            feature_mat = np.vstack(f)
            feature_mat = np.nan_to_num(feature_mat)
            weighted_feature_mat = feature_mat * np.tile(obs_weights, (feature_mat.shape[1], 1)).T

            # reconstruct weighted-average ink
            mean_ink = weighted_feature_mat.sum(axis=0) / obs_weights.sum()
            mean_ink = mean_ink.reshape((-1, n_features), order="C")
            mean_ink = mean_ink + medoid

            # make sure pen-up is binary
            mean_ink[:, _PU_IDX] = mean_ink[:, _PU_IDX].round()

            # number of penups is off, fallback to medoid
            if mean_ink[:, _PU_IDX].sum() != medoid_n_strokes:
                return medoid

            # It seems like not updating the direction yeilds
            # a better result.
            # return update_directions(mean_ink)
            return mean_ink

        n = len(obs)
        self.num_obs = n

        if obs_weights is None:
            obs_weights = np.ones(n)
        else:
            obs_weights = np.asarray(obs_weights)

        if not center_type in ["medoid", "centroid"]:
            raise ValueError("center_type should be either medoid or centroid.")

        # calculate distance matrix
        distMat = np.zeros((n, n))
        for i in xrange(n):
            for j in xrange(i + 1, n):
                distMat[i, j] = compute_dtw_distance(obs[i], obs[j], alpha=self.alpha)
                distMat[j, i] = distMat[i, j]

        # compute the center
        if center_type == "centroid":
            medoid_idx = _find_medoid(obs, obs_weights, distMat)
            self.model = _find_centroid(obs, obs_weights, medoid_idx, distMat)
        else:
            medoid_idx = _find_medoid(obs, obs_weights, distMat)
            self.model = obs[medoid_idx].copy()

        if state_reduction:
            self.model = _state_reduction(self.model, obs)

        self.avg_dist = _compute_avg_dist(self.model, obs, obs_weights, self.alpha)

        self.total_weight = obs_weights.sum()

        return -self.avg_dist