def test_specific_user(self): link = ClusterLinkage(self.user_ink_data, target_user_id='user_1') clustered_data = link.clustered_data() cdtw = ClassifierDTW() cdtw.train(clustered_data) accuracy,_,_ = cdtw.test(self.label_ink_pairs) self.assertGreater(accuracy, 91.0)
def test_simple(self): link = ClusterLinkage(self.user_ink_data) clustered_data = link.clustered_data() cdtw = ClassifierDTW() cdtw.train(clustered_data) accuracy,_,_ = cdtw.test(self.label_ink_pairs) self.assertGreater(accuracy, 92.0)
def test_state_reduction(self): cDTW = ClassifierDTW(alpha=0.5,min_cluster_size=10) cDTW.train(self.clustered_data,center_type='medoid', state_reduction=True) accuracy,_,_ = cDTW.test(self.label_ink_pairs) if VERBOSE: print accuracy, 87 self.assertGreater(accuracy, 87.0)
def test_simple(self): km = ClusterKMeans(self.user_ink_data,algorithm='dtw') clustered_data = km.clustered_data() cDTW = ClassifierDTW() cDTW.train(clustered_data, center_type='centroid') accuracy,_,_ = cDTW.test(self.label_ink_pairs) self.assertGreater(accuracy, 93.0)
def test_optimize(self): km = ClusterKMeans(self.user_ink_data,algorithm='dtw') km.optimize_cluster_num(self.label_ink_pairs, verbose=False) clustered_data = km.clustered_data() cDTW = ClassifierDTW() cDTW.train(clustered_data, center_type='centroid') accuracy,_,_ = cDTW.test(self.label_ink_pairs) self.assertGreater(accuracy, 93.0)
def optimize_cluster_num(self, test_data, n_iter=30, threshold=0.001, dview=None, verbose=False): # start with 1 prototype for each label temp_n_clusters = np.ones(len(self.labels),dtype=np.int) cluster_info = self._partition_data(temp_n_clusters.tolist(), dview=dview) distmats = [distmat for _,distmat in cluster_info] trained_prototypes = _train_all_prototypes(self.weighted_ink_data, cluster_info, self.labels) curr_classifier = ClassifierDTW() curr_classifier.trained_prototypes = trained_prototypes (accuracy,_,_) = curr_classifier.test(test_data,dview=dview) error_rates = [1.0 - accuracy / 100.0] n_clusters = [temp_n_clusters.copy()] added_labels = [] if verbose: print error_rates # compute all candidates candidates = [] for i in range(len(self.labels)): partition,_ = _partition_subset((self.weighted_ink_data[i], self.labels[i], temp_n_clusters[i]+1), distmat=distmats[i], verbose=verbose) prototypes = _train_prototypes(self.weighted_ink_data[i], partition, self.labels[i]) if verbose: print "candidate for %s has length %d"%( self.labels[i], len(prototypes)) candidates.append(prototypes) for it in range(n_iter): if verbose: print "Iteration %d"%it # find the most beneficial class to increse prototype by 1 min_error = 1.0 min_idx = None best_classifier = None for i in range(len(self.labels)): if len(candidates[i]) < temp_n_clusters[i]+1: # degenerated case. dont need to test error = 1.0 elif temp_n_clusters[i]+1 > self.maxclust: # maxclust reached error = 1.0 else: test_classifier = ClassifierDTW() prototypes = curr_classifier.trained_prototypes # filter out all prototypes with the label prototypes = [p for p in prototypes if p.label != self.labels[i]] prototypes += candidates[i] test_classifier.trained_prototypes = prototypes (accuracy,_,_) = test_classifier.test(test_data, dview=dview) error = 1.0 - accuracy / 100.0 if verbose: print "> %f"%error if (error < min_error): min_error = error min_idx = i best_classifier = test_classifier # Stop if no update found or error reduction is small if (min_idx is None) or (error_rates[-1] - min_error < threshold): if verbose: print "no improvement. done." break # choose the min_idx temp_n_clusters[min_idx] += 1 error_rates.append(min_error) added_labels.append(self.labels[min_idx]) n_clusters.append(temp_n_clusters.copy()) curr_classifier = best_classifier if verbose: print error_rates print n_clusters print "choosing %s"%self.labels[min_idx] # replace the candidate if needed if (temp_n_clusters[min_idx]+1 <= self.maxclust): partition,_ = _partition_subset( (self.weighted_ink_data[min_idx], self.labels[min_idx], temp_n_clusters[min_idx]+1), distmat=distmats[min_idx], verbose=verbose) prototypes = _train_prototypes( self.weighted_ink_data[min_idx], partition, self.labels[min_idx]) if verbose: print "candidate for %s has length %d"%( self.labels[min_idx], len(prototypes)) candidates[min_idx] = prototypes self.n_clusters = n_clusters[-1] return (error_rates, added_labels)