def classify(self, query, HDorLD='HD'): l=[] if HDorLD == 'HD': for key,value in self.data.HDtraining.iteritems(): for reference in value: l.append((key,dtw.dist(query,reference))) # Get HD distances to neighbours if HDorLD == 'LD': for key,value in self.data.LDtraining.iteritems(): for reference in value: #l.append((key,dtw.getDTWdist2DweightedSum(query,reference,self.data.averageWeights))) # Slow, weighted multi-dim DTW l.append((key,dtw.dist(query,reference))) # faster multi-dim distance without weights sl = sorted(l,key=lambda x:x[1]) # Sort neighbours by distance L = sl[:self.k] # Take the nearest k neighbours # Count the number of instances of each class among the k nearest neighbours classCountsWithSums = {} for (cl,dist) in L: if cl not in classCountsWithSums: classCountsWithSums[cl] = (1,dist,cl) else: classCountsWithSums[cl] = (classCountsWithSums[cl][0] + 1, classCountsWithSums[cl][1] + dist, cl) sizeOfLargestGroup = max([c for (c,_,_) in classCountsWithSums.values()]) # There may be a tie # Break any tie by using the group which is closer on average largestGroupWithSmallestDistance = min( [(d,cl) for (c,d,cl) in classCountsWithSums.values() if c == sizeOfLargestGroup] ) return largestGroupWithSmallestDistance[1] # Return the name of the most common neighbour
def classify(self, query, HDorLD='HD'): l=[] if HDorLD == 'HD': for key,value in self.data.HDtraining.iteritems(): for reference in value: l.append((key,dtw.dist(query,reference))) # Get HD distances to neighbours if HDorLD == 'LD': for key,value in self.data.LDtraining.iteritems(): for reference in value: #l.append((key,dtw.getDTWdist2DweightedSum(query,reference,self.data.averageWeights))) # Slow, weighted multi-dim DTW l.append((key,dtw.dist(query,reference))) # faster multi-dim distance without weights sl = sorted(l,key=lambda x:x[1]) # Sort neighbours by distance L = sl[:self.k] # Take the nearest k neighbours return max(itertools.groupby(sorted(L)), key=lambda(x, v):(len(list(v)),-L.index(x)))[0] # Chose the most common, using the index as a tie breaker
def interClassDistance(taskIndex): (classA,classB,classAweights,classBweights) = arguments[taskIndex/len(dictOfClasses.keys())][taskIndex%len(dictOfClasses.keys())] summedDistances = 0 for a,aw in zip(classA,classAweights): for b,bw in zip(classB,classBweights): weights = [float(sum(t))/float(len(t)) for t in zip(aw,bw)] #summedDistances += dtw.getDTWdist2DweightedSum(a,b,weights) summedDistances += dtw.dist(a,b) # Faster method that doesn't use weights averageDistance = summedDistances / (len(classA)*len(classB)) return (taskIndex,averageDistance)
def interClassDistance(classA,classB,classAweights=None,classBweights=None): #Not tested with weights free version summedDistances = 0 for a,aw in zip(classA,classAweights): for b,bw in zip(classB,classBweights): weights = [float(sum(t))/float(len(t)) for t in zip(aw,bw)] if (classAweights == None or classBweights == None): summedDistances += dtw.dist(a,b) else: summedDistances += dtw.getDTWdist2DweightedSum(a,b,weights) averageDistance = summedDistances / (len(classA)*len(classB)) return averageDistance
def similarityMatrix(segments,segmentNames,title,savePlot=False): #UNTESTED print "Constructing similarity matrix" bar = progressbar.ProgressBar(maxval=len(segments)**2, widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()]) bar.start() progressCount = 0 distances = [] for i,k in zip(range(len(segments)),reversed(range(len(segments)))): distances.append([]) for j in range(len(segments)): distances[i].append(dtw.dist(segments[k],segments[j])) progressCount+=1 bar.update(progressCount) bar.finish() plot.plotSimilarityMatrix(distances,segmentNames,title,savePlot)