def collisions(self, other): #self.dist_max = 0.2 step = 5 pointer = 0 r = [] window = self.window_min while pointer+window <= len(other.data): slice = other[pointer:pointer+window] slice.epsilon = self.epsilon slice.alphabet_size = self.alphabet_size slice.word_size = self.word_size #s = SAX(slice) d = sax.distance(self.data_saxed, slice.data_saxed, (self.alphabet_size-1)*self.word_size) if d <= self.dist_max: #We have found a match, yay! col = "@%s(%s):%s"%(pointer,window,d) print col r.append(col) #move one (current) window ahead (= the match) and reset the window size pointer += window window = self.window_min if self.window_step == 0 or window + self.window_step > self.window_max or pointer + window + self.window_step > len(other.data): #No window stepping or the next window will be bigger than window_max or out of the data #move one step and reset the window size pointer += step window = self.window_min else: #Enlarge the window window += self.window_step return r
def compare(self): pointer = 0 candidates = [] matches = [] lastrep = 0 while 1: if self.data.is_serial: #print "Pos: %s Len: %s"%(pointer, self.data.length) while pointer+self.lenmax > self.data.length: #print "Waiting for data. Pos: %s Len: %s"%(pointer, self.data.length) time.sleep(0.1) elif pointer > self.data.length: print "EOF" break if self.data.length-100 > lastrep: print "@ %s of %s"%(pointer, self.data.length) lastrep = self.data.length for window in self.windows: if pointer+window["length"] > self.data.length: continue slice = self.data[pointer:pointer+window["length"]] slice.epsilon = window["sax"].epsilon slice.alphabet_size = window["sax"].alphabet_size slice.word_size = window["sax"].word_size #print "[%s:%s]"%(pointer, pointer+window["length"]) #print slice.data_saxed #print window["length"] d = sax.distance(window["sax"].data_saxed, slice.data_saxed, (slice.alphabet_size-1)*slice.word_size) #print d if d <= window["sax"].dist_max: #We have found a candidate, yay! col = "Candidate for %s-%s [%s:%s] (%s) Len: %s"%(window["sax"].id1,window["sax"].id2,pointer,pointer+window["length"],d, window["length"]) print col for c in candidates: if c["sax"] == window["sax"] and c["d"]>d: #print "removing" #print len(candidates) candidates.remove(c) candidates.append({"start": pointer, "end": pointer+window["length"], "sax": window["sax"], "d": d}) if candidates: minend = min([c["end"] for c in candidates]) if minend <= pointer: match = candidates[0] for c in candidates: if c["d"] < match["d"]: match = c matches.append(match) candidates = [] self.matches_dict[match["sax"].id1] += 1 print "Match for %s-%s [%s:%s] (%s)"%(match["sax"].id1,match["sax"].id2,match["start"],match["end"], match["d"]) print self.matches_dict pointer = match["end"] else: pointer += self.step else: pointer += self.step print len(matches)