Example #1
File: smear.py Project: yggi/smear
    def collisions(self, other):
        #self.dist_max = 0.2

        step = 5
        pointer = 0
        r = []
        window = self.window_min

        while pointer+window <= len(other.data):
            slice = other[pointer:pointer+window]
            slice.epsilon = self.epsilon
            slice.alphabet_size = self.alphabet_size
            slice.word_size = self.word_size

            #s = SAX(slice)
            d = sax.distance(self.data_saxed, slice.data_saxed, (self.alphabet_size-1)*self.word_size)

            if d <= self.dist_max:
                #We have found a match, yay!
                col = "@%s(%s):%s"%(pointer,window,d)
                print col
                #move one (current) window ahead (= the match) and reset the window size
                pointer += window
                window = self.window_min
            if self.window_step == 0 or window + self.window_step > self.window_max or pointer + window + self.window_step > len(other.data):
                #No window stepping or the next window will be bigger than window_max or out of the data
                #move one step and reset the window size
                pointer += step
                window = self.window_min
                #Enlarge the window
                window += self.window_step
        return r
Example #2
File: smear.py Project: yggi/smear
    def compare(self):
        pointer = 0
        candidates = []
        matches = []

        lastrep = 0
        while 1:
            if self.data.is_serial:
                #print "Pos: %s Len: %s"%(pointer, self.data.length)
                while pointer+self.lenmax > self.data.length:
                    #print "Waiting for data. Pos: %s Len: %s"%(pointer, self.data.length)
            elif pointer > self.data.length:
                print "EOF"

            if self.data.length-100 > lastrep:
                    print "@ %s of %s"%(pointer, self.data.length)
                    lastrep = self.data.length

            for window in self.windows:
                if pointer+window["length"] > self.data.length:
                slice = self.data[pointer:pointer+window["length"]]
                slice.epsilon = window["sax"].epsilon
                slice.alphabet_size = window["sax"].alphabet_size
                slice.word_size = window["sax"].word_size
                #print "[%s:%s]"%(pointer, pointer+window["length"])
                #print slice.data_saxed
                #print window["length"]
                d = sax.distance(window["sax"].data_saxed, slice.data_saxed, (slice.alphabet_size-1)*slice.word_size)
                #print d
                if d <= window["sax"].dist_max:
                    #We have found a candidate, yay!
                    col = "Candidate for %s-%s [%s:%s] (%s) Len: %s"%(window["sax"].id1,window["sax"].id2,pointer,pointer+window["length"],d, window["length"])
                    print col
                    for c in candidates:
                        if c["sax"] == window["sax"] and c["d"]>d:
                            #print "removing"
                            #print len(candidates)
                    candidates.append({"start": pointer, "end": pointer+window["length"], "sax": window["sax"], "d": d})

            if candidates:
                minend = min([c["end"] for c in candidates])
                if minend <= pointer:
                    match = candidates[0]
                    for c in candidates:
                        if c["d"] < match["d"]:
                            match = c
                    candidates = []
                    self.matches_dict[match["sax"].id1] += 1
                    print "Match for %s-%s [%s:%s] (%s)"%(match["sax"].id1,match["sax"].id2,match["start"],match["end"], match["d"])
                    print self.matches_dict
                    pointer = match["end"]
                    pointer += self.step
                pointer += self.step

        print len(matches)