class TestSkipListCreation(unittest.TestCase): def setUp(self): data = range(0, 20) self.lst = SkipList() for i in data: self.lst.append(i) def test_listCreation(self): rt = self.lst.root for i in range(0, 20): self.assertEqual(rt.val, i) rt = rt.next def test_skipCreation(self): self.lst.create_skips() # TODO::test that the *right* skip pointers were created, i.e. # test the targets for the skips! lngth = self.lst.default_skip_length() nextSkip = 0 nd = self.lst.root for i in range(0, 20): if i == nextSkip and i + lngth < 20: self.assertNotEqual(len(nd.pointers),0) nextSkip += lngth else: self.assertIsNone(nd.pointers) nd = nd.next
def index_word(word, docId): """ indexes the docId for the given word, creating the skiplist if necessary. """ global current_line if word not in dictionary: dictionary[word] = current_line lst = SkipList() lst.append(docId) postings.insert(current_line, lst) current_line += 1 else: postings[dictionary[word]].append(docId)
def merge_two_list(self, la, lb, op): """ FOr NOT, la is irrelevant.TODO::fix? low priority For OR, AND, order doesn't matter. """ lst = SkipList() nodea = la.root nodeb = lb.root if op is Operation.OR: # SLOW OR MERGE # =================================== # while nodea != None and nodeb != None: # if nodea.val < nodeb.val: # if lst.last: # if nodea.val != lst.last.val: # lst.append(nodea.val) # else: # lst.append(nodea.val) # nodea = nodea.next # elif nodea.val > nodeb.val: # if lst.last: # if nodeb.val != lst.last.val: # lst.append(nodeb.val) # else: # lst.append(nodeb.val) # nodeb = nodeb.next # else: # lst.append(nodea.val) # nodea = nodea.next # nodeb = nodeb.next # while nodea != None: # if lst.last: # if nodea.val != lst.last.val: # lst.append(nodea.val) # else: # lst.append(nodea.val) # nodea = nodea.next # while nodeb != None: # if lst.last: # if nodeb.val != lst.last.val: # lst.append(nodeb.val) # else: # lst.append(nodeb.val) # nodeb = nodeb.next # lst.create_skips() # return lst # FAST OR MERGE # ====================================== lst = la.get_list() + lb.get_list() lst = {}.fromkeys(lst).keys() lst.sort(key=lambda x: int(x)) lst = SkipList(lst) lst.create_skips() return lst elif op is Operation.AND: while nodea != None and nodeb != None: if nodea.val < nodeb.val: if nodea.pointers != None: jmp = False for target in nodea.pointers: if target.val <= nodeb.val: nodea = target jmp = True if not jmp: nodea = nodea.next else: nodea = nodea.next elif nodea.val > nodeb.val: if nodeb.pointers != None: jmp = False for target in nodeb.pointers: if target.val <= nodea.val: nodeb = target jmp = True if not jmp: nodeb = nodeb.next else: nodeb = nodeb.next else: lst.append(nodea.val) nodea = nodea.next nodeb = nodeb.next lst.create_skips() return lst elif op is Operation.NOT: # SLOW NOT MERGE # ===================================== # universal_set = self.UNIVERSAL_SET # na, nb = universal_set.root, lb.root # while na != None and nb != None: # if na.val < nb.val: # lst.append(na.val) # na = na.next # elif na.val > nb.val: # nb = nb.next # elif na.val == nb.val: # na = na.next # while na != None: # lst.append(na.val) # na = na.next # lst.create_skips() # return lst # FAST NOT MERGE # ====================================== lsta = set(self.search_term("UNIVERSAL_SET").get_list()) lstb = set(lb.get_list()) results = list(lsta - lstb) results.sort(key=lambda x: int(x)) return SkipList(results)
def get_skipList(self, length): lst = SkipList() data = sorted([random.randint(0, length*4) for i in range(0, length)]) for i in range(0, length): lst.append(data[i]) # TODO:swap out with SkipList(data) return lst