def search_run(size): # Create an Skip List sl = SkipList() # Insert elements for i in xrange(size): sl.insert(i) # Now, get time start = time.clock() # Find elements for i in xrange(size): sl.find(size) # Get final time, and store end = time.clock() return { 'structure': 'skip_list', 'size': size, 'time': end - start, 'comparisons': sl.comparisons, 'height': sl.maxHeight }
class TestSkipList: def setup(self): self.skip_list = SkipList() def test_root(self): self.skip_list.append(3) eq_(self.skip_list.root.val(), 3) eq_(len(self.skip_list), 1) eq_(self.skip_list.root, self.skip_list.last) def test_append(self): self.skip_list.append(1) self.skip_list.append(2) self.skip_list.append(3) self.skip_list.append(4) eq_(self.skip_list.root.val(), 1) eq_(len(self.skip_list), 4) eq_(self.skip_list.last.val(), 4) def test_generate_skips(self): for i in range(30): self.skip_list.append(i) skip_pairs = [(a, b) for (a, b) in self.skip_list.generate_skips()] eq_(skip_pairs, [(1, 6), (6, 11), (11, 16), (16, 21), (21, 26)])
def build_dictionary(terms, dictionary, doc_id, postings): """Build a dictionary and respective postings list from the terms""" global pointer for term in terms: if term not in dictionary: skip_list = SkipList() skip_list.append(doc_id) dictionary[term] = [len(skip_list), pointer] postings.insert(pointer, skip_list) else: pointer = dictionary[term][1] postings[dictionary[term][1]].append(doc_id) dictionary[term][0] += 1 pointer += 1
def main(): sl = SkipList() sl.set(1, 1) sl.set(2, 2) sl.set(4, 4) sl.set(3, 3) sl.set(6, 6) sl.set(5, 5) print_skip_list(sl) sl.remove(3) print_skip_list(sl) print(sl.at(4))
def test(): skip_list = SkipList() items = list(range(NUMBER_OF_ITEMS)) for item in items: skip_list.insert(item) # Runs the experiment on the original skip list. skip_times = [0 for i in items] for i in range(NUMBER_OF_RUNS): for item in items: start = time.time() * 1000 skip_list.search(item) end = time.time() * 1000 skip_times[item] += (end - start) / NUMBER_OF_RUNS mean_skip_time = sum(skip_times) / len(skip_times) std_dev_skip_time = math.sqrt(sum([(mean_skip_time - t)**2 for t in skip_times])) max_z_score_skip_time = (max(skip_times) - mean_skip_time) / std_dev_skip_time print("Skip List Data: ") print(skip_times) print(mean_skip_time) print(std_dev_skip_time) print(max_z_score_skip_time) # Runs the experiment on the TUSL. tusl_list = TUSL(skip_list) tusl_times = [0 for i in items] for i in range(NUMBER_OF_RUNS): for item in items: start = time.time() * 1000 tusl_list.search(item) end = time.time() * 1000 tusl_times[item] += (end-start) / NUMBER_OF_RUNS mean_tusl_time = sum(tusl_times) / len(tusl_times) std_dev_tusl_time = math.sqrt(sum([(mean_tusl_time - t) ** 2 for t in tusl_times])) max_z_score_tusl_time = (max(tusl_times) - mean_tusl_time) / std_dev_tusl_time print("TUSL Data: ") print(tusl_times) print(mean_tusl_time) print(std_dev_tusl_time) print(max_z_score_tusl_time) # Creates charts showing the query times for each index. fig1 = plt.figure(1, (12, 12)) plt.bar(items, skip_times) fig2 = plt.figure(2, (12, 12)) plt.bar(items, tusl_times) plt.show()
def setup(self): self.skip_list = SkipList()