def main(): input_file = "./input.txt" output_file = "./output.txt" table_size = 4 doubling_threshold = 0.75 halving_threshold = 0.25 try: arguments, values = getopt.getopt(argument_list, options, long_options) for currentArgument, currentValue in arguments: if currentArgument in ("-i", "--input"): input_file = currentValue elif currentArgument in ("-o", "--output"): output_file = currentValue elif currentArgument in ("-s", "--table-size"): table_size = int(currentValue) elif currentArgument in ("-d", "--doubling-threshold"): doubling_threshold = float(currentValue) elif currentArgument in ("-h", "--halving-threshold"): halving_threshold = float(currentValue) except getopt.error as err: print(str(err)) inputs = parse_input(input_file) logger = Logger() hashtable = Hashtable(table_size, logger, grow_threshold=doubling_threshold, shrink_threshold=halving_threshold) for ipt in inputs: cmd, value = ipt hashtable.apply(cmd, value) logger.write(output_file)
def lagraHashtabell(atomlista): """Lagrar atomlistans element i en hashtabell""" antalElement = len(atomlista) hashtabell = Hashtable(antalElement) for atom in atomlista: hashtabell.store(atom.namn, atom) return hashtabell
def test_unknown_key(): ht = Hashtable() ht.add("banana", 9) actual = ht.get("cucumber") expected = None assert actual == expected
class Arcs: def __init__(self, Q): self.coincidences = Hashtable(Q * 10) self.arcsByPoint = Hashtable(Q * 10) self.pointsByPoint = Hashtable(Q * 10) #self.arc_db_path=mkdtemp()+'/arc_db' #self.arcs= shelve.open(self.arc_db_path) self.arcs = {} self.length = 0 #self.storage_path = mkdtemp()+'/db' #self.db = shelve.open(self.storage_path) self.db = {} def get_index(self, point): return self.pointsByPoint.get(point) def get_point_arcs(self, point): return self.arcsByPoint.get(point) def coincidence_lines(self, point): return self.coincidences.get(point) def peak(self, point): return self.coincidences.peak(point) def push(self, arc): self.arcs[str(self.length)] = arc self.length += 1 return self.length def map(self, func): #self.db.close() #remove(self.storage_path) out = [] for num in range(0, self.length): out.append(func(self.arcs[str(num)])) #self.arcs.close() #remove(self.arc_db_path) return out def get_hash(self, arc): ourhash = sha1() ourhash.update(str(arc)) return ourhash.hexdigest() def check(self, arcs): a0 = arcs[0] a1 = arcs[-1] point = a0 if point_compare(a0, a1) < 0 else a1 point_arcs = self.get_point_arcs(point) h = self.get_hash(arcs) if h in self.db: return int(self.db[h]) else: index = self.length point_arcs.append(arcs) self.db[h] = index self.db[self.get_hash(list(reversed(arcs)))] = ~index self.push(arcs) return index
def test_add(): hashtable = Hashtable() index = hashtable.hash("spam") assert hashtable.buckets[index] is None hashtable.add("spam", "eggs") bucket = hashtable.buckets[index] assert bucket
def __init__(self, Q): self.coincidences = Hashtable(Q * 10) self.arcsByPoint = Hashtable(Q * 10) self.pointsByPoint = Hashtable(Q * 10) self.arcs = {} self.length = 0 self.db = {}
def test_hashtable_contains_true(): hashtable = Hashtable() hashtable.set('listen',1) actual = hashtable.contains('listen') expected = True assert actual == expected
def test_in_range_hash(): hashtable = Hashtable() actual = hashtable._hash('spam') # assert actual >= 0 # assert actual < hashtable._size assert 0 <= actual < hashtable._size
def test_hashtable_returns_null(): hashtable = Hashtable() hashtable.set('glisten',3) actual = hashtable.get('cat') expected = False assert actual == expected
def test_hashtable_add(): hashtable = Hashtable() hashtable.set('glisten',3) actual = hashtable.get('glisten') expected = 3 assert actual == expected
def repeated_word(sentence): words = sentence.split() for w in range(len(words)): pattern = r'[^\w\s]' words[w] = re.sub(pattern,'', words[w]) hash = Hashtable() for word in words : if hash.contains(word.upper()): return word hash.add(word,word.upper())
def __init__(self,Q): self.coincidences = Hashtable(Q * 10) self.arcsByPoint = Hashtable(Q * 10) self.pointsByPoint = Hashtable(Q * 10) self.arc_db_path=mkdtemp()+'/arc_db' self.arcs= shelve.open(self.arc_db_path) #self.arcs={} self.length=0 self.storage_path = mkdtemp()+'/db' self.db = shelve.open(self.storage_path)
def test_hashtable_lookup_collision(): hashtable = Hashtable() hashtable.set('listen', 1) hashtable.set('silent', 2) actual = hashtable.get('listen') expected = 1 assert actual == expected
class Arcs: def __init__(self, Q): self.coincidences = Hashtable(Q * 10) self.arcsByPoint = Hashtable(Q * 10) self.pointsByPoint = Hashtable(Q * 10) self.arcs = {} self.length = 0 self.db = {} def get_index(self, point): return self.pointsByPoint.get(point) def get_point_arcs(self, point): return self.arcsByPoint.get(point) def coincidence_lines(self, point): return self.coincidences.get(point) def peak(self, point): return self.coincidences.peak(point) def push(self, arc): self.arcs[str(self.length)] = arc self.length += 1 return self.length def map(self, func): out = [] for num in range(0, self.length): out.append(func(self.arcs[str(num)])) return out def get_hash(self, arc): ourhash = sha1() ourhash.update(str(arc)) return ourhash.hexdigest() def check(self, arcs): a0 = arcs[0] a1 = arcs[-1] point = a0 if point_compare(a0, a1) < 0 else a1 point_arcs = self.get_point_arcs(point) h = self.get_hash(arcs) if h in self.db: return int(self.db[h]) else: index = self.length point_arcs.append(arcs) self.db[h] = index self.db[self.get_hash(list(reversed(arcs)))] = ~index self.push(arcs) return index
def repeated_word_check(text_to_check): case_format = text_to_check.lower() punctuation_clean = case_format.translate( str.maketrans('', '', string.punctuation)) iterable_words = punctuation_clean.split() ht = Hashtable() for word in iterable_words: if ht.contains(word): return word else: ht.set(word, word) return 'No Words Repeated'
class Arcs: def __init__(self,Q): self.coincidences = Hashtable(Q * 10) self.arcsByPoint = Hashtable(Q * 10) self.pointsByPoint = Hashtable(Q * 10) self.arc_db_path=mkdtemp()+'/arc_db' self.arcs= shelve.open(self.arc_db_path) #self.arcs={} self.length=0 self.storage_path = mkdtemp()+'/db' self.db = shelve.open(self.storage_path) #self.db={} def get_index(self,point): return self.pointsByPoint.get(point) def get_point_arcs(self,point): return self.arcsByPoint.get(point) def coincidence_lines(self,point): return self.coincidences.get(point) def peak(self,point): return self.coincidences.peak(point) def push(self,arc): self.arcs[str(self.length)]=arc self.length+=1 return self.length def close(self): #pass self.db.close() remove(self.storage_path) self.arcs.close() remove(self.arc_db_path) def get_hash(self,arc): ourhash = sha1() ourhash.update(str(arc)) return ourhash.hexdigest() def check(self,arcs): a0 = arcs[0] a1 = arcs[-1] point = a0 if point_compare(a0, a1) < 0 else a1 point_arcs = self.get_point_arcs(point) h = self.get_hash(arcs) if h in self.db: return int(self.db[h]) else: index = self.length point_arcs.append(arcs) self.db[h]=index self.db[self.get_hash(list(reversed(arcs)))]=~index self.push(arcs) return index
def test_get_silent_and_listen(): hashtable = Hashtable() hashtable.set('listen', 'to me') hashtable.set('silent', 'so quiet') assert hashtable.get('listen') == 'to me' assert hashtable.get('silent') == 'so quiet'
def testSmallContains(self): q = Hashtable(hashFunction, 3) for key, value in self.buildings.iteritems(): q[key] = value for key in self.buildings: self.assertIn( key, q, "membership in small hashtable: `in` keyword didn't work! check __contains__.\nkey:{}" .format(key))
def testLargeContains(self): q = Hashtable(hashFunction, 800) for key, value in self.doubles.iteritems(): q[key] = value for key in self.doubles: self.assertIn( key, q, "membership in large hashtable: `in` keyword didn't work! check __contains__.\nkey:{}" .format(key))
def testSmallGetSetWithoutFunction(self): q = Hashtable(self.testingFunction, 3) for key, value in self.buildings.iteritems(): q[key] = value for key, expected in self.buildings.iteritems(): observed = q[key] self.assertEquals( observed, expected, "small hashtable without your hash function: value changed after being added!\nkey:{}\nexpected value:{}\nobserved value:{}" .format(key, expected, observed))
def testLargeGetSetWithFunction(self): q = Hashtable(hashFunction, 800) for key, value in self.doubles.iteritems(): q[key] = value for key, expected in self.doubles.iteritems(): observed = q[key] self.assertEquals( observed, expected, "large hashtable with your hash function: value changed after being added! check __getitem__/__setitem__\nkey:{}\nexpected value:{}\nobserved value:{}" .format(key, expected, observed))
def twoSumWithHash(self, nums, target): hashtable = Hashtable() for i in range(len(nums)): # Check if complement exists. if hashtable.contains(target - nums[i]): # Return it if it does. return [hashtable.get(target - nums[i]), i] else: hashtable.put(nums[i], i)
def testLargeLen(self): q = Hashtable(hashFunction, 800) for key, value in self.doubles.iteritems(): q[key] = value l = len(q) self.assertIsInstance(l, type(len(self.doubles)), "length of large hashtable: incorrect type!") self.assertLessEqual( l, len(self.doubles), "length of large hashtable: {} items is too many! expected {}; check __len__." .format(l, len(self.doubles))) self.assertGreaterEqual( l, len(self.doubles), "length of large hashtable: {} items is not enough! expected {}; check __len__." .format(l, len(self.doubles)))
def __init__(self, Q): self.coincidences = Hashtable(Q * 10) self.arcsByPoint = Hashtable(Q * 10) self.pointsByPoint = Hashtable(Q * 10) self.arc_db_path = mkdtemp() + '/arc_db' self.arcs = shelve.open(self.arc_db_path) #self.arcs={} self.length = 0 self.storage_path = mkdtemp() + '/db' self.db = shelve.open(self.storage_path)
def tree_intersection(tree_1, tree_2=0): arr = [] hashtable = Hashtable() top = tree_1.preOrder().top while top: if not hashtable.contains(top.value): hashtable.add(0, top.value) top = top.next top = tree_2.preOrder().top while top: if hashtable.contains(top.value): arr.append(int(top.value)) top = top.next return arr
def test_add(): test_table = Hashtable() test_table.add('Home', 'Kansas City') assert test_table.get('Home') == 'Kansas City'
def test_hashtable_instance(): hashtable = Hashtable() assert hashtable
def test_get_empty(): hashtable = Hashtable() assert hashtable.get(None) == KeyError
def test_does_not_contain(): new_hashtable = Hashtable() assert new_hashtable.contains('Couch') == False
def test_contains(): new_hashtable = Hashtable() new_hashtable.add('Soda', 'MtnDew') assert new_hashtable.contains('Soda') == True
def test_get_in_list(): new_hashtable = Hashtable() new_hashtable.add('Candy', 'Crunch') assert new_hashtable.get('Candy') == 'Crunch'
def setUp(self): self.ht = Hashtable()
def test_contains_false(): hashtable = Hashtable() hashtable.add('roger', 45) actual = hashtable.contains('roger') expected = False assert actual == expected
class TestHashtable(unittest.TestCase): def setUp(self): self.ht = Hashtable() def test_put(self): self.assertEqual(self.ht._size(), 0) self.ht.put('foo', 100) self.assertEqual(self.ht._size(), 1) def test_put_with_str_and_int(self): self.ht.put('foo', 100) self.ht.put(5, 'bar') self.assertEqual(self.ht.get('foo'), 100) self.assertEqual(self.ht.get(5), 'bar') def test_put_type_checking(self): self.assertRaises(TypeError, self.ht.put, [], 100) def test_put_overwrite(self): self.ht.put('foo', 100) self.ht.put('foo', 200) self.assertEqual(self.ht._size(), 1) val = self.ht.get('foo') self.assertEqual(val, 200) def test_put_resize(self): self.ht._resize = MagicMock() self.ht.put('foo', 100) self.ht.put('bar', 200) self.assertTrue(self.ht._resize.called) def test_get(self): self.ht.put('foo', 100) val = self.ht.get('foo') self.assertEqual(val, 100)
from hashtable import Hashtable from hashtable import hashFunction import csv cities = open("uscities.txt", "r") datafile = open("datafile.csv", "w") c = csv.writer (datafile, dialect = "excel") citylist = [] for line in cities: line = line.split(",") citylist.append(line) numbuck = 1000 hashy = Hashtable(hashFunction, numbuck) for lists in citylist: hashy[lists[1]] = lists[4] # can be anything count = 0 datalist = [] genobj = hashy.getBucketSizes() for item in genobj: c.writerow([count, item]) count += 1 cities.close() datafile.close()
joined_data = [] for bucket in left._buckets: if bucket: current = bucket.head while current: left_column = [current.data[0], current.data[1]] right_value = right.get(current.data[0]) left_column.append(right_value) current = current.next joined_data.append(left_column) return joined_data if __name__ == '__main__': hashmap1 = Hashtable() hashmap2 = Hashtable() # hashmap1.add('fond', 'enamored') # hashmap1.add('wrath', 'anger') # hashmap1.add('diligent', 'employed') # hashmap1.add('outfit', 'garb') # hashmap1.add('guide', 'usher') # hashmap2.add('fond', 'averse') # hashmap2.add('wrath', 'delight') # hashmap2.add('diligent', 'idle') # hashmap2.add('guide', 'follow') # hashmap2.add('flow', 'jam') # print(left_joiner(hashmap1, hashmap2))