def test_size(self): cache = LRUCache(size=5, expires=1000, region='Montreal') users = generate_content(n=10) for user in users: cache.set(user) # test max capacity of cache self.assertEqual(len(cache), 5) # users == 10 but we dont have 10 items in cache self.assertNotEqual(len(cache), 10)
class LRUCache_test_multithreading_behaviour(unittest.TestCase): ''' Test Suite to verify the multithread working ''' def setUp(self): def thread_1(c): c.lock.acquire() time.sleep(0.01) c.lock.release() def thread_2(c): c.set(1, 10) def thread_3(c): c.get(1) self.c = LRUCache(10, 5) self.t1 = Thread(target=thread_1, args=[self.c]) self.t2 = Thread(target=thread_2, args=[self.c]) self.t3 = Thread(target=thread_3, args=[self.c]) def tearDown(self): del self.c del self.t1 del self.t2 del self.t3 def test_lockSet(self): ''' Lock on LRUCache.set method ''' self.t1.start() self.t2.start() self.assertEqual(self.t2.is_alive(), True) self.assertEqual(self.t1.is_alive(), True) self.t2.join() self.assertEqual(self.c.head.value, 10) def test_lockGet(self): ''' Lock on LRUCache.get method ''' self.c.set(1, 10) self.t1.start() self.t3.start() self.assertEqual(self.t3.is_alive(), True) self.assertEqual(self.t1.is_alive(), True) self.assertEqual(self.c.head.ttl, 5) self.t3.join() self.assertEqual(self.c.head.ttl, 4)
def test_expiration(self): cache = LRUCache(size=1000, expires=3, region='Montreal') content = generate_content(n=10) for item in content: cache.set(item) self.assertEqual(len(cache), 10) time.sleep(3) cache.remove_expired() self.assertTrue(len(cache) == 0) # remove expired items item1 = content[0] cache.set(item1) time.sleep(3) for item in content[9:]: cache.set(item) cache.remove_expired() self.assertEqual(len(cache), 1)
from cache import LRUCache cache = LRUCache(100) cache.set('Jesse', 'Pinkman') cache.set('Walter', 'White') print(f'Cache contains: {cache.cache}') cache.set('Jesse', 'James') print(f'Cache contains: {cache.cache}') print(f'Get method: {cache.get("Jesse")}') cache.delete('Walter') print(f'Cache contains: {cache.cache}') print(f'Get method: {cache.get("Walter")}') cache = LRUCache(2) cache.set('Jesse', 'Pinkman') print(f'Cache contains: {cache.cache}') cache.set('Jesse', 's;khgdf') print(f'Cache contains: {cache.cache}') cache.set('Walter', 'White') print(f'Cache contains: {cache.cache}') cache.set('23', 'unknown') # print(f'Cache contains: {cache.cache}')
class crawler(object): """Represents 'Googlebot'. Populates a database by crawling and indexing a subset of the Internet. This crawler keeps track of font sizes and makes it simpler to manage word ids and document ids.""" def __init__(self, db_conn, url_file): """Initialize the crawler with a connection to the database to populate and with the file containing the list of seed URLs to begin indexing.""" self._url_queue = [] self.db_conn = db_conn self._liteMode = 1 self._memory_cap = 50000 self._doc_id_cache = LRUCache(self._memory_cap) self._word_id_cache = LRUCache(self._memory_cap) self._inverted_index = {} # Map the doc_id of each webpage to the page title and a short description. self._document_index = defaultdict(lambda: ["", ""]) #for page rank self._relation = [] self._curr_relation = [] # functions to call when entering and exiting specific tags self._enter = defaultdict(lambda *a, **ka: self._visit_ignore) self._exit = defaultdict(lambda *a, **ka: self._visit_ignore) # add a link to our graph, and indexing info to the related page self._enter['a'] = self._visit_a # record the currently indexed document's title an increase # the font size def visit_title(*args, **kargs): self._visit_title(*args, **kargs) self._increase_font_factor(7)(*args, **kargs) # increase the font size when we enter these tags self._enter['b'] = self._increase_font_factor(2) self._enter['strong'] = self._increase_font_factor(2) self._enter['i'] = self._increase_font_factor(1) self._enter['em'] = self._increase_font_factor(1) self._enter['h1'] = self._increase_font_factor(7) self._enter['h2'] = self._increase_font_factor(6) self._enter['h3'] = self._increase_font_factor(5) self._enter['h4'] = self._increase_font_factor(4) self._enter['h5'] = self._increase_font_factor(3) self._enter['title'] = visit_title # decrease the font size when we exit these tags self._exit['b'] = self._increase_font_factor(-2) self._exit['strong'] = self._increase_font_factor(-2) self._exit['i'] = self._increase_font_factor(-1) self._exit['em'] = self._increase_font_factor(-1) self._exit['h1'] = self._increase_font_factor(-7) self._exit['h2'] = self._increase_font_factor(-6) self._exit['h3'] = self._increase_font_factor(-5) self._exit['h4'] = self._increase_font_factor(-4) self._exit['h5'] = self._increase_font_factor(-3) self._exit['title'] = self._increase_font_factor(-7) # never go in and parse these tags self._ignored_tags = set([ 'meta', 'script', 'link', 'meta', 'embed', 'iframe', 'frame', 'noscript', 'object', 'svg', 'canvas', 'applet', 'frameset', 'textarea', 'style', 'area', 'map', 'base', 'basefont', 'param', ]) # set of words to ignore self._ignored_words = set([ '', 'the', 'of', 'at', 'on', 'in', 'is', 'it', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'and', 'or', ]) # TODO remove me in real version self._mock_next_doc_id = 1 self._mock_next_word_id = 1 # keep track of some info about the page we are currently parsing self._curr_depth = 0 self._curr_url = "" self._curr_doc_id = 0 self._font_size = 0 self._curr_words = None # get all urls into the queue try: with open(url_file, 'r') as f: for line in f: self._url_queue.append((self._fix_url(line.strip(), ""), 0)) except IOError: pass # When initializing, by default crawl with a depth of 1. self.crawl(depth=1) # TODO remove me in real version def _mock_insert_document(self, url): """A function that pretends to insert a url into a document db table and then returns that newly inserted document's id.""" ret_id = self._mock_next_doc_id self._mock_next_doc_id += 1 return ret_id # TODO remove me in real version def _mock_insert_word(self, word): """A function that pretends to inster a word into the lexicon db table and then returns that newly inserted word's id.""" ret_id = self._mock_next_word_id self._mock_next_word_id += 1 return ret_id def word_id(self, word): """Get the word id of some specific word.""" word_id_cached = self._word_id_cache.get(word) if word_id_cached != None: return word_id_cached elif not self._liteMode: con = lite.connect(self.db_conn) cur = con.cursor() cur.execute( 'CREATE TABLE IF NOT EXISTS lexicon(wordid INTEGER PRIMARY KEY, word text)' ) cur.execute('SELECT * FROM lexicon WHERE word = ?', (word, )) result = cur.fetchone() con.close() if result != () and result != None: return result[0] # TODO: 1) add the word to the lexicon, if that fails, then the # word is in the lexicon # 2) query the lexicon for the id assigned to this word, # store it in the word id cache, and return the id. word_id = self._mock_insert_word(word) evict = self._word_id_cache.set(word, word_id) if evict != None: try: con = lite.connect(self.db_conn) cur = con.cursor() cur.execute( 'CREATE TABLE IF NOT EXISTS lexicon(wordid INTEGER PRIMARY KEY, word text)' ) cur.execute('INSERT INTO lexicon VALUES (?, ?)', (evict[1], evict[0])) con.commit() con.close() except lite.IntegrityError as e: print "can't insert into db...", e if "UNIQUE" in str(e): pass return word_id def document_id(self, url): """Get the document id for some url.""" doc_id_cached = self._doc_id_cache.get(url) if doc_id_cached != None: return doc_id_cached elif not self._liteMode: con = lite.connect(self.db_conn) cur = con.cursor() cur.execute( 'CREATE TABLE IF NOT EXISTS docIndex(docid INTEGER PRIMARY KEY, url text)' ) cur.execute('SELECT * FROM docIndex WHERE url = ?', (url, )) result = cur.fetchone() con.close() if result != () and result != None: return result[0] # TODO: just like word id cache, but for documents. if the document # doesn't exist in the db then only insert the url and leave # the rest to their defaults. doc_id = self._mock_insert_document(url) evict = self._doc_id_cache.set(url, doc_id) if evict != None: try: con = lite.connect(self.db_conn) cur = con.cursor() cur.execute( 'CREATE TABLE IF NOT EXISTS docIndex(docid INTEGER PRIMARY KEY, url text)' ) cur.execute('INSERT INTO docIndex VALUES (?, ?)', (evict[1], evict[0])) con.commit() con.close() except lite.IntegrityError as e: print "can't insert into db..." if "UNIQUE" in str(e): pass return doc_id def _fix_url(self, curr_url, rel): """Given a url and either something relative to that url or another url, get a properly parsed url.""" rel_l = rel.lower() if rel_l.startswith("http://") or rel_l.startswith("https://"): curr_url, rel = rel, "" # compute the new url based on import curr_url = urlparse.urldefrag(curr_url)[0] parsed_url = urlparse.urlparse(curr_url) return urlparse.urljoin(parsed_url.geturl(), rel) def add_link(self, from_doc_id, to_doc_id): """Add a link into the database, or increase the number of links between two pages in the database.""" # TODO def _visit_title(self, elem): """Called when visiting the <title> tag.""" title_text = self._text_of(elem).strip() print "document title=" + repr(title_text) self._document_index[self._curr_doc_id][0] = title_text # TODO update document title for document id self._curr_doc_id def _visit_a(self, elem): """Called when visiting <a> tags.""" dest_url = self._fix_url(self._curr_url, attr(elem, "href")) # print "href="+repr(dest_url), \ # "title="+repr(attr(elem,"title")), \ # "alt="+repr(attr(elem,"alt")), \ # "text="+repr(self._text_of(elem)) # add the just found URL to the url queue self._url_queue.append((dest_url, self._curr_depth)) self._curr_relation.append(dest_url) # add a link entry into the database from the current document to the # other document self.add_link(self._curr_doc_id, self.document_id(dest_url)) # TODO add title/alt/text to index for destination url def _add_words_to_document(self): # TODO: knowing self._curr_doc_id and the list of all words and their # font sizes (in self._curr_words), add all the words into the # database for this document print " num words=" + str(len(self._curr_words)) def _increase_font_factor(self, factor): """Increade/decrease the current font size.""" def increase_it(elem): self._font_size += factor return increase_it def _visit_ignore(self, elem): """Ignore visiting this type of tag""" pass def _add_text(self, elem): """Add some text to the document. This records word ids and word font sizes into the self._curr_words list for later processing.""" words = WORD_SEPARATORS.split(elem.string.lower()) for word in words: word = word.strip() if word in self._ignored_words: continue self._curr_words.append((self.word_id(word), self._font_size)) def _text_of(self, elem): """Get the text inside some element without any tags.""" if isinstance(elem, Tag): text = [] for sub_elem in elem: text.append(self._text_of(sub_elem)) return " ".join(text) else: return elem.string def _index_document(self, soup): """Traverse the document in depth-first order and call functions when entering and leaving tags. When we come across some text, add it into the index. This handles ignoring tags that we have no business looking at.""" class DummyTag(object): next = False name = '' class NextTag(object): def __init__(self, obj): self.next = obj tag = soup.html stack = [DummyTag(), soup.html] text_line = 0 while tag and tag.next: tag = tag.next # html tag if isinstance(tag, Tag): if tag.parent != stack[-1]: self._exit[stack[-1].name.lower()](stack[-1]) stack.pop() tag_name = tag.name.lower() # ignore this tag and everything in it if tag_name in self._ignored_tags: if tag.nextSibling: tag = NextTag(tag.nextSibling) else: self._exit[stack[-1].name.lower()](stack[-1]) stack.pop() tag = NextTag(tag.parent.nextSibling) continue # enter the tag self._enter[tag_name](tag) stack.append(tag) # text (text, cdata, comments, etc.) else: self._add_text(tag) text = tag.string.lower() # Use first three non-empty lines in a page as page description. if text_line < 3 and text.strip(): self._document_index[self._curr_doc_id][1] += text text_line += 1 def _populate_inverted_index(self): """Populate the inverted index. For each word_id encountered in the current document, add the current document ID to the set of documents that contain the word. """ if self._liteMode: #print self._curr_words for word, _ in self._curr_words: if word not in self._inverted_index: self._inverted_index[word] = set() self._inverted_index[word].add(self._curr_doc_id) else: for word, _ in self._curr_words: con = lite.connect(self.db_conn) cur = con.cursor() cur.execute( 'CREATE TABLE IF NOT EXISTS invertedIndex(wordid INTEGER, docid INTEGER)' ) cur.execute('INSERT INTO invertedIndex VALUES (?, ?)', (word, self._curr_doc_id)) con.commit() con.close() def crawl(self, depth=2, timeout=3): """Crawl the web!""" seen = set() while len(self._url_queue): url, depth_ = self._url_queue.pop() # skip this url; it's too deep if depth_ > depth: continue doc_id = self.document_id(url) # we've already seen this document if doc_id in seen: continue seen.add(doc_id) # mark this document as haven't been visited socket = None try: socket = urllib2.urlopen(url, timeout=timeout) soup = BeautifulSoup(socket.read()) self._curr_depth = depth_ + 1 self._curr_url = url self._curr_doc_id = doc_id self._font_size = 0 self._curr_words = [] self._index_document(soup) self._add_words_to_document() self._populate_inverted_index() #build self._relation for item in self._curr_relation: self._relation.append((self._curr_url, item)) self._curr_relation = [] except Exception as e: print e pass finally: if socket: socket.close() self.insertdatabase() self._word_id_cache = {} self._doc_id_cache = {} self.get_page_rank() def get_doc_id_cache(self): con = lite.connect(self.db_conn) cur = con.cursor() cur.execute('SELECT * FROM docIndex') result = cur.fetchall() dic = {} for item in result: dic[item[1]] = item[0] con.close() return dic def get_inverted_doc_id_cache(self): con = lite.connect(self.db_conn) cur = con.cursor() cur.execute('SELECT * FROM docIndex') result = cur.fetchall() dic = {} for item in result: dic[item[0]] = item[1] con.close() return dic def get_inverted_word_id_cache(self): con = lite.connect(self.db_conn) cur = con.cursor() cur.execute('SELECT * FROM lexicon') result = cur.fetchall() dic = {} for item in result: dic[item[0]] = item[1] con.close() return dic def get_inverted_index(self): """Retrieves an inverted index for crawled pages. Returns: A dict mapping each encountered word to the set of documents where they are found, in the form {word_id: set(doc_id1, doc_id2, ...)}. """ if self._liteMode: return self._inverted_index else: con = lite.connect(self.db_conn) cur = con.cursor() cur.execute('SELECT * FROM invertedIndex') result = cur.fetchall() dic = {} for item in result: if item[0] not in dic: dic[item[0]] = set() dic[item[0]].add(item[1]) #print dic con.close() return dic def get_resolved_inverted_index(self): """Retrieves an inverted index for crawled pages with word IDs and doc IDs resolved to words and URLs. Returns: A dict mapping each encountered word to the set of documents where they are found, in the form {word: set(url1, url2, ...)}. """ #inverted_index = self._inverted_index inverted_index = self.get_inverted_index() inverted_doc_id = self.get_inverted_doc_id_cache() inverted_word_id = self.get_inverted_word_id_cache() resolved_inverted_index = {} for word_id, doc_id_set in inverted_index.items(): word = inverted_word_id[word_id] url_set = set() for doc_id in doc_id_set: url_set.add(inverted_doc_id[doc_id]) resolved_inverted_index[word] = url_set return resolved_inverted_index def get_page_rank(self): # get the rank score of websites and write them into database table and print each row of the table relation = [] doc_id_cache = self.get_doc_id_cache() # self.relation is a list of tuples generated by crawler, which the first element in each tuple is the from url and second element is the to url for item in self._relation: # convert the urls to doc_ids to match the format of page_rank function fromid = doc_id_cache[item[0]] toid = doc_id_cache[item[1]] relation.append((fromid, toid)) # call page_rank function to calculate scores and returns a defaultdic pr = self.page_rank(relation) # insert the rankscore to pageRank table in database con = lite.connect(self.db_conn) cur = con.cursor() cur.execute( 'CREATE TABLE IF NOT EXISTS pageRank(docid INTEGER PRIMARY KEY, score real)' ) for item in pr: score = pr[item] cur.execute('INSERT INTO pageRank VALUES (?, ?)', (item, score)) cur.execute('SELECT * FROM pageRank') #print "pageRank Table:" #print "[docid, score ]" #print each row of the pageRank table in the database. #for row in cur: #print row con.commit() con.close() return pr def page_rank(self, links, num_iterations=20, initial_pr=1.0): from collections import defaultdict import numpy as np page_rank = defaultdict(lambda: float(initial_pr)) num_outgoing_links = defaultdict(float) incoming_link_sets = defaultdict(set) incoming_links = defaultdict(lambda: np.array([])) damping_factor = 0.85 # collect the number of outbound links and the set of all incoming documents # for every document for (from_id, to_id) in links: num_outgoing_links[int(from_id)] += 1.0 incoming_link_sets[to_id].add(int(from_id)) # convert each set of incoming links into a numpy array for doc_id in incoming_link_sets: incoming_links[doc_id] = np.array( [from_doc_id for from_doc_id in incoming_link_sets[doc_id]]) num_documents = float(len(num_outgoing_links)) lead = (1.0 - damping_factor) / num_documents partial_PR = np.vectorize( lambda doc_id: page_rank[doc_id] / num_outgoing_links[doc_id]) for _ in xrange(num_iterations): for doc_id in num_outgoing_links: tail = 0.0 if len(incoming_links[doc_id]): tail = damping_factor * partial_PR( incoming_links[doc_id]).sum() page_rank[doc_id] = lead + tail return page_rank def insertdatabase(self): # insert lexicon, docindex and inverted index into the database con = lite.connect(self.db_conn) cur = con.cursor() cur.execute( 'CREATE TABLE IF NOT EXISTS lexicon(wordid INTEGER PRIMARY KEY, word text UNIQUE)' ) for item in self._word_id_cache.map: word_id = self._word_id_cache.map[item][0] try: cur.execute('INSERT INTO lexicon VALUES (?, ?)', (word_id, item)) except lite.IntegrityError as e: print "can't insert into db...", e if "UNIQUE" in str(e): pass cur.execute( 'CREATE TABLE IF NOT EXISTS docIndex(docid INTEGER PRIMARY KEY, url text UNIQUE)' ) for item in self._doc_id_cache.map: doc_id = self._doc_id_cache.map[item][0] try: cur.execute('INSERT INTO docIndex VALUES (?, ?)', (doc_id, item)) except lite.IntegrityError as e: print "can't insert into db...", e if "UNIQUE" in str(e): pass con.commit() con.close() return None
class LRUCache_test_logic(unittest.TestCase): ''' Test Suite to verify the cache logic ''' def setUp(self): self.c0 = LRUCache() self.c1 = LRUCache(5, 3) def tearDown(self): del self.c0 del self.c1 def test_insertItem(self): ''' Verify correct setting of the node and the LRU moves ''' # Insert one node and verify all the fields self.c0.set(1, 10, 2) self.assertEqual(self.c0.cache_nodes, 1) self.assertEqual(self.c0.head, self.c0.linkdict[1]) self.assertEqual(self.c0.head, self.c0.tail) self.assertEqual(self.c0.head.left, None) self.assertEqual(self.c0.head.right, None) self.assertEqual(self.c0.head.key, 1) self.assertEqual(self.c0.head.value, 10) self.assertEqual(self.c0.head.ttl, 2) # Insert first node and verify the ttl default self.c1.set(1, 10) first_node = self.c1.head self.assertEqual(self.c1.head, self.c1.linkdict[1]) self.assertEqual(self.c1.head.ttl, self.c1.cache_ttl) self.assertEqual(self.c1.tail, first_node) # Insert second node and verify the ttl default and the correct link # list 2 -> 1 self.c1.set(2, 20, 1) second_node = self.c1.head self.assertEqual(self.c1.head, self.c1.linkdict[2]) self.assertEqual(self.c1.head.ttl, 1) self.assertEqual(self.c1.head.left, None) self.assertEqual(self.c1.head.right, first_node) self.assertEqual(self.c1.tail, first_node) # Add third node # list 3 -> 2 -> 1 self.c1.set(3, 30) self.assertEqual(self.c1.head, self.c1.linkdict[3]) self.assertEqual(self.c1.head.left, None) self.assertEqual(self.c1.head.right, second_node) self.assertEqual(second_node.left, self.c1.head) self.assertEqual(second_node.right, first_node) self.assertEqual(first_node.left, second_node) self.assertEqual(first_node.right, None) self.assertEqual(self.c1.cache_nodes, 3) def test_updateItem(self): ''' Adding same key causes update and move to head of the list ''' self.c1.set(1, 10) first_node = self.c1.head self.c1.set(2, 20) second_node = self.c1.head self.c1.set(1, 30, 5) # list 1 -> 2 self.assertEqual(self.c1.head, first_node) self.assertEqual(self.c1.head.key, 1) self.assertEqual(self.c1.head.value, 30) self.assertEqual(self.c1.head.ttl, 5) self.assertEqual(self.c1.head.left, None) self.assertEqual(self.c1.head.right, second_node) self.assertEqual(second_node.left, first_node) self.assertEqual(second_node.right, None) self.assertEqual(self.c1.cache_nodes, 2) def test_getItem(self): ''' Get an item causes either the move to head of the list and change of ttl or the eviction per ttl ''' self.c0.set(1, 10, 2) n = self.c0.get(1) self.assertEqual(self.c0.head.key, 1) self.assertEqual(self.c0.head.ttl, 1) self.assertEqual(n, 10) n = self.c0.get(2) self.assertEqual(n, None) self.c1.set(1, 10) first_node = self.c1.head self.c1.set(2, 20, 5) second_node = self.c1.head self.c1.set(3, 30, 6) third_node = self.c1.head # pick from the tail 3 -> 2 -> 1 n = self.c1.get(1) # list 1 -> 3 -> 2 self.assertEqual(n, 10) self.assertEqual(self.c1.head, first_node) self.assertEqual(self.c1.tail, second_node) self.assertEqual(self.c1.head.left, None) self.assertEqual(self.c1.head.right, third_node) self.assertEqual(self.c1.tail.right, None) # pick from the middle 1 -> 3 -> 2 n = self.c1.get(3) # list 3 -> 1 -> 2 self.assertEqual(n, 30) self.assertEqual(self.c1.head, third_node) self.assertEqual(self.c1.head.right, first_node) self.assertEqual(self.c1.head.left, None) self.assertEqual(self.c1.tail, second_node) self.assertEqual(self.c1.tail.right, None) self.assertEqual(self.c1.tail.left, first_node) self.assertEqual(first_node.right, second_node) self.assertEqual(first_node.left, third_node) def test_evictionLRU(self): ''' If the cache is full, the tail has to be evicted ''' for i in range(1, 7): self.c1.set(i, 10 * i) # list 6 -> 5 -> 4 -> 3 -> 2 self.assertEqual(self.c1.tail.key, 2) n = self.c1.get(1) self.assertEqual(n, None) def test_evictionTTL(self): ''' If one value is read over ttl times, it will be evicted ''' self.c1.set(1, 10, 1) self.c1.set(2, 20, 1) for i in range(3, 6): self.c1.set(i, 10 * i) # list 5 -> 4 -> 3 -> 2 -> 1 # eviction any node n = self.c1.get(2) self.assertEqual(self.c1.head.key, 5) self.assertEqual(self.c1.head.left, None) self.assertEqual(self.c1.head.right.key, 4) self.assertEqual(self.c1.tail.key, 1) self.assertEqual(self.c1.tail.right, None) n = self.c1.get(2) self.assertEqual(n, None) # list 5 -> 4 -> 3 -> 1 # eviction tail node n = self.c1.get(1) self.assertEqual(self.c1.head.key, 5) self.assertEqual(self.c1.head.left, None) self.assertEqual(self.c1.head.right.key, 4) self.assertEqual(self.c1.tail.key, 3) self.assertEqual(self.c1.tail.right, None) n = self.c1.get(1) self.assertEqual(n, None) # list 5 -> 4 -> 3 for i in range(3): n = self.c1.get(4) self.assertEqual(n, 40) n = self.c1.get(4) self.assertEqual(n, None) self.assertEqual(self.c1.head.key, 5) self.assertEqual(self.c1.tail.key, 3) self.assertEqual(self.c1.head.right.key, 3) self.assertEqual(self.c1.head.left, None) self.assertEqual(self.c1.tail.left.key, 5) self.assertEqual(self.c1.tail.right, None) self.assertEqual(self.c1.cache_nodes, 2)
# tree.inodes['/']['home'].update({'3.jpg': d}) # print tree # serialization #f = open('tree', 'wb') #f.write(tree.marshal()) #f.close() # deserialization # f = open('tree') # tree_str = f.read() # print tree.unmarshal(tree_str) #upload_main_inode(tree.marshal()) #tree_str = download_from_vk(tree=True) #print Tree.unmarshal(tree_str) cache = LRUCache(capacity=66) cache.set(a.id, (a.size, '/tmp/2.jpg')) cache.set(b.id, b) cache.set(c.id, c) cache.set(c1.id, c1) # hit the cache. c should be popped. cache.get(a.id) cache.get(b.id) cache.get(c1.id) cache.set(d.id, d) cache.get(a.id)
class TestLRUCache(unittest.TestCase): def setUp(self): self.cache1 = LRUCache(100) self.cache2 = LRUCache(1) self.cache3 = LRUCache(2) self.cache4 = LRUCache(1) self.cache5 = LRUCache(2) def test_init(self): self.assertRaises(ValueError, LRUCache, 0) self.assertRaises(ValueError, LRUCache, -100) def test_get(self): self.cache2.set('1', '1') self.assertEqual(self.cache2.get('1'), '1') self.cache2.set('2', '2') self.assertEqual(self.cache2.get('1'), '') self.assertEqual(self.cache2.get('2'), '2') self.cache3.set('1', '1') self.cache3.set('2', '2') self.assertEqual(self.cache3.get('1'), '1') self.cache3.set('3', '3') self.assertEqual(self.cache3.get('1'), '1') self.assertEqual(self.cache3.get('2'), '') def test_set(self): self.cache4.set('1', '1') self.assertEqual(self.cache4.get('1'), '1') def test_delete(self): self.cache5.set('1', '1') self.cache5.set('2', '2') self.cache5.delete('1') self.assertEqual(self.cache5.get('1'), '') self.assertEqual(self.cache5.get('2'), '2') self.assertRaises(KeyError, self.cache5.delete, '1') self.assertRaises(KeyError, self.cache5.delete, '3') def test_from_task(self): self.cache1.set('Jesse', 'Pinkman') self.cache1.set('Walter', 'White') self.cache1.set('Jesse', 'James') self.assertEqual(self.cache1.get('Jesse'), 'James') self.cache1.delete('Walter') self.assertEqual(self.cache1.get('Walter'), '')
from cache import LRUCache cache = LRUCache(1) cache.set('Jesse', 'Pinkman') cache.set('Walter', 'White') cache.set('Jesse', 'James') print(cache.get('Jesse')) cache.delete('Walter') print(cache.get('Walter'))
def test_lru(self): cache = LRUCache(10) cache.set("1", 1) cache.set("2", 2) cache.set("3", 3) self.assertEqual(cache.get("2"), 2) cache.set("4", 4) cache.set("5", 5) cache.set("6", 6) cache.set("7", 7) cache.set("8", 8) cache.set("9", 9) self.assertEqual(cache.get("8"), 8) cache.set("10", 10) cache.set("11", 11) self.assertEqual(cache.get("1"), None) self.assertEqual(cache.get("3"), 3)