class topic4: def __init__(self, c_hash, c_user, c_words): self.topic_count =1 self.l1 = LRU(c_hash) self.l2 = LRU(c_user) def set_hashLRU(self,l): self.set(self.l1, l) def set_userLRU(self,l): self.set(self.l2, l) def set(self, lru, l): for k in l: v = lru.get(k,0) lru[k]=v+1 def set_cluster(self, hashtags, users, words): for k in hashtags: self.l1[k]=self.l1.get(k,0)+1 for k in users: self.l2[k]=self.l2.get(k,0)+1 self.topic_count+=1 def get_similarity(self,hashtags,users,words): h_sum = 1 u_sum = 1 w_sum = 1 h_match =0 h_ind =0 u_ind =0 w_ind =0 c=0 h1 = self.l1.get_size() u1 = self.l2.get_size() for h in hashtags: # l1_items=zip(*self.l1.items()) h_sum+= self.l1.get(h,0) if(self.l1.has_key(h)): ind = self.l1.keys().index(h) h_ind+= h1 - ind h_match+= 1 if ind<250 else 0 for u in users: u_sum+= self.l2.get(u,0) if(self.l2.has_key(u)): u_ind+= u1 - self.l2.keys().index(u) if(h_match !=0): c = h_match -1 # print(h_ind,h1,u_ind,u1,w_ind,w1, h_sum,w_sum,) similarity = (h_ind/(h1+1))*(h_sum/sum(self.l1.values() +[1])) + (u_ind/(u1+1))*(u_sum/sum(self.l2.values()+[1])) +c return similarity
class GameState(object, metaclass=Singleton): def __init__(self): self.is_finished = False self.active_player = None self.other_players = LRU(10) self.world_map = LRU(1000) @classmethod def get_instance(self): return GameState() @guard_exception(False) @log_exception def parse_net_frame(self, net_frame): # parse current player ap = PlayerState.from_dict(net_frame["active_player"]) if self.active_player is None: self.active_player = ap else: self.active_player.update(ap) # parse other aoi players ops = net_frame["other_aoi_players"] if len(ops) > self.other_players.get_size(): self.other_players.set_size(len(ops)) for op in ops: op = PlayerState.from_dict(op) if op.name not in self.other_players: self.other_players[op.name] = op else: self.other_players[op.name].update(op) # parse world map element wm = net_frame["world_map"] if len(wm) > self.world_map.get_size(): self.world_map.set_size(len(wm)) for item in wm: x, y, val = item wm[(x, y)] = val return True def get_update(self, net_frame): ret = self.parse_net_frame(net_frame) if ret: return self
def test_capacity_set(self): for size in SIZES: l = LRU(size) for i in range(size+5): l[i] = str(i) l.set_size(size+10) self.assertTrue(size+10 == l.get_size()) self.assertTrue(len(l) == size) for i in range(size+20): l[i] = str(i) self.assertTrue(len(l) == size+10) l.set_size(size+10-1) self.assertTrue(len(l) == size+10-1)
def test_capacity_get(self): for size in SIZES: l = LRU(size) self.assertTrue(size == l.get_size())
class FileServer(fileService_pb2_grpc.FileserviceServicer): def __init__(self, hostname, server_port, activeNodesChecker, shardingHandler, superNodeAddress): self.serverPort = server_port self.serverAddress = hostname + ":" + server_port self.activeNodesChecker = activeNodesChecker self.shardingHandler = shardingHandler self.hostname = hostname self.lru = LRU(5) self.superNodeAddress = superNodeAddress # # This service gets invoked when user uploads a new file. # def UploadFile(self, request_iterator, context): print("Inside Server method ---------- UploadFile") data = bytes("", 'utf-8') username, filename = "", "" totalDataSize = 0 active_ip_channel_dict = self.activeNodesChecker.getActiveChannels() # list to store the info related to file location. metaData = [] # If the node is the leader of the cluster. if (int(db.get("primaryStatus")) == 1): print("Inside primary upload") currDataSize = 0 currDataBytes = bytes("", 'utf-8') seqNo = 1 # Step 1: # Get 2 least loaded nodes based on the CPU stats. # 'Node' is where the actual data goes and 'node_replica' is where replica will go. node, node_replica = self.getLeastLoadedNode() if (node == -1): return fileService_pb2.ack( success=False, message="Error Saving File. No active nodes.") # Step 2: # Check whether file already exists, if yes then return with message 'File already exists'. for request in request_iterator: username, filename = request.username, request.filename print("Key is-----------------", username + "_" + filename) if (self.fileExists(username, filename) == 1): print("sending neg ack") return fileService_pb2.ack( success=False, message= "File already exists for this user. Please rename or delete file first." ) break # Step 3: # Make chunks of size 'UPLOAD_SHARD_SIZE' and start sending the data to the least utilized node trough gRPC streaming. currDataSize += sys.getsizeof(request.data) currDataBytes += request.data for request in request_iterator: if ((currDataSize + sys.getsizeof(request.data)) > UPLOAD_SHARD_SIZE): response = self.sendDataToDestination( currDataBytes, node, node_replica, username, filename, seqNo, active_ip_channel_dict[node]) metaData.append([node, seqNo, node_replica]) currDataBytes = request.data currDataSize = sys.getsizeof(request.data) seqNo += 1 node, node_replica = self.getLeastLoadedNode() else: currDataSize += sys.getsizeof(request.data) currDataBytes += request.data if (currDataSize > 0): response = self.sendDataToDestination( currDataBytes, node, node_replica, username, filename, seqNo, active_ip_channel_dict[node]) metaData.append([node, seqNo, node_replica]) # Step 4: # Save the metadata on the primary node after the completion of sharding. if (response.success): db.saveMetaData(username, filename, metaData) db.saveUserFile(username, filename) # Step 5: # Make a gRPC call to replicate the matadata on all the other nodes. self.saveMetadataOnAllNodes(username, filename, metaData) return fileService_pb2.ack(success=True, message="Saved") # If the node is not the leader. else: print("Saving the data on my local db") sequenceNumberOfChunk = 0 dataToBeSaved = bytes("", 'utf-8') # Gather all the data from gRPC stream for request in request_iterator: username, filename, sequenceNumberOfChunk = request.username, request.filename, request.seqNo dataToBeSaved += request.data key = username + "_" + filename + "_" + str(sequenceNumberOfChunk) # Save the data in local DB. db.setData(key, dataToBeSaved) # After saving the chunk in the local DB, make a gRPC call to save the replica of the chunk on different # node only if the replicaNode is present. if (request.replicaNode != ""): print("Sending replication to ", request.replicaNode) replica_channel = active_ip_channel_dict[request.replicaNode] t1 = Thread(target=self.replicateChunkData, args=( replica_channel, dataToBeSaved, username, filename, sequenceNumberOfChunk, )) t1.start() # stub = fileService_pb2_grpc.FileserviceStub(replica_channel) # response = stub.UploadFile(self.sendDataInStream(dataToBeSaved, username, filename, sequenceNumberOfChunk, "")) return fileService_pb2.ack(success=True, message="Saved") def replicateChunkData(self, replica_channel, dataToBeSaved, username, filename, sequenceNumberOfChunk): stub = fileService_pb2_grpc.FileserviceStub(replica_channel) response = stub.UploadFile( self.sendDataInStream(dataToBeSaved, username, filename, sequenceNumberOfChunk, "")) # This helper method is responsible for sending the data to destination node through gRPC stream. def sendDataToDestination(self, currDataBytes, node, nodeReplica, username, filename, seqNo, channel): if (node == self.serverAddress): key = username + "_" + filename + "_" + str(seqNo) db.setData(key, currDataBytes) if (nodeReplica != ""): print("Sending replication to ", nodeReplica) active_ip_channel_dict = self.activeNodesChecker.getActiveChannels( ) replica_channel = active_ip_channel_dict[nodeReplica] stub = fileService_pb2_grpc.FileserviceStub(replica_channel) response = stub.UploadFile( self.sendDataInStream(currDataBytes, username, filename, seqNo, "")) return response else: print("Sending the UPLOAD_SHARD_SIZE to node :", node) stub = fileService_pb2_grpc.FileserviceStub(channel) response = stub.UploadFile( self.sendDataInStream(currDataBytes, username, filename, seqNo, nodeReplica)) print("Response from uploadFile: ", response.message) return response # This helper method actually makes chunks of less than 4MB and streams them through gRPC. # 4 MB is the max data packet size in gRPC while sending. That's why it is necessary. def sendDataInStream(self, dataBytes, username, filename, seqNo, replicaNode): chunk_size = 4000000 start, end = 0, chunk_size while (True): chunk = dataBytes[start:end] if (len(chunk) == 0): break start = end end += chunk_size yield fileService_pb2.FileData(username=username, filename=filename, data=chunk, seqNo=seqNo, replicaNode=replicaNode) # # This service gets invoked when user requests an uploaded file. # def DownloadFile(self, request, context): print("Inside Download") # If the node is the leader of the cluster. if (int(db.get("primaryStatus")) == 1): print("Inside primary download") # Check if file exists if (self.fileExists(request.username, request.filename) == 0): print("File does not exist") yield fileService_pb2.FileData(username=request.username, filename=request.filename, data=bytes("", 'utf-8'), seqNo=0) return # If the file is present in cache then just fetch it and return. No need to go to individual node. if (self.lru.has_key(request.username + "_" + request.filename)): print("Fetching data from Cache") CHUNK_SIZE = 4000000 fileName = request.username + "_" + request.filename filePath = self.lru[fileName] outfile = os.path.join(filePath, fileName) with open(outfile, 'rb') as infile: while True: chunk = infile.read(CHUNK_SIZE) if not chunk: break yield fileService_pb2.FileData( username=request.username, filename=request.filename, data=chunk, seqNo=1) # If the file is not present in the cache, then fetch it from the individual node. else: print("Fetching the metadata") # Step 1: get metadata i.e. the location of chunks. metaData = db.parseMetaData(request.username, request.filename) print(metaData) #Step 2: make gRPC calls and get the fileData from all the nodes. downloadHelper = DownloadHelper(self.hostname, self.serverPort, self.activeNodesChecker) data = downloadHelper.getDataFromNodes(request.username, request.filename, metaData) print("Sending the data to client") #Step 3: send the file to supernode using gRPC streaming. chunk_size = 4000000 start, end = 0, chunk_size while (True): chunk = data[start:end] if (len(chunk) == 0): break start = end end += chunk_size yield fileService_pb2.FileData(username=request.username, filename=request.filename, data=chunk, seqNo=request.seqNo) # Step 4: update the cache based on LRU(least recently used) algorithm. self.saveInCache(request.username, request.filename, data) # If the node is not the leader, then just fetch the fileChunk from the local db and stream it back to leader. else: key = request.username + "_" + request.filename + "_" + str( request.seqNo) print(key) data = db.getFileData(key) chunk_size = 4000000 start, end = 0, chunk_size while (True): chunk = data[start:end] if (len(chunk) == 0): break start = end end += chunk_size yield fileService_pb2.FileData(username=request.username, filename=request.filename, data=chunk, seqNo=request.seqNo) # This service is responsible fetching all the files. def FileList(self, request, context): print("File List Called") userFiles = db.getUserFiles(request.username) return fileService_pb2.FileListResponse(Filenames=str(userFiles)) # This helper method checks whether the file is present in db or not. def fileExists(self, username, filename): print("isFile Present", db.keyExists(username + "_" + filename)) return db.keyExists(username + "_" + filename) # This helper method returns 2 least loaded nodes from the cluster. def getLeastLoadedNode(self): print("Ready to enter sharding handler") node, node_replica = self.shardingHandler.leastUtilizedNode() print("Least loaded node is :", node) print("Replica node - ", node_replica) return node, node_replica # This helper method replicates the metadata on all nodes. def saveMetadataOnAllNodes(self, username, filename, metadata): print("saveMetadataOnAllNodes") active_ip_channel_dict = self.activeNodesChecker.getActiveChannels() uniqueFileName = username + "_" + filename for ip, channel in active_ip_channel_dict.items(): if (self.isChannelAlive(channel)): stub = fileService_pb2_grpc.FileserviceStub(channel) response = stub.MetaDataInfo( fileService_pb2.MetaData( filename=uniqueFileName, seqValues=str(metadata).encode('utf-8'))) print(response.message) # This service is responsible for saving the metadata on local db. def MetaDataInfo(self, request, context): print("Inside Metadatainfo") fileName = request.filename seqValues = request.seqValues db.saveMetaDataOnOtherNodes(fileName, seqValues) ack_message = "Successfully saved the metadata on " + self.serverAddress return fileService_pb2.ack(success=True, message=ack_message) # This helper method checks whethere created channel is alive or not def isChannelAlive(self, channel): try: grpc.channel_ready_future(channel).result(timeout=1) except grpc.FutureTimeoutError: #print("Connection timeout. Unable to connect to port ") return False return True # This helper method is responsible for updating the cache for faster lookup. def saveInCache(self, username, filename, data): if (len(self.lru.items()) >= self.lru.get_size()): fileToDel, path = self.lru.peek_last_item() os.remove(path + "/" + fileToDel) self.lru[username + "_" + filename] = "cache" filePath = os.path.join('cache', username + "_" + filename) saveFile = open(filePath, 'wb') saveFile.write(data) saveFile.close() # This service is responsible for sending the whole cluster stats to superNode def getClusterStats(self, request, context): print("Inside getClusterStats") active_ip_channel_dict = self.activeNodesChecker.getActiveChannels() total_cpu_usage, total_disk_space, total_used_mem = 0.0, 0.0, 0.0 total_nodes = 0 for ip, channel in active_ip_channel_dict.items(): if (self.isChannelAlive(channel)): stub = heartbeat_pb2_grpc.HearBeatStub(channel) stats = stub.isAlive(heartbeat_pb2.NodeInfo(ip="", port="")) total_cpu_usage = float(stats.cpu_usage) total_disk_space = float(stats.disk_space) total_used_mem = float(stats.used_mem) total_nodes += 1 if (total_nodes == 0): return fileService_pb2.ClusterStats(cpu_usage=str(100.00), disk_space=str(100.00), used_mem=str(100.00)) return fileService_pb2.ClusterStats( cpu_usage=str(total_cpu_usage / total_nodes), disk_space=str(total_disk_space / total_nodes), used_mem=str(total_used_mem / total_nodes)) # This service is responsible for sending the leader info to superNode as soon as leader changes. def getLeaderInfo(self, request, context): channel = grpc.insecure_channel('{}'.format(self.superNodeAddress)) stub = fileService_pb2_grpc.FileserviceStub(channel) response = stub.getLeaderInfo( fileService_pb2.ClusterInfo(ip=self.hostname, port=self.serverPort, clusterName="team1")) print(response.message) # # This service gets invoked when user deletes a file. # def FileDelete(self, request, data): username = request.username filename = request.filename if (int(db.get("primaryStatus")) == 1): if (self.fileExists(username, filename) == 0): print("File does not exist") return fileService_pb2.ack(success=False, message="File does not exist") print("Fetching metadata from leader") metadata = db.parseMetaData(request.username, request.filename) print("Successfully retrieved metadata from leader") deleteHelper = DeleteHelper(self.hostname, self.serverPort, self.activeNodesChecker) deleteHelper.deleteFileChunksAndMetaFromNodes( username, filename, metadata) return fileService_pb2.ack( success=True, message="Successfully deleted file from the cluster") else: seqNo = -1 try: seqNo = request.seqNo except: return fileService_pb2.ack(success=False, message="Internal Error") metaDataKey = username + "_" + filename dataChunkKey = username + "_" + filename + "_" + str(seqNo) if (db.keyExists(metaDataKey) == 1): print("FileDelete: Deleting the metadataEntry from local db :") db.deleteEntry(metaDataKey) if (db.keyExists(dataChunkKey)): print("FileDelete: Deleting the data chunk from local db: ") db.deleteEntry(dataChunkKey) return fileService_pb2.ack( success=True, message="Successfully deleted file from the cluster") # # This service gets invoked when user wants to check if the file is present. # def FileSearch(self, request, data): username, filename = request.username, request.filename if (self.fileExists(username, filename) == 1): return fileService_pb2.ack(success=True, message="File exists in the cluster.") else: return fileService_pb2.ack( success=False, message="File does not exist in the cluster.") # # This service gets invoked when user wants to update a file. # def UpdateFile(self, request_iterator, context): username, filename = "", "" fileData = bytes("", 'utf-8') for request in request_iterator: fileData += request.data username, filename = request.username, request.filename def getFileChunks(fileData): # Maximum chunk size that can be sent CHUNK_SIZE = 4000000 outfile = os.path.join('files', fileName) sTime = time.time() while True: chunk = fileData.read(CHUNK_SIZE) if not chunk: break yield fileService_pb2.FileData(username=username, filename=fileName, data=chunk, seqNo=1) print("Time for upload= ", time.time() - sTime) if (int(db.get("primaryStatus")) == 1): channel = grpc.insecure_channel('{}'.format(self.serverAddress)) stub = fileService_pb2_grpc.FileserviceStub(channel) response1 = stub.FileDelete( fileService_pb2.FileInfo(username=userName, filename=fileName)) if (response1.success): response2 = stub.UploadFile(getFileChunks(fileData)) if (response2.success): return fileService_pb2.ack( success=True, message="File suceessfully updated.") else: return fileService_pb2.ack(success=False, message="Internal error.") else: return fileService_pb2.ack(success=False, message="Internal error.")
class topic4: def __init__(self, c_hash, c_user, c_words): self.topic_count =1 # self.time = (self.first,self.last) self.l1 = LRU(c_hash) self.first ="" self.last="" self.lats=[] self.longs=[] self.l2 = LRU(c_user) self.l3 = LRU(c_words) self.l4 = LRU(400) def set_hashLRU(self,l): self.set(self.l1, l) def set_userLRU(self,l): self.set(self.l2, l) def set_wordLRU(self,l): self.set(self.l3, l) def set(self, lru, l): for k in l: v = lru.get(k,0) lru[k]=v+1 def set_cluster(self, hashtags, users, words,links, cords): for k in hashtags: self.l1[k]=self.l1.get(k,0)+1 for k in users: self.l2[k]=self.l2.get(k,0)+1 for k in words: self.l3[k]=self.l3.get(k,0)+1 for k in links: self.l4[k]=self.l4.get(k,0)+1 if(cords is not None): self.lats.append(cords["coordinates"][1]) self.longs.append(cords["coordinates"][0]) self.topic_count+=1 def get_similarity(self,hashtags,users,words): h_sum = 1 u_sum = 1 w_sum = 1 h_match =0 h_ind =0 u_ind =0 w_ind =0 c=0 h1 = self.l1.get_size() u1 = self.l2.get_size() w1 = self.l3.get_size() for h in hashtags: # l1_items=zip(*self.l1.items()) h_sum+= self.l1.get(h,0) if(self.l1.has_key(h)): ind = self.l1.keys().index(h) h_ind+= h1 - ind h_match+= 1 if ind<250 else 0 for u in users: u_sum+= self.l2.get(u,0) if(self.l2.has_key(u)): u_ind+= u1 - self.l2.keys().index(u) for w in words: w_sum+= self.l3.get(w,0) if(self.l3.has_key(w)): w_ind+= w1 - self.l3.keys().index(w) if(h_match !=0): c = h_match -1 # print(h_ind,h1,u_ind,u1,w_ind,w1, h_sum,w_sum,) similarity = (h_ind/(h1+1))*(h_sum/sum(self.l1.values() +[1])) + (u_ind/(u1+1))*(u_sum/sum(self.l2.values()+[1])) + (w_ind/(w1+1))*(w_sum/sum(self.l3.values()+[1])) +c return similarity def flush1(self, cache, size): if(len(cache.keys())>5): tokens = reversed(cache.keys()[5]) cache.clear() for i in tokens: cache[i]=1 def flush(self): self.flush1(self.l1,500) self.flush1(self.l2, 500) self.flush1(self.l3,3500) self.topic_count=1
print(l.items()) # Would print [(5, '5'), (4, '4'), (3, '3'), (2, '2'), (1, '1')] l[3] # Accessing an item would make it MRU print(l.items()) # Would print [(3, '3'), (5, '5'), (4, '4'), (2, '2'), (1, '1')] # Now 3 is in front l.keys() # Can get keys alone in MRU order # Would print [3, 5, 4, 2, 1] del l[4] # Delete an item print(l.items()) # Would print [(3, '3'), (5, '5'), (2, '2'), (1, '1')] print(l.get_size()) # Would print 5 l.set_size(3) print(l.items()) # Would print [(3, '3'), (5, '5'), (2, '2')] print(l.get_size()) # Would print 3 print(l.has_key(5)) # Would print True print(2 in l) # Would print True l.get_stats() # Would print (1, 0)
class Cache: """Class representing D3N.""" # Replacement policies LRU = "LRU" LFU = "LFU" LRU_S = "LRU_S" FIFO = "FIFO" RAND = "RAND" # Write policies WRITE_BACK = "WB" WRITE_THROUGH = "WT" # Layer L1 = "L1" L2 = "L2" consistent = "consistent" rendezvous = "rendezvous" rr = "rr" def __init__(self, layer, size, replace_pol, write_pol, hash_ring, hash_type, obj_size, full_size, logger): self._replace_pol = replace_pol # Replacement policy self._write_pol = write_pol # Write policy self._layer = layer # Layer info self._size = size # Cache size self.spaceLeft = size # Cache size self._logger = logger self.hashmap = {} # Mapping self.hash_ring = hash_ring self._hash_type = hash_type self._obj_size = obj_size if (self._size == 0): self.zerosize = True self._size = 1 else: self.zerosize = False if (self._replace_pol == Cache.LRU): self.cache = LRU(self._size) elif (self._replace_pol == Cache.FIFO): self.cache = deque() elif (self._replace_pol == Cache.LRU_S): self.cache = LRU(self._size) self.shadow = LRU(full_size) self.hist = [] for i in range(full_size): self.hist.append(0) # Statistics self._hit_count = 0 self._miss_count = 0 self._backend_bw = 0 self._crossrack_bw = 0 self._intrarack_bw = 0 self.miss_lat = 0 self.lat_count = 0 def _insert1(self, key, size): # No eviction if not self.zerosize: if (self._replace_pol == Cache.LRU_S): self.shadow[key] = 1 if (int(size) <= self.spaceLeft): if (self._replace_pol == Cache.LRU): self.cache[key] = int(size) elif (self._replace_pol == Cache.LRU_S): self.cache[key] = int(size) elif (self._replace_pol == Cache.FIFO): self.cache.append(key) self.hashmap[key] = int(size) self.spaceLeft -= int(size) else: while (int(size) > self.spaceLeft): self._evict() if (self._replace_pol == Cache.LRU): self.cache[key] = int(size) elif (self._replace_pol == Cache.LRU_S): self.cache[key] = int(size) elif (self._replace_pol == Cache.FIFO): self.cache.append(key) self.hashmap[key] = int(size) self.spaceLeft -= int(size) def _insert(self, key, size): # No eviction if not self.zerosize: if (self._replace_pol == Cache.LRU_S): self.cache[key] = int(size) self.shadow[key] = int(size) elif (self._replace_pol == Cache.LRU): self.cache[key] = int(size) else: if (int(size) <= self.spaceLeft): if (self._replace_pol == Cache.LRU): self.cache[key] = int(size) elif (self._replace_pol == Cache.LRU_S): self.cache[key] = int(size) elif (self._replace_pol == Cache.FIFO): self.cache.append(key) self.hashmap[key] = int(size) self.spaceLeft -= int(size) else: while (int(size) > self.spaceLeft): self._evict() if (self._replace_pol == Cache.LRU): self.cache[key] = int(size) elif (self._replace_pol == Cache.LRU_S): self.cache[key] = int(size) elif (self._replace_pol == Cache.FIFO): self.cache.append(key) self.hashmap[key] = int(size) self.spaceLeft -= int(size) def read1(self, key, size): if self._layer == "BE": return 1 if self.zerosize == True: return None """Read a object from the cache.""" r = None if (self._replace_pol == Cache.LRU_S): if self.shadow.has_key(key): count = 0 for i in self.shadow.keys(): if i == key: self.hist[count] += 1 break count += 1 self.shadow[key] = 1 if key in self.hashmap: if (self._replace_pol == Cache.LRU): self._update_use(key) elif (self._replace_pol == Cache.LRU_S): self._update_use(key) self._hit_count += 1 r = 1 else: self._miss_count += 1 return r def read(self, key, size): if self._layer == "BE": return 1 if self.zerosize == True: return None """Read a object from the cache.""" r = None if (self._replace_pol == Cache.LRU_S): if self.cache.has_key(key): self._hit_count += 1 self.cache[key] = self.cache[key] r = 1 else: self._miss_count += 1 if self.shadow.has_key(key): count = 0 for i in self.shadow.keys(): if i == key: self.hist[count] += 1 break count += 1 self.shadow[key] = 1 else: if key in self.hashmap: if (self._replace_pol == Cache.LRU): self._update_use(key) elif (self._replace_pol == Cache.LRU_S): self._update_use(key) self._hit_count += 1 r = 1 else: self._miss_count += 1 return r def checkKey(self, key): if self._layer == "BE": return 1 if self.zerosize == True: return 0 """Read a object from the cache.""" r = 0 if (self._replace_pol == Cache.LRU_S) or (self._replace_pol == Cache.LRU): if self.cache.has_key(key): r = 1 else: r = 0 return r def _evict(self): if (self._replace_pol == Cache.LRU): id = self.cache.peek_last_item()[0] del self.cache[id] elif (self._replace_pol == Cache.LRU_S): id = self.cache.peek_last_item()[0] del self.cache[id] elif (self._replace_pol == Cache.FIFO): id = self.cache.popleft() self.spaceLeft += int(self.hashmap[id]) del self.hashmap[id] def _update_use(self, key): """Update the use of a cache.""" if (self._replace_pol == Cache.LRU): self.cache[key] = self.hashmap[key] if (self._replace_pol == Cache.LRU_S): self.cache[key] = self.hashmap[key] def set_cache_size(self, size): new_size = self.cache.get_size() + int(size) self.cache.set_size(int(new_size)) def set_backend_bw(self, value): self._backend_bw += value def set_crossrack_bw(self, value): self._crossrack_bw += value def set_intrarack_bw(self, value): self._intrarack_bw += value def get_backend_bw(self): return self._backend_bw def get_crossrack_bw(self): return self._crossrack_bw def get_intrarack_bw(self): return self._intrarack_bw def get_replace_pol(self): return self._replace_pol def get_hit_count(self): return self._hit_count def get_miss_count(self): return self._miss_count def get_available_space(self): return self.spaceLeft def get_replace_poll(self): return self._replace_pol def reset_shadow_cache(): self.shadow.clear() def print_cache(self): print self.cache def get_l2_address(self, key): if (self._hash_type == Cache.consistent): return self.hash_ring.get_node(key) elif (self._hash_type == Cache.rendezvous): return self.hash_ring.find_node(key) elif (self._hash_type == Cache.rr): val = key.split("_")[1] res = int(val) % int(self.hash_ring) return res
class Streamer: """ streamer for flows management """ num_streamers = 0 def __init__(self, source=None, capacity=128000, active_timeout=120, inactive_timeout=60, user_metrics=None, user_classifiers=None, enable_ndpi=True): Streamer.num_streamers += 1 self.__exports = [] self.source = source self.__flows = LRU(capacity, callback=emergency_callback) # LRU cache self._capacity = self.__flows.get_size( ) # Streamer capacity (default: 128000) self.active_timeout = active_timeout # expiration active timeout self.inactive_timeout = inactive_timeout # expiration inactive timeout self.current_flows = 0 # counter for stored flows self.flows_number = 0 self.current_tick = 0 # current timestamp self.processed_packets = 0 # current timestamp # Python dictionaries to hold current and archived flow records self.flow_cache = OrderedDict() self.user_classifiers = {} if user_classifiers is not None: try: classifier_iterator = iter(user_classifiers) for classifier in classifier_iterator: if isinstance(classifier, NFStreamClassifier): self.user_classifiers[classifier.name] = classifier except TypeError: self.user_classifiers[user_classifiers.name] = user_classifiers self.user_metrics = {} if enable_ndpi: ndpi_classifier = NDPIClassifier('ndpi') self.user_classifiers[ndpi_classifier.name] = ndpi_classifier if user_metrics is not None: self.user_metrics = user_metrics def _get_capacity(self): """ getter for capacity attribute """ return self.__flows.get_size() def _set_capacity(self, new_size): """ setter for capacity size attribute """ return self.__flows.set_size(new_size) capacity = property(_get_capacity, _set_capacity) def terminate(self): """ terminate all entries in Streamer """ remaining_flows = True while remaining_flows: try: key, value = self.__flows.peek_last_item() value.export_reason = 2 self.exporter(value) except TypeError: remaining_flows = False for classifier_name, classifier in self.user_classifiers.items(): self.user_classifiers[classifier_name].on_exit() def exporter(self, flow): """ export method for a flow trigger_type:0(inactive), 1(active), 2(flush) """ # Look for the flow in the created classifiers for classifier_name, classifier in self.user_classifiers.items(): # Terminate the flow in the respective classifiers self.user_classifiers[classifier_name].on_flow_terminate(flow) # Delete the flow register from the active flows collection del self.__flows[flow.key] # Decrease the number of active flows by 1 self.current_flows -= 1 # Add the expired flow register to the final flows collection self.__exports.append(flow) def inactive_watcher(self): """ inactive expiration management """ remaining_inactives = True # While there are inactive flow registers while remaining_inactives: try: # Obtain the last flow register (Least Recently Used - LRU) in the variable value using its key key, value = self.__flows.peek_last_item() # Has the flow exceeded the inactive timeout (1 minute)? if (self.current_tick - value.end_time) >= (self.inactive_timeout * 1000): # Set export reason to 0 (inactive) in the flow value.export_reason = 0 # Export the flow to the final flows collection self.exporter(value) # There are no flows that can be declared inactive yet else: # Stop the inactive watcher until it is called again remaining_inactives = False except TypeError: remaining_inactives = False def consume(self, pkt_info): """ consume a packet and update Streamer status """ self.processed_packets += 1 # increment total processed packet counter # Obtain a flow hash key for identification of the flow key = get_flow_key(pkt_info) print("\nCONSUMING PACKET FROM FLOW:", key) # Is this packet from a registered flow? if key in self.__flows: print("FLOW FOUND - UPDATING STATISTICS") # Checking current status of the flow that the packet belongs to # -1 active flow - 0 inactive flow - 1 active flow timeout expired - 2 flush remaining flows in LRU # 3 FIN flag detected - 4 RST flag detected flow_status = self.__flows[key].update_and_check_flow_status( pkt_info, self.active_timeout, self.user_classifiers, self.user_metrics) #Has the active timeout of the flow register expired (2 minutes)? if (flow_status == 1): # Export the old flow register to the final collection and terminate this flow process on the specified classifier self.exporter(self.__flows[key]) # Create a new flow register for the current packet flow = Flow(pkt_info, self.user_classifiers, self.user_metrics, self.flow_cache) # Add the new flow to the active flows collection using the same Hash key self.__flows[flow.key] = flow # Create the entry on the flow_cache with the flow key del self.flow_cache[flow.key] self.flow_cache[flow.key] = {} # Update the flow status on the collection flow.create_new_flow_record(pkt_info, self.user_classifiers, self.user_metrics) if (flow_status == 3 ): # FIN FLAG DETECTED IN BOTH DIRECTIONS - EXPORTING FLOW self.exporter(self.__flows[key]) if ( flow_status == 4 ): # RST FLAG FOUND - UPDATING BIDIRECTIONAL STATISTICS - EXPORTING FLOW self.exporter(self.__flows[key]) if (flow_status == 5): # FIN FLAG TIMER EXPIRED self.exporter(self.__flows[key]) print("****FLOW EXPORTED") """ expired_flow = self.__flows[key] print("****STARTING TCP TIMER") threading.Timer(20, self.export_incomplete_flow(expired_flow)) """ # This packet belongs to a new flow else: # Increase the count of current active flows print("FLOW NOT FOUND - CREATING NEW FLOW REGISTER") # Update flow counters self.current_flows += 1 self.flows_number += 1 # Create the new flow object flow = Flow(pkt_info, self.user_classifiers, self.user_metrics, self.flow_cache) # Add this new flow register to the LRU self.__flows[flow.key] = flow # Create the entry on the flow_cache with the flow key self.flow_cache[flow.key] = {} # Create the new bidirectional flow record flow.create_new_flow_record(pkt_info, self.user_classifiers, self.user_metrics) # Set the current start time on the streamer timer to keep control of the inactive flows self.current_tick = flow.start_time # Remove the Least Recently Used (LRU) flow record from the active flows collection # and export it to the final flows collection if its inactive timeout has been exceeded self.inactive_watcher() print( "*******************PACKET CONSUMED - MOVING TO NEXT*********************************" ) """ def export_incomplete_flow(self, expired_flow): print("##############################---TCP TIMER EXPIRED--#######################") # Look for the flow in the created classifiers self.flows_number += 1 for classifier_name, classifier in self.user_classifiers.items(): # Terminate the flow in the respective classifiers self.user_classifiers[classifier_name].on_flow_terminate(expired_flow) self.__exports.append(expired_flow) print("##############################---EXPIRED FLOW EXPORTED-----###############################") """ def __iter__(self): # Create the packet information generator pkt_info_gen = Observer(source=self.source) # Extract each packet information from the network interface or pcap file for pkt_info in pkt_info_gen: if pkt_info is not None: # Check if the packet belongs to an existent flow or create a new one self.consume(pkt_info) for export in self.__exports: yield export self.__exports = [] # Terminate the streamer self.terminate() for export in self.__exports: yield export self.__exports = []
conn = sqlite3.connect('terms.db') c = conn.cursor() c.execute( '''CREATE TABLE IF NOT EXISTS rejected (word TEXT PRIMARY KEY, reason TEXT)''' ) conn.commit() c.execute('''SELECT COUNT(*) FROM rejected''') print("\tthere are", c.fetchone()[0], "words blocked in database") c.execute('''SELECT reason, COUNT(*) FROM rejected GROUP BY reason''') for row in c.fetchall(): print('\t\t', row[0], ':\t', row[1]) cache_rejected = LRU(50000) # fill in the cache with entries c.execute('''SELECT * FROM rejected ORDER BY RANDOM() LIMIT ?''', (int(cache_rejected.get_size() * 2 / 3), )) for row in c: cache_rejected[row[0]] = row[1] print("\tloaded", len(cache_rejected), "items in cache") def is_word_rejected_db(token): global c global stats global cache_rejected # first try the cache if token.lemma_ in cache_rejected: stats['words rejected db (cache)'] = stats[ 'words rejected db (cache)'] + 1 return cache_rejected[token.lemma_]