Beispiel #1
0
class topic4:
    def __init__(self, c_hash, c_user, c_words):
        self.topic_count =1
        self.l1 = LRU(c_hash)
        self.l2 = LRU(c_user)

    def set_hashLRU(self,l):
        self.set(self.l1, l)

    def set_userLRU(self,l):
        self.set(self.l2, l)



    def set(self, lru, l):
        for k in l:
            v = lru.get(k,0)
            lru[k]=v+1

    def set_cluster(self, hashtags, users, words):
        for k in hashtags:
            self.l1[k]=self.l1.get(k,0)+1
        for k in users:
            self.l2[k]=self.l2.get(k,0)+1

        self.topic_count+=1

    def get_similarity(self,hashtags,users,words):
        h_sum = 1
        u_sum = 1
        w_sum = 1
        h_match =0
        h_ind =0
        u_ind =0
        w_ind =0
        c=0
        h1 = self.l1.get_size()
        u1 = self.l2.get_size()
        for h in hashtags:
            # l1_items=zip(*self.l1.items())
            h_sum+= self.l1.get(h,0)
            if(self.l1.has_key(h)):
                ind = self.l1.keys().index(h)
                h_ind+= h1 - ind
                h_match+= 1 if ind<250 else 0
        for u in users:
            u_sum+= self.l2.get(u,0)
            if(self.l2.has_key(u)):
                u_ind+= u1 - self.l2.keys().index(u)

        if(h_match !=0):
            c = h_match -1
        # print(h_ind,h1,u_ind,u1,w_ind,w1, h_sum,w_sum,)
        similarity = (h_ind/(h1+1))*(h_sum/sum(self.l1.values() +[1])) + (u_ind/(u1+1))*(u_sum/sum(self.l2.values()+[1]))  +c
        return similarity
Beispiel #2
0
class GameState(object, metaclass=Singleton):
    def __init__(self):
        self.is_finished = False
        self.active_player = None
        self.other_players = LRU(10)
        self.world_map = LRU(1000)

    @classmethod
    def get_instance(self):
        return GameState()

    @guard_exception(False)
    @log_exception
    def parse_net_frame(self, net_frame):
        # parse current player
        ap = PlayerState.from_dict(net_frame["active_player"])
        if self.active_player is None:
            self.active_player = ap
        else:
            self.active_player.update(ap)

        # parse other aoi players
        ops = net_frame["other_aoi_players"]
        if len(ops) > self.other_players.get_size():
            self.other_players.set_size(len(ops))

        for op in ops:
            op = PlayerState.from_dict(op)
            if op.name not in self.other_players:
                self.other_players[op.name] = op
            else:
                self.other_players[op.name].update(op)

        # parse world map element
        wm = net_frame["world_map"]
        if len(wm) > self.world_map.get_size():
            self.world_map.set_size(len(wm))
        for item in wm:
            x, y, val = item
            wm[(x, y)] = val

        return True

    def get_update(self, net_frame):
        ret = self.parse_net_frame(net_frame)
        if ret:
            return self
Beispiel #3
0
 def test_capacity_set(self):
     for size in SIZES:
         l = LRU(size)
         for i in range(size+5):
             l[i] = str(i)
         l.set_size(size+10)
         self.assertTrue(size+10 == l.get_size())
         self.assertTrue(len(l) == size)
         for i in range(size+20):
             l[i] = str(i)
         self.assertTrue(len(l) == size+10)
         l.set_size(size+10-1)
         self.assertTrue(len(l) == size+10-1)
Beispiel #4
0
 def test_capacity_set(self):
     for size in SIZES:
         l = LRU(size)
         for i in range(size+5):
             l[i] = str(i)
         l.set_size(size+10)
         self.assertTrue(size+10 == l.get_size())
         self.assertTrue(len(l) == size)
         for i in range(size+20):
             l[i] = str(i)
         self.assertTrue(len(l) == size+10)
         l.set_size(size+10-1)
         self.assertTrue(len(l) == size+10-1)
Beispiel #5
0
 def test_capacity_get(self):
     for size in SIZES:
         l = LRU(size)
         self.assertTrue(size == l.get_size())
Beispiel #6
0
 def test_capacity_get(self):
     for size in SIZES:
         l = LRU(size)
         self.assertTrue(size == l.get_size())
Beispiel #7
0
class FileServer(fileService_pb2_grpc.FileserviceServicer):
    def __init__(self, hostname, server_port, activeNodesChecker,
                 shardingHandler, superNodeAddress):
        self.serverPort = server_port
        self.serverAddress = hostname + ":" + server_port
        self.activeNodesChecker = activeNodesChecker
        self.shardingHandler = shardingHandler
        self.hostname = hostname
        self.lru = LRU(5)
        self.superNodeAddress = superNodeAddress

    #
    #   This service gets invoked when user uploads a new file.
    #
    def UploadFile(self, request_iterator, context):
        print("Inside Server method ---------- UploadFile")
        data = bytes("", 'utf-8')
        username, filename = "", ""
        totalDataSize = 0
        active_ip_channel_dict = self.activeNodesChecker.getActiveChannels()

        # list to store the info related to file location.
        metaData = []

        # If the node is the leader of the cluster.
        if (int(db.get("primaryStatus")) == 1):
            print("Inside primary upload")
            currDataSize = 0
            currDataBytes = bytes("", 'utf-8')
            seqNo = 1

            # Step 1:
            # Get 2 least loaded nodes based on the CPU stats.
            # 'Node' is where the actual data goes and 'node_replica' is where replica will go.
            node, node_replica = self.getLeastLoadedNode()

            if (node == -1):
                return fileService_pb2.ack(
                    success=False,
                    message="Error Saving File. No active nodes.")

            # Step 2:
            # Check whether file already exists, if yes then return with message 'File already exists'.
            for request in request_iterator:
                username, filename = request.username, request.filename
                print("Key is-----------------", username + "_" + filename)
                if (self.fileExists(username, filename) == 1):
                    print("sending neg ack")
                    return fileService_pb2.ack(
                        success=False,
                        message=
                        "File already exists for this user. Please rename or delete file first."
                    )
                break

            # Step 3:
            # Make chunks of size 'UPLOAD_SHARD_SIZE' and start sending the data to the least utilized node trough gRPC streaming.
            currDataSize += sys.getsizeof(request.data)
            currDataBytes += request.data

            for request in request_iterator:

                if ((currDataSize + sys.getsizeof(request.data)) >
                        UPLOAD_SHARD_SIZE):
                    response = self.sendDataToDestination(
                        currDataBytes, node, node_replica, username, filename,
                        seqNo, active_ip_channel_dict[node])
                    metaData.append([node, seqNo, node_replica])
                    currDataBytes = request.data
                    currDataSize = sys.getsizeof(request.data)
                    seqNo += 1
                    node, node_replica = self.getLeastLoadedNode()
                else:
                    currDataSize += sys.getsizeof(request.data)
                    currDataBytes += request.data

            if (currDataSize > 0):
                response = self.sendDataToDestination(
                    currDataBytes, node, node_replica, username, filename,
                    seqNo, active_ip_channel_dict[node])
                metaData.append([node, seqNo, node_replica])

            # Step 4:
            # Save the metadata on the primary node after the completion of sharding.
            if (response.success):
                db.saveMetaData(username, filename, metaData)
                db.saveUserFile(username, filename)

            # Step 5:
            # Make a gRPC call to replicate the matadata on all the other nodes.
            self.saveMetadataOnAllNodes(username, filename, metaData)

            return fileService_pb2.ack(success=True, message="Saved")

        # If the node is not the leader.
        else:
            print("Saving the data on my local db")
            sequenceNumberOfChunk = 0
            dataToBeSaved = bytes("", 'utf-8')

            # Gather all the data from gRPC stream
            for request in request_iterator:
                username, filename, sequenceNumberOfChunk = request.username, request.filename, request.seqNo
                dataToBeSaved += request.data
            key = username + "_" + filename + "_" + str(sequenceNumberOfChunk)

            # Save the data in local DB.
            db.setData(key, dataToBeSaved)

            # After saving the chunk in the local DB, make a gRPC call to save the replica of the chunk on different
            # node only if the replicaNode is present.
            if (request.replicaNode != ""):
                print("Sending replication to ", request.replicaNode)
                replica_channel = active_ip_channel_dict[request.replicaNode]
                t1 = Thread(target=self.replicateChunkData,
                            args=(
                                replica_channel,
                                dataToBeSaved,
                                username,
                                filename,
                                sequenceNumberOfChunk,
                            ))
                t1.start()
                # stub = fileService_pb2_grpc.FileserviceStub(replica_channel)
                # response = stub.UploadFile(self.sendDataInStream(dataToBeSaved, username, filename, sequenceNumberOfChunk, ""))

            return fileService_pb2.ack(success=True, message="Saved")

    def replicateChunkData(self, replica_channel, dataToBeSaved, username,
                           filename, sequenceNumberOfChunk):
        stub = fileService_pb2_grpc.FileserviceStub(replica_channel)
        response = stub.UploadFile(
            self.sendDataInStream(dataToBeSaved, username, filename,
                                  sequenceNumberOfChunk, ""))

    # This helper method is responsible for sending the data to destination node through gRPC stream.
    def sendDataToDestination(self, currDataBytes, node, nodeReplica, username,
                              filename, seqNo, channel):
        if (node == self.serverAddress):
            key = username + "_" + filename + "_" + str(seqNo)
            db.setData(key, currDataBytes)
            if (nodeReplica != ""):
                print("Sending replication to ", nodeReplica)
                active_ip_channel_dict = self.activeNodesChecker.getActiveChannels(
                )
                replica_channel = active_ip_channel_dict[nodeReplica]
                stub = fileService_pb2_grpc.FileserviceStub(replica_channel)
                response = stub.UploadFile(
                    self.sendDataInStream(currDataBytes, username, filename,
                                          seqNo, ""))
                return response
        else:
            print("Sending the UPLOAD_SHARD_SIZE to node :", node)
            stub = fileService_pb2_grpc.FileserviceStub(channel)
            response = stub.UploadFile(
                self.sendDataInStream(currDataBytes, username, filename, seqNo,
                                      nodeReplica))
            print("Response from uploadFile: ", response.message)
            return response

    # This helper method actually makes chunks of less than 4MB and streams them through gRPC.
    # 4 MB is the max data packet size in gRPC while sending. That's why it is necessary.
    def sendDataInStream(self, dataBytes, username, filename, seqNo,
                         replicaNode):
        chunk_size = 4000000
        start, end = 0, chunk_size
        while (True):
            chunk = dataBytes[start:end]
            if (len(chunk) == 0): break
            start = end
            end += chunk_size
            yield fileService_pb2.FileData(username=username,
                                           filename=filename,
                                           data=chunk,
                                           seqNo=seqNo,
                                           replicaNode=replicaNode)

    #
    #   This service gets invoked when user requests an uploaded file.
    #
    def DownloadFile(self, request, context):

        print("Inside Download")

        # If the node is the leader of the cluster.
        if (int(db.get("primaryStatus")) == 1):

            print("Inside primary download")

            # Check if file exists
            if (self.fileExists(request.username, request.filename) == 0):
                print("File does not exist")
                yield fileService_pb2.FileData(username=request.username,
                                               filename=request.filename,
                                               data=bytes("", 'utf-8'),
                                               seqNo=0)
                return

            # If the file is present in cache then just fetch it and return. No need to go to individual node.
            if (self.lru.has_key(request.username + "_" + request.filename)):
                print("Fetching data from Cache")
                CHUNK_SIZE = 4000000
                fileName = request.username + "_" + request.filename
                filePath = self.lru[fileName]
                outfile = os.path.join(filePath, fileName)

                with open(outfile, 'rb') as infile:
                    while True:
                        chunk = infile.read(CHUNK_SIZE)
                        if not chunk: break
                        yield fileService_pb2.FileData(
                            username=request.username,
                            filename=request.filename,
                            data=chunk,
                            seqNo=1)

            # If the file is not present in the cache, then fetch it from the individual node.
            else:
                print("Fetching the metadata")

                # Step 1: get metadata i.e. the location of chunks.
                metaData = db.parseMetaData(request.username, request.filename)

                print(metaData)

                #Step 2: make gRPC calls and get the fileData from all the nodes.
                downloadHelper = DownloadHelper(self.hostname, self.serverPort,
                                                self.activeNodesChecker)
                data = downloadHelper.getDataFromNodes(request.username,
                                                       request.filename,
                                                       metaData)
                print("Sending the data to client")

                #Step 3: send the file to supernode using gRPC streaming.
                chunk_size = 4000000
                start, end = 0, chunk_size
                while (True):
                    chunk = data[start:end]
                    if (len(chunk) == 0): break
                    start = end
                    end += chunk_size
                    yield fileService_pb2.FileData(username=request.username,
                                                   filename=request.filename,
                                                   data=chunk,
                                                   seqNo=request.seqNo)

                # Step 4: update the cache based on LRU(least recently used) algorithm.
                self.saveInCache(request.username, request.filename, data)

        # If the node is not the leader, then just fetch the fileChunk from the local db and stream it back to leader.
        else:
            key = request.username + "_" + request.filename + "_" + str(
                request.seqNo)
            print(key)
            data = db.getFileData(key)
            chunk_size = 4000000
            start, end = 0, chunk_size
            while (True):
                chunk = data[start:end]
                if (len(chunk) == 0): break
                start = end
                end += chunk_size
                yield fileService_pb2.FileData(username=request.username,
                                               filename=request.filename,
                                               data=chunk,
                                               seqNo=request.seqNo)

    # This service is responsible fetching all the files.
    def FileList(self, request, context):
        print("File List Called")
        userFiles = db.getUserFiles(request.username)
        return fileService_pb2.FileListResponse(Filenames=str(userFiles))

    # This helper method checks whether the file is present in db or not.
    def fileExists(self, username, filename):
        print("isFile Present", db.keyExists(username + "_" + filename))
        return db.keyExists(username + "_" + filename)

    # This helper method returns 2 least loaded nodes from the cluster.
    def getLeastLoadedNode(self):
        print("Ready to enter sharding handler")
        node, node_replica = self.shardingHandler.leastUtilizedNode()
        print("Least loaded node is :", node)
        print("Replica node - ", node_replica)
        return node, node_replica

    # This helper method replicates the metadata on all nodes.
    def saveMetadataOnAllNodes(self, username, filename, metadata):
        print("saveMetadataOnAllNodes")
        active_ip_channel_dict = self.activeNodesChecker.getActiveChannels()
        uniqueFileName = username + "_" + filename
        for ip, channel in active_ip_channel_dict.items():
            if (self.isChannelAlive(channel)):
                stub = fileService_pb2_grpc.FileserviceStub(channel)
                response = stub.MetaDataInfo(
                    fileService_pb2.MetaData(
                        filename=uniqueFileName,
                        seqValues=str(metadata).encode('utf-8')))
                print(response.message)

    # This service is responsible for saving the metadata on local db.
    def MetaDataInfo(self, request, context):
        print("Inside Metadatainfo")
        fileName = request.filename
        seqValues = request.seqValues
        db.saveMetaDataOnOtherNodes(fileName, seqValues)
        ack_message = "Successfully saved the metadata on " + self.serverAddress
        return fileService_pb2.ack(success=True, message=ack_message)

    # This helper method checks whethere created channel is alive or not
    def isChannelAlive(self, channel):
        try:
            grpc.channel_ready_future(channel).result(timeout=1)
        except grpc.FutureTimeoutError:
            #print("Connection timeout. Unable to connect to port ")
            return False
        return True

    # This helper method is responsible for updating the cache for faster lookup.
    def saveInCache(self, username, filename, data):
        if (len(self.lru.items()) >= self.lru.get_size()):
            fileToDel, path = self.lru.peek_last_item()
            os.remove(path + "/" + fileToDel)

        self.lru[username + "_" + filename] = "cache"
        filePath = os.path.join('cache', username + "_" + filename)
        saveFile = open(filePath, 'wb')
        saveFile.write(data)
        saveFile.close()

    # This service is responsible for sending the whole cluster stats to superNode
    def getClusterStats(self, request, context):
        print("Inside getClusterStats")
        active_ip_channel_dict = self.activeNodesChecker.getActiveChannels()
        total_cpu_usage, total_disk_space, total_used_mem = 0.0, 0.0, 0.0
        total_nodes = 0
        for ip, channel in active_ip_channel_dict.items():
            if (self.isChannelAlive(channel)):
                stub = heartbeat_pb2_grpc.HearBeatStub(channel)
                stats = stub.isAlive(heartbeat_pb2.NodeInfo(ip="", port=""))
                total_cpu_usage = float(stats.cpu_usage)
                total_disk_space = float(stats.disk_space)
                total_used_mem = float(stats.used_mem)
                total_nodes += 1

        if (total_nodes == 0):
            return fileService_pb2.ClusterStats(cpu_usage=str(100.00),
                                                disk_space=str(100.00),
                                                used_mem=str(100.00))

        return fileService_pb2.ClusterStats(
            cpu_usage=str(total_cpu_usage / total_nodes),
            disk_space=str(total_disk_space / total_nodes),
            used_mem=str(total_used_mem / total_nodes))

    # This service is responsible for sending the leader info to superNode as soon as leader changes.
    def getLeaderInfo(self, request, context):
        channel = grpc.insecure_channel('{}'.format(self.superNodeAddress))
        stub = fileService_pb2_grpc.FileserviceStub(channel)
        response = stub.getLeaderInfo(
            fileService_pb2.ClusterInfo(ip=self.hostname,
                                        port=self.serverPort,
                                        clusterName="team1"))
        print(response.message)

    #
    #   This service gets invoked when user deletes a file.
    #
    def FileDelete(self, request, data):
        username = request.username
        filename = request.filename

        if (int(db.get("primaryStatus")) == 1):

            if (self.fileExists(username, filename) == 0):
                print("File does not exist")
                return fileService_pb2.ack(success=False,
                                           message="File does not exist")

            print("Fetching metadata from leader")
            metadata = db.parseMetaData(request.username, request.filename)
            print("Successfully retrieved metadata from leader")

            deleteHelper = DeleteHelper(self.hostname, self.serverPort,
                                        self.activeNodesChecker)
            deleteHelper.deleteFileChunksAndMetaFromNodes(
                username, filename, metadata)

            return fileService_pb2.ack(
                success=True,
                message="Successfully deleted file from the cluster")

        else:
            seqNo = -1

            try:
                seqNo = request.seqNo
            except:
                return fileService_pb2.ack(success=False,
                                           message="Internal Error")

            metaDataKey = username + "_" + filename
            dataChunkKey = username + "_" + filename + "_" + str(seqNo)

            if (db.keyExists(metaDataKey) == 1):
                print("FileDelete: Deleting the metadataEntry from local db :")
                db.deleteEntry(metaDataKey)
            if (db.keyExists(dataChunkKey)):
                print("FileDelete: Deleting the data chunk from local db: ")
                db.deleteEntry(dataChunkKey)

            return fileService_pb2.ack(
                success=True,
                message="Successfully deleted file from the cluster")

    #
    #   This service gets invoked when user wants to check if the file is present.
    #
    def FileSearch(self, request, data):
        username, filename = request.username, request.filename

        if (self.fileExists(username, filename) == 1):
            return fileService_pb2.ack(success=True,
                                       message="File exists in the cluster.")
        else:
            return fileService_pb2.ack(
                success=False, message="File does not exist in the cluster.")

    #
    #   This service gets invoked when user wants to update a file.
    #
    def UpdateFile(self, request_iterator, context):

        username, filename = "", ""
        fileData = bytes("", 'utf-8')

        for request in request_iterator:
            fileData += request.data
            username, filename = request.username, request.filename

        def getFileChunks(fileData):
            # Maximum chunk size that can be sent
            CHUNK_SIZE = 4000000

            outfile = os.path.join('files', fileName)

            sTime = time.time()

            while True:
                chunk = fileData.read(CHUNK_SIZE)
                if not chunk: break

                yield fileService_pb2.FileData(username=username,
                                               filename=fileName,
                                               data=chunk,
                                               seqNo=1)
            print("Time for upload= ", time.time() - sTime)

        if (int(db.get("primaryStatus")) == 1):
            channel = grpc.insecure_channel('{}'.format(self.serverAddress))
            stub = fileService_pb2_grpc.FileserviceStub(channel)

            response1 = stub.FileDelete(
                fileService_pb2.FileInfo(username=userName, filename=fileName))

            if (response1.success):
                response2 = stub.UploadFile(getFileChunks(fileData))
                if (response2.success):
                    return fileService_pb2.ack(
                        success=True, message="File suceessfully updated.")
                else:
                    return fileService_pb2.ack(success=False,
                                               message="Internal error.")
            else:
                return fileService_pb2.ack(success=False,
                                           message="Internal error.")
Beispiel #8
0
class topic4:
    def __init__(self, c_hash, c_user, c_words):
        self.topic_count =1
        # self.time = (self.first,self.last)
        self.l1 = LRU(c_hash)
        self.first =""
        self.last=""
        self.lats=[]
        self.longs=[]
        self.l2 = LRU(c_user)
        self.l3 = LRU(c_words)
        self.l4 = LRU(400)
    def set_hashLRU(self,l):
        self.set(self.l1, l)

    def set_userLRU(self,l):
        self.set(self.l2, l)

    def set_wordLRU(self,l):
        self.set(self.l3, l)

    def set(self, lru, l):
        for k in l:
            v = lru.get(k,0)
            lru[k]=v+1

    def set_cluster(self, hashtags, users, words,links, cords):
        for k in hashtags:
            self.l1[k]=self.l1.get(k,0)+1
        for k in users:
            self.l2[k]=self.l2.get(k,0)+1
        for k in words:
            self.l3[k]=self.l3.get(k,0)+1
        for k in links:
            self.l4[k]=self.l4.get(k,0)+1
        if(cords is not None):
            self.lats.append(cords["coordinates"][1])
            self.longs.append(cords["coordinates"][0])
        self.topic_count+=1

    def get_similarity(self,hashtags,users,words):
        h_sum = 1
        u_sum = 1
        w_sum = 1
        h_match =0
        h_ind =0
        u_ind =0
        w_ind =0
        c=0
        h1 = self.l1.get_size()
        u1 = self.l2.get_size()
        w1 = self.l3.get_size()
        for h in hashtags:
            # l1_items=zip(*self.l1.items())
            h_sum+= self.l1.get(h,0)
            if(self.l1.has_key(h)):
                ind = self.l1.keys().index(h)
                h_ind+= h1 - ind
                h_match+= 1 if ind<250 else 0
        for u in users:
            u_sum+= self.l2.get(u,0)
            if(self.l2.has_key(u)):
                u_ind+= u1 - self.l2.keys().index(u)
        for w in words:
            w_sum+= self.l3.get(w,0)
            if(self.l3.has_key(w)):
                w_ind+= w1 - self.l3.keys().index(w)
        if(h_match !=0):
            c = h_match -1
        # print(h_ind,h1,u_ind,u1,w_ind,w1, h_sum,w_sum,)
        similarity = (h_ind/(h1+1))*(h_sum/sum(self.l1.values() +[1])) + (u_ind/(u1+1))*(u_sum/sum(self.l2.values()+[1])) + (w_ind/(w1+1))*(w_sum/sum(self.l3.values()+[1])) +c
        return similarity
    def flush1(self, cache, size):
        if(len(cache.keys())>5):
            tokens = reversed(cache.keys()[5])
            cache.clear()
            for i in tokens:
                cache[i]=1


    def flush(self):
        self.flush1(self.l1,500)
        self.flush1(self.l2, 500)
        self.flush1(self.l3,3500)
        self.topic_count=1
Beispiel #9
0
print(l.items())
# Would print [(5, '5'), (4, '4'), (3, '3'), (2, '2'), (1, '1')]

l[3]  # Accessing an item would make it MRU
print(l.items())
# Would print [(3, '3'), (5, '5'), (4, '4'), (2, '2'), (1, '1')]
# Now 3 is in front

l.keys()  # Can get keys alone in MRU order
# Would print [3, 5, 4, 2, 1]

del l[4]  # Delete an item
print(l.items())
# Would print [(3, '3'), (5, '5'), (2, '2'), (1, '1')]

print(l.get_size())
# Would print 5

l.set_size(3)
print(l.items())
# Would print [(3, '3'), (5, '5'), (2, '2')]
print(l.get_size())
# Would print 3
print(l.has_key(5))
# Would print True
print(2 in l)
# Would print True

l.get_stats()
# Would print (1, 0)
Beispiel #10
0
class Cache:
    """Class representing D3N."""

    # Replacement policies
    LRU = "LRU"
    LFU = "LFU"
    LRU_S = "LRU_S"
    FIFO = "FIFO"
    RAND = "RAND"

    # Write policies
    WRITE_BACK = "WB"
    WRITE_THROUGH = "WT"

    # Layer
    L1 = "L1"
    L2 = "L2"

    consistent = "consistent"
    rendezvous = "rendezvous"
    rr = "rr"

    def __init__(self, layer, size, replace_pol, write_pol, hash_ring,
                 hash_type, obj_size, full_size, logger):
        self._replace_pol = replace_pol  # Replacement policy
        self._write_pol = write_pol  # Write policy
        self._layer = layer  # Layer info
        self._size = size  # Cache size
        self.spaceLeft = size  # Cache size
        self._logger = logger
        self.hashmap = {}  # Mapping
        self.hash_ring = hash_ring
        self._hash_type = hash_type
        self._obj_size = obj_size

        if (self._size == 0):
            self.zerosize = True
            self._size = 1
        else:
            self.zerosize = False

        if (self._replace_pol == Cache.LRU):
            self.cache = LRU(self._size)
        elif (self._replace_pol == Cache.FIFO):
            self.cache = deque()
        elif (self._replace_pol == Cache.LRU_S):
            self.cache = LRU(self._size)
            self.shadow = LRU(full_size)
            self.hist = []
            for i in range(full_size):
                self.hist.append(0)

    # Statistics
        self._hit_count = 0
        self._miss_count = 0
        self._backend_bw = 0
        self._crossrack_bw = 0
        self._intrarack_bw = 0
        self.miss_lat = 0
        self.lat_count = 0

    def _insert1(self, key, size):
        # No eviction
        if not self.zerosize:
            if (self._replace_pol == Cache.LRU_S):
                self.shadow[key] = 1

            if (int(size) <= self.spaceLeft):
                if (self._replace_pol == Cache.LRU):
                    self.cache[key] = int(size)
                elif (self._replace_pol == Cache.LRU_S):
                    self.cache[key] = int(size)
                elif (self._replace_pol == Cache.FIFO):
                    self.cache.append(key)
                self.hashmap[key] = int(size)
                self.spaceLeft -= int(size)
            else:
                while (int(size) > self.spaceLeft):
                    self._evict()
                if (self._replace_pol == Cache.LRU):
                    self.cache[key] = int(size)
                elif (self._replace_pol == Cache.LRU_S):
                    self.cache[key] = int(size)
                elif (self._replace_pol == Cache.FIFO):
                    self.cache.append(key)
                self.hashmap[key] = int(size)
                self.spaceLeft -= int(size)

    def _insert(self, key, size):
        # No eviction
        if not self.zerosize:
            if (self._replace_pol == Cache.LRU_S):
                self.cache[key] = int(size)
                self.shadow[key] = int(size)
            elif (self._replace_pol == Cache.LRU):
                self.cache[key] = int(size)
            else:
                if (int(size) <= self.spaceLeft):
                    if (self._replace_pol == Cache.LRU):
                        self.cache[key] = int(size)
                    elif (self._replace_pol == Cache.LRU_S):
                        self.cache[key] = int(size)
                    elif (self._replace_pol == Cache.FIFO):
                        self.cache.append(key)
                    self.hashmap[key] = int(size)
                    self.spaceLeft -= int(size)
                else:
                    while (int(size) > self.spaceLeft):
                        self._evict()
                    if (self._replace_pol == Cache.LRU):
                        self.cache[key] = int(size)
                    elif (self._replace_pol == Cache.LRU_S):
                        self.cache[key] = int(size)
                    elif (self._replace_pol == Cache.FIFO):
                        self.cache.append(key)
                    self.hashmap[key] = int(size)
                    self.spaceLeft -= int(size)

    def read1(self, key, size):
        if self._layer == "BE":
            return 1
        if self.zerosize == True:
            return None
        """Read a object from the cache."""
        r = None

        if (self._replace_pol == Cache.LRU_S):
            if self.shadow.has_key(key):
                count = 0
                for i in self.shadow.keys():
                    if i == key:
                        self.hist[count] += 1
                        break
                    count += 1
                self.shadow[key] = 1

        if key in self.hashmap:
            if (self._replace_pol == Cache.LRU):
                self._update_use(key)
            elif (self._replace_pol == Cache.LRU_S):
                self._update_use(key)
            self._hit_count += 1
            r = 1
        else:
            self._miss_count += 1
        return r

    def read(self, key, size):
        if self._layer == "BE":
            return 1
        if self.zerosize == True:
            return None
        """Read a object from the cache."""
        r = None

        if (self._replace_pol == Cache.LRU_S):
            if self.cache.has_key(key):
                self._hit_count += 1
                self.cache[key] = self.cache[key]
                r = 1
            else:
                self._miss_count += 1

            if self.shadow.has_key(key):
                count = 0
                for i in self.shadow.keys():
                    if i == key:
                        self.hist[count] += 1
                        break
                    count += 1
                self.shadow[key] = 1

        else:
            if key in self.hashmap:
                if (self._replace_pol == Cache.LRU):
                    self._update_use(key)
                elif (self._replace_pol == Cache.LRU_S):
                    self._update_use(key)
                self._hit_count += 1
                r = 1
            else:
                self._miss_count += 1
        return r

    def checkKey(self, key):
        if self._layer == "BE":
            return 1
        if self.zerosize == True:
            return 0
        """Read a object from the cache."""
        r = 0

        if (self._replace_pol == Cache.LRU_S) or (self._replace_pol
                                                  == Cache.LRU):
            if self.cache.has_key(key):
                r = 1
            else:
                r = 0
        return r

    def _evict(self):
        if (self._replace_pol == Cache.LRU):
            id = self.cache.peek_last_item()[0]
            del self.cache[id]
        elif (self._replace_pol == Cache.LRU_S):
            id = self.cache.peek_last_item()[0]
            del self.cache[id]
        elif (self._replace_pol == Cache.FIFO):
            id = self.cache.popleft()
        self.spaceLeft += int(self.hashmap[id])
        del self.hashmap[id]

    def _update_use(self, key):
        """Update the use of a cache."""
        if (self._replace_pol == Cache.LRU):
            self.cache[key] = self.hashmap[key]
        if (self._replace_pol == Cache.LRU_S):
            self.cache[key] = self.hashmap[key]

    def set_cache_size(self, size):
        new_size = self.cache.get_size() + int(size)
        self.cache.set_size(int(new_size))

    def set_backend_bw(self, value):
        self._backend_bw += value

    def set_crossrack_bw(self, value):
        self._crossrack_bw += value

    def set_intrarack_bw(self, value):
        self._intrarack_bw += value

    def get_backend_bw(self):
        return self._backend_bw

    def get_crossrack_bw(self):
        return self._crossrack_bw

    def get_intrarack_bw(self):
        return self._intrarack_bw

    def get_replace_pol(self):
        return self._replace_pol

    def get_hit_count(self):
        return self._hit_count

    def get_miss_count(self):
        return self._miss_count

    def get_available_space(self):
        return self.spaceLeft

    def get_replace_poll(self):
        return self._replace_pol

    def reset_shadow_cache():
        self.shadow.clear()

    def print_cache(self):
        print self.cache

    def get_l2_address(self, key):
        if (self._hash_type == Cache.consistent):
            return self.hash_ring.get_node(key)
        elif (self._hash_type == Cache.rendezvous):
            return self.hash_ring.find_node(key)
        elif (self._hash_type == Cache.rr):
            val = key.split("_")[1]
            res = int(val) % int(self.hash_ring)
            return res
Beispiel #11
0
class Streamer:
    """ streamer for flows management """
    num_streamers = 0

    def __init__(self,
                 source=None,
                 capacity=128000,
                 active_timeout=120,
                 inactive_timeout=60,
                 user_metrics=None,
                 user_classifiers=None,
                 enable_ndpi=True):

        Streamer.num_streamers += 1
        self.__exports = []
        self.source = source
        self.__flows = LRU(capacity, callback=emergency_callback)  # LRU cache
        self._capacity = self.__flows.get_size(
        )  # Streamer capacity (default: 128000)
        self.active_timeout = active_timeout  # expiration active timeout
        self.inactive_timeout = inactive_timeout  # expiration inactive timeout
        self.current_flows = 0  # counter for stored flows
        self.flows_number = 0
        self.current_tick = 0  # current timestamp
        self.processed_packets = 0  # current timestamp
        # Python dictionaries to hold current and archived flow records
        self.flow_cache = OrderedDict()
        self.user_classifiers = {}
        if user_classifiers is not None:
            try:
                classifier_iterator = iter(user_classifiers)
                for classifier in classifier_iterator:
                    if isinstance(classifier, NFStreamClassifier):
                        self.user_classifiers[classifier.name] = classifier
            except TypeError:
                self.user_classifiers[user_classifiers.name] = user_classifiers
        self.user_metrics = {}
        if enable_ndpi:
            ndpi_classifier = NDPIClassifier('ndpi')
            self.user_classifiers[ndpi_classifier.name] = ndpi_classifier
        if user_metrics is not None:
            self.user_metrics = user_metrics

    def _get_capacity(self):
        """ getter for capacity attribute """
        return self.__flows.get_size()

    def _set_capacity(self, new_size):
        """ setter for capacity size attribute """
        return self.__flows.set_size(new_size)

    capacity = property(_get_capacity, _set_capacity)

    def terminate(self):
        """ terminate all entries in Streamer """
        remaining_flows = True
        while remaining_flows:
            try:
                key, value = self.__flows.peek_last_item()
                value.export_reason = 2
                self.exporter(value)
            except TypeError:
                remaining_flows = False

        for classifier_name, classifier in self.user_classifiers.items():
            self.user_classifiers[classifier_name].on_exit()

    def exporter(self, flow):
        """ export method for a flow trigger_type:0(inactive), 1(active), 2(flush) """
        # Look for the flow in the created classifiers
        for classifier_name, classifier in self.user_classifiers.items():
            # Terminate the flow in the respective classifiers
            self.user_classifiers[classifier_name].on_flow_terminate(flow)
        # Delete the flow register from the active flows collection
        del self.__flows[flow.key]
        # Decrease the number of active flows by 1
        self.current_flows -= 1
        # Add the expired flow register to the final flows collection
        self.__exports.append(flow)

    def inactive_watcher(self):
        """ inactive expiration management """
        remaining_inactives = True
        # While there are inactive flow registers
        while remaining_inactives:
            try:
                # Obtain the last flow register (Least Recently Used - LRU) in the variable value using its key
                key, value = self.__flows.peek_last_item()
                # Has the flow exceeded the inactive timeout (1 minute)?
                if (self.current_tick -
                        value.end_time) >= (self.inactive_timeout * 1000):
                    # Set export reason to 0 (inactive) in the flow
                    value.export_reason = 0
                    # Export the flow to the final flows collection
                    self.exporter(value)
                # There are no flows that can be declared inactive yet
                else:
                    # Stop the inactive watcher until it is called again
                    remaining_inactives = False
            except TypeError:
                remaining_inactives = False

    def consume(self, pkt_info):
        """ consume a packet and update Streamer status """
        self.processed_packets += 1  # increment total processed packet counter
        # Obtain a flow hash key for identification of the flow
        key = get_flow_key(pkt_info)
        print("\nCONSUMING PACKET FROM FLOW:", key)
        # Is this packet from a registered flow?
        if key in self.__flows:
            print("FLOW FOUND - UPDATING STATISTICS")
            # Checking current status of the flow that the packet belongs to
            # -1 active flow - 0 inactive flow - 1 active flow timeout expired - 2 flush remaining flows in LRU
            # 3 FIN flag detected - 4 RST flag detected
            flow_status = self.__flows[key].update_and_check_flow_status(
                pkt_info, self.active_timeout, self.user_classifiers,
                self.user_metrics)

            #Has the active timeout of the flow register expired (2 minutes)?
            if (flow_status == 1):
                # Export the old flow register to the final collection and terminate this flow process on the specified classifier
                self.exporter(self.__flows[key])
                # Create a new flow register for the current packet
                flow = Flow(pkt_info, self.user_classifiers, self.user_metrics,
                            self.flow_cache)
                # Add the new flow to the active flows collection using the same Hash key
                self.__flows[flow.key] = flow
                # Create the entry on the flow_cache with the flow key
                del self.flow_cache[flow.key]
                self.flow_cache[flow.key] = {}
                # Update the flow status on the collection
                flow.create_new_flow_record(pkt_info, self.user_classifiers,
                                            self.user_metrics)
            if (flow_status == 3
                ):  # FIN FLAG DETECTED IN BOTH DIRECTIONS - EXPORTING FLOW
                self.exporter(self.__flows[key])
            if (
                    flow_status == 4
            ):  # RST FLAG FOUND - UPDATING BIDIRECTIONAL STATISTICS - EXPORTING FLOW
                self.exporter(self.__flows[key])
            if (flow_status == 5):  # FIN FLAG TIMER EXPIRED
                self.exporter(self.__flows[key])
                print("****FLOW EXPORTED")
                """
                expired_flow = self.__flows[key]
                print("****STARTING TCP TIMER")
                threading.Timer(20, self.export_incomplete_flow(expired_flow))
                """

        # This packet belongs to a new flow
        else:
            # Increase the count of current active flows
            print("FLOW NOT FOUND - CREATING NEW FLOW REGISTER")
            # Update flow counters
            self.current_flows += 1
            self.flows_number += 1
            # Create the new flow object
            flow = Flow(pkt_info, self.user_classifiers, self.user_metrics,
                        self.flow_cache)
            # Add this new flow register to the LRU
            self.__flows[flow.key] = flow
            # Create the entry on the flow_cache with the flow key
            self.flow_cache[flow.key] = {}
            # Create the new bidirectional flow record
            flow.create_new_flow_record(pkt_info, self.user_classifiers,
                                        self.user_metrics)
            # Set the current start time on the streamer timer to keep control of the inactive flows
            self.current_tick = flow.start_time
            # Remove the Least Recently Used (LRU) flow record from the active flows collection
            # and export it to the final flows collection if its inactive timeout has been exceeded
            self.inactive_watcher()
        print(
            "*******************PACKET CONSUMED - MOVING TO NEXT*********************************"
        )

    """
    def export_incomplete_flow(self, expired_flow):
        print("##############################---TCP TIMER EXPIRED--#######################")
        # Look for the flow in the created classifiers
        self.flows_number += 1
        for classifier_name, classifier in self.user_classifiers.items():
            # Terminate the flow in the respective classifiers
            self.user_classifiers[classifier_name].on_flow_terminate(expired_flow)
        self.__exports.append(expired_flow)
        print("##############################---EXPIRED FLOW EXPORTED-----###############################")
    """

    def __iter__(self):
        # Create the packet information generator
        pkt_info_gen = Observer(source=self.source)
        # Extract each packet information from the network interface or pcap file
        for pkt_info in pkt_info_gen:
            if pkt_info is not None:
                # Check if the packet belongs to an existent flow or create a new one
                self.consume(pkt_info)
                for export in self.__exports:
                    yield export
                self.__exports = []
        # Terminate the streamer
        self.terminate()
        for export in self.__exports:
            yield export
        self.__exports = []
Beispiel #12
0
conn = sqlite3.connect('terms.db')
c = conn.cursor()
c.execute(
    '''CREATE TABLE IF NOT EXISTS rejected (word TEXT PRIMARY KEY, reason TEXT)'''
)
conn.commit()
c.execute('''SELECT COUNT(*) FROM rejected''')
print("\tthere are", c.fetchone()[0], "words blocked in database")
c.execute('''SELECT reason, COUNT(*) FROM rejected GROUP BY reason''')
for row in c.fetchall():
    print('\t\t', row[0], ':\t', row[1])

cache_rejected = LRU(50000)
# fill in the cache with entries
c.execute('''SELECT * FROM rejected ORDER BY RANDOM() LIMIT ?''',
          (int(cache_rejected.get_size() * 2 / 3), ))
for row in c:
    cache_rejected[row[0]] = row[1]
print("\tloaded", len(cache_rejected), "items in cache")


def is_word_rejected_db(token):
    global c
    global stats
    global cache_rejected

    # first try the cache
    if token.lemma_ in cache_rejected:
        stats['words rejected db (cache)'] = stats[
            'words rejected db (cache)'] + 1
        return cache_rejected[token.lemma_]