Beispiel #1
0
 def test_has_key(self):
     for size in SIZES:
         l = LRU(size)
         for i in range(2*size):
             l[i] = str(i)
             self.assertTrue(l.has_key(i))
         for i in range(size, 2*size):
             self.assertTrue(l.has_key(i))
         for i in range(size):
             self.assertFalse(l.has_key(i))
Beispiel #2
0
 def test_has_key(self):
     for size in SIZES:
         l = LRU(size)
         for i in xrange(2*size):
             l[i] = str(i)
             self.assertTrue(l.has_key(i))
         for i in xrange(size, 2*size):
             self.assertTrue(l.has_key(i))
         for i in xrange(size):
             self.assertFalse(l.has_key(i))
Beispiel #3
0
class topic4:
    def __init__(self, c_hash, c_user, c_words):
        self.topic_count =1
        self.l1 = LRU(c_hash)
        self.l2 = LRU(c_user)

    def set_hashLRU(self,l):
        self.set(self.l1, l)

    def set_userLRU(self,l):
        self.set(self.l2, l)



    def set(self, lru, l):
        for k in l:
            v = lru.get(k,0)
            lru[k]=v+1

    def set_cluster(self, hashtags, users, words):
        for k in hashtags:
            self.l1[k]=self.l1.get(k,0)+1
        for k in users:
            self.l2[k]=self.l2.get(k,0)+1

        self.topic_count+=1

    def get_similarity(self,hashtags,users,words):
        h_sum = 1
        u_sum = 1
        w_sum = 1
        h_match =0
        h_ind =0
        u_ind =0
        w_ind =0
        c=0
        h1 = self.l1.get_size()
        u1 = self.l2.get_size()
        for h in hashtags:
            # l1_items=zip(*self.l1.items())
            h_sum+= self.l1.get(h,0)
            if(self.l1.has_key(h)):
                ind = self.l1.keys().index(h)
                h_ind+= h1 - ind
                h_match+= 1 if ind<250 else 0
        for u in users:
            u_sum+= self.l2.get(u,0)
            if(self.l2.has_key(u)):
                u_ind+= u1 - self.l2.keys().index(u)

        if(h_match !=0):
            c = h_match -1
        # print(h_ind,h1,u_ind,u1,w_ind,w1, h_sum,w_sum,)
        similarity = (h_ind/(h1+1))*(h_sum/sum(self.l1.values() +[1])) + (u_ind/(u1+1))*(u_sum/sum(self.l2.values()+[1]))  +c
        return similarity
Beispiel #4
0
class Newton:
    '''
    area_ids - np.array con ides de las areas.
    serialized_forests - str path a carpeta con forests serializados.
    serialized_tree - str path a carpeta con tree serializado
    data_dir - str path a carpeta con datos.
    n_forest_results - int cantidad de resultados obtenidos por el RF
    k - int cantidad de vecinos cercanos calculados por el BallTree
    '''

    def __init__(self, area_ids, serialized_forests, serialized_tree, data_dir, cache=4, n_forest_results=3, k=5):
        self.area_ids = area_ids
        self.balltree = Tree(serialized_tree, data_dir)
        self.n_forest_results = n_forest_results
        self.k = k
        self.serialized_forests = serialized_forests
        self.cache = cache
        self.locks = {i: Lock() for i in area_ids}
        self.counters = {i: 0 for i in area_ids}
        self.active_forests = LRU(cache, callback=lambda key, value: clear(key, value, self.locks,self.counters))

    '''
    area_id - int  id de area para recomendar
    scores - np.array (n,5) arreglo de puntajes para recomendar
    retorna np.array (n,n_forest_results,k) carreras recomendadas
    '''

    def get_recs(self, area_id, scores):
        prediction = self.predict(area_id, scores, self.n_forest_results)
        recommendations = []
        for carreer_set in prediction:
            recommendations.append(self.balltree.query(carreer_set, self.k))
        return np.array(recommendations)

    def predict(self, area_id, scores, n_results):
        with self.locks[area_id]:
            self.counters[area_id] += 1
        if not self.active_forests.has_key(area_id):
            if get_mem_percentage() < 0.3:
                clear(self.active_forests.peek_last_item()[0], self.active_forests[self.active_forests.peek_last_item()[0]], self.locks,self.counters)
            self.active_forests[area_id] = Forest(area_id, self.serialized_forests)
            # print(get_mem_percentage())
        forest = self.active_forests[area_id]
        prediction = forest.get_class(forest.query(scores, n_results))
        with self.locks[area_id]:
            self.counters[area_id] -= 1
        # print(self.active_forests.items())
        return prediction

    def filter_recs(self, user, carreers):
        pass
class complex_cache:
    def __init__(self, size, type): # the number of items
        self.size = size # actual size of the cache
        self.lru = LRU(size)

        self.hits = 0.0
        self.reqs = 0.0
        self.cache_stack_size = 0 # how much of the cache is occupied


    def place(self, request):
        # request is a tuple (timestamp, username)
        self.reqs += 1 
        if self.lru.has_key(request[-1]): 
            self.lru[request[-1]] = self.lru[request[-1]] + 1
            
            self.hits += 1            
        else:
            if self.cache_stack_size + 1 > self.size: 
                print "evict an item: "+str(self.lru.peek_last_item())
                self.cache_stack_size -= 1
                
            self.lru[request[-1]] = 1
            self.cache_stack_size += 1
Beispiel #6
0
class StarboardEntries:
    """A way of managing starboard entries.
    Sort of like an ORM, but also not fully."""

    _pool: asyncpg.Pool = attr.ib()
    # note: entry cache isn't really a dict, but for typehinting purposes this works
    _entry_cache: typing.Dict[int, StarboardEntry] = attr.ib()
    _sql_loop_task: asyncio.Task = attr.ib()
    _sql_queries: cclass.SetUpdateAsyncQueue = attr.ib()

    def __init__(self, pool: asyncpg.Pool, cache_size: int = 200):
        self._pool = pool
        self._entry_cache = LRU(
            cache_size
        )  # the 200 should be raised as the bot grows bigger
        self._sql_queries = cclass.SetUpdateAsyncQueue()

        loop = asyncio.get_event_loop()
        self._sql_loop_task = loop.create_task(self._sql_loop())

    def stop(self):
        """Stops the SQL task loop."""
        self._sql_loop_task.cancel()

    async def _sql_loop(self):
        """Actually runs SQL updating, hopefully one after another.

        Saves speed on adding, deleting, and updating by offloading
        this step here."""
        try:
            while True:
                entry = await self._sql_queries.get()
                logging.getLogger("discord").debug(f"Running {entry.query}.")
                await self._pool.execute(entry.query, timeout=60, *entry.args)
                self._sql_queries.task_done()
        except asyncio.CancelledError:
            pass

    def _get_required_from_entry(self, entry: StarboardEntry):
        """Transforms data into the form needed for databases."""
        return (
            entry.ori_mes_id,
            entry.ori_chan_id,
            entry.star_var_id,
            entry.starboard_id,
            entry.author_id,
            list(entry.ori_reactors),
            list(entry.var_reactors),
            entry.guild_id,
            entry.forced,
            entry.frozen,
            entry.trashed,
        )

    def _str_builder_to_insert(
        self, str_builder: typing.List[str], entry: StarboardEntry
    ):
        """Takes data from a string builder list and eventually
        puts the data needed into the _sql_queries variable."""
        query = "".join(str_builder)
        args = self._get_required_from_entry(entry)
        self._sql_queries.put_nowait(StarboardSQLEntry(query, args))

    def _handle_upsert(self, entry: StarboardEntry):
        """Upserts an entry by using an INSERT with an ON CONFLICT cause.
        This is a PostgreSQL-specific feature, so that's nice!"""
        str_builder = [
            "INSERT INTO starboard(ori_mes_id, ori_chan_id, star_var_id, ",
            "starboard_id, author_id, ori_reactors, var_reactors, ",
            "guild_id, forced, frozen, trashed) VALUES($1, $2, $3, $4, ",
            "$5, $6, $7, $8, $9, $10, $11) ON CONFLICT (ori_mes_id) DO UPDATE ",
            "SET ori_chan_id = $2, star_var_id = $3, starboard_id = $4, ",
            "author_id = $5, ori_reactors = $6, var_reactors = $7, guild_id = $8, ",
            "forced = $9, frozen = $10, trashed = $11",
        ]
        self._str_builder_to_insert(str_builder, entry)

    def upsert(self, entry: StarboardEntry):
        """Either adds or updates an entry in the collection of entries."""
        temp_dict = {entry.ori_mes_id: entry}
        if entry.star_var_id:
            temp_dict[entry.star_var_id] = entry

        self._entry_cache.update(**temp_dict)  # type: ignore this is valid i promise
        self._handle_upsert(entry)

    def delete(self, entry_id: int):
        """Removes an entry from the collection of entries."""
        self._entry_cache.pop(entry_id, None)
        self._sql_queries.put_nowait(
            StarboardSQLEntry("DELETE FROM starboard WHERE ori_mes_id = $1", [entry_id])
        )

    async def get(
        self, entry_id: int, check_for_var: bool = False
    ) -> typing.Optional[StarboardEntry]:
        """Gets an entry from the collection of entries."""
        entry = None

        if self._entry_cache.has_key(entry_id):  # type: ignore
            entry = self._entry_cache[entry_id]
        else:
            entry = discord.utils.find(
                lambda e: e and e.star_var_id == entry_id, self._entry_cache.values()
            )

        if not entry:
            async with self._pool.acquire() as conn:
                data = await conn.fetchrow(
                    f"SELECT * FROM starboard WHERE ori_mes_id = {entry_id} OR"
                    f" star_var_id = {entry_id}"
                )
                if data:
                    entry = StarboardEntry.from_row(data)
                    self._entry_cache[entry_id] = entry

        if entry and check_for_var and not entry.star_var_id:
            return None

        return entry

    async def select_query(self, query: str):
        """Selects the starboard database directly for entries based on the query."""
        async with self._pool.acquire() as conn:
            data = await conn.fetch(f"SELECT * FROM starboard WHERE {query}")

            if not data:
                return None
            return tuple(StarboardEntry.from_row(row) for row in data)

    async def raw_query(self, query: str):
        """Runs the raw query against the pool, assuming the results are starboard entries."""
        async with self._pool.acquire() as conn:
            data = await conn.fetch(query)

            if not data:
                return None
            return tuple(StarboardEntry.from_row(row) for row in data)

    async def super_raw_query(self, query: str):
        """You want a raw query? You'll get one."""
        async with self._pool.acquire() as conn:
            return await conn.fetch(query)

    async def query_entries(
        self, seperator: str = "AND", **conditions: typing.Dict[str, str]
    ) -> typing.Optional[typing.Tuple[StarboardEntry, ...]]:
        """Queries entries based on conditions provided.

        For example, you could do `query_entries(guild_id=143425)` to get
        entries with that guild id."""
        sql_conditions: list[str] = [
            f"{key} = {value}" for key, value in conditions.items()
        ]
        combined_statements = f" {seperator} ".join(sql_conditions)

        async with self._pool.acquire() as conn:
            data = await conn.fetch(
                f"SELECT * FROM starboard WHERE {combined_statements}"
            )

            if not data:
                return None
            return tuple(StarboardEntry.from_row(row) for row in data)

    async def get_random(self, guild_id: int) -> typing.Optional[StarboardEntry]:
        """Gets a random entry from a guild."""
        # query adapted from
        # https://github.com/Rapptz/RoboDanny/blob/1fb95d76d1b7685e2e2ff950e11cddfc96efbfec/cogs/stars.py#L1082
        query = """SELECT *
                   FROM starboard
                   WHERE guild_id=$1
                   AND star_var_id IS NOT NULL
                   OFFSET FLOOR(RANDOM() * (
                       SELECT COUNT(*)
                       FROM starboard
                       WHERE guild_id=$1
                       AND star_var_id IS NOT NULL
                   ))
                   LIMIT 1
                """

        async with self._pool.acquire() as conn:
            data = await conn.fetchrow(query, guild_id)
            if not data:
                return None
            return StarboardEntry.from_row(data)
Beispiel #7
0
class FileServer(fileService_pb2_grpc.FileserviceServicer):
    def __init__(self, hostname, server_port, activeNodesChecker,
                 shardingHandler, superNodeAddress):
        self.serverPort = server_port
        self.serverAddress = hostname + ":" + server_port
        self.activeNodesChecker = activeNodesChecker
        self.shardingHandler = shardingHandler
        self.hostname = hostname
        self.lru = LRU(5)
        self.superNodeAddress = superNodeAddress

    #
    #   This service gets invoked when user uploads a new file.
    #
    def UploadFile(self, request_iterator, context):
        print("Inside Server method ---------- UploadFile")
        data = bytes("", 'utf-8')
        username, filename = "", ""
        totalDataSize = 0
        active_ip_channel_dict = self.activeNodesChecker.getActiveChannels()

        # list to store the info related to file location.
        metaData = []

        # If the node is the leader of the cluster.
        if (int(db.get("primaryStatus")) == 1):
            print("Inside primary upload")
            currDataSize = 0
            currDataBytes = bytes("", 'utf-8')
            seqNo = 1

            # Step 1:
            # Get 2 least loaded nodes based on the CPU stats.
            # 'Node' is where the actual data goes and 'node_replica' is where replica will go.
            node, node_replica = self.getLeastLoadedNode()

            if (node == -1):
                return fileService_pb2.ack(
                    success=False,
                    message="Error Saving File. No active nodes.")

            # Step 2:
            # Check whether file already exists, if yes then return with message 'File already exists'.
            for request in request_iterator:
                username, filename = request.username, request.filename
                print("Key is-----------------", username + "_" + filename)
                if (self.fileExists(username, filename) == 1):
                    print("sending neg ack")
                    return fileService_pb2.ack(
                        success=False,
                        message=
                        "File already exists for this user. Please rename or delete file first."
                    )
                break

            # Step 3:
            # Make chunks of size 'UPLOAD_SHARD_SIZE' and start sending the data to the least utilized node trough gRPC streaming.
            currDataSize += sys.getsizeof(request.data)
            currDataBytes += request.data

            for request in request_iterator:

                if ((currDataSize + sys.getsizeof(request.data)) >
                        UPLOAD_SHARD_SIZE):
                    response = self.sendDataToDestination(
                        currDataBytes, node, node_replica, username, filename,
                        seqNo, active_ip_channel_dict[node])
                    metaData.append([node, seqNo, node_replica])
                    currDataBytes = request.data
                    currDataSize = sys.getsizeof(request.data)
                    seqNo += 1
                    node, node_replica = self.getLeastLoadedNode()
                else:
                    currDataSize += sys.getsizeof(request.data)
                    currDataBytes += request.data

            if (currDataSize > 0):
                response = self.sendDataToDestination(
                    currDataBytes, node, node_replica, username, filename,
                    seqNo, active_ip_channel_dict[node])
                metaData.append([node, seqNo, node_replica])

            # Step 4:
            # Save the metadata on the primary node after the completion of sharding.
            if (response.success):
                db.saveMetaData(username, filename, metaData)
                db.saveUserFile(username, filename)

            # Step 5:
            # Make a gRPC call to replicate the matadata on all the other nodes.
            self.saveMetadataOnAllNodes(username, filename, metaData)

            return fileService_pb2.ack(success=True, message="Saved")

        # If the node is not the leader.
        else:
            print("Saving the data on my local db")
            sequenceNumberOfChunk = 0
            dataToBeSaved = bytes("", 'utf-8')

            # Gather all the data from gRPC stream
            for request in request_iterator:
                username, filename, sequenceNumberOfChunk = request.username, request.filename, request.seqNo
                dataToBeSaved += request.data
            key = username + "_" + filename + "_" + str(sequenceNumberOfChunk)

            # Save the data in local DB.
            db.setData(key, dataToBeSaved)

            # After saving the chunk in the local DB, make a gRPC call to save the replica of the chunk on different
            # node only if the replicaNode is present.
            if (request.replicaNode != ""):
                print("Sending replication to ", request.replicaNode)
                replica_channel = active_ip_channel_dict[request.replicaNode]
                t1 = Thread(target=self.replicateChunkData,
                            args=(
                                replica_channel,
                                dataToBeSaved,
                                username,
                                filename,
                                sequenceNumberOfChunk,
                            ))
                t1.start()
                # stub = fileService_pb2_grpc.FileserviceStub(replica_channel)
                # response = stub.UploadFile(self.sendDataInStream(dataToBeSaved, username, filename, sequenceNumberOfChunk, ""))

            return fileService_pb2.ack(success=True, message="Saved")

    def replicateChunkData(self, replica_channel, dataToBeSaved, username,
                           filename, sequenceNumberOfChunk):
        stub = fileService_pb2_grpc.FileserviceStub(replica_channel)
        response = stub.UploadFile(
            self.sendDataInStream(dataToBeSaved, username, filename,
                                  sequenceNumberOfChunk, ""))

    # This helper method is responsible for sending the data to destination node through gRPC stream.
    def sendDataToDestination(self, currDataBytes, node, nodeReplica, username,
                              filename, seqNo, channel):
        if (node == self.serverAddress):
            key = username + "_" + filename + "_" + str(seqNo)
            db.setData(key, currDataBytes)
            if (nodeReplica != ""):
                print("Sending replication to ", nodeReplica)
                active_ip_channel_dict = self.activeNodesChecker.getActiveChannels(
                )
                replica_channel = active_ip_channel_dict[nodeReplica]
                stub = fileService_pb2_grpc.FileserviceStub(replica_channel)
                response = stub.UploadFile(
                    self.sendDataInStream(currDataBytes, username, filename,
                                          seqNo, ""))
                return response
        else:
            print("Sending the UPLOAD_SHARD_SIZE to node :", node)
            stub = fileService_pb2_grpc.FileserviceStub(channel)
            response = stub.UploadFile(
                self.sendDataInStream(currDataBytes, username, filename, seqNo,
                                      nodeReplica))
            print("Response from uploadFile: ", response.message)
            return response

    # This helper method actually makes chunks of less than 4MB and streams them through gRPC.
    # 4 MB is the max data packet size in gRPC while sending. That's why it is necessary.
    def sendDataInStream(self, dataBytes, username, filename, seqNo,
                         replicaNode):
        chunk_size = 4000000
        start, end = 0, chunk_size
        while (True):
            chunk = dataBytes[start:end]
            if (len(chunk) == 0): break
            start = end
            end += chunk_size
            yield fileService_pb2.FileData(username=username,
                                           filename=filename,
                                           data=chunk,
                                           seqNo=seqNo,
                                           replicaNode=replicaNode)

    #
    #   This service gets invoked when user requests an uploaded file.
    #
    def DownloadFile(self, request, context):

        print("Inside Download")

        # If the node is the leader of the cluster.
        if (int(db.get("primaryStatus")) == 1):

            print("Inside primary download")

            # Check if file exists
            if (self.fileExists(request.username, request.filename) == 0):
                print("File does not exist")
                yield fileService_pb2.FileData(username=request.username,
                                               filename=request.filename,
                                               data=bytes("", 'utf-8'),
                                               seqNo=0)
                return

            # If the file is present in cache then just fetch it and return. No need to go to individual node.
            if (self.lru.has_key(request.username + "_" + request.filename)):
                print("Fetching data from Cache")
                CHUNK_SIZE = 4000000
                fileName = request.username + "_" + request.filename
                filePath = self.lru[fileName]
                outfile = os.path.join(filePath, fileName)

                with open(outfile, 'rb') as infile:
                    while True:
                        chunk = infile.read(CHUNK_SIZE)
                        if not chunk: break
                        yield fileService_pb2.FileData(
                            username=request.username,
                            filename=request.filename,
                            data=chunk,
                            seqNo=1)

            # If the file is not present in the cache, then fetch it from the individual node.
            else:
                print("Fetching the metadata")

                # Step 1: get metadata i.e. the location of chunks.
                metaData = db.parseMetaData(request.username, request.filename)

                print(metaData)

                #Step 2: make gRPC calls and get the fileData from all the nodes.
                downloadHelper = DownloadHelper(self.hostname, self.serverPort,
                                                self.activeNodesChecker)
                data = downloadHelper.getDataFromNodes(request.username,
                                                       request.filename,
                                                       metaData)
                print("Sending the data to client")

                #Step 3: send the file to supernode using gRPC streaming.
                chunk_size = 4000000
                start, end = 0, chunk_size
                while (True):
                    chunk = data[start:end]
                    if (len(chunk) == 0): break
                    start = end
                    end += chunk_size
                    yield fileService_pb2.FileData(username=request.username,
                                                   filename=request.filename,
                                                   data=chunk,
                                                   seqNo=request.seqNo)

                # Step 4: update the cache based on LRU(least recently used) algorithm.
                self.saveInCache(request.username, request.filename, data)

        # If the node is not the leader, then just fetch the fileChunk from the local db and stream it back to leader.
        else:
            key = request.username + "_" + request.filename + "_" + str(
                request.seqNo)
            print(key)
            data = db.getFileData(key)
            chunk_size = 4000000
            start, end = 0, chunk_size
            while (True):
                chunk = data[start:end]
                if (len(chunk) == 0): break
                start = end
                end += chunk_size
                yield fileService_pb2.FileData(username=request.username,
                                               filename=request.filename,
                                               data=chunk,
                                               seqNo=request.seqNo)

    # This service is responsible fetching all the files.
    def FileList(self, request, context):
        print("File List Called")
        userFiles = db.getUserFiles(request.username)
        return fileService_pb2.FileListResponse(Filenames=str(userFiles))

    # This helper method checks whether the file is present in db or not.
    def fileExists(self, username, filename):
        print("isFile Present", db.keyExists(username + "_" + filename))
        return db.keyExists(username + "_" + filename)

    # This helper method returns 2 least loaded nodes from the cluster.
    def getLeastLoadedNode(self):
        print("Ready to enter sharding handler")
        node, node_replica = self.shardingHandler.leastUtilizedNode()
        print("Least loaded node is :", node)
        print("Replica node - ", node_replica)
        return node, node_replica

    # This helper method replicates the metadata on all nodes.
    def saveMetadataOnAllNodes(self, username, filename, metadata):
        print("saveMetadataOnAllNodes")
        active_ip_channel_dict = self.activeNodesChecker.getActiveChannels()
        uniqueFileName = username + "_" + filename
        for ip, channel in active_ip_channel_dict.items():
            if (self.isChannelAlive(channel)):
                stub = fileService_pb2_grpc.FileserviceStub(channel)
                response = stub.MetaDataInfo(
                    fileService_pb2.MetaData(
                        filename=uniqueFileName,
                        seqValues=str(metadata).encode('utf-8')))
                print(response.message)

    # This service is responsible for saving the metadata on local db.
    def MetaDataInfo(self, request, context):
        print("Inside Metadatainfo")
        fileName = request.filename
        seqValues = request.seqValues
        db.saveMetaDataOnOtherNodes(fileName, seqValues)
        ack_message = "Successfully saved the metadata on " + self.serverAddress
        return fileService_pb2.ack(success=True, message=ack_message)

    # This helper method checks whethere created channel is alive or not
    def isChannelAlive(self, channel):
        try:
            grpc.channel_ready_future(channel).result(timeout=1)
        except grpc.FutureTimeoutError:
            #print("Connection timeout. Unable to connect to port ")
            return False
        return True

    # This helper method is responsible for updating the cache for faster lookup.
    def saveInCache(self, username, filename, data):
        if (len(self.lru.items()) >= self.lru.get_size()):
            fileToDel, path = self.lru.peek_last_item()
            os.remove(path + "/" + fileToDel)

        self.lru[username + "_" + filename] = "cache"
        filePath = os.path.join('cache', username + "_" + filename)
        saveFile = open(filePath, 'wb')
        saveFile.write(data)
        saveFile.close()

    # This service is responsible for sending the whole cluster stats to superNode
    def getClusterStats(self, request, context):
        print("Inside getClusterStats")
        active_ip_channel_dict = self.activeNodesChecker.getActiveChannels()
        total_cpu_usage, total_disk_space, total_used_mem = 0.0, 0.0, 0.0
        total_nodes = 0
        for ip, channel in active_ip_channel_dict.items():
            if (self.isChannelAlive(channel)):
                stub = heartbeat_pb2_grpc.HearBeatStub(channel)
                stats = stub.isAlive(heartbeat_pb2.NodeInfo(ip="", port=""))
                total_cpu_usage = float(stats.cpu_usage)
                total_disk_space = float(stats.disk_space)
                total_used_mem = float(stats.used_mem)
                total_nodes += 1

        if (total_nodes == 0):
            return fileService_pb2.ClusterStats(cpu_usage=str(100.00),
                                                disk_space=str(100.00),
                                                used_mem=str(100.00))

        return fileService_pb2.ClusterStats(
            cpu_usage=str(total_cpu_usage / total_nodes),
            disk_space=str(total_disk_space / total_nodes),
            used_mem=str(total_used_mem / total_nodes))

    # This service is responsible for sending the leader info to superNode as soon as leader changes.
    def getLeaderInfo(self, request, context):
        channel = grpc.insecure_channel('{}'.format(self.superNodeAddress))
        stub = fileService_pb2_grpc.FileserviceStub(channel)
        response = stub.getLeaderInfo(
            fileService_pb2.ClusterInfo(ip=self.hostname,
                                        port=self.serverPort,
                                        clusterName="team1"))
        print(response.message)

    #
    #   This service gets invoked when user deletes a file.
    #
    def FileDelete(self, request, data):
        username = request.username
        filename = request.filename

        if (int(db.get("primaryStatus")) == 1):

            if (self.fileExists(username, filename) == 0):
                print("File does not exist")
                return fileService_pb2.ack(success=False,
                                           message="File does not exist")

            print("Fetching metadata from leader")
            metadata = db.parseMetaData(request.username, request.filename)
            print("Successfully retrieved metadata from leader")

            deleteHelper = DeleteHelper(self.hostname, self.serverPort,
                                        self.activeNodesChecker)
            deleteHelper.deleteFileChunksAndMetaFromNodes(
                username, filename, metadata)

            return fileService_pb2.ack(
                success=True,
                message="Successfully deleted file from the cluster")

        else:
            seqNo = -1

            try:
                seqNo = request.seqNo
            except:
                return fileService_pb2.ack(success=False,
                                           message="Internal Error")

            metaDataKey = username + "_" + filename
            dataChunkKey = username + "_" + filename + "_" + str(seqNo)

            if (db.keyExists(metaDataKey) == 1):
                print("FileDelete: Deleting the metadataEntry from local db :")
                db.deleteEntry(metaDataKey)
            if (db.keyExists(dataChunkKey)):
                print("FileDelete: Deleting the data chunk from local db: ")
                db.deleteEntry(dataChunkKey)

            return fileService_pb2.ack(
                success=True,
                message="Successfully deleted file from the cluster")

    #
    #   This service gets invoked when user wants to check if the file is present.
    #
    def FileSearch(self, request, data):
        username, filename = request.username, request.filename

        if (self.fileExists(username, filename) == 1):
            return fileService_pb2.ack(success=True,
                                       message="File exists in the cluster.")
        else:
            return fileService_pb2.ack(
                success=False, message="File does not exist in the cluster.")

    #
    #   This service gets invoked when user wants to update a file.
    #
    def UpdateFile(self, request_iterator, context):

        username, filename = "", ""
        fileData = bytes("", 'utf-8')

        for request in request_iterator:
            fileData += request.data
            username, filename = request.username, request.filename

        def getFileChunks(fileData):
            # Maximum chunk size that can be sent
            CHUNK_SIZE = 4000000

            outfile = os.path.join('files', fileName)

            sTime = time.time()

            while True:
                chunk = fileData.read(CHUNK_SIZE)
                if not chunk: break

                yield fileService_pb2.FileData(username=username,
                                               filename=fileName,
                                               data=chunk,
                                               seqNo=1)
            print("Time for upload= ", time.time() - sTime)

        if (int(db.get("primaryStatus")) == 1):
            channel = grpc.insecure_channel('{}'.format(self.serverAddress))
            stub = fileService_pb2_grpc.FileserviceStub(channel)

            response1 = stub.FileDelete(
                fileService_pb2.FileInfo(username=userName, filename=fileName))

            if (response1.success):
                response2 = stub.UploadFile(getFileChunks(fileData))
                if (response2.success):
                    return fileService_pb2.ack(
                        success=True, message="File suceessfully updated.")
                else:
                    return fileService_pb2.ack(success=False,
                                               message="Internal error.")
            else:
                return fileService_pb2.ack(success=False,
                                           message="Internal error.")
Beispiel #8
0
class topic4:
    def __init__(self, c_hash, c_user, c_words):
        self.topic_count =1
        # self.time = (self.first,self.last)
        self.l1 = LRU(c_hash)
        self.first =""
        self.last=""
        self.lats=[]
        self.longs=[]
        self.l2 = LRU(c_user)
        self.l3 = LRU(c_words)
        self.l4 = LRU(400)
    def set_hashLRU(self,l):
        self.set(self.l1, l)

    def set_userLRU(self,l):
        self.set(self.l2, l)

    def set_wordLRU(self,l):
        self.set(self.l3, l)

    def set(self, lru, l):
        for k in l:
            v = lru.get(k,0)
            lru[k]=v+1

    def set_cluster(self, hashtags, users, words,links, cords):
        for k in hashtags:
            self.l1[k]=self.l1.get(k,0)+1
        for k in users:
            self.l2[k]=self.l2.get(k,0)+1
        for k in words:
            self.l3[k]=self.l3.get(k,0)+1
        for k in links:
            self.l4[k]=self.l4.get(k,0)+1
        if(cords is not None):
            self.lats.append(cords["coordinates"][1])
            self.longs.append(cords["coordinates"][0])
        self.topic_count+=1

    def get_similarity(self,hashtags,users,words):
        h_sum = 1
        u_sum = 1
        w_sum = 1
        h_match =0
        h_ind =0
        u_ind =0
        w_ind =0
        c=0
        h1 = self.l1.get_size()
        u1 = self.l2.get_size()
        w1 = self.l3.get_size()
        for h in hashtags:
            # l1_items=zip(*self.l1.items())
            h_sum+= self.l1.get(h,0)
            if(self.l1.has_key(h)):
                ind = self.l1.keys().index(h)
                h_ind+= h1 - ind
                h_match+= 1 if ind<250 else 0
        for u in users:
            u_sum+= self.l2.get(u,0)
            if(self.l2.has_key(u)):
                u_ind+= u1 - self.l2.keys().index(u)
        for w in words:
            w_sum+= self.l3.get(w,0)
            if(self.l3.has_key(w)):
                w_ind+= w1 - self.l3.keys().index(w)
        if(h_match !=0):
            c = h_match -1
        # print(h_ind,h1,u_ind,u1,w_ind,w1, h_sum,w_sum,)
        similarity = (h_ind/(h1+1))*(h_sum/sum(self.l1.values() +[1])) + (u_ind/(u1+1))*(u_sum/sum(self.l2.values()+[1])) + (w_ind/(w1+1))*(w_sum/sum(self.l3.values()+[1])) +c
        return similarity
    def flush1(self, cache, size):
        if(len(cache.keys())>5):
            tokens = reversed(cache.keys()[5])
            cache.clear()
            for i in tokens:
                cache[i]=1


    def flush(self):
        self.flush1(self.l1,500)
        self.flush1(self.l2, 500)
        self.flush1(self.l3,3500)
        self.topic_count=1
Beispiel #9
0
l.keys()  # Can get keys alone in MRU order
# Would print [3, 5, 4, 2, 1]

del l[4]  # Delete an item
print(l.items())
# Would print [(3, '3'), (5, '5'), (2, '2'), (1, '1')]

print(l.get_size())
# Would print 5

l.set_size(3)
print(l.items())
# Would print [(3, '3'), (5, '5'), (2, '2')]
print(l.get_size())
# Would print 3
print(l.has_key(5))
# Would print True
print(2 in l)
# Would print True

l.get_stats()
# Would print (1, 0)

l.update(5='0')  # Update an item
print l.items()
# Would print [(5, '0'), (3, '3'), (2, '2')]

l.clear()
print l.items()
# Would print []
Beispiel #10
0
class Cache:
    """Class representing D3N."""

    # Replacement policies
    LRU = "LRU"
    LFU = "LFU"
    LRU_S = "LRU_S"
    FIFO = "FIFO"
    RAND = "RAND"

    # Write policies
    WRITE_BACK = "WB"
    WRITE_THROUGH = "WT"

    # Layer
    L1 = "L1"
    L2 = "L2"

    consistent = "consistent"
    rendezvous = "rendezvous"
    rr = "rr"

    def __init__(self, layer, size, replace_pol, write_pol, hash_ring,
                 hash_type, obj_size, full_size, logger):
        self._replace_pol = replace_pol  # Replacement policy
        self._write_pol = write_pol  # Write policy
        self._layer = layer  # Layer info
        self._size = size  # Cache size
        self.spaceLeft = size  # Cache size
        self._logger = logger
        self.hashmap = {}  # Mapping
        self.hash_ring = hash_ring
        self._hash_type = hash_type
        self._obj_size = obj_size

        if (self._size == 0):
            self.zerosize = True
            self._size = 1
        else:
            self.zerosize = False

        if (self._replace_pol == Cache.LRU):
            self.cache = LRU(self._size)
        elif (self._replace_pol == Cache.FIFO):
            self.cache = deque()
        elif (self._replace_pol == Cache.LRU_S):
            self.cache = LRU(self._size)
            self.shadow = LRU(full_size)
            self.hist = []
            for i in range(full_size):
                self.hist.append(0)

    # Statistics
        self._hit_count = 0
        self._miss_count = 0
        self._backend_bw = 0
        self._crossrack_bw = 0
        self._intrarack_bw = 0
        self.miss_lat = 0
        self.lat_count = 0

    def _insert1(self, key, size):
        # No eviction
        if not self.zerosize:
            if (self._replace_pol == Cache.LRU_S):
                self.shadow[key] = 1

            if (int(size) <= self.spaceLeft):
                if (self._replace_pol == Cache.LRU):
                    self.cache[key] = int(size)
                elif (self._replace_pol == Cache.LRU_S):
                    self.cache[key] = int(size)
                elif (self._replace_pol == Cache.FIFO):
                    self.cache.append(key)
                self.hashmap[key] = int(size)
                self.spaceLeft -= int(size)
            else:
                while (int(size) > self.spaceLeft):
                    self._evict()
                if (self._replace_pol == Cache.LRU):
                    self.cache[key] = int(size)
                elif (self._replace_pol == Cache.LRU_S):
                    self.cache[key] = int(size)
                elif (self._replace_pol == Cache.FIFO):
                    self.cache.append(key)
                self.hashmap[key] = int(size)
                self.spaceLeft -= int(size)

    def _insert(self, key, size):
        # No eviction
        if not self.zerosize:
            if (self._replace_pol == Cache.LRU_S):
                self.cache[key] = int(size)
                self.shadow[key] = int(size)
            elif (self._replace_pol == Cache.LRU):
                self.cache[key] = int(size)
            else:
                if (int(size) <= self.spaceLeft):
                    if (self._replace_pol == Cache.LRU):
                        self.cache[key] = int(size)
                    elif (self._replace_pol == Cache.LRU_S):
                        self.cache[key] = int(size)
                    elif (self._replace_pol == Cache.FIFO):
                        self.cache.append(key)
                    self.hashmap[key] = int(size)
                    self.spaceLeft -= int(size)
                else:
                    while (int(size) > self.spaceLeft):
                        self._evict()
                    if (self._replace_pol == Cache.LRU):
                        self.cache[key] = int(size)
                    elif (self._replace_pol == Cache.LRU_S):
                        self.cache[key] = int(size)
                    elif (self._replace_pol == Cache.FIFO):
                        self.cache.append(key)
                    self.hashmap[key] = int(size)
                    self.spaceLeft -= int(size)

    def read1(self, key, size):
        if self._layer == "BE":
            return 1
        if self.zerosize == True:
            return None
        """Read a object from the cache."""
        r = None

        if (self._replace_pol == Cache.LRU_S):
            if self.shadow.has_key(key):
                count = 0
                for i in self.shadow.keys():
                    if i == key:
                        self.hist[count] += 1
                        break
                    count += 1
                self.shadow[key] = 1

        if key in self.hashmap:
            if (self._replace_pol == Cache.LRU):
                self._update_use(key)
            elif (self._replace_pol == Cache.LRU_S):
                self._update_use(key)
            self._hit_count += 1
            r = 1
        else:
            self._miss_count += 1
        return r

    def read(self, key, size):
        if self._layer == "BE":
            return 1
        if self.zerosize == True:
            return None
        """Read a object from the cache."""
        r = None

        if (self._replace_pol == Cache.LRU_S):
            if self.cache.has_key(key):
                self._hit_count += 1
                self.cache[key] = self.cache[key]
                r = 1
            else:
                self._miss_count += 1

            if self.shadow.has_key(key):
                count = 0
                for i in self.shadow.keys():
                    if i == key:
                        self.hist[count] += 1
                        break
                    count += 1
                self.shadow[key] = 1

        else:
            if key in self.hashmap:
                if (self._replace_pol == Cache.LRU):
                    self._update_use(key)
                elif (self._replace_pol == Cache.LRU_S):
                    self._update_use(key)
                self._hit_count += 1
                r = 1
            else:
                self._miss_count += 1
        return r

    def checkKey(self, key):
        if self._layer == "BE":
            return 1
        if self.zerosize == True:
            return 0
        """Read a object from the cache."""
        r = 0

        if (self._replace_pol == Cache.LRU_S) or (self._replace_pol
                                                  == Cache.LRU):
            if self.cache.has_key(key):
                r = 1
            else:
                r = 0
        return r

    def _evict(self):
        if (self._replace_pol == Cache.LRU):
            id = self.cache.peek_last_item()[0]
            del self.cache[id]
        elif (self._replace_pol == Cache.LRU_S):
            id = self.cache.peek_last_item()[0]
            del self.cache[id]
        elif (self._replace_pol == Cache.FIFO):
            id = self.cache.popleft()
        self.spaceLeft += int(self.hashmap[id])
        del self.hashmap[id]

    def _update_use(self, key):
        """Update the use of a cache."""
        if (self._replace_pol == Cache.LRU):
            self.cache[key] = self.hashmap[key]
        if (self._replace_pol == Cache.LRU_S):
            self.cache[key] = self.hashmap[key]

    def set_cache_size(self, size):
        new_size = self.cache.get_size() + int(size)
        self.cache.set_size(int(new_size))

    def set_backend_bw(self, value):
        self._backend_bw += value

    def set_crossrack_bw(self, value):
        self._crossrack_bw += value

    def set_intrarack_bw(self, value):
        self._intrarack_bw += value

    def get_backend_bw(self):
        return self._backend_bw

    def get_crossrack_bw(self):
        return self._crossrack_bw

    def get_intrarack_bw(self):
        return self._intrarack_bw

    def get_replace_pol(self):
        return self._replace_pol

    def get_hit_count(self):
        return self._hit_count

    def get_miss_count(self):
        return self._miss_count

    def get_available_space(self):
        return self.spaceLeft

    def get_replace_poll(self):
        return self._replace_pol

    def reset_shadow_cache():
        self.shadow.clear()

    def print_cache(self):
        print self.cache

    def get_l2_address(self, key):
        if (self._hash_type == Cache.consistent):
            return self.hash_ring.get_node(key)
        elif (self._hash_type == Cache.rendezvous):
            return self.hash_ring.find_node(key)
        elif (self._hash_type == Cache.rr):
            val = key.split("_")[1]
            res = int(val) % int(self.hash_ring)
            return res
Beispiel #11
0
class ImpalaLogger(object):
    def __init__(self,
                 nodes: list,
                 elasticsearch: Elasticsearch = None,
                 lru_size: int = 5000):
        self.nodes = nodes
        self.queries_logged = LRU(lru_size)
        self.elasticsearch = elasticsearch

    def run(self):
        """
        We need to get from all the nodes the required information.
        In this case we need to first of all load the queries, and then the
        metadata on the query profile.

        :return:
        """
        queries = []
        with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
            futures = {
                executor.submit(self.query_retriever, node, 2): node
                for node in self.nodes
            }

            for future in concurrent.futures.as_completed(futures):
                node = futures[future]
                try:
                    retrieved_queries = future.result()

                    for query in retrieved_queries:
                        if not self.queries_logged.has_key(query.query_id):
                            self.queries_logged[query.query_id] = True
                            query.node = node
                            queries.append(query)

                except Exception as e:
                    print("Something went wrong {}".format(e))

        with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
            futures = {
                executor.submit(self.query_profiler, query, 2): query
                for query in queries
            }

            for future in concurrent.futures.as_completed(futures):
                try:
                    query = future.result()

                    # send to elastic
                    self.elasticsearch.index(index='impala_queries',
                                             doc_type='query',
                                             id=query.query_id,
                                             body=query.to_dict())

                    print("[{}] Query {} indexed".format(
                        query.start_time, query.query_id))

                except Exception as e:
                    print('Something went wrong {}'.format(e))

    @staticmethod
    def query_retriever(node: str, timeout: int = 1) -> list:
        url = "{schema}{ip}/{path}".format(schema='http://',
                                           ip=node,
                                           path='queries')

        request = requests.get(url, timeout=timeout)
        if request.status_code != 200:
            return False

        parser = ImpalaQueryLogParser(request.text)
        queries = parser.queries

        if not queries:
            return []

        return queries

    @staticmethod
    def query_profiler(query: Query, timeout: int = 1) -> Query:
        url = "{schema}{ip}/query_profile?query_id={query_id}".format(
            schema='http://', ip=query.node, query_id=query.query_id)

        request = requests.get(url, timeout=timeout)
        if request.status_code != 200:
            return False

        return ImpalaQueryLogParser(request.text).extract_profile(query)